1
Fork 0
simple-sql-parser/tools/Language/SQL/SimpleSQL/LexerTests.lhs

234 lines
9.1 KiB
Plaintext
Raw Normal View History

2015-07-31 23:04:18 +02:00
Test for the lexer
> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where
> import Language.SQL.SimpleSQL.TestTypes
> import Language.SQL.SimpleSQL.Lex (Token(..))
2015-07-31 23:04:18 +02:00
> --import Debug.Trace
> import Data.Char (isAlpha)
2015-07-31 23:04:18 +02:00
> lexerTests :: TestItem
> lexerTests = Group "lexerTests" $
> [Group "lexer token tests" [ansiLexerTests
> ,postgresLexerTests]]
> ansiLexerTable :: [(String,[Token])]
> ansiLexerTable =
2015-07-31 23:04:18 +02:00
> -- single char symbols
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
> -- multi char symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
> -- simple identifiers
2016-02-12 13:13:47 +01:00
> in map (\i -> (i, [Identifier Nothing i])) idens
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
2015-07-31 23:04:18 +02:00
> -- todo: in order to make lex . pretty id, need to
> -- preserve the case of the u
2016-02-12 13:13:47 +01:00
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
2015-07-31 23:04:18 +02:00
> -- host param
> ++ map (\i -> (':':i, [HostParam i])) idens
> )
> -- quoted identifiers with embedded double quotes
> -- the lexer doesn't unescape the quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
2015-07-31 23:04:18 +02:00
> -- strings
> -- the lexer doesn't apply escapes at all
> ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])]
2015-07-31 23:04:18 +02:00
> -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
2015-07-31 23:04:18 +02:00
> ["n", "N","b", "B","x", "X", "u&"]
> -- numbers
> ++ [("10", [SqlNumber "10"])
> ,(".1", [SqlNumber ".1"])
> ,("5e3", [SqlNumber "5e3"])
> ,("5e+3", [SqlNumber "5e+3"])
> ,("5e-3", [SqlNumber "5e-3"])
> ,("10.2", [SqlNumber "10.2"])
> ,("10.2e7", [SqlNumber "10.2e7"])]
> -- whitespace
> ++ concat [[([a],[Whitespace [a]])
> ,([a,b], [Whitespace [a,b]])]
> | a <- " \n\t", b <- " \n\t"]
> -- line comment
> ++ map (\c -> (c, [LineComment c]))
2015-08-02 14:58:09 +02:00
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
2015-07-31 23:04:18 +02:00
> -- block comment
> ++ map (\c -> (c, [BlockComment c]))
> ["/**/", "/* */","/* this is a comment */"
> ,"/* this *is/ a comment */"
2015-07-31 23:04:18 +02:00
> ]
> ansiLexerTests :: TestItem
> ansiLexerTests = Group "ansiLexerTests" $
> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable]
> ,Group "ansi generated combination lexer tests" $
> [ LexerTest ansi2011 (s ++ s1) (t ++ t1)
> | (s,t) <- ansiLexerTable
> , (s1,t1) <- ansiLexerTable
2015-07-31 23:04:18 +02:00
which combinations won't work:
<> <= >= || two single symbols which make a double char symbol
identifier + identifier if both are quoted or unquoted
string string
csstring string
line comment anything (can add newline?)
number number (todo: double check more carefully)
> , isGood $ t ++ t1
> ]
> ,Group "adhoc lexer tests" $
> map (uncurry $ LexerTest ansi2011)
2015-07-31 23:04:18 +02:00
> [("", [])
2016-02-12 13:13:47 +01:00
> ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier Nothing "stuff"])
2015-07-31 23:04:18 +02:00
> ]
> ]
2015-07-31 23:04:18 +02:00
> where
> isGood :: [Token] -> Bool
> isGood l = {-let b =-} and $ map not [p l | p <- map listPred badCombos]
> -- in trace ("isGood " ++ show (l,b)) b
> badCombos :: [((Token -> Bool),(Token -> Bool))]
> badCombos = [symbolPair "<" ">"
> ,symbolPair "<" "="
> ,symbolPair ">" "="
> ,symbolPair "!" "="
> ,symbolPair "|" "|"
> ,symbolPair "||" "|"
> ,symbolPair "|" "||"
> ,symbolPair "||" "||"
> ,symbolPair "<" ">="
> ,symbolPair "-" "-"
> ,symbolPair "/" "*"
> ,symbolPair "*" "/"
> ,(isIdentifier, isIdentifier)
> ,(isDQIdentifier, isDQIdentifier)
> ,(isCQIdentifier, isDQIdentifier)
> ,(isString, isNonCsString)
2015-08-02 14:58:09 +02:00
> ,(isEofLineComment, const True)
2015-07-31 23:04:18 +02:00
> ,(isNumber, isNumber)
> ,(isHostParam,isIdentifier)
> ,(isHostParam,isCsString)
> ,(isHostParam,isCQIdentifier)
2015-07-31 23:04:18 +02:00
> ,(isIdentifier,isCsString)
> ,(isIdentifier,isCQIdentifier)
2015-07-31 23:04:18 +02:00
> ,(isWhitespace, isWhitespace)
> ,(isIdentifier, isNumber)
> ,(isHostParam, isNumber)
> ,(isMinus, isLineComment)
> ]
2016-02-12 13:13:47 +01:00
> isIdentifier (Identifier Nothing _) = True
2015-07-31 23:04:18 +02:00
> isIdentifier _ = False
2016-02-12 13:13:47 +01:00
> isDQIdentifier (Identifier (Just ("\"",_)) _) = True
> isDQIdentifier _ = False
2016-02-12 13:13:47 +01:00
> isCQIdentifier (Identifier (Just ((x:_),_)) _) | isAlpha x = True
> isCQIdentifier _ = False
> isCsString (SqlString (x:_) _ _) | isAlpha x = True
2015-07-31 23:04:18 +02:00
> isCsString _ = False
> isString (SqlString _ _ _) = True
> isString _ = False
> isNonCsString (SqlString [] _ _) = True
> isNonCsString (SqlString (x:_) _ _) | not (isAlpha x) = True
> isNonCsString _ = False
2015-08-02 14:58:09 +02:00
> isEofLineComment (LineComment s) = last s /= '\n'
> isEofLineComment _ = False
> isLineComment (LineComment {}) = True
2015-07-31 23:04:18 +02:00
> isLineComment _ = False
> isNumber (SqlNumber{}) = True
> isNumber _ = False
> isHostParam (HostParam{}) = True
> isHostParam _ = False
> isWhitespace (Whitespace{}) = True
> isWhitespace _ = False
> isMinus (Symbol "-") = True
> isMinus _ = False
> symbolPair a b = ((==Symbol a), (==Symbol b))
> listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool
> listPred _ [] = False
> listPred _ [_] = False
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
> | otherwise = listPred (p,p1) (t1:ts)
todo: lexing tests
do quickcheck testing:
can try to generate valid tokens then check they parse
same as above: can also try to pair tokens, create an accurate
function to say which ones can appear adjacent, and test
I think this plus the explicit lists of tokens like above which do
basic sanity + explicit edge casts will provide a high level of
assurance.
> postgresLexerTable :: [(String,[Token])]
> postgresLexerTable =
> -- single char symbols
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
> -- multi char symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
> -- todo: add many examples of generic symbols
> -- also: do the testing for the ansi compatibility special cases
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
> -- simple identifiers
> in map (\i -> (i, [Identifier Nothing i])) idens
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
> -- todo: in order to make lex . pretty id, need to
> -- preserve the case of the u
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
> -- host param
> ++ map (\i -> (':':i, [HostParam i])) idens
> )
> -- positional var
> ++ [("$1", [PositionalArg 1])]
> -- quoted identifiers with embedded double quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
> -- strings
> ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
> ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
> ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
> ,("'not this \\' quote", [SqlString "'" "'" "not this \\"
> ,Whitespace " "
> ,Identifier Nothing "quote"])
> ,("$$ string 1 $$", [SqlString "$$" "$$" " string 1 "])
> ,("$$ string $ 2 $$", [SqlString "$$" "$$" " string $ 2 "])
> ,("$a$ $$string 3$$ $a$", [SqlString "$a$" "$a$" " $$string 3$$ "])
> ]
> -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
> ["n", "N","b", "B","x", "X", "u&", "e", "E"]
> -- numbers
> ++ [("10", [SqlNumber "10"])
> ,(".1", [SqlNumber ".1"])
> ,("5e3", [SqlNumber "5e3"])
> ,("5e+3", [SqlNumber "5e+3"])
> ,("5e-3", [SqlNumber "5e-3"])
> ,("10.2", [SqlNumber "10.2"])
> ,("10.2e7", [SqlNumber "10.2e7"])]
> -- whitespace
> ++ concat [[([a],[Whitespace [a]])
> ,([a,b], [Whitespace [a,b]])]
> | a <- " \n\t", b <- " \n\t"]
> -- line comment
> ++ map (\c -> (c, [LineComment c]))
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
> -- block comment
> ++ map (\c -> (c, [BlockComment c]))
> ["/**/", "/* */","/* this is a comment */"
> ,"/* this *is/ a comment */"
> ]
> postgresLexerTests :: TestItem
> postgresLexerTests = Group "postgresLexerTests" $
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
> ]