2015-07-31 23:04:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
Test for the lexer
|
|
|
|
|
|
|
|
> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where
|
|
|
|
|
|
|
|
> import Language.SQL.SimpleSQL.TestTypes
|
2016-02-12 11:22:19 +01:00
|
|
|
> import Language.SQL.SimpleSQL.Lex (Token(..))
|
2015-07-31 23:04:18 +02:00
|
|
|
> --import Debug.Trace
|
2016-02-12 12:09:58 +01:00
|
|
|
> import Data.Char (isAlpha)
|
2015-07-31 23:04:18 +02:00
|
|
|
|
2016-02-13 14:54:40 +01:00
|
|
|
> lexerTests :: TestItem
|
|
|
|
> lexerTests = Group "lexerTests" $
|
2016-02-13 15:31:20 +01:00
|
|
|
> [Group "lexer token tests" [ansiLexerTests
|
|
|
|
> ,postgresLexerTests]]
|
2016-02-13 14:54:40 +01:00
|
|
|
|
2016-02-12 12:09:58 +01:00
|
|
|
> ansiLexerTable :: [(String,[Token])]
|
|
|
|
> ansiLexerTable =
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- single char symbols
|
|
|
|
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
|
|
|
> -- multi char symbols
|
|
|
|
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
|
|
|
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
|
|
|
> -- simple identifiers
|
2016-02-12 13:13:47 +01:00
|
|
|
> in map (\i -> (i, [Identifier Nothing i])) idens
|
|
|
|
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- todo: in order to make lex . pretty id, need to
|
|
|
|
> -- preserve the case of the u
|
2016-02-12 13:13:47 +01:00
|
|
|
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- host param
|
|
|
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
|
|
|
> )
|
|
|
|
> -- quoted identifiers with embedded double quotes
|
2016-02-13 14:54:40 +01:00
|
|
|
> -- the lexer doesn't unescape the quotes
|
|
|
|
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- strings
|
2016-02-13 14:54:40 +01:00
|
|
|
> -- the lexer doesn't apply escapes at all
|
2016-02-12 12:09:58 +01:00
|
|
|
> ++ [("'string'", [SqlString "'" "'" "string"])
|
2016-02-13 14:54:40 +01:00
|
|
|
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
|
|
|
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])]
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- csstrings
|
2016-02-12 12:09:58 +01:00
|
|
|
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
2015-07-31 23:04:18 +02:00
|
|
|
> ["n", "N","b", "B","x", "X", "u&"]
|
|
|
|
> -- numbers
|
|
|
|
> ++ [("10", [SqlNumber "10"])
|
|
|
|
> ,(".1", [SqlNumber ".1"])
|
|
|
|
> ,("5e3", [SqlNumber "5e3"])
|
|
|
|
> ,("5e+3", [SqlNumber "5e+3"])
|
|
|
|
> ,("5e-3", [SqlNumber "5e-3"])
|
|
|
|
> ,("10.2", [SqlNumber "10.2"])
|
|
|
|
> ,("10.2e7", [SqlNumber "10.2e7"])]
|
|
|
|
> -- whitespace
|
|
|
|
> ++ concat [[([a],[Whitespace [a]])
|
|
|
|
> ,([a,b], [Whitespace [a,b]])]
|
|
|
|
> | a <- " \n\t", b <- " \n\t"]
|
|
|
|
> -- line comment
|
|
|
|
> ++ map (\c -> (c, [LineComment c]))
|
2015-08-02 14:58:09 +02:00
|
|
|
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
|
2015-07-31 23:04:18 +02:00
|
|
|
> -- block comment
|
|
|
|
> ++ map (\c -> (c, [BlockComment c]))
|
|
|
|
> ["/**/", "/* */","/* this is a comment */"
|
2015-08-01 12:22:07 +02:00
|
|
|
> ,"/* this *is/ a comment */"
|
2015-07-31 23:04:18 +02:00
|
|
|
> ]
|
|
|
|
|
2016-02-12 12:09:58 +01:00
|
|
|
> ansiLexerTests :: TestItem
|
|
|
|
> ansiLexerTests = Group "ansiLexerTests" $
|
|
|
|
> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable]
|
|
|
|
> ,Group "ansi generated combination lexer tests" $
|
2016-02-12 11:51:06 +01:00
|
|
|
> [ LexerTest ansi2011 (s ++ s1) (t ++ t1)
|
2016-02-12 12:09:58 +01:00
|
|
|
> | (s,t) <- ansiLexerTable
|
|
|
|
> , (s1,t1) <- ansiLexerTable
|
2015-07-31 23:04:18 +02:00
|
|
|
|
|
|
|
which combinations won't work:
|
|
|
|
<> <= >= || two single symbols which make a double char symbol
|
|
|
|
identifier + identifier if both are quoted or unquoted
|
|
|
|
string string
|
|
|
|
csstring string
|
|
|
|
line comment anything (can add newline?)
|
|
|
|
number number (todo: double check more carefully)
|
|
|
|
|
|
|
|
> , isGood $ t ++ t1
|
|
|
|
|
|
|
|
> ]
|
2015-08-15 18:04:29 +02:00
|
|
|
> ,Group "adhoc lexer tests" $
|
2016-02-12 11:51:06 +01:00
|
|
|
> map (uncurry $ LexerTest ansi2011)
|
2015-07-31 23:04:18 +02:00
|
|
|
> [("", [])
|
2016-02-12 13:13:47 +01:00
|
|
|
> ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier Nothing "stuff"])
|
2015-07-31 23:04:18 +02:00
|
|
|
> ]
|
2015-08-15 18:04:29 +02:00
|
|
|
> ]
|
2015-07-31 23:04:18 +02:00
|
|
|
|
|
|
|
> where
|
|
|
|
> isGood :: [Token] -> Bool
|
|
|
|
> isGood l = {-let b =-} and $ map not [p l | p <- map listPred badCombos]
|
|
|
|
> -- in trace ("isGood " ++ show (l,b)) b
|
|
|
|
> badCombos :: [((Token -> Bool),(Token -> Bool))]
|
|
|
|
> badCombos = [symbolPair "<" ">"
|
|
|
|
> ,symbolPair "<" "="
|
|
|
|
> ,symbolPair ">" "="
|
|
|
|
> ,symbolPair "!" "="
|
|
|
|
> ,symbolPair "|" "|"
|
|
|
|
> ,symbolPair "||" "|"
|
|
|
|
> ,symbolPair "|" "||"
|
|
|
|
> ,symbolPair "||" "||"
|
|
|
|
> ,symbolPair "<" ">="
|
|
|
|
|
|
|
|
> ,symbolPair "-" "-"
|
|
|
|
> ,symbolPair "/" "*"
|
|
|
|
> ,symbolPair "*" "/"
|
|
|
|
|
|
|
|
> ,(isIdentifier, isIdentifier)
|
2016-02-12 12:09:58 +01:00
|
|
|
> ,(isDQIdentifier, isDQIdentifier)
|
|
|
|
> ,(isCQIdentifier, isDQIdentifier)
|
|
|
|
> ,(isString, isNonCsString)
|
2015-08-02 14:58:09 +02:00
|
|
|
> ,(isEofLineComment, const True)
|
2015-07-31 23:04:18 +02:00
|
|
|
> ,(isNumber, isNumber)
|
|
|
|
> ,(isHostParam,isIdentifier)
|
|
|
|
> ,(isHostParam,isCsString)
|
2016-02-12 12:09:58 +01:00
|
|
|
> ,(isHostParam,isCQIdentifier)
|
2015-07-31 23:04:18 +02:00
|
|
|
> ,(isIdentifier,isCsString)
|
2016-02-12 12:09:58 +01:00
|
|
|
> ,(isIdentifier,isCQIdentifier)
|
2015-07-31 23:04:18 +02:00
|
|
|
> ,(isWhitespace, isWhitespace)
|
|
|
|
> ,(isIdentifier, isNumber)
|
|
|
|
> ,(isHostParam, isNumber)
|
|
|
|
> ,(isMinus, isLineComment)
|
|
|
|
> ]
|
2016-02-12 13:13:47 +01:00
|
|
|
> isIdentifier (Identifier Nothing _) = True
|
2015-07-31 23:04:18 +02:00
|
|
|
> isIdentifier _ = False
|
2016-02-12 13:13:47 +01:00
|
|
|
> isDQIdentifier (Identifier (Just ("\"",_)) _) = True
|
2016-02-12 12:09:58 +01:00
|
|
|
> isDQIdentifier _ = False
|
2016-02-12 13:13:47 +01:00
|
|
|
> isCQIdentifier (Identifier (Just ((x:_),_)) _) | isAlpha x = True
|
2016-02-12 12:09:58 +01:00
|
|
|
> isCQIdentifier _ = False
|
|
|
|
> isCsString (SqlString (x:_) _ _) | isAlpha x = True
|
2015-07-31 23:04:18 +02:00
|
|
|
> isCsString _ = False
|
2016-02-12 12:09:58 +01:00
|
|
|
> isString (SqlString _ _ _) = True
|
|
|
|
> isString _ = False
|
|
|
|
> isNonCsString (SqlString [] _ _) = True
|
|
|
|
> isNonCsString (SqlString (x:_) _ _) | not (isAlpha x) = True
|
|
|
|
> isNonCsString _ = False
|
2015-08-02 14:58:09 +02:00
|
|
|
> isEofLineComment (LineComment s) = last s /= '\n'
|
|
|
|
> isEofLineComment _ = False
|
|
|
|
> isLineComment (LineComment {}) = True
|
2015-07-31 23:04:18 +02:00
|
|
|
> isLineComment _ = False
|
|
|
|
> isNumber (SqlNumber{}) = True
|
|
|
|
> isNumber _ = False
|
|
|
|
> isHostParam (HostParam{}) = True
|
|
|
|
> isHostParam _ = False
|
|
|
|
> isWhitespace (Whitespace{}) = True
|
|
|
|
> isWhitespace _ = False
|
|
|
|
> isMinus (Symbol "-") = True
|
|
|
|
> isMinus _ = False
|
|
|
|
> symbolPair a b = ((==Symbol a), (==Symbol b))
|
|
|
|
> listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool
|
|
|
|
> listPred _ [] = False
|
|
|
|
> listPred _ [_] = False
|
|
|
|
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
|
|
|
|
> | otherwise = listPred (p,p1) (t1:ts)
|
2016-02-13 14:54:40 +01:00
|
|
|
|
|
|
|
todo: lexing tests
|
|
|
|
do quickcheck testing:
|
|
|
|
can try to generate valid tokens then check they parse
|
|
|
|
|
|
|
|
same as above: can also try to pair tokens, create an accurate
|
|
|
|
function to say which ones can appear adjacent, and test
|
|
|
|
|
|
|
|
I think this plus the explicit lists of tokens like above which do
|
|
|
|
basic sanity + explicit edge casts will provide a high level of
|
|
|
|
assurance.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
> postgresLexerTable :: [(String,[Token])]
|
|
|
|
> postgresLexerTable =
|
|
|
|
> -- single char symbols
|
2016-02-13 15:31:20 +01:00
|
|
|
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
|
2016-02-13 14:54:40 +01:00
|
|
|
> -- multi char symbols
|
2016-02-13 15:31:20 +01:00
|
|
|
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
|
|
|
|
> -- todo: add many examples of generic symbols
|
|
|
|
> -- also: do the testing for the ansi compatibility special cases
|
2016-02-13 14:54:40 +01:00
|
|
|
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
|
|
|
> -- simple identifiers
|
|
|
|
> in map (\i -> (i, [Identifier Nothing i])) idens
|
|
|
|
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
|
|
|
|
> -- todo: in order to make lex . pretty id, need to
|
|
|
|
> -- preserve the case of the u
|
|
|
|
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
|
|
|
> -- host param
|
|
|
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
|
|
|
> )
|
|
|
|
> -- positional var
|
2016-02-13 15:31:20 +01:00
|
|
|
> ++ [("$1", [PositionalArg 1])]
|
2016-02-13 14:54:40 +01:00
|
|
|
> -- quoted identifiers with embedded double quotes
|
2016-02-13 15:31:20 +01:00
|
|
|
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
|
2016-02-13 14:54:40 +01:00
|
|
|
> -- strings
|
|
|
|
> ++ [("'string'", [SqlString "'" "'" "string"])
|
|
|
|
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
2016-02-13 15:31:20 +01:00
|
|
|
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
|
|
|
|
> ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
|
|
|
|
> ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
|
|
|
|
> -- todo: implement only allowing \' in e quoted strings
|
|
|
|
> {-,("'not this \\' quote", [SqlString "'" "'" "not this \\"
|
|
|
|
> ,Whitespace " "
|
|
|
|
> ,Identifier Nothing "quote"])-}
|
2016-02-13 14:54:40 +01:00
|
|
|
> ]
|
|
|
|
> -- csstrings
|
|
|
|
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
|
|
|
> ["n", "N","b", "B","x", "X", "u&", "e", "E"]
|
|
|
|
> -- numbers
|
|
|
|
> ++ [("10", [SqlNumber "10"])
|
|
|
|
> ,(".1", [SqlNumber ".1"])
|
|
|
|
> ,("5e3", [SqlNumber "5e3"])
|
|
|
|
> ,("5e+3", [SqlNumber "5e+3"])
|
|
|
|
> ,("5e-3", [SqlNumber "5e-3"])
|
|
|
|
> ,("10.2", [SqlNumber "10.2"])
|
|
|
|
> ,("10.2e7", [SqlNumber "10.2e7"])]
|
|
|
|
> -- whitespace
|
|
|
|
> ++ concat [[([a],[Whitespace [a]])
|
|
|
|
> ,([a,b], [Whitespace [a,b]])]
|
|
|
|
> | a <- " \n\t", b <- " \n\t"]
|
|
|
|
> -- line comment
|
|
|
|
> ++ map (\c -> (c, [LineComment c]))
|
|
|
|
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
|
|
|
|
> -- block comment
|
|
|
|
> ++ map (\c -> (c, [BlockComment c]))
|
|
|
|
> ["/**/", "/* */","/* this is a comment */"
|
|
|
|
> ,"/* this *is/ a comment */"
|
|
|
|
> ]
|
2016-02-13 15:31:20 +01:00
|
|
|
|
|
|
|
> postgresLexerTests :: TestItem
|
|
|
|
> postgresLexerTests = Group "postgresLexerTests" $
|
|
|
|
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
|
|
|
|
> ]
|