simple-sql-parser/tools/Language/SQL/SimpleSQL/LexerTests.lhs



Test for the lexer

> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where

> import Language.SQL.SimpleSQL.TestTypes
> import Language.SQL.SimpleSQL.Lex (Token(..))
> --import Debug.Trace
> import Data.Char (isAlpha)

> ansiLexerTable :: [(String,[Token])]
> ansiLexerTable =
>     -- single char symbols
>     map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
>     -- multi char symbols
>     ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
>     ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
>         -- simple identifiers
>         in map (\i -> (i, [Identifier i])) idens
>            ++ map (\i -> ("\"" ++ i ++ "\"", [QuotedIdentifier "\"" "\"" i])) idens
>            -- todo: in order to make lex . pretty id, need to
>            -- preserve the case of the u
>            ++ map (\i -> ("u&\"" ++ i ++ "\"", [QuotedIdentifier "u&\"" "\"" i])) idens
>            -- host param
>            ++ map (\i -> (':':i, [HostParam i])) idens
>        )
>     -- quoted identifiers with embedded double quotes
>     ++ [("\"normal \"\" iden\"", [QuotedIdentifier "\"" "\"" "normal \" iden"])]
>     -- strings
>     ++ [("'string'", [SqlString "'" "'" "string"])
>        ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"])
>        ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])]
>     -- csstrings
>     ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
>        ["n", "N","b", "B","x", "X", "u&"]
>     -- numbers
>     ++ [("10", [SqlNumber "10"])
>        ,(".1", [SqlNumber ".1"])
>        ,("5e3", [SqlNumber "5e3"])
>        ,("5e+3", [SqlNumber "5e+3"])
>        ,("5e-3", [SqlNumber "5e-3"])
>        ,("10.2", [SqlNumber "10.2"])
>        ,("10.2e7", [SqlNumber "10.2e7"])]
>     -- whitespace
>     ++ concat [[([a],[Whitespace [a]])
>                ,([a,b], [Whitespace [a,b]])]
>               | a <- " \n\t", b <- " \n\t"]
>     -- line comment
>     ++ map (\c -> (c, [LineComment c]))
>        ["--", "-- ", "-- this is a comment", "-- line com\n"]
>     -- block comment
>     ++ map (\c -> (c, [BlockComment c]))
>        ["/**/", "/* */","/* this is a comment */"
>        ,"/* this *is/ a comment */"
>        ]

> lexerTests :: TestItem
> lexerTests = Group "lexerTests" $
>     [Group "lexer token tests" [ansiLexerTests]]


> ansiLexerTests :: TestItem
> ansiLexerTests = Group "ansiLexerTests" $
>     [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t |  (s,t) <- ansiLexerTable]
>     ,Group "ansi generated combination lexer tests" $
>     [ LexerTest ansi2011 (s ++ s1) (t ++ t1)
>     | (s,t) <- ansiLexerTable
>     , (s1,t1) <- ansiLexerTable

which combinations won't work:
<> <= >= || two single symbols which make a double char symbol
identifier + identifier if both are quoted or unquoted
string string
csstring string
line comment anything (can add newline?)
number number (todo: double check more carefully)

>     , isGood $ t ++ t1

>     ]
>     ,Group "adhoc lexer tests" $
>        map (uncurry $ LexerTest ansi2011)
>        [("", [])
>        ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier "stuff"])
>        ]
>      ]

>  where
>    isGood :: [Token] -> Bool
>    isGood l = {-let b =-} and $ map not [p l | p <- map listPred badCombos]
>               -- in trace ("isGood " ++ show (l,b)) b
>    badCombos :: [((Token -> Bool),(Token -> Bool))]
>    badCombos = [symbolPair "<" ">"
>                ,symbolPair "<" "="
>                ,symbolPair ">" "="
>                ,symbolPair "!" "="
>                ,symbolPair "|" "|"
>                ,symbolPair "||" "|"
>                ,symbolPair "|" "||"
>                ,symbolPair "||" "||"
>                ,symbolPair "<" ">="

>                ,symbolPair "-" "-"
>                ,symbolPair "/" "*"
>                ,symbolPair "*" "/"

>                ,(isIdentifier, isIdentifier)
>                ,(isDQIdentifier, isDQIdentifier)
>                ,(isCQIdentifier, isDQIdentifier)
>                ,(isString, isNonCsString)
>                ,(isEofLineComment, const True)
>                ,(isNumber, isNumber)
>                ,(isHostParam,isIdentifier)
>                ,(isHostParam,isCsString)
>                ,(isHostParam,isCQIdentifier)
>                ,(isIdentifier,isCsString)
>                ,(isIdentifier,isCQIdentifier)
>                ,(isWhitespace, isWhitespace)
>                ,(isIdentifier, isNumber)
>                ,(isHostParam, isNumber)
>                ,(isMinus, isLineComment)
>                ]
>    isIdentifier (Identifier _) = True
>    isIdentifier _ = False
>    isDQIdentifier (QuotedIdentifier "\"" _ _) = True
>    isDQIdentifier _ = False
>    isCQIdentifier (QuotedIdentifier (x:_) _ _) | isAlpha x = True
>    isCQIdentifier _ = False
>    isCsString (SqlString (x:_) _ _) | isAlpha x = True
>    isCsString _ = False
>    isString (SqlString _ _ _) = True
>    isString _ = False
>    isNonCsString (SqlString [] _ _) = True
>    isNonCsString (SqlString (x:_) _ _) | not (isAlpha x) = True
>    isNonCsString _ = False
>    isEofLineComment (LineComment s) = last s /= '\n'
>    isEofLineComment _ = False
>    isLineComment (LineComment {}) = True
>    isLineComment _ = False
>    isNumber (SqlNumber{}) = True
>    isNumber _ = False
>    isHostParam (HostParam{}) = True
>    isHostParam _ = False
>    isWhitespace (Whitespace{}) = True
>    isWhitespace _ = False
>    isMinus (Symbol "-") = True
>    isMinus _ = False
>    symbolPair a b = ((==Symbol a), (==Symbol b))
>    listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool
>    listPred _ [] = False
>    listPred _ [_] = False
>    listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
>                              | otherwise = listPred (p,p1) (t1:ts)
add separate lexer 2015-07-31 23:04:18 +02:00

			`Test for the lexer`

			`> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where`

			`> import Language.SQL.SimpleSQL.TestTypes`
rename modules lexer->lex, parser->parse 2016-02-12 11:22:19 +01:00			`> import Language.SQL.SimpleSQL.Lex (Token(..))`
add separate lexer 2015-07-31 23:04:18 +02:00			`> --import Debug.Trace`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> import Data.Char (isAlpha)`
add separate lexer 2015-07-31 23:04:18 +02:00
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ansiLexerTable :: [(String,[Token])]`
			`> ansiLexerTable =`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- single char symbols`
			`> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&\|?<>[]=,;()"`
			`> -- multi char symbols`
			`> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","\|\|"]`
			`> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]`
			`> -- simple identifiers`
			`> in map (\i -> (i, [Identifier i])) idens`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ++ map (\i -> ("\"" ++ i ++ "\"", [QuotedIdentifier "\"" "\"" i])) idens`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- todo: in order to make lex . pretty id, need to`
			`> -- preserve the case of the u`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ++ map (\i -> ("u&\"" ++ i ++ "\"", [QuotedIdentifier "u&\"" "\"" i])) idens`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- host param`
			`> ++ map (\i -> (':':i, [HostParam i])) idens`
			`> )`
			`> -- quoted identifiers with embedded double quotes`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ++ [("\"normal \"\" iden\"", [QuotedIdentifier "\"" "\"" "normal \" iden"])]`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- strings`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ++ [("'string'", [SqlString "'" "'" "string"])`
			`> ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"])`
			`> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])]`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- csstrings`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ["n", "N","b", "B","x", "X", "u&"]`
			`> -- numbers`
			`> ++ [("10", [SqlNumber "10"])`
			`> ,(".1", [SqlNumber ".1"])`
			`> ,("5e3", [SqlNumber "5e3"])`
			`> ,("5e+3", [SqlNumber "5e+3"])`
			`> ,("5e-3", [SqlNumber "5e-3"])`
			`> ,("10.2", [SqlNumber "10.2"])`
			`> ,("10.2e7", [SqlNumber "10.2e7"])]`
			`> -- whitespace`
			`> ++ concat [[([a],[Whitespace [a]])`
			`> ,([a,b], [Whitespace [a,b]])]`
			`> \| a <- " \n\t", b <- " \n\t"]`
			`> -- line comment`
			`> ++ map (\c -> (c, [LineComment c]))`
tweak lexer tests 2015-08-02 14:58:09 +02:00			`> ["--", "-- ", "-- this is a comment", "-- line com\n"]`
add separate lexer 2015-07-31 23:04:18 +02:00			`> -- block comment`
			`> ++ map (\c -> (c, [BlockComment c]))`
			`> ["/*/", "/ /","/ this is a comment */"`
small fixes fix positions? small fixes to haddock add notes to top of lexer module simplify line comment lexer remove some trys from lexer fix the block comment parser to return all the comment text when there are embedded / * in the comment refactor the symbol, keyword and identifier blacklist checking into the low level token parsers instead of a separate step using guard 2015-08-01 12:22:07 +02:00			`> ,"/* this is/ a comment /"`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ]`

			`> lexerTests :: TestItem`
			`> lexerTests = Group "lexerTests" $`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> [Group "lexer token tests" [ansiLexerTests]]`


			`> ansiLexerTests :: TestItem`
			`> ansiLexerTests = Group "ansiLexerTests" $`
			`> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t \| (s,t) <- ansiLexerTable]`
			`> ,Group "ansi generated combination lexer tests" $`
refactor dialect into a non enum and separate to own file 2016-02-12 11:51:06 +01:00			`> [ LexerTest ansi2011 (s ++ s1) (t ++ t1)`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> \| (s,t) <- ansiLexerTable`
			`> , (s1,t1) <- ansiLexerTable`
add separate lexer 2015-07-31 23:04:18 +02:00
			`which combinations won't work:`
			`<> <= >= \|\| two single symbols which make a double char symbol`
			`identifier + identifier if both are quoted or unquoted`
			`string string`
			`csstring string`
			`line comment anything (can add newline?)`
			`number number (todo: double check more carefully)`

			`> , isGood $ t ++ t1`

			`> ]`
rearrange tests slightly and hide/show tests in the website a bit better 2015-08-15 18:04:29 +02:00			`> ,Group "adhoc lexer tests" $`
refactor dialect into a non enum and separate to own file 2016-02-12 11:51:06 +01:00			`> map (uncurry $ LexerTest ansi2011)`
add separate lexer 2015-07-31 23:04:18 +02:00			`> [("", [])`
fix positioning in lexer? and fix line comment token missing trailing \n 2015-08-02 14:29:35 +02:00			`> ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier "stuff"])`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ]`
rearrange tests slightly and hide/show tests in the website a bit better 2015-08-15 18:04:29 +02:00			`> ]`
add separate lexer 2015-07-31 23:04:18 +02:00
			`> where`
			`> isGood :: [Token] -> Bool`
			`> isGood l = {-let b =-} and $ map not [p l \| p <- map listPred badCombos]`
			`> -- in trace ("isGood " ++ show (l,b)) b`
			`> badCombos :: [((Token -> Bool),(Token -> Bool))]`
			`> badCombos = [symbolPair "<" ">"`
			`> ,symbolPair "<" "="`
			`> ,symbolPair ">" "="`
			`> ,symbolPair "!" "="`
			`> ,symbolPair "\|" "\|"`
			`> ,symbolPair "\|\|" "\|"`
			`> ,symbolPair "\|" "\|\|"`
			`> ,symbolPair "\|\|" "\|\|"`
			`> ,symbolPair "<" ">="`

			`> ,symbolPair "-" "-"`
			`> ,symbolPair "/" "*"`
			`> ,symbolPair "*" "/"`

			`> ,(isIdentifier, isIdentifier)`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ,(isDQIdentifier, isDQIdentifier)`
			`> ,(isCQIdentifier, isDQIdentifier)`
			`> ,(isString, isNonCsString)`
tweak lexer tests 2015-08-02 14:58:09 +02:00			`> ,(isEofLineComment, const True)`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ,(isNumber, isNumber)`
			`> ,(isHostParam,isIdentifier)`
			`> ,(isHostParam,isCsString)`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ,(isHostParam,isCQIdentifier)`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ,(isIdentifier,isCsString)`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> ,(isIdentifier,isCQIdentifier)`
add separate lexer 2015-07-31 23:04:18 +02:00			`> ,(isWhitespace, isWhitespace)`
			`> ,(isIdentifier, isNumber)`
			`> ,(isHostParam, isNumber)`
			`> ,(isMinus, isLineComment)`
			`> ]`
			`> isIdentifier (Identifier _) = True`
			`> isIdentifier _ = False`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> isDQIdentifier (QuotedIdentifier "\"" _ _) = True`
			`> isDQIdentifier _ = False`
			`> isCQIdentifier (QuotedIdentifier (x:_) _ _) \| isAlpha x = True`
			`> isCQIdentifier _ = False`
			`> isCsString (SqlString (x:_) _ _) \| isAlpha x = True`
add separate lexer 2015-07-31 23:04:18 +02:00			`> isCsString _ = False`
new syntax for names and string literals 2016-02-12 12:09:58 +01:00			`> isString (SqlString _ _ _) = True`
			`> isString _ = False`
			`> isNonCsString (SqlString [] _ _) = True`
			`> isNonCsString (SqlString (x:_) _ _) \| not (isAlpha x) = True`
			`> isNonCsString _ = False`
tweak lexer tests 2015-08-02 14:58:09 +02:00			`> isEofLineComment (LineComment s) = last s /= '\n'`
			`> isEofLineComment _ = False`
			`> isLineComment (LineComment {}) = True`
add separate lexer 2015-07-31 23:04:18 +02:00			`> isLineComment _ = False`
			`> isNumber (SqlNumber{}) = True`
			`> isNumber _ = False`
			`> isHostParam (HostParam{}) = True`
			`> isHostParam _ = False`
			`> isWhitespace (Whitespace{}) = True`
			`> isWhitespace _ = False`
			`> isMinus (Symbol "-") = True`
			`> isMinus _ = False`
			`> symbolPair a b = ((==Symbol a), (==Symbol b))`
			`> listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool`
			`> listPred _ [] = False`
			`> listPred _ [_] = False`
			`> listPred (p,p1) (t:t1:ts) \| p t && p1 t1 = True`
			`> \| otherwise = listPred (p,p1) (t1:ts)`