1
Fork 0

don't unescape quotes in string literals and identifiers

This commit is contained in:
Jake Wheat 2016-02-13 15:54:40 +02:00
parent a892d6d2ee
commit a59f19aae9
5 changed files with 89 additions and 58 deletions

View file

@ -9,6 +9,10 @@ Test for the lexer
> --import Debug.Trace
> import Data.Char (isAlpha)
> lexerTests :: TestItem
> lexerTests = Group "lexerTests" $
> [Group "lexer token tests" [ansiLexerTests]]
> ansiLexerTable :: [(String,[Token])]
> ansiLexerTable =
> -- single char symbols
@ -26,11 +30,13 @@ Test for the lexer
> ++ map (\i -> (':':i, [HostParam i])) idens
> )
> -- quoted identifiers with embedded double quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
> -- the lexer doesn't unescape the quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
> -- strings
> -- the lexer doesn't apply escapes at all
> ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])]
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])]
> -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
> ["n", "N","b", "B","x", "X", "u&"]
@ -55,11 +61,6 @@ Test for the lexer
> ,"/* this *is/ a comment */"
> ]
> lexerTests :: TestItem
> lexerTests = Group "lexerTests" $
> [Group "lexer token tests" [ansiLexerTests]]
> ansiLexerTests :: TestItem
> ansiLexerTests = Group "ansiLexerTests" $
> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable]
@ -152,3 +153,68 @@ number number (todo: double check more carefully)
> listPred _ [_] = False
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
> | otherwise = listPred (p,p1) (t1:ts)
todo: lexing tests
do quickcheck testing:
can try to generate valid tokens then check they parse
same as above: can also try to pair tokens, create an accurate
function to say which ones can appear adjacent, and test
I think this plus the explicit lists of tokens like above which do
basic sanity + explicit edge casts will provide a high level of
assurance.
> postgresLexerTable :: [(String,[Token])]
> postgresLexerTable =
> -- single char symbols
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
> -- multi char symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
> -- symbols to add: :, ::, .. :=
> -- plus generic symbols
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
> -- simple identifiers
> in map (\i -> (i, [Identifier Nothing i])) idens
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
> -- todo: in order to make lex . pretty id, need to
> -- preserve the case of the u
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
> -- host param
> ++ map (\i -> (':':i, [HostParam i])) idens
> )
> -- positional var
> -- quoted identifiers with embedded double quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
> -- strings
> ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
> ]
> -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
> ["n", "N","b", "B","x", "X", "u&", "e", "E"]
> -- numbers
> ++ [("10", [SqlNumber "10"])
> ,(".1", [SqlNumber ".1"])
> ,("5e3", [SqlNumber "5e3"])
> ,("5e+3", [SqlNumber "5e+3"])
> ,("5e-3", [SqlNumber "5e-3"])
> ,("10.2", [SqlNumber "10.2"])
> ,("10.2e7", [SqlNumber "10.2e7"])]
> -- whitespace
> ++ concat [[([a],[Whitespace [a]])
> ,([a,b], [Whitespace [a,b]])]
> | a <- " \n\t", b <- " \n\t"]
> -- line comment
> ++ map (\c -> (c, [LineComment c]))
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
> -- block comment
> ++ map (\c -> (c, [BlockComment c]))
> ["/**/", "/* */","/* this is a comment */"
> ,"/* this *is/ a comment */"
> ]

View file

@ -514,7 +514,7 @@ Specify a non-null value.
> ,("'something' -- a comment\n ' some more' /*another comment*/ 'and more'"
> ,StringLit "'" "'" "something some moreand more")
> ,("'a quote: '', stuff'"
> ,StringLit "'" "'" "a quote: ', stuff")
> ,StringLit "'" "'" "a quote: '', stuff")
> ,("''"
> ,StringLit "'" "'" "")
@ -754,9 +754,9 @@ Specify names.
> ,("a.b",Iden [Name Nothing "a", Name Nothing "b"])
> ,("a.b.c",Iden [Name Nothing "a", Name Nothing "b", Name Nothing "c"])
> ,("\"quoted iden\"", Iden [Name (Just ("\"","\"")) "quoted iden"])
> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \" iden"])
> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \"\" iden"])
> ,("U&\"quoted iden\"", Iden [Name (Just ("U&\"","\"")) "quoted iden"])
> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \" iden"])
> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \"\" iden"])
> ]
TODO: more identifiers, e.g. unicode escapes?, mixed quoted/unquoted

View file

@ -35,7 +35,7 @@ Tests for parsing value expressions
> ,("3e+3", NumLit "3e+3")
> ,("3e-3", NumLit "3e-3")
> ,("'string'", StringLit "'" "'" "string")
> ,("'string with a '' quote'", StringLit "'" "'" "string with a ' quote")
> ,("'string with a '' quote'", StringLit "'" "'" "string with a '' quote")
> ,("'1'", StringLit "'" "'" "1")
> ,("interval '3' day"
> ,IntervalLit Nothing "3" (Itf "day" Nothing) Nothing)