don't unescape quotes in string literals and identifiers
This commit is contained in:
parent
a892d6d2ee
commit
a59f19aae9
|
@ -112,22 +112,9 @@ todo: public documentation on dialect definition - and dialect flags
|
||||||
> prettyToken :: Dialect -> Token -> String
|
> prettyToken :: Dialect -> Token -> String
|
||||||
> prettyToken _ (Symbol s) = s
|
> prettyToken _ (Symbol s) = s
|
||||||
> prettyToken _ (Identifier Nothing t) = t
|
> prettyToken _ (Identifier Nothing t) = t
|
||||||
> prettyToken _ (Identifier (Just (q1,q2)) t) =
|
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
|
||||||
> q1 ++
|
|
||||||
> -- todo: a bit hacky, do a better design
|
|
||||||
> -- the dialect will know how to escape and unescape
|
|
||||||
> -- contents, but the parser here also needs to know
|
|
||||||
> -- about parsing escaped quotes
|
|
||||||
> (if '"' `elem` q1 then doubleChars '"' t else t)
|
|
||||||
> ++ q2
|
|
||||||
> --prettyToken _ (UQIdentifier t) =
|
|
||||||
> -- "u&\"" ++ doubleChars '"' t ++ "\""
|
|
||||||
> --prettyToken _ (DQIdentifier s e t) =
|
|
||||||
> -- s ++ t ++ e
|
|
||||||
> prettyToken _ (HostParam p) = ':':p
|
> prettyToken _ (HostParam p) = ':':p
|
||||||
> prettyToken _ (SqlString s e t) =
|
> prettyToken _ (SqlString s e t) = s ++ t ++ e
|
||||||
> s ++ (if '\'' `elem` s then doubleChars '\'' t else t) ++ e
|
|
||||||
> --prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
|
|
||||||
> prettyToken _ (SqlNumber r) = r
|
> prettyToken _ (SqlNumber r) = r
|
||||||
> prettyToken _ (Whitespace t) = t
|
> prettyToken _ (Whitespace t) = t
|
||||||
> prettyToken _ (LineComment l) = l
|
> prettyToken _ (LineComment l) = l
|
||||||
|
@ -136,18 +123,6 @@ todo: public documentation on dialect definition - and dialect flags
|
||||||
> prettyTokens :: Dialect -> [Token] -> String
|
> prettyTokens :: Dialect -> [Token] -> String
|
||||||
> prettyTokens d ts = concat $ map (prettyToken d) ts
|
> prettyTokens d ts = concat $ map (prettyToken d) ts
|
||||||
|
|
||||||
When parsing a quoted identifier, you can have a double quote
|
|
||||||
character in the identifier like this: "quotes""identifier" ->
|
|
||||||
quoted"identifer. The double double quotes character is changed to a
|
|
||||||
single character in the lexer and expanded back to two characters in
|
|
||||||
the pretty printer. This also applies to strings, which can embed a
|
|
||||||
single quote like this: 'string''with quote'.
|
|
||||||
|
|
||||||
> doubleChars :: Char -> String -> String
|
|
||||||
> doubleChars _ [] = []
|
|
||||||
> doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
|
|
||||||
> | otherwise = d:doubleChars c ds
|
|
||||||
|
|
||||||
TODO: try to make all parsers applicative only
|
TODO: try to make all parsers applicative only
|
||||||
|
|
||||||
> -- | Lex some SQL to a list of tokens.
|
> -- | Lex some SQL to a list of tokens.
|
||||||
|
@ -222,7 +197,7 @@ u&"unicode quoted identifier"
|
||||||
> -- deal with "" as literal double quote character
|
> -- deal with "" as literal double quote character
|
||||||
> choice [do
|
> choice [do
|
||||||
> void $ char '"'
|
> void $ char '"'
|
||||||
> qidenSuffix $ concat [t,s,"\""]
|
> qidenSuffix $ concat [t,s,"\"\""]
|
||||||
> ,return $ concat [t,s]]
|
> ,return $ concat [t,s]]
|
||||||
> -- mysql can quote identifiers with `
|
> -- mysql can quote identifiers with `
|
||||||
> mySqlQIden = do
|
> mySqlQIden = do
|
||||||
|
@ -259,12 +234,16 @@ x'hexidecimal string'
|
||||||
> -- deal with '' as literal quote character
|
> -- deal with '' as literal quote character
|
||||||
> choice [do
|
> choice [do
|
||||||
> void $ char '\''
|
> void $ char '\''
|
||||||
> normalStringSuffix $ concat [t,s,"'"]
|
> normalStringSuffix $ concat [t,s,"''"]
|
||||||
> ,return $ concat [t,s]]
|
> ,return $ concat [t,s]]
|
||||||
> -- try is used to to avoid conflicts with
|
> -- try is used to to avoid conflicts with
|
||||||
> -- identifiers which can start with n,b,x,u
|
> -- identifiers which can start with n,b,x,u
|
||||||
> -- once we read the quote type and the starting '
|
> -- once we read the quote type and the starting '
|
||||||
> -- then we commit to a string
|
> -- then we commit to a string
|
||||||
|
> -- it's possible that this will reject some valid syntax
|
||||||
|
> -- but only pathalogical stuff, and I think the improved
|
||||||
|
> -- error messages and user predictability make it a good
|
||||||
|
> -- pragmatic choice
|
||||||
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
|
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
|
||||||
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
|
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
|
||||||
> ++ [string "u&'"
|
> ++ [string "u&'"
|
||||||
|
|
|
@ -40,8 +40,7 @@ which have been changed to try to improve the layout of the output.
|
||||||
= value expressions
|
= value expressions
|
||||||
|
|
||||||
> valueExpr :: Dialect -> ValueExpr -> Doc
|
> valueExpr :: Dialect -> ValueExpr -> Doc
|
||||||
> valueExpr _ (StringLit s e t) =
|
> valueExpr _ (StringLit s e t) = text s <> text t <> text e
|
||||||
> text (s ++ (if '\'' `elem` s then doubleUpQuotes t else t) ++ e)
|
|
||||||
|
|
||||||
> valueExpr _ (NumLit s) = text s
|
> valueExpr _ (NumLit s) = text s
|
||||||
> valueExpr _ (IntervalLit s v f t) =
|
> valueExpr _ (IntervalLit s v f t) =
|
||||||
|
@ -227,22 +226,10 @@ which have been changed to try to improve the layout of the output.
|
||||||
> valueExpr d (VEComment cmt v) =
|
> valueExpr d (VEComment cmt v) =
|
||||||
> vcat $ map comment cmt ++ [valueExpr d v]
|
> vcat $ map comment cmt ++ [valueExpr d v]
|
||||||
|
|
||||||
> doubleUpQuotes :: String -> String
|
|
||||||
> doubleUpQuotes [] = []
|
|
||||||
> doubleUpQuotes ('\'':cs) = '\'':'\'':doubleUpQuotes cs
|
|
||||||
> doubleUpQuotes (c:cs) = c:doubleUpQuotes cs
|
|
||||||
|
|
||||||
> doubleUpDoubleQuotes :: String -> String
|
|
||||||
> doubleUpDoubleQuotes [] = []
|
|
||||||
> doubleUpDoubleQuotes ('"':cs) = '"':'"':doubleUpDoubleQuotes cs
|
|
||||||
> doubleUpDoubleQuotes (c:cs) = c:doubleUpDoubleQuotes cs
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
> unname :: Name -> String
|
> unname :: Name -> String
|
||||||
> unname (Name Nothing n) = n
|
> unname (Name Nothing n) = n
|
||||||
> unname (Name (Just (s,e)) n) =
|
> unname (Name (Just (s,e)) n) =
|
||||||
> s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e
|
> s ++ n ++ e
|
||||||
|
|
||||||
> unnames :: [Name] -> String
|
> unnames :: [Name] -> String
|
||||||
> unnames ns = intercalate "." $ map unname ns
|
> unnames ns = intercalate "." $ map unname ns
|
||||||
|
@ -250,8 +237,7 @@ which have been changed to try to improve the layout of the output.
|
||||||
|
|
||||||
> name :: Name -> Doc
|
> name :: Name -> Doc
|
||||||
> name (Name Nothing n) = text n
|
> name (Name Nothing n) = text n
|
||||||
> name (Name (Just (s,e)) n) =
|
> name (Name (Just (s,e)) n) = text s <> text n <> text e
|
||||||
> text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e
|
|
||||||
|
|
||||||
> names :: [Name] -> Doc
|
> names :: [Name] -> Doc
|
||||||
> names ns = hcat $ punctuate (text ".") $ map name ns
|
> names ns = hcat $ punctuate (text ".") $ map name ns
|
||||||
|
|
|
@ -9,6 +9,10 @@ Test for the lexer
|
||||||
> --import Debug.Trace
|
> --import Debug.Trace
|
||||||
> import Data.Char (isAlpha)
|
> import Data.Char (isAlpha)
|
||||||
|
|
||||||
|
> lexerTests :: TestItem
|
||||||
|
> lexerTests = Group "lexerTests" $
|
||||||
|
> [Group "lexer token tests" [ansiLexerTests]]
|
||||||
|
|
||||||
> ansiLexerTable :: [(String,[Token])]
|
> ansiLexerTable :: [(String,[Token])]
|
||||||
> ansiLexerTable =
|
> ansiLexerTable =
|
||||||
> -- single char symbols
|
> -- single char symbols
|
||||||
|
@ -26,11 +30,13 @@ Test for the lexer
|
||||||
> ++ map (\i -> (':':i, [HostParam i])) idens
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||||
> )
|
> )
|
||||||
> -- quoted identifiers with embedded double quotes
|
> -- quoted identifiers with embedded double quotes
|
||||||
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
|
> -- the lexer doesn't unescape the quotes
|
||||||
|
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
|
||||||
> -- strings
|
> -- strings
|
||||||
|
> -- the lexer doesn't apply escapes at all
|
||||||
> ++ [("'string'", [SqlString "'" "'" "string"])
|
> ++ [("'string'", [SqlString "'" "'" "string"])
|
||||||
> ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"])
|
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
||||||
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])]
|
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])]
|
||||||
> -- csstrings
|
> -- csstrings
|
||||||
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
||||||
> ["n", "N","b", "B","x", "X", "u&"]
|
> ["n", "N","b", "B","x", "X", "u&"]
|
||||||
|
@ -55,11 +61,6 @@ Test for the lexer
|
||||||
> ,"/* this *is/ a comment */"
|
> ,"/* this *is/ a comment */"
|
||||||
> ]
|
> ]
|
||||||
|
|
||||||
> lexerTests :: TestItem
|
|
||||||
> lexerTests = Group "lexerTests" $
|
|
||||||
> [Group "lexer token tests" [ansiLexerTests]]
|
|
||||||
|
|
||||||
|
|
||||||
> ansiLexerTests :: TestItem
|
> ansiLexerTests :: TestItem
|
||||||
> ansiLexerTests = Group "ansiLexerTests" $
|
> ansiLexerTests = Group "ansiLexerTests" $
|
||||||
> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable]
|
> [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable]
|
||||||
|
@ -152,3 +153,68 @@ number number (todo: double check more carefully)
|
||||||
> listPred _ [_] = False
|
> listPred _ [_] = False
|
||||||
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
|
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
|
||||||
> | otherwise = listPred (p,p1) (t1:ts)
|
> | otherwise = listPred (p,p1) (t1:ts)
|
||||||
|
|
||||||
|
todo: lexing tests
|
||||||
|
do quickcheck testing:
|
||||||
|
can try to generate valid tokens then check they parse
|
||||||
|
|
||||||
|
same as above: can also try to pair tokens, create an accurate
|
||||||
|
function to say which ones can appear adjacent, and test
|
||||||
|
|
||||||
|
I think this plus the explicit lists of tokens like above which do
|
||||||
|
basic sanity + explicit edge casts will provide a high level of
|
||||||
|
assurance.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
> postgresLexerTable :: [(String,[Token])]
|
||||||
|
> postgresLexerTable =
|
||||||
|
> -- single char symbols
|
||||||
|
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
||||||
|
> -- multi char symbols
|
||||||
|
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
||||||
|
> -- symbols to add: :, ::, .. :=
|
||||||
|
> -- plus generic symbols
|
||||||
|
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||||
|
> -- simple identifiers
|
||||||
|
> in map (\i -> (i, [Identifier Nothing i])) idens
|
||||||
|
> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
|
||||||
|
> -- todo: in order to make lex . pretty id, need to
|
||||||
|
> -- preserve the case of the u
|
||||||
|
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
||||||
|
> -- host param
|
||||||
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||||
|
> )
|
||||||
|
> -- positional var
|
||||||
|
> -- quoted identifiers with embedded double quotes
|
||||||
|
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
|
||||||
|
> -- strings
|
||||||
|
> ++ [("'string'", [SqlString "'" "'" "string"])
|
||||||
|
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
||||||
|
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
|
||||||
|
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
|
||||||
|
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
|
||||||
|
> ]
|
||||||
|
> -- csstrings
|
||||||
|
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
||||||
|
> ["n", "N","b", "B","x", "X", "u&", "e", "E"]
|
||||||
|
> -- numbers
|
||||||
|
> ++ [("10", [SqlNumber "10"])
|
||||||
|
> ,(".1", [SqlNumber ".1"])
|
||||||
|
> ,("5e3", [SqlNumber "5e3"])
|
||||||
|
> ,("5e+3", [SqlNumber "5e+3"])
|
||||||
|
> ,("5e-3", [SqlNumber "5e-3"])
|
||||||
|
> ,("10.2", [SqlNumber "10.2"])
|
||||||
|
> ,("10.2e7", [SqlNumber "10.2e7"])]
|
||||||
|
> -- whitespace
|
||||||
|
> ++ concat [[([a],[Whitespace [a]])
|
||||||
|
> ,([a,b], [Whitespace [a,b]])]
|
||||||
|
> | a <- " \n\t", b <- " \n\t"]
|
||||||
|
> -- line comment
|
||||||
|
> ++ map (\c -> (c, [LineComment c]))
|
||||||
|
> ["--", "-- ", "-- this is a comment", "-- line com\n"]
|
||||||
|
> -- block comment
|
||||||
|
> ++ map (\c -> (c, [BlockComment c]))
|
||||||
|
> ["/**/", "/* */","/* this is a comment */"
|
||||||
|
> ,"/* this *is/ a comment */"
|
||||||
|
> ]
|
||||||
|
|
|
@ -514,7 +514,7 @@ Specify a non-null value.
|
||||||
> ,("'something' -- a comment\n ' some more' /*another comment*/ 'and more'"
|
> ,("'something' -- a comment\n ' some more' /*another comment*/ 'and more'"
|
||||||
> ,StringLit "'" "'" "something some moreand more")
|
> ,StringLit "'" "'" "something some moreand more")
|
||||||
> ,("'a quote: '', stuff'"
|
> ,("'a quote: '', stuff'"
|
||||||
> ,StringLit "'" "'" "a quote: ', stuff")
|
> ,StringLit "'" "'" "a quote: '', stuff")
|
||||||
> ,("''"
|
> ,("''"
|
||||||
> ,StringLit "'" "'" "")
|
> ,StringLit "'" "'" "")
|
||||||
|
|
||||||
|
@ -754,9 +754,9 @@ Specify names.
|
||||||
> ,("a.b",Iden [Name Nothing "a", Name Nothing "b"])
|
> ,("a.b",Iden [Name Nothing "a", Name Nothing "b"])
|
||||||
> ,("a.b.c",Iden [Name Nothing "a", Name Nothing "b", Name Nothing "c"])
|
> ,("a.b.c",Iden [Name Nothing "a", Name Nothing "b", Name Nothing "c"])
|
||||||
> ,("\"quoted iden\"", Iden [Name (Just ("\"","\"")) "quoted iden"])
|
> ,("\"quoted iden\"", Iden [Name (Just ("\"","\"")) "quoted iden"])
|
||||||
> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \" iden"])
|
> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \"\" iden"])
|
||||||
> ,("U&\"quoted iden\"", Iden [Name (Just ("U&\"","\"")) "quoted iden"])
|
> ,("U&\"quoted iden\"", Iden [Name (Just ("U&\"","\"")) "quoted iden"])
|
||||||
> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \" iden"])
|
> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \"\" iden"])
|
||||||
> ]
|
> ]
|
||||||
|
|
||||||
TODO: more identifiers, e.g. unicode escapes?, mixed quoted/unquoted
|
TODO: more identifiers, e.g. unicode escapes?, mixed quoted/unquoted
|
||||||
|
|
|
@ -35,7 +35,7 @@ Tests for parsing value expressions
|
||||||
> ,("3e+3", NumLit "3e+3")
|
> ,("3e+3", NumLit "3e+3")
|
||||||
> ,("3e-3", NumLit "3e-3")
|
> ,("3e-3", NumLit "3e-3")
|
||||||
> ,("'string'", StringLit "'" "'" "string")
|
> ,("'string'", StringLit "'" "'" "string")
|
||||||
> ,("'string with a '' quote'", StringLit "'" "'" "string with a ' quote")
|
> ,("'string with a '' quote'", StringLit "'" "'" "string with a '' quote")
|
||||||
> ,("'1'", StringLit "'" "'" "1")
|
> ,("'1'", StringLit "'" "'" "1")
|
||||||
> ,("interval '3' day"
|
> ,("interval '3' day"
|
||||||
> ,IntervalLit Nothing "3" (Itf "day" Nothing) Nothing)
|
> ,IntervalLit Nothing "3" (Itf "day" Nothing) Nothing)
|
||||||
|
|
Loading…
Reference in a new issue