don't unescape quotes in string literals and identifiers

2016-02-13 15:54:40 +02:00 · 2016-02-13 15:54:40 +02:00 · a59f19aae9
commit a59f19aae9
parent a892d6d2ee
5 changed files with 89 additions and 58 deletions
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@ -112,22 +112,9 @@ todo: public documentation on dialect definition - and dialect flags
 > prettyToken :: Dialect -> Token -> String
 > prettyToken _ (Symbol s) = s
 > prettyToken _ (Identifier Nothing t) = t
-> prettyToken _ (Identifier (Just (q1,q2)) t) =
+> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
 >     q1 ++
 >     -- todo: a bit hacky, do a better design
 >     -- the dialect will know how to escape and unescape
 >     -- contents, but the parser here also needs to know
 >     -- about parsing escaped quotes
 >     (if '"' `elem` q1 then doubleChars '"' t else t)
 >     ++ q2
 > --prettyToken _ (UQIdentifier t) =
 > --    "u&\"" ++ doubleChars '"' t ++ "\""
 > --prettyToken _ (DQIdentifier s e t) =
 > --    s ++ t ++ e
 > prettyToken _ (HostParam p) = ':':p
-> prettyToken _ (SqlString s e t) =
+> prettyToken _ (SqlString s e t) = s ++ t ++ e
 >     s ++ (if '\'' `elem` s then doubleChars '\'' t else t) ++ e
 > --prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
 > prettyToken _ (SqlNumber r) = r
 > prettyToken _ (Whitespace t) = t
 > prettyToken _ (LineComment l) = l
@ -136,18 +123,6 @@ todo: public documentation on dialect definition - and dialect flags
 > prettyTokens :: Dialect -> [Token] -> String
 > prettyTokens d ts = concat $ map (prettyToken d) ts
 When parsing a quoted identifier, you can have a double quote
 character in the identifier like this: "quotes""identifier" ->
 quoted"identifer. The double double quotes character is changed to a
 single character in the lexer and expanded back to two characters in
 the pretty printer. This also applies to strings, which can embed a
 single quote like this: 'string''with quote'.
 > doubleChars :: Char -> String -> String
 > doubleChars _ [] = []
 > doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
 >                      | otherwise = d:doubleChars c ds
 TODO: try to make all parsers applicative only
 > -- | Lex some SQL to a list of tokens.
@ -222,7 +197,7 @@ u&"unicode quoted identifier"
 >         -- deal with "" as literal double quote character
 >         choice [do
 >                 void $ char '"'
->                 qidenSuffix $ concat [t,s,"\""]
+>                 qidenSuffix $ concat [t,s,"\"\""]
 >                ,return $ concat [t,s]]
 >     -- mysql can quote identifiers with `
 >     mySqlQIden = do
@ -259,12 +234,16 @@ x'hexidecimal string'
 >         -- deal with '' as literal quote character
 >         choice [do
 >                 void $ char '\''
->                 normalStringSuffix $ concat [t,s,"'"]
+>                 normalStringSuffix $ concat [t,s,"''"]
 >                ,return $ concat [t,s]]
 >     -- try is used to to avoid conflicts with
 >     -- identifiers which can start with n,b,x,u
 >     -- once we read the quote type and the starting '
 >     -- then we commit to a string
 >     -- it's possible that this will reject some valid syntax
 >     -- but only pathalogical stuff, and I think the improved
 >     -- error messages and user predictability make it a good
 >     -- pragmatic choice
 >     csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
 >     cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
 >                   ++ [string "u&'"
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@ -40,8 +40,7 @@ which have been changed to try to improve the layout of the output.
 = value expressions
 > valueExpr :: Dialect -> ValueExpr -> Doc
-> valueExpr _ (StringLit s e t) =
+> valueExpr _ (StringLit s e t) = text s <> text t <> text e
 >     text (s ++ (if '\'' `elem` s then doubleUpQuotes t else t) ++ e)
 > valueExpr _ (NumLit s) = text s
 > valueExpr _ (IntervalLit s v f t) =
@ -227,22 +226,10 @@ which have been changed to try to improve the layout of the output.
 > valueExpr d (VEComment cmt v) =
 >     vcat $ map comment cmt ++ [valueExpr d v]
 > doubleUpQuotes :: String -> String
 > doubleUpQuotes [] = []
 > doubleUpQuotes ('\'':cs) = '\'':'\'':doubleUpQuotes cs
 > doubleUpQuotes (c:cs) = c:doubleUpQuotes cs
 > doubleUpDoubleQuotes :: String -> String
 > doubleUpDoubleQuotes [] = []
 > doubleUpDoubleQuotes ('"':cs) = '"':'"':doubleUpDoubleQuotes cs
 > doubleUpDoubleQuotes (c:cs) = c:doubleUpDoubleQuotes cs
 > unname :: Name -> String
 > unname (Name Nothing n) = n
 > unname (Name (Just (s,e)) n) =
->     s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e
+>     s ++ n ++ e
 > unnames :: [Name] -> String
 > unnames ns = intercalate "." $ map unname ns
@ -250,8 +237,7 @@ which have been changed to try to improve the layout of the output.
 > name :: Name -> Doc
 > name (Name Nothing n) = text n
-> name (Name (Just (s,e)) n) =
+> name (Name (Just (s,e)) n) = text s <> text n <> text e
 >     text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e
 > names :: [Name] -> Doc
 > names ns = hcat $ punctuate (text ".") $ map name ns
--- a/tools/Language/SQL/SimpleSQL/LexerTests.lhs
+++ b/tools/Language/SQL/SimpleSQL/LexerTests.lhs
@ -9,6 +9,10 @@ Test for the lexer
 > --import Debug.Trace
 > import Data.Char (isAlpha)
 > lexerTests :: TestItem
 > lexerTests = Group "lexerTests" $
 >     [Group "lexer token tests" [ansiLexerTests]]
 > ansiLexerTable :: [(String,[Token])]
 > ansiLexerTable =
 >     -- single char symbols
@ -26,11 +30,13 @@ Test for the lexer
 >            ++ map (\i -> (':':i, [HostParam i])) idens
 >        )
 >     -- quoted identifiers with embedded double quotes
->     ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
+>     -- the lexer doesn't unescape the quotes
 >     ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
 >     -- strings
 >     -- the lexer doesn't apply escapes at all
 >     ++ [("'string'", [SqlString "'" "'" "string"])
->        ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"])
+>        ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
->        ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])]
+>        ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])]
 >     -- csstrings
 >     ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
 >        ["n", "N","b", "B","x", "X", "u&"]
@ -55,11 +61,6 @@ Test for the lexer
 >        ,"/* this *is/ a comment */"
 >        ]
 > lexerTests :: TestItem
 > lexerTests = Group "lexerTests" $
 >     [Group "lexer token tests" [ansiLexerTests]]
 > ansiLexerTests :: TestItem
 > ansiLexerTests = Group "ansiLexerTests" $
 >     [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t |  (s,t) <- ansiLexerTable]
@ -152,3 +153,68 @@ number number (todo: double check more carefully)
 >    listPred _ [_] = False
 >    listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
 >                              | otherwise = listPred (p,p1) (t1:ts)
 todo: lexing tests
 do quickcheck testing:
 can try to generate valid tokens then check they parse
 same as above: can also try to pair tokens, create an accurate
  function to say which ones can appear adjacent, and test
 I think this plus the explicit lists of tokens like above which do
 basic sanity + explicit edge casts will provide a high level of
 assurance.
 > postgresLexerTable :: [(String,[Token])]
 > postgresLexerTable =
 >     -- single char symbols
 >     map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
 >     -- multi char symbols
 >     ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
 >     -- symbols to add: :, ::, .. :=
 >     -- plus generic symbols
 >     ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
 >         -- simple identifiers
 >         in map (\i -> (i, [Identifier Nothing i])) idens
 >            ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
 >            -- todo: in order to make lex . pretty id, need to
 >            -- preserve the case of the u
 >            ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
 >            -- host param
 >            ++ map (\i -> (':':i, [HostParam i])) idens
 >        )
 >     -- positional var
 >     -- quoted identifiers with embedded double quotes
 >     ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
 >     -- strings
 >     ++ [("'string'", [SqlString "'" "'" "string"])
 >        ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
 >        ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
 >        ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
 >        ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
 >        ]
 >     -- csstrings
 >     ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
 >        ["n", "N","b", "B","x", "X", "u&", "e", "E"]
 >     -- numbers
 >     ++ [("10", [SqlNumber "10"])
 >        ,(".1", [SqlNumber ".1"])
 >        ,("5e3", [SqlNumber "5e3"])
 >        ,("5e+3", [SqlNumber "5e+3"])
 >        ,("5e-3", [SqlNumber "5e-3"])
 >        ,("10.2", [SqlNumber "10.2"])
 >        ,("10.2e7", [SqlNumber "10.2e7"])]
 >     -- whitespace
 >     ++ concat [[([a],[Whitespace [a]])
 >                ,([a,b], [Whitespace [a,b]])]
 >               | a <- " \n\t", b <- " \n\t"]
 >     -- line comment
 >     ++ map (\c -> (c, [LineComment c]))
 >        ["--", "-- ", "-- this is a comment", "-- line com\n"]
 >     -- block comment
 >     ++ map (\c -> (c, [BlockComment c]))
 >        ["/**/", "/* */","/* this is a comment */"
 >        ,"/* this *is/ a comment */"
 >        ]
--- a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
@ -514,7 +514,7 @@ Specify a non-null value.
 >     ,("'something' -- a comment\n ' some more' /*another comment*/ 'and more'"
 >      ,StringLit "'" "'" "something some moreand more")
 >     ,("'a quote: '', stuff'"
->      ,StringLit "'" "'" "a quote: ', stuff")
+>      ,StringLit "'" "'" "a quote: '', stuff")
 >     ,("''"
 >      ,StringLit "'" "'" "")
@ -754,9 +754,9 @@ Specify names.
 >     ,("a.b",Iden [Name Nothing "a", Name Nothing "b"])
 >     ,("a.b.c",Iden [Name Nothing "a", Name Nothing "b", Name Nothing "c"])
 >     ,("\"quoted iden\"", Iden [Name (Just ("\"","\"")) "quoted iden"])
->     ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \" iden"])
+>     ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \"\" iden"])
 >     ,("U&\"quoted iden\"", Iden [Name (Just ("U&\"","\"")) "quoted iden"])
->     ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \" iden"])
+>     ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \"\" iden"])
 >     ]
 TODO: more identifiers, e.g. unicode escapes?, mixed quoted/unquoted
--- a/tools/Language/SQL/SimpleSQL/ValueExprs.lhs
+++ b/tools/Language/SQL/SimpleSQL/ValueExprs.lhs
@ -35,7 +35,7 @@ Tests for parsing value expressions
 >      ,("3e+3", NumLit "3e+3")
 >      ,("3e-3", NumLit "3e-3")
 >      ,("'string'", StringLit "'" "'" "string")
->      ,("'string with a '' quote'", StringLit "'" "'" "string with a ' quote")
+>      ,("'string with a '' quote'", StringLit "'" "'" "string with a '' quote")
 >      ,("'1'", StringLit "'" "'" "1")
 >      ,("interval '3' day"
 >       ,IntervalLit Nothing "3" (Itf "day" Nothing) Nothing)