diff --git a/Language/SQL/SimpleSQL/Lex.lhs b/Language/SQL/SimpleSQL/Lex.lhs index 799b7d6..ce40377 100644 --- a/Language/SQL/SimpleSQL/Lex.lhs +++ b/Language/SQL/SimpleSQL/Lex.lhs @@ -112,22 +112,9 @@ todo: public documentation on dialect definition - and dialect flags > prettyToken :: Dialect -> Token -> String > prettyToken _ (Symbol s) = s > prettyToken _ (Identifier Nothing t) = t -> prettyToken _ (Identifier (Just (q1,q2)) t) = -> q1 ++ -> -- todo: a bit hacky, do a better design -> -- the dialect will know how to escape and unescape -> -- contents, but the parser here also needs to know -> -- about parsing escaped quotes -> (if '"' `elem` q1 then doubleChars '"' t else t) -> ++ q2 -> --prettyToken _ (UQIdentifier t) = -> -- "u&\"" ++ doubleChars '"' t ++ "\"" -> --prettyToken _ (DQIdentifier s e t) = -> -- s ++ t ++ e +> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2 > prettyToken _ (HostParam p) = ':':p -> prettyToken _ (SqlString s e t) = -> s ++ (if '\'' `elem` s then doubleChars '\'' t else t) ++ e -> --prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'" +> prettyToken _ (SqlString s e t) = s ++ t ++ e > prettyToken _ (SqlNumber r) = r > prettyToken _ (Whitespace t) = t > prettyToken _ (LineComment l) = l @@ -136,18 +123,6 @@ todo: public documentation on dialect definition - and dialect flags > prettyTokens :: Dialect -> [Token] -> String > prettyTokens d ts = concat $ map (prettyToken d) ts -When parsing a quoted identifier, you can have a double quote -character in the identifier like this: "quotes""identifier" -> -quoted"identifer. The double double quotes character is changed to a -single character in the lexer and expanded back to two characters in -the pretty printer. This also applies to strings, which can embed a -single quote like this: 'string''with quote'. - -> doubleChars :: Char -> String -> String -> doubleChars _ [] = [] -> doubleChars c (d:ds) | c == d = c:d:doubleChars c ds -> | otherwise = d:doubleChars c ds - TODO: try to make all parsers applicative only > -- | Lex some SQL to a list of tokens. @@ -222,7 +197,7 @@ u&"unicode quoted identifier" > -- deal with "" as literal double quote character > choice [do > void $ char '"' -> qidenSuffix $ concat [t,s,"\""] +> qidenSuffix $ concat [t,s,"\"\""] > ,return $ concat [t,s]] > -- mysql can quote identifiers with ` > mySqlQIden = do @@ -259,12 +234,16 @@ x'hexidecimal string' > -- deal with '' as literal quote character > choice [do > void $ char '\'' -> normalStringSuffix $ concat [t,s,"'"] +> normalStringSuffix $ concat [t,s,"''"] > ,return $ concat [t,s]] > -- try is used to to avoid conflicts with > -- identifiers which can start with n,b,x,u > -- once we read the quote type and the starting ' > -- then we commit to a string +> -- it's possible that this will reject some valid syntax +> -- but only pathalogical stuff, and I think the improved +> -- error messages and user predictability make it a good +> -- pragmatic choice > csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix "" > cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX") > ++ [string "u&'" diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index bc2ca9a..fb80b8c 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -40,8 +40,7 @@ which have been changed to try to improve the layout of the output. = value expressions > valueExpr :: Dialect -> ValueExpr -> Doc -> valueExpr _ (StringLit s e t) = -> text (s ++ (if '\'' `elem` s then doubleUpQuotes t else t) ++ e) +> valueExpr _ (StringLit s e t) = text s <> text t <> text e > valueExpr _ (NumLit s) = text s > valueExpr _ (IntervalLit s v f t) = @@ -227,22 +226,10 @@ which have been changed to try to improve the layout of the output. > valueExpr d (VEComment cmt v) = > vcat $ map comment cmt ++ [valueExpr d v] -> doubleUpQuotes :: String -> String -> doubleUpQuotes [] = [] -> doubleUpQuotes ('\'':cs) = '\'':'\'':doubleUpQuotes cs -> doubleUpQuotes (c:cs) = c:doubleUpQuotes cs - -> doubleUpDoubleQuotes :: String -> String -> doubleUpDoubleQuotes [] = [] -> doubleUpDoubleQuotes ('"':cs) = '"':'"':doubleUpDoubleQuotes cs -> doubleUpDoubleQuotes (c:cs) = c:doubleUpDoubleQuotes cs - - - > unname :: Name -> String > unname (Name Nothing n) = n > unname (Name (Just (s,e)) n) = -> s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e +> s ++ n ++ e > unnames :: [Name] -> String > unnames ns = intercalate "." $ map unname ns @@ -250,8 +237,7 @@ which have been changed to try to improve the layout of the output. > name :: Name -> Doc > name (Name Nothing n) = text n -> name (Name (Just (s,e)) n) = -> text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e +> name (Name (Just (s,e)) n) = text s <> text n <> text e > names :: [Name] -> Doc > names ns = hcat $ punctuate (text ".") $ map name ns diff --git a/tools/Language/SQL/SimpleSQL/LexerTests.lhs b/tools/Language/SQL/SimpleSQL/LexerTests.lhs index 63499c5..0946340 100644 --- a/tools/Language/SQL/SimpleSQL/LexerTests.lhs +++ b/tools/Language/SQL/SimpleSQL/LexerTests.lhs @@ -9,6 +9,10 @@ Test for the lexer > --import Debug.Trace > import Data.Char (isAlpha) +> lexerTests :: TestItem +> lexerTests = Group "lexerTests" $ +> [Group "lexer token tests" [ansiLexerTests]] + > ansiLexerTable :: [(String,[Token])] > ansiLexerTable = > -- single char symbols @@ -26,11 +30,13 @@ Test for the lexer > ++ map (\i -> (':':i, [HostParam i])) idens > ) > -- quoted identifiers with embedded double quotes -> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])] +> -- the lexer doesn't unescape the quotes +> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])] > -- strings +> -- the lexer doesn't apply escapes at all > ++ [("'string'", [SqlString "'" "'" "string"]) -> ,("'normal '' quote'", [SqlString "'" "'" "normal ' quote"]) -> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])] +> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"]) +> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])] > -- csstrings > ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"])) > ["n", "N","b", "B","x", "X", "u&"] @@ -55,11 +61,6 @@ Test for the lexer > ,"/* this *is/ a comment */" > ] -> lexerTests :: TestItem -> lexerTests = Group "lexerTests" $ -> [Group "lexer token tests" [ansiLexerTests]] - - > ansiLexerTests :: TestItem > ansiLexerTests = Group "ansiLexerTests" $ > [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t | (s,t) <- ansiLexerTable] @@ -152,3 +153,68 @@ number number (todo: double check more carefully) > listPred _ [_] = False > listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True > | otherwise = listPred (p,p1) (t1:ts) + +todo: lexing tests +do quickcheck testing: +can try to generate valid tokens then check they parse + +same as above: can also try to pair tokens, create an accurate + function to say which ones can appear adjacent, and test + +I think this plus the explicit lists of tokens like above which do +basic sanity + explicit edge casts will provide a high level of +assurance. + + + +> postgresLexerTable :: [(String,[Token])] +> postgresLexerTable = +> -- single char symbols +> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()" +> -- multi char symbols +> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"] +> -- symbols to add: :, ::, .. := +> -- plus generic symbols +> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"] +> -- simple identifiers +> in map (\i -> (i, [Identifier Nothing i])) idens +> ++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens +> -- todo: in order to make lex . pretty id, need to +> -- preserve the case of the u +> ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens +> -- host param +> ++ map (\i -> (':':i, [HostParam i])) idens +> ) +> -- positional var +> -- quoted identifiers with embedded double quotes +> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])] +> -- strings +> ++ [("'string'", [SqlString "'" "'" "string"]) +> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"]) +> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"]) +> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"]) +> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"]) +> ] +> -- csstrings +> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"])) +> ["n", "N","b", "B","x", "X", "u&", "e", "E"] +> -- numbers +> ++ [("10", [SqlNumber "10"]) +> ,(".1", [SqlNumber ".1"]) +> ,("5e3", [SqlNumber "5e3"]) +> ,("5e+3", [SqlNumber "5e+3"]) +> ,("5e-3", [SqlNumber "5e-3"]) +> ,("10.2", [SqlNumber "10.2"]) +> ,("10.2e7", [SqlNumber "10.2e7"])] +> -- whitespace +> ++ concat [[([a],[Whitespace [a]]) +> ,([a,b], [Whitespace [a,b]])] +> | a <- " \n\t", b <- " \n\t"] +> -- line comment +> ++ map (\c -> (c, [LineComment c])) +> ["--", "-- ", "-- this is a comment", "-- line com\n"] +> -- block comment +> ++ map (\c -> (c, [BlockComment c])) +> ["/**/", "/* */","/* this is a comment */" +> ,"/* this *is/ a comment */" +> ] diff --git a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs index d437d80..339afdd 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs @@ -514,7 +514,7 @@ Specify a non-null value. > ,("'something' -- a comment\n ' some more' /*another comment*/ 'and more'" > ,StringLit "'" "'" "something some moreand more") > ,("'a quote: '', stuff'" -> ,StringLit "'" "'" "a quote: ', stuff") +> ,StringLit "'" "'" "a quote: '', stuff") > ,("''" > ,StringLit "'" "'" "") @@ -754,9 +754,9 @@ Specify names. > ,("a.b",Iden [Name Nothing "a", Name Nothing "b"]) > ,("a.b.c",Iden [Name Nothing "a", Name Nothing "b", Name Nothing "c"]) > ,("\"quoted iden\"", Iden [Name (Just ("\"","\"")) "quoted iden"]) -> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \" iden"]) +> ,("\"quoted \"\" iden\"", Iden [Name (Just ("\"","\"")) "quoted \"\" iden"]) > ,("U&\"quoted iden\"", Iden [Name (Just ("U&\"","\"")) "quoted iden"]) -> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \" iden"]) +> ,("U&\"quoted \"\" iden\"", Iden [Name (Just ("U&\"","\"")) "quoted \"\" iden"]) > ] TODO: more identifiers, e.g. unicode escapes?, mixed quoted/unquoted diff --git a/tools/Language/SQL/SimpleSQL/ValueExprs.lhs b/tools/Language/SQL/SimpleSQL/ValueExprs.lhs index 1eaf9d2..d88695f 100644 --- a/tools/Language/SQL/SimpleSQL/ValueExprs.lhs +++ b/tools/Language/SQL/SimpleSQL/ValueExprs.lhs @@ -35,7 +35,7 @@ Tests for parsing value expressions > ,("3e+3", NumLit "3e+3") > ,("3e-3", NumLit "3e-3") > ,("'string'", StringLit "'" "'" "string") -> ,("'string with a '' quote'", StringLit "'" "'" "string with a ' quote") +> ,("'string with a '' quote'", StringLit "'" "'" "string with a '' quote") > ,("'1'", StringLit "'" "'" "1") > ,("interval '3' day" > ,IntervalLit Nothing "3" (Itf "day" Nothing) Nothing)