1
Fork 0

work on postgresql lexing

add positional arg
add e' quoted strings
add \' escaping in strings (not finished)
add ::, :=, : symbols
This commit is contained in:
Jake Wheat 2016-02-13 16:31:20 +02:00
parent a59f19aae9
commit b41803427d
2 changed files with 60 additions and 12 deletions

View file

@ -81,6 +81,10 @@ todo: public documentation on dialect definition - and dialect flags
> -- | This is a host param symbol, e.g. :param
> | HostParam String
> -- | This is a positional arg identifier e.g. $1
> | PositionalArg Int
>
> -- | This is a string literal. The first two fields are the --
> -- start and end quotes, which are usually both ', but can be
@ -114,6 +118,7 @@ todo: public documentation on dialect definition - and dialect flags
> prettyToken _ (Identifier Nothing t) = t
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
> prettyToken _ (HostParam p) = ':':p
> prettyToken _ (PositionalArg p) = '$':show p
> prettyToken _ (SqlString s e t) = s ++ t ++ e
> prettyToken _ (SqlNumber r) = r
> prettyToken _ (Whitespace t) = t
@ -166,6 +171,7 @@ this is also tried before symbol (a .1 will be parsed as a number, but
> ,lineComment d
> ,blockComment d
> ,sqlNumber d
> ,positionalArg d
> ,symbol d
> ,sqlWhitespace d]
@ -222,13 +228,23 @@ x'hexidecimal string'
> sqlString :: Dialect -> Parser Token
> sqlString _ =
> sqlString d =
> choice [csString
> ,normalString
> ]
> where
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
> normalStringSuffix t = do
> normalStringSuffix t
> | diSyntaxFlavour d == Postgres = do
> s <- takeTill (`elem` "'\\")
> -- deal with '' or \' as literal quote character
> choice [do
> ctu <- choice ["''" <$ try (string "''")
> ,"\\'" <$ string "\\'"
> ,"\\" <$ char '\\']
> normalStringSuffix $ concat [t,s,ctu]
> ,concat [t,s] <$ char '\'']
> | otherwise = do
> s <- takeTill (=='\'')
> void $ char '\''
> -- deal with '' as literal quote character
@ -245,13 +261,27 @@ x'hexidecimal string'
> -- error messages and user predictability make it a good
> -- pragmatic choice
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
> csPrefixes | diSyntaxFlavour d == Postgres = "nNbBxXEe"
> | otherwise = "nNbBxX"
> cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
> ++ [string "u&'"
> ,string "U&'"]
> hostParam :: Dialect -> Parser Token
use try for postgres because we also support : and :: as symbols
There might be a problem with parsing e.g. a[1:b]
> hostParam d | diSyntaxFlavour d == Postgres =
> HostParam <$> try (char ':' *> identifierString)
> hostParam _ = HostParam <$> (char ':' *> identifierString)
> positionalArg :: Dialect -> Parser Token
> positionalArg d | diSyntaxFlavour d == Postgres =
> -- use try to avoid ambiguities with other syntax which starts with dollar
> PositionalArg <$> try (char '$' *> (read <$> many1 digit))
> positionalArg _ = guard False *> error "unpossible"
digits
@ -288,6 +318,13 @@ A symbol is one of the two character symbols, or one of the single
character symbols in the two lists below.
> symbol :: Dialect -> Parser Token
> symbol d | diSyntaxFlavour d == Postgres =
> Symbol <$>
> choice (
> many1 (char '.') :
> map (try . string) [">=","<=","!=","<>","||", "::", ":="]
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;():")
> symbol _ = Symbol <$>
> choice (
> many1 (char '.') :

View file

@ -11,7 +11,8 @@ Test for the lexer
> lexerTests :: TestItem
> lexerTests = Group "lexerTests" $
> [Group "lexer token tests" [ansiLexerTests]]
> [Group "lexer token tests" [ansiLexerTests
> ,postgresLexerTests]]
> ansiLexerTable :: [(String,[Token])]
> ansiLexerTable =
@ -170,11 +171,11 @@ assurance.
> postgresLexerTable :: [(String,[Token])]
> postgresLexerTable =
> -- single char symbols
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
> -- multi char symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
> -- symbols to add: :, ::, .. :=
> -- plus generic symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
> -- todo: add many examples of generic symbols
> -- also: do the testing for the ansi compatibility special cases
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
> -- simple identifiers
> in map (\i -> (i, [Identifier Nothing i])) idens
@ -186,14 +187,19 @@ assurance.
> ++ map (\i -> (':':i, [HostParam i])) idens
> )
> -- positional var
> ++ [("$1", [PositionalArg 1])]
> -- quoted identifiers with embedded double quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
> -- strings
> ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
> ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
> ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
> -- todo: implement only allowing \' in e quoted strings
> {-,("'not this \\' quote", [SqlString "'" "'" "not this \\"
> ,Whitespace " "
> ,Identifier Nothing "quote"])-}
> ]
> -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
@ -218,3 +224,8 @@ assurance.
> ["/**/", "/* */","/* this is a comment */"
> ,"/* this *is/ a comment */"
> ]
> postgresLexerTests :: TestItem
> postgresLexerTests = Group "postgresLexerTests" $
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
> ]