work on postgresql lexing
add positional arg add e' quoted strings add \' escaping in strings (not finished) add ::, :=, : symbols
This commit is contained in:
parent
a59f19aae9
commit
b41803427d
|
@ -81,6 +81,10 @@ todo: public documentation on dialect definition - and dialect flags
|
|||
|
||||
> -- | This is a host param symbol, e.g. :param
|
||||
> | HostParam String
|
||||
|
||||
|
||||
> -- | This is a positional arg identifier e.g. $1
|
||||
> | PositionalArg Int
|
||||
>
|
||||
> -- | This is a string literal. The first two fields are the --
|
||||
> -- start and end quotes, which are usually both ', but can be
|
||||
|
@ -114,6 +118,7 @@ todo: public documentation on dialect definition - and dialect flags
|
|||
> prettyToken _ (Identifier Nothing t) = t
|
||||
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
|
||||
> prettyToken _ (HostParam p) = ':':p
|
||||
> prettyToken _ (PositionalArg p) = '$':show p
|
||||
> prettyToken _ (SqlString s e t) = s ++ t ++ e
|
||||
> prettyToken _ (SqlNumber r) = r
|
||||
> prettyToken _ (Whitespace t) = t
|
||||
|
@ -166,6 +171,7 @@ this is also tried before symbol (a .1 will be parsed as a number, but
|
|||
> ,lineComment d
|
||||
> ,blockComment d
|
||||
> ,sqlNumber d
|
||||
> ,positionalArg d
|
||||
> ,symbol d
|
||||
> ,sqlWhitespace d]
|
||||
|
||||
|
@ -222,13 +228,23 @@ x'hexidecimal string'
|
|||
|
||||
|
||||
> sqlString :: Dialect -> Parser Token
|
||||
> sqlString _ =
|
||||
> sqlString d =
|
||||
> choice [csString
|
||||
> ,normalString
|
||||
> ]
|
||||
> where
|
||||
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
|
||||
> normalStringSuffix t = do
|
||||
> normalStringSuffix t
|
||||
> | diSyntaxFlavour d == Postgres = do
|
||||
> s <- takeTill (`elem` "'\\")
|
||||
> -- deal with '' or \' as literal quote character
|
||||
> choice [do
|
||||
> ctu <- choice ["''" <$ try (string "''")
|
||||
> ,"\\'" <$ string "\\'"
|
||||
> ,"\\" <$ char '\\']
|
||||
> normalStringSuffix $ concat [t,s,ctu]
|
||||
> ,concat [t,s] <$ char '\'']
|
||||
> | otherwise = do
|
||||
> s <- takeTill (=='\'')
|
||||
> void $ char '\''
|
||||
> -- deal with '' as literal quote character
|
||||
|
@ -245,13 +261,27 @@ x'hexidecimal string'
|
|||
> -- error messages and user predictability make it a good
|
||||
> -- pragmatic choice
|
||||
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
|
||||
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
|
||||
> csPrefixes | diSyntaxFlavour d == Postgres = "nNbBxXEe"
|
||||
> | otherwise = "nNbBxX"
|
||||
> cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
|
||||
> ++ [string "u&'"
|
||||
> ,string "U&'"]
|
||||
|
||||
> hostParam :: Dialect -> Parser Token
|
||||
|
||||
use try for postgres because we also support : and :: as symbols
|
||||
There might be a problem with parsing e.g. a[1:b]
|
||||
|
||||
> hostParam d | diSyntaxFlavour d == Postgres =
|
||||
> HostParam <$> try (char ':' *> identifierString)
|
||||
|
||||
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
||||
|
||||
> positionalArg :: Dialect -> Parser Token
|
||||
> positionalArg d | diSyntaxFlavour d == Postgres =
|
||||
> -- use try to avoid ambiguities with other syntax which starts with dollar
|
||||
> PositionalArg <$> try (char '$' *> (read <$> many1 digit))
|
||||
> positionalArg _ = guard False *> error "unpossible"
|
||||
|
||||
|
||||
digits
|
||||
|
@ -288,6 +318,13 @@ A symbol is one of the two character symbols, or one of the single
|
|||
character symbols in the two lists below.
|
||||
|
||||
> symbol :: Dialect -> Parser Token
|
||||
> symbol d | diSyntaxFlavour d == Postgres =
|
||||
> Symbol <$>
|
||||
> choice (
|
||||
> many1 (char '.') :
|
||||
> map (try . string) [">=","<=","!=","<>","||", "::", ":="]
|
||||
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;():")
|
||||
|
||||
> symbol _ = Symbol <$>
|
||||
> choice (
|
||||
> many1 (char '.') :
|
||||
|
|
|
@ -11,7 +11,8 @@ Test for the lexer
|
|||
|
||||
> lexerTests :: TestItem
|
||||
> lexerTests = Group "lexerTests" $
|
||||
> [Group "lexer token tests" [ansiLexerTests]]
|
||||
> [Group "lexer token tests" [ansiLexerTests
|
||||
> ,postgresLexerTests]]
|
||||
|
||||
> ansiLexerTable :: [(String,[Token])]
|
||||
> ansiLexerTable =
|
||||
|
@ -170,11 +171,11 @@ assurance.
|
|||
> postgresLexerTable :: [(String,[Token])]
|
||||
> postgresLexerTable =
|
||||
> -- single char symbols
|
||||
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
||||
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
|
||||
> -- multi char symbols
|
||||
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
||||
> -- symbols to add: :, ::, .. :=
|
||||
> -- plus generic symbols
|
||||
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
|
||||
> -- todo: add many examples of generic symbols
|
||||
> -- also: do the testing for the ansi compatibility special cases
|
||||
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||
> -- simple identifiers
|
||||
> in map (\i -> (i, [Identifier Nothing i])) idens
|
||||
|
@ -186,14 +187,19 @@ assurance.
|
|||
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||
> )
|
||||
> -- positional var
|
||||
> ++ [("$1", [PositionalArg 1])]
|
||||
> -- quoted identifiers with embedded double quotes
|
||||
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
|
||||
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
|
||||
> -- strings
|
||||
> ++ [("'string'", [SqlString "'" "'" "string"])
|
||||
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
||||
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
|
||||
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
|
||||
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
|
||||
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
|
||||
> ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
|
||||
> ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
|
||||
> -- todo: implement only allowing \' in e quoted strings
|
||||
> {-,("'not this \\' quote", [SqlString "'" "'" "not this \\"
|
||||
> ,Whitespace " "
|
||||
> ,Identifier Nothing "quote"])-}
|
||||
> ]
|
||||
> -- csstrings
|
||||
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
||||
|
@ -218,3 +224,8 @@ assurance.
|
|||
> ["/**/", "/* */","/* this is a comment */"
|
||||
> ,"/* this *is/ a comment */"
|
||||
> ]
|
||||
|
||||
> postgresLexerTests :: TestItem
|
||||
> postgresLexerTests = Group "postgresLexerTests" $
|
||||
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
|
||||
> ]
|
||||
|
|
Loading…
Reference in a new issue