work on postgresql lexing
add positional arg add e' quoted strings add \' escaping in strings (not finished) add ::, :=, : symbols
This commit is contained in:
parent
a59f19aae9
commit
b41803427d
|
@ -81,6 +81,10 @@ todo: public documentation on dialect definition - and dialect flags
|
||||||
|
|
||||||
> -- | This is a host param symbol, e.g. :param
|
> -- | This is a host param symbol, e.g. :param
|
||||||
> | HostParam String
|
> | HostParam String
|
||||||
|
|
||||||
|
|
||||||
|
> -- | This is a positional arg identifier e.g. $1
|
||||||
|
> | PositionalArg Int
|
||||||
>
|
>
|
||||||
> -- | This is a string literal. The first two fields are the --
|
> -- | This is a string literal. The first two fields are the --
|
||||||
> -- start and end quotes, which are usually both ', but can be
|
> -- start and end quotes, which are usually both ', but can be
|
||||||
|
@ -114,6 +118,7 @@ todo: public documentation on dialect definition - and dialect flags
|
||||||
> prettyToken _ (Identifier Nothing t) = t
|
> prettyToken _ (Identifier Nothing t) = t
|
||||||
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
|
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
|
||||||
> prettyToken _ (HostParam p) = ':':p
|
> prettyToken _ (HostParam p) = ':':p
|
||||||
|
> prettyToken _ (PositionalArg p) = '$':show p
|
||||||
> prettyToken _ (SqlString s e t) = s ++ t ++ e
|
> prettyToken _ (SqlString s e t) = s ++ t ++ e
|
||||||
> prettyToken _ (SqlNumber r) = r
|
> prettyToken _ (SqlNumber r) = r
|
||||||
> prettyToken _ (Whitespace t) = t
|
> prettyToken _ (Whitespace t) = t
|
||||||
|
@ -166,6 +171,7 @@ this is also tried before symbol (a .1 will be parsed as a number, but
|
||||||
> ,lineComment d
|
> ,lineComment d
|
||||||
> ,blockComment d
|
> ,blockComment d
|
||||||
> ,sqlNumber d
|
> ,sqlNumber d
|
||||||
|
> ,positionalArg d
|
||||||
> ,symbol d
|
> ,symbol d
|
||||||
> ,sqlWhitespace d]
|
> ,sqlWhitespace d]
|
||||||
|
|
||||||
|
@ -222,13 +228,23 @@ x'hexidecimal string'
|
||||||
|
|
||||||
|
|
||||||
> sqlString :: Dialect -> Parser Token
|
> sqlString :: Dialect -> Parser Token
|
||||||
> sqlString _ =
|
> sqlString d =
|
||||||
> choice [csString
|
> choice [csString
|
||||||
> ,normalString
|
> ,normalString
|
||||||
> ]
|
> ]
|
||||||
> where
|
> where
|
||||||
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
|
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
|
||||||
> normalStringSuffix t = do
|
> normalStringSuffix t
|
||||||
|
> | diSyntaxFlavour d == Postgres = do
|
||||||
|
> s <- takeTill (`elem` "'\\")
|
||||||
|
> -- deal with '' or \' as literal quote character
|
||||||
|
> choice [do
|
||||||
|
> ctu <- choice ["''" <$ try (string "''")
|
||||||
|
> ,"\\'" <$ string "\\'"
|
||||||
|
> ,"\\" <$ char '\\']
|
||||||
|
> normalStringSuffix $ concat [t,s,ctu]
|
||||||
|
> ,concat [t,s] <$ char '\'']
|
||||||
|
> | otherwise = do
|
||||||
> s <- takeTill (=='\'')
|
> s <- takeTill (=='\'')
|
||||||
> void $ char '\''
|
> void $ char '\''
|
||||||
> -- deal with '' as literal quote character
|
> -- deal with '' as literal quote character
|
||||||
|
@ -245,13 +261,27 @@ x'hexidecimal string'
|
||||||
> -- error messages and user predictability make it a good
|
> -- error messages and user predictability make it a good
|
||||||
> -- pragmatic choice
|
> -- pragmatic choice
|
||||||
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
|
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
|
||||||
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
|
> csPrefixes | diSyntaxFlavour d == Postgres = "nNbBxXEe"
|
||||||
|
> | otherwise = "nNbBxX"
|
||||||
|
> cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
|
||||||
> ++ [string "u&'"
|
> ++ [string "u&'"
|
||||||
> ,string "U&'"]
|
> ,string "U&'"]
|
||||||
|
|
||||||
> hostParam :: Dialect -> Parser Token
|
> hostParam :: Dialect -> Parser Token
|
||||||
|
|
||||||
|
use try for postgres because we also support : and :: as symbols
|
||||||
|
There might be a problem with parsing e.g. a[1:b]
|
||||||
|
|
||||||
|
> hostParam d | diSyntaxFlavour d == Postgres =
|
||||||
|
> HostParam <$> try (char ':' *> identifierString)
|
||||||
|
|
||||||
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
||||||
|
|
||||||
|
> positionalArg :: Dialect -> Parser Token
|
||||||
|
> positionalArg d | diSyntaxFlavour d == Postgres =
|
||||||
|
> -- use try to avoid ambiguities with other syntax which starts with dollar
|
||||||
|
> PositionalArg <$> try (char '$' *> (read <$> many1 digit))
|
||||||
|
> positionalArg _ = guard False *> error "unpossible"
|
||||||
|
|
||||||
|
|
||||||
digits
|
digits
|
||||||
|
@ -288,6 +318,13 @@ A symbol is one of the two character symbols, or one of the single
|
||||||
character symbols in the two lists below.
|
character symbols in the two lists below.
|
||||||
|
|
||||||
> symbol :: Dialect -> Parser Token
|
> symbol :: Dialect -> Parser Token
|
||||||
|
> symbol d | diSyntaxFlavour d == Postgres =
|
||||||
|
> Symbol <$>
|
||||||
|
> choice (
|
||||||
|
> many1 (char '.') :
|
||||||
|
> map (try . string) [">=","<=","!=","<>","||", "::", ":="]
|
||||||
|
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;():")
|
||||||
|
|
||||||
> symbol _ = Symbol <$>
|
> symbol _ = Symbol <$>
|
||||||
> choice (
|
> choice (
|
||||||
> many1 (char '.') :
|
> many1 (char '.') :
|
||||||
|
|
|
@ -11,7 +11,8 @@ Test for the lexer
|
||||||
|
|
||||||
> lexerTests :: TestItem
|
> lexerTests :: TestItem
|
||||||
> lexerTests = Group "lexerTests" $
|
> lexerTests = Group "lexerTests" $
|
||||||
> [Group "lexer token tests" [ansiLexerTests]]
|
> [Group "lexer token tests" [ansiLexerTests
|
||||||
|
> ,postgresLexerTests]]
|
||||||
|
|
||||||
> ansiLexerTable :: [(String,[Token])]
|
> ansiLexerTable :: [(String,[Token])]
|
||||||
> ansiLexerTable =
|
> ansiLexerTable =
|
||||||
|
@ -170,11 +171,11 @@ assurance.
|
||||||
> postgresLexerTable :: [(String,[Token])]
|
> postgresLexerTable :: [(String,[Token])]
|
||||||
> postgresLexerTable =
|
> postgresLexerTable =
|
||||||
> -- single char symbols
|
> -- single char symbols
|
||||||
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
|
||||||
> -- multi char symbols
|
> -- multi char symbols
|
||||||
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
|
||||||
> -- symbols to add: :, ::, .. :=
|
> -- todo: add many examples of generic symbols
|
||||||
> -- plus generic symbols
|
> -- also: do the testing for the ansi compatibility special cases
|
||||||
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||||
> -- simple identifiers
|
> -- simple identifiers
|
||||||
> in map (\i -> (i, [Identifier Nothing i])) idens
|
> in map (\i -> (i, [Identifier Nothing i])) idens
|
||||||
|
@ -186,14 +187,19 @@ assurance.
|
||||||
> ++ map (\i -> (':':i, [HostParam i])) idens
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||||
> )
|
> )
|
||||||
> -- positional var
|
> -- positional var
|
||||||
|
> ++ [("$1", [PositionalArg 1])]
|
||||||
> -- quoted identifiers with embedded double quotes
|
> -- quoted identifiers with embedded double quotes
|
||||||
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])]
|
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
|
||||||
> -- strings
|
> -- strings
|
||||||
> ++ [("'string'", [SqlString "'" "'" "string"])
|
> ++ [("'string'", [SqlString "'" "'" "string"])
|
||||||
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
|
||||||
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"])
|
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
|
||||||
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"])
|
> ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
|
||||||
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"])
|
> ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
|
||||||
|
> -- todo: implement only allowing \' in e quoted strings
|
||||||
|
> {-,("'not this \\' quote", [SqlString "'" "'" "not this \\"
|
||||||
|
> ,Whitespace " "
|
||||||
|
> ,Identifier Nothing "quote"])-}
|
||||||
> ]
|
> ]
|
||||||
> -- csstrings
|
> -- csstrings
|
||||||
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
||||||
|
@ -218,3 +224,8 @@ assurance.
|
||||||
> ["/**/", "/* */","/* this is a comment */"
|
> ["/**/", "/* */","/* this is a comment */"
|
||||||
> ,"/* this *is/ a comment */"
|
> ,"/* this *is/ a comment */"
|
||||||
> ]
|
> ]
|
||||||
|
|
||||||
|
> postgresLexerTests :: TestItem
|
||||||
|
> postgresLexerTests = Group "postgresLexerTests" $
|
||||||
|
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
|
||||||
|
> ]
|
||||||
|
|
Loading…
Reference in a new issue