1
Fork 0

work on postgresql lexing

add positional arg
add e' quoted strings
add \' escaping in strings (not finished)
add ::, :=, : symbols
This commit is contained in:
Jake Wheat 2016-02-13 16:31:20 +02:00
parent a59f19aae9
commit b41803427d
2 changed files with 60 additions and 12 deletions

View file

@ -81,6 +81,10 @@ todo: public documentation on dialect definition - and dialect flags
> -- | This is a host param symbol, e.g. :param > -- | This is a host param symbol, e.g. :param
> | HostParam String > | HostParam String
> -- | This is a positional arg identifier e.g. $1
> | PositionalArg Int
> >
> -- | This is a string literal. The first two fields are the -- > -- | This is a string literal. The first two fields are the --
> -- start and end quotes, which are usually both ', but can be > -- start and end quotes, which are usually both ', but can be
@ -114,6 +118,7 @@ todo: public documentation on dialect definition - and dialect flags
> prettyToken _ (Identifier Nothing t) = t > prettyToken _ (Identifier Nothing t) = t
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2 > prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
> prettyToken _ (HostParam p) = ':':p > prettyToken _ (HostParam p) = ':':p
> prettyToken _ (PositionalArg p) = '$':show p
> prettyToken _ (SqlString s e t) = s ++ t ++ e > prettyToken _ (SqlString s e t) = s ++ t ++ e
> prettyToken _ (SqlNumber r) = r > prettyToken _ (SqlNumber r) = r
> prettyToken _ (Whitespace t) = t > prettyToken _ (Whitespace t) = t
@ -166,6 +171,7 @@ this is also tried before symbol (a .1 will be parsed as a number, but
> ,lineComment d > ,lineComment d
> ,blockComment d > ,blockComment d
> ,sqlNumber d > ,sqlNumber d
> ,positionalArg d
> ,symbol d > ,symbol d
> ,sqlWhitespace d] > ,sqlWhitespace d]
@ -222,13 +228,23 @@ x'hexidecimal string'
> sqlString :: Dialect -> Parser Token > sqlString :: Dialect -> Parser Token
> sqlString _ = > sqlString d =
> choice [csString > choice [csString
> ,normalString > ,normalString
> ] > ]
> where > where
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "") > normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
> normalStringSuffix t = do > normalStringSuffix t
> | diSyntaxFlavour d == Postgres = do
> s <- takeTill (`elem` "'\\")
> -- deal with '' or \' as literal quote character
> choice [do
> ctu <- choice ["''" <$ try (string "''")
> ,"\\'" <$ string "\\'"
> ,"\\" <$ char '\\']
> normalStringSuffix $ concat [t,s,ctu]
> ,concat [t,s] <$ char '\'']
> | otherwise = do
> s <- takeTill (=='\'') > s <- takeTill (=='\'')
> void $ char '\'' > void $ char '\''
> -- deal with '' as literal quote character > -- deal with '' as literal quote character
@ -245,13 +261,27 @@ x'hexidecimal string'
> -- error messages and user predictability make it a good > -- error messages and user predictability make it a good
> -- pragmatic choice > -- pragmatic choice
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix "" > csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX") > csPrefixes | diSyntaxFlavour d == Postgres = "nNbBxXEe"
> | otherwise = "nNbBxX"
> cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
> ++ [string "u&'" > ++ [string "u&'"
> ,string "U&'"] > ,string "U&'"]
> hostParam :: Dialect -> Parser Token > hostParam :: Dialect -> Parser Token
use try for postgres because we also support : and :: as symbols
There might be a problem with parsing e.g. a[1:b]
> hostParam d | diSyntaxFlavour d == Postgres =
> HostParam <$> try (char ':' *> identifierString)
> hostParam _ = HostParam <$> (char ':' *> identifierString) > hostParam _ = HostParam <$> (char ':' *> identifierString)
> positionalArg :: Dialect -> Parser Token
> positionalArg d | diSyntaxFlavour d == Postgres =
> -- use try to avoid ambiguities with other syntax which starts with dollar
> PositionalArg <$> try (char '$' *> (read <$> many1 digit))
> positionalArg _ = guard False *> error "unpossible"
digits digits
@ -288,6 +318,13 @@ A symbol is one of the two character symbols, or one of the single
character symbols in the two lists below. character symbols in the two lists below.
> symbol :: Dialect -> Parser Token > symbol :: Dialect -> Parser Token
> symbol d | diSyntaxFlavour d == Postgres =
> Symbol <$>
> choice (
> many1 (char '.') :
> map (try . string) [">=","<=","!=","<>","||", "::", ":="]
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;():")
> symbol _ = Symbol <$> > symbol _ = Symbol <$>
> choice ( > choice (
> many1 (char '.') : > many1 (char '.') :

View file

@ -11,7 +11,8 @@ Test for the lexer
> lexerTests :: TestItem > lexerTests :: TestItem
> lexerTests = Group "lexerTests" $ > lexerTests = Group "lexerTests" $
> [Group "lexer token tests" [ansiLexerTests]] > [Group "lexer token tests" [ansiLexerTests
> ,postgresLexerTests]]
> ansiLexerTable :: [(String,[Token])] > ansiLexerTable :: [(String,[Token])]
> ansiLexerTable = > ansiLexerTable =
@ -170,11 +171,11 @@ assurance.
> postgresLexerTable :: [(String,[Token])] > postgresLexerTable :: [(String,[Token])]
> postgresLexerTable = > postgresLexerTable =
> -- single char symbols > -- single char symbols
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()" > map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
> -- multi char symbols > -- multi char symbols
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"] > ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
> -- symbols to add: :, ::, .. := > -- todo: add many examples of generic symbols
> -- plus generic symbols > -- also: do the testing for the ansi compatibility special cases
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"] > ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
> -- simple identifiers > -- simple identifiers
> in map (\i -> (i, [Identifier Nothing i])) idens > in map (\i -> (i, [Identifier Nothing i])) idens
@ -186,14 +187,19 @@ assurance.
> ++ map (\i -> (':':i, [HostParam i])) idens > ++ map (\i -> (':':i, [HostParam i])) idens
> ) > )
> -- positional var > -- positional var
> ++ [("$1", [PositionalArg 1])]
> -- quoted identifiers with embedded double quotes > -- quoted identifiers with embedded double quotes
> ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \" iden"])] > ++ [("\"normal \"\" iden\"", [Identifier (Just ("\"","\"")) "normal \"\" iden"])]
> -- strings > -- strings
> ++ [("'string'", [SqlString "'" "'" "string"]) > ++ [("'string'", [SqlString "'" "'" "string"])
> ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"]) > ,("'normal '' quote'", [SqlString "'" "'" "normal '' quote"])
> ,("'normalendquote '''", [SqlString "'" "'" "normalendquote '"]) > ,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
> ,("e'this '' quote''", [SqlString "e'" "'" "this '' quote '"]) > ,("e'this '' quote'", [SqlString "e'" "'" "this '' quote"])
> ,("e'this \' quote''", [SqlString "e'" "'" "this \' quote '"]) > ,("e'this \\' quote'", [SqlString "e'" "'" "this \\' quote"])
> -- todo: implement only allowing \' in e quoted strings
> {-,("'not this \\' quote", [SqlString "'" "'" "not this \\"
> ,Whitespace " "
> ,Identifier Nothing "quote"])-}
> ] > ]
> -- csstrings > -- csstrings
> ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"])) > ++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
@ -218,3 +224,8 @@ assurance.
> ["/**/", "/* */","/* this is a comment */" > ["/**/", "/* */","/* this is a comment */"
> ,"/* this *is/ a comment */" > ,"/* this *is/ a comment */"
> ] > ]
> postgresLexerTests :: TestItem
> postgresLexerTests = Group "postgresLexerTests" $
> [Group "postgres lexer token tests" $ [LexerTest postgres s t | (s,t) <- postgresLexerTable]
> ]