From 9457720d2c6ab259af8521d0bfe49530773c4975 Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Mon, 22 Feb 2016 23:23:42 +0200 Subject: [PATCH] refactor the symbol lexer more --- Language/SQL/SimpleSQL/Lex.lhs | 110 +++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/Language/SQL/SimpleSQL/Lex.lhs b/Language/SQL/SimpleSQL/Lex.lhs index 8f3d2ac..6fecbfe 100644 --- a/Language/SQL/SimpleSQL/Lex.lhs +++ b/Language/SQL/SimpleSQL/Lex.lhs @@ -327,13 +327,69 @@ constant. > sInt = (++) <$> option "" (string "+" <|> string "-") <*> int > pp = (<$$> (++)) +Symbols -A symbol is one of the two character symbols, or one of the single -character symbols in the two lists below. +A symbol is an operator, or one of the misc symbols which include: +. +.. +:= +: +:: +( +) +? +; +, +{ (for odbc) +} + +The postgresql operator syntax allows a huge range of operators +compared with ansi and other dialects > symbol :: Dialect -> Parser Token -> symbol d | diSyntaxFlavour d == Postgres = -> Symbol <$> choice (otherSymbol ++ [singlePlusMinus,opMoreChars]) +> symbol d = Symbol <$> choice (concat +> [dots +> ,if (diSyntaxFlavour d == Postgres) +> then postgresExtraSymbols +> else [] +> ,miscSymbol +> ,if allowOdbc d then odbcSymbol else [] +> ,if (diSyntaxFlavour d == Postgres) +> then generalizedPostgresqlOperator +> else basicAnsiOps +> ]) +> where +> dots = [many1 (char '.')] +> odbcSymbol = [string "{", string "}"] +> postgresExtraSymbols = +> [try (string ":=") +> -- parse :: and : and avoid allowing ::: or more +> ,try (string "::" <* notFollowedBy (char ':')) +> ,try (string ":" <* notFollowedBy (char ':'))] +> miscSymbol = map (string . (:[])) $ +> case diSyntaxFlavour d of +> SQLServer -> ",;():?" +> Postgres -> "[],;()" +> _ -> "[],;():?" + +try is used because most of the first characters of the two character +symbols can also be part of a single character symbol + +> basicAnsiOps = map (try . string) [">=","<=","!=","<>"] +> ++ map (string . (:[])) "+-^*/%~&<>=" +> ++ pipes +> pipes = -- what about using many1 (char '|'), then it will +> -- fail in the parser? Not sure exactly how +> -- standalone the lexer should be +> [char '|' *> +> choice ["||" <$ char '|' <* notFollowedBy (char '|') +> ,return "|"]] + +postgresql generalized operators + +this includes the custom operators that postgres supports, +plus all the standard operators which could be custom operators +according to their grammar rules @@ -348,27 +404,12 @@ A multiple-character operator name cannot end in + or -, unless the name also co ~ ! @ # % ^ & | ` ? -> where -> -- other symbols are all the tokens which parse as symbols in -> -- this lexer which aren't considered operators in postgresql -> -- a single ? is parsed as a operator here instead of an other -> -- symbol because this is the least complex way to do it -> otherSymbol = many1 (char '.') : -> try (string ":=") : -> -- parse :: and : and avoid allowing ::: or more -> try (string "::" <* notFollowedBy (char ':')) : -> try (string ":" <* notFollowedBy (char ':')) : -> (map (string . (:[])) "[],;()" -> ++ if allowOdbc d -> then [string "{", string "}"] -> else [] -> ) - -exception char is one of: -~ ! @ # % ^ & | ` ? which allows the last character of a multi character symbol to be + or - +> generalizedPostgresqlOperator :: [Parser String] +> generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars] +> where > allOpSymbols = "+-*/<>=~!@#%^&|`?" > -- these are the symbols when if part of a multi character > -- operator permit the operator to end with a + or - symbol @@ -416,31 +457,6 @@ which allows the last character of a multi character symbol to be + or > <*> option [] opMoreChars > ] -> symbol d = Symbol <$> choice (otherSymbol ++ regularOp) -> where -> otherSymbol = many1 (char '.') : -> (map (string . (:[])) otherSymbolChars -> ++ if allowOdbc d -> then [string "{", string "}"] -> else []) -> otherSymbolChars = -> case diSyntaxFlavour d of -> SQLServer -> ",;():?" -> _ -> "[],;():?" - -try is used because most of the first characters of the two character -symbols can also be part of a single character symbol - -> regularOp = map (try . string) [">=","<=","!=","<>"] -> ++ map (string . (:[])) "+-^*/%~&<>=" -> -- what about using many1 (char '|'), then it will -> -- fail in the parser? Not sure exactly how -> -- standalone the lexer should be -> ++ [char '|' *> -> choice ["||" <$ char '|' <* notFollowedBy (char '|') -> ,return "|"]] - - > sqlWhitespace :: Dialect -> Parser Token > sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)