From 9457720d2c6ab259af8521d0bfe49530773c4975 Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Mon, 22 Feb 2016 23:23:42 +0200
Subject: [PATCH] refactor the symbol lexer more

---
 Language/SQL/SimpleSQL/Lex.lhs | 110 +++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 47 deletions(-)

diff --git a/Language/SQL/SimpleSQL/Lex.lhs b/Language/SQL/SimpleSQL/Lex.lhs
index 8f3d2ac..6fecbfe 100644
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@@ -327,13 +327,69 @@ constant.
 >     sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
 >     pp = (<$$> (++))
 
+Symbols
 
-A symbol is one of the two character symbols, or one of the single
-character symbols in the two lists below.
+A symbol is an operator, or one of the misc symbols which include:
+.
+..
+:=
+:
+::
+(
+)
+?
+;
+,
+{ (for odbc)
+}
+
+The postgresql operator syntax allows a huge range of operators
+compared with ansi and other dialects
 
 > symbol :: Dialect -> Parser Token
-> symbol d | diSyntaxFlavour d == Postgres =
->     Symbol <$> choice (otherSymbol ++ [singlePlusMinus,opMoreChars])
+> symbol d  = Symbol <$> choice (concat
+>    [dots
+>    ,if (diSyntaxFlavour d == Postgres)
+>     then postgresExtraSymbols
+>     else []
+>    ,miscSymbol
+>    ,if allowOdbc d then odbcSymbol else []
+>    ,if (diSyntaxFlavour d == Postgres)
+>     then generalizedPostgresqlOperator
+>     else basicAnsiOps
+>    ])
+>  where
+>    dots = [many1 (char '.')]
+>    odbcSymbol = [string "{", string "}"]
+>    postgresExtraSymbols =
+>        [try (string ":=")
+>         -- parse :: and : and avoid allowing ::: or more
+>        ,try (string "::" <* notFollowedBy (char ':'))
+>        ,try (string ":" <* notFollowedBy (char ':'))]
+>    miscSymbol = map (string . (:[])) $
+>        case diSyntaxFlavour d of
+>            SQLServer -> ",;():?"
+>            Postgres -> "[],;()"
+>            _ -> "[],;():?"
+
+try is used because most of the first characters of the two character
+symbols can also be part of a single character symbol
+
+>    basicAnsiOps = map (try . string) [">=","<=","!=","<>"]
+>                   ++ map (string . (:[])) "+-^*/%~&<>="
+>                   ++ pipes
+>    pipes = -- what about using many1 (char '|'), then it will
+>            -- fail in the parser? Not sure exactly how
+>            -- standalone the lexer should be
+>            [char '|' *>
+>             choice ["||" <$ char '|' <* notFollowedBy (char '|')
+>                    ,return "|"]]
+
+postgresql generalized operators
+
+this includes the custom operators that postgres supports,
+plus all the standard operators which could be custom operators
+according to their grammar
 
 rules
 
@@ -348,27 +404,12 @@ A multiple-character operator name cannot end in + or -, unless the name also co
 
 ~ ! @ # % ^ & | ` ?
 
->   where
->     -- other symbols are all the tokens which parse as symbols in
->     -- this lexer which aren't considered operators in postgresql
->     -- a single ? is parsed as a operator here instead of an other
->     -- symbol because this is the least complex way to do it
->     otherSymbol = many1 (char '.') :
->                   try (string ":=") :
->                   -- parse :: and : and avoid allowing ::: or more
->                   try (string "::" <* notFollowedBy (char ':')) :
->                   try (string ":" <* notFollowedBy (char ':')) :
->                   (map (string . (:[])) "[],;()"
->                    ++ if allowOdbc d
->                       then [string "{", string "}"]
->                       else []
->                   )
-
-exception char is one of:
-~ ! @ # % ^ & | ` ?
 which allows the last character of a multi character symbol to be + or
 -
 
+> generalizedPostgresqlOperator :: [Parser String]
+> generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
+>   where
 >     allOpSymbols = "+-*/<>=~!@#%^&|`?"
 >     -- these are the symbols when if part of a multi character
 >     -- operator permit the operator to end with a + or - symbol
@@ -416,31 +457,6 @@ which allows the last character of a multi character symbol to be + or
 >         <*> option [] opMoreChars
 >        ]
 
-> symbol d  = Symbol <$> choice (otherSymbol ++ regularOp)
->  where
->    otherSymbol = many1 (char '.') :
->                  (map (string . (:[])) otherSymbolChars
->                   ++ if allowOdbc d
->                      then [string "{", string "}"]
->                      else [])
->    otherSymbolChars =
->        case diSyntaxFlavour d of
->            SQLServer -> ",;():?"
->            _ -> "[],;():?"
-
-try is used because most of the first characters of the two character
-symbols can also be part of a single character symbol
-
->    regularOp = map (try . string) [">=","<=","!=","<>"]
->                ++ map (string . (:[])) "+-^*/%~&<>="
->                   -- what about using many1 (char '|'), then it will
->                   -- fail in the parser? Not sure exactly how
->                   -- standalone the lexer should be
->                ++ [char '|' *>
->                    choice ["||" <$ char '|' <* notFollowedBy (char '|')
->                           ,return "|"]]
-
-
 > sqlWhitespace :: Dialect -> Parser Token
 > sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)