From 9ee2a1beab3a3204fc237e655e270762434fcc9a Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Wed, 7 May 2014 21:53:24 +0300
Subject: [PATCH] start reworking some of the combinators

---
 Language/SQL/SimpleSQL/Parser.lhs | 125 ++++++++++++++++++------------
 TODO                              |   8 ++
 2 files changed, 85 insertions(+), 48 deletions(-)

diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs
index c5b0e77..ffff720 100644
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@@ -184,7 +184,7 @@ fixing them in the syntax but leaving them till the semantic checking
 
 > import Control.Monad.Identity (Identity)
 > import Control.Monad (guard, void, when)
-> import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>))
+> import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>), (<**>))
 > import Data.Maybe (fromMaybe,catMaybes)
 > import Data.Char (toLower)
 > import Text.Parsec (errorPos,sourceLine,sourceColumn,sourceName
@@ -192,7 +192,7 @@ fixing them in the syntax but leaving them till the semantic checking
 >                    ,option,between,sepBy,sepBy1,string,manyTill,anyChar
 >                    ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
 >                    ,optionMaybe,optional,many,letter,parse
->                    ,chainl1, (<?>),notFollowedBy,alphaNum, lookAhead)
+>                    ,chainl1, chainr1,(<?>),notFollowedBy,alphaNum, lookAhead)
 > import Text.Parsec.String (Parser)
 > import qualified Text.Parsec as P (ParseError)
 > import Text.Parsec.Perm (permute,(<$?>), (<|?>))
@@ -306,11 +306,19 @@ u&"example quoted"
 >               ,UQName <$> uquotedIdentifier
 >               ,Name <$> identifierBlacklist blacklist]
 
+todo: replace (:[]) with a named function all over
+
 > names :: Parser [Name]
-> names = ((:[]) <$> name) >>= optionSuffix another
+> names = reverse <$> repeatPostfix ((:[]) <$> name) anotherName
+>   -- can't use a simple chain here since we
+>   -- want to wrap the . + name in a try
+>   -- this will change when this is left factored
 >   where
->     another n =
->         (((n++) . (:[])) <$> try (symbol "." *> name)) >>= optionSuffix another
+>     anotherName :: Parser ([Name] -> [Name])
+>     anotherName = try ((:) <$> (symbol "." *> name))
+
+> repeatPostfix :: Parser a -> Parser (a -> a) -> Parser a
+> repeatPostfix p q = foldr ($) <$> p <*> (reverse <$> many q)
 
 = Type Names
 
@@ -420,8 +428,9 @@ TODO: this code needs heavy refactoring
 
 > typeName :: Parser TypeName
 > typeName =
->     (rowTypeName <|> intervalTypeName <|> otherTypeName)
->     >>= tnSuffix
+>     repeatPostfix
+>         (rowTypeName <|> intervalTypeName <|> otherTypeName)
+>         tnSuffix
 >     <?> "typename"
 >   where
 >     -- row type names - a little like create table
@@ -438,6 +447,7 @@ TODO: this code needs heavy refactoring
 >     otherTypeName = do
 >         tn <- (try reservedTypeNames <|> names)
 >         choice [try $ timezone tn
+>                 -- todo: use the P (a->a) style
 >                ,try (precscale tn) >>= optionSuffix charSuffix
 >                ,try $ lob tn
 >                ,optionSuffix charSuffix $ TypeName tn]
@@ -480,14 +490,11 @@ TODO: this code needs heavy refactoring
 >     lobUnits = choice [PrecCharacters <$ keyword_ "characters"
 >                       ,PrecOctets <$ keyword_ "octets"]
 >     -- deal with multiset and array suffixes
->     tnSuffix x =
->         multisetSuffix x <|> arrayTNSuffix x <|> return x
->     multisetSuffix x =
->         (MultisetTypeName x <$ keyword_ "multiset") >>= tnSuffix
->     arrayTNSuffix x =
->         (keyword_ "array" >> ArrayTypeName x
->                              <$> optionMaybe (brackets unsignedInteger)
->         ) >>= tnSuffix
+>     tnSuffix :: Parser (TypeName -> TypeName)
+>     tnSuffix = multisetSuffix <|> arrayTNSuffix
+>     multisetSuffix = MultisetTypeName <$ keyword_ "multiset"
+>     arrayTNSuffix = keyword_ "array" >>
+>         flip ArrayTypeName <$> optionMaybe (brackets unsignedInteger)
 >     -- this parser handles the fixed set of multi word
 >     -- type names, plus all the type names which are
 >     -- reserved words
@@ -850,6 +857,9 @@ if there are no value exprs
 >     makeApp i (SQDefault,es,Nothing) = App i es
 >     makeApp i (d,es,od) = AggregateApp i d es (fromMaybe [] od) Nothing
 
+TODO: change all these suffix functions to use type
+Parser (ValueExpr -> ValueExpr)
+
 > app :: [Name] -> Parser ValueExpr
 > app n = aggOrApp n >>= \a -> choice
 >         [windowSuffix a
@@ -940,8 +950,6 @@ in: two variations:
 a in (expr0, expr1, ...)
 a in (queryexpr)
 
-this is parsed as a postfix operator which is why it is in this form
-
 > inSuffix :: Parser (ValueExpr -> ValueExpr)
 > inSuffix =
 >     mkIn <$> inty
@@ -1232,6 +1240,8 @@ tref
 > from :: Parser [TableRef]
 > from = keyword_ "from" *> commaSep1 tref
 >   where
+>     -- TODO: use P (a->) for the join tref suffix
+>     -- chainl or buildexpressionparser
 >     tref = nonJoinTref >>= optionSuffix joinTrefSuffix
 >     nonJoinTref = choice
 >         [parens $ choice
@@ -1243,9 +1253,8 @@ tref
 >          n <- names
 >          choice [TRFunction n
 >                  <$> parens (commaSep valueExpr)
->                 ,return $ TRSimple n]]
->         >>= optionSuffix aliasSuffix
->     aliasSuffix j = option j (TRAlias j <$> fromAlias)
+>                 ,return $ TRSimple n]] <??> aliasSuffix
+>     aliasSuffix = flip TRAlias <$> fromAlias
 >     joinTrefSuffix t =
 >         (TRJoin t <$> option False (True <$ keyword_ "natural")
 >                   <*> joinType
@@ -1356,7 +1365,7 @@ and union, etc..
 > queryExpr :: Parser QueryExpr
 > queryExpr = choice
 >     [with
->     ,choice [values,table, select] >>= optionSuffix queryExprSuffix]
+>     ,chainr1 (choice [values,table, select]) setOp]
 >   where
 >     select = keyword_ "select" >>
 >         mkSelect
@@ -1396,30 +1405,33 @@ be in the public syntax?
 >     mkTe f w g h od (ofs,fe) =
 >         TableExpression f w g h od ofs fe
 
-> queryExprSuffix :: QueryExpr -> Parser QueryExpr
-> queryExprSuffix qe = cqSuffix >>= optionSuffix queryExprSuffix
+> setOp :: Parser (QueryExpr -> QueryExpr -> QueryExpr)
+> setOp = cq
+>         <$> setOpK
+>         <*> (fromMaybe SQDefault <$> duplicates)
+>         <*> corr
 >   where
->     cqSuffix = CombineQueryExpr qe
->                <$> setOp
->                <*> (fromMaybe SQDefault <$> duplicates)
->                <*> corr
->                <*> queryExpr
->     setOp = choice [Union <$ keyword_ "union"
->                    ,Intersect <$ keyword_ "intersect"
->                    ,Except <$ keyword_ "except"]
+>     cq o d c q0 q1 = CombineQueryExpr q0 o d c q1
+>     setOpK = choice [Union <$ keyword_ "union"
+>                     ,Intersect <$ keyword_ "intersect"
+>                     ,Except <$ keyword_ "except"]
 >             <?> "set operator"
 >     corr = option Respectively (Corresponding <$ keyword_ "corresponding")
 
 
 wrapper for query expr which ignores optional trailing semicolon.
 
+TODO: change style
+
 > topLevelQueryExpr :: Parser QueryExpr
-> topLevelQueryExpr = queryExpr >>= optionSuffix ((semi *>) . return)
+> topLevelQueryExpr = queryExpr <??> (id <$ semi)
 
 wrapper to parse a series of query exprs from a single source. They
 must be separated by semicolon, but for the last expression, the
 trailing semicolon is optional.
 
+TODO: change style
+
 > queryExprs :: Parser [QueryExpr]
 > queryExprs = (:[]) <$> queryExpr
 >              >>= optionSuffix ((semi *>) . return)
@@ -1511,24 +1523,15 @@ making a decision on how to represent numbers, the client code can
 make this choice.
 
 > numberLiteral :: Parser String
-> numberLiteral =
->     lexeme (numToken <* notFollowedBy (alphaNum <|> char '.'))
->     <?> "number literal"
+> numberLiteral = lexeme (
+>     int <??> (pp dot <??.> pp int) <??> pp expon
+>     <|> (++) <$> dot <*> int <??> pp expon)
 >   where
->     numToken = choice [int
->                        >>= optionSuffix dot
->                        >>= optionSuffix fracts
->                        >>= optionSuffix expon
->                       ,fract "" >>= optionSuffix expon]
 >     int = many1 digit
->     fract p = dot p >>= fracts
->     dot p = (p++) <$> string "."
->     fracts p = (p++) <$> int
->     expon p = concat <$> sequence
->               [return p
->               ,(:[]) <$> oneOf "eE"
->               ,option "" (string "+" <|> string "-")
->               ,int]
+>     dot = string "."
+>     expon = (:) <$> oneOf "eE" <*> sInt
+>     sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
+>     pp = ((++) <$$>)
 
 
 > identifier :: Parser String
@@ -1666,6 +1669,9 @@ associativity when chaining it recursively. Have to review
 all these uses and figure out if any should be right associative
 instead, and create an alternative suffix parser
 
+This is no good, and should be replaced with chain and <??> which has
+a different type
+
 > optionSuffix :: (a -> Parser a) -> a -> Parser a
 > optionSuffix p a = option a (p a)
 
@@ -2068,3 +2074,26 @@ context
 >                     ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
 >              _ -> ""
 >     p = errorPos e
+
+parses an optional postfix element and applies its result to its left
+hand result, taken from uu-parsinglib
+
+TODO: make sure the precedence higher than <|> and lower than the
+other operators so it can be used nicely
+
+> (<??>) :: Parser a -> Parser (a -> a) -> Parser a
+> p <??> q = p <**> option id q
+
+
+this is analogous to <**>
+
+> (<$$>) :: (a -> b -> c) -> Parser b -> Parser (a -> c)
+> (<$$>) = (<$>) . flip
+
+
+composing suffix parsers, not sure about the name
+
+> (<??.>) :: Parser (a -> a) -> Parser (a -> a) -> Parser (a -> a)
+> (<??.>) pa pb = (.) <$$> pa <*> option id pb
+
+
diff --git a/TODO b/TODO
index 49b38cf..291e4e3 100644
--- a/TODO
+++ b/TODO
@@ -5,6 +5,14 @@ continue 2011 review and tests
 2. start thinking about automated tests for invalid syntax to catch
    bad parsing
 
+
+fixing the non idiomatic (pun!) suffix parsing:
+  typename parsing
+  identifier/app/agg/window parsing
+  join parsing in trefs (use chain? - tricky because of postfix onExpr)
+  top level and queryexprs parsing
+  number literal
+
 review names in the syntax for correspondence with sql standard, avoid
    gratuitous differences