tidying up

2013-12-14 10:55:44 +02:00 · 2013-12-14 10:55:44 +02:00 · 0de32d3429
commit 0de32d3429
parent 0da39d4498
4 changed files with 327 additions and 186 deletions
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@ -1,4 +1,5 @@

+The parser code

 > module Language.SQL.SimpleSQL.Parser
 >     (parseQueryExpr
@ -6,49 +7,58 @@
 >     ,parseQueryExprs
 >     ,ParseError(..)) where

-> import Text.Groom
-> import Text.Parsec hiding (ParseError)
-> import qualified Text.Parsec as P
 > import Control.Monad.Identity
 > import Control.Applicative hiding (many, (<|>), optional)
-> import qualified Language.Haskell.Exts.Syntax as HSE
-> import qualified Language.Haskell.Exts.Fixity as HSE
 > import Data.Maybe
-> import Data.List
 > import Data.Char
+> import Text.Parsec hiding (ParseError)
+> import qualified Text.Parsec as P

 > import Language.SQL.SimpleSQL.Syntax

+The public api functions.

-> parseQueryExpr :: FilePath
->                -> Maybe (Int,Int)
->                -> String
+> -- | Parses a query expr, trailing semicolon optional.
+> parseQueryExpr :: FilePath -- ^ filename to use in errors
+>                -> Maybe (Int,Int) -- ^ line number and column number to use in errors
+>                -> String -- ^ the sql source to parse
 >                -> Either ParseError QueryExpr
-> parseQueryExpr f p src =
->     either (Left . convParseError src) Right
->     $ parse (setPos p *> whiteSpace *> queryExpr <* eof) f src
+> parseQueryExpr = wrapParse topLevelQueryExpr

-> parseQueryExprs :: FilePath
->                 -> Maybe (Int,Int)
->                 -> String
+> parseQueryExprs :: FilePath -- ^ filename to use in errors
+>                 -> Maybe (Int,Int) -- ^ line number and column number to use in errors
+>                 -> String -- ^ the sql source to parse
 >                 -> Either ParseError [QueryExpr]
-> parseQueryExprs f p src =
->     either (Left . convParseError src) Right
->     $ parse (setPos p *> whiteSpace *> queryExprs <* eof) f src
+> parseQueryExprs = wrapParse queryExprs

-> parseScalarExpr :: FilePath
->                 -> Maybe (Int,Int)
->                 -> String
+> parseScalarExpr :: FilePath -- ^ filename to use in errors
+>                 -> Maybe (Int,Int) -- ^ line number and column number to use in errors
+>                 -> String -- ^ the sql source to parse
 >                 -> Either ParseError ScalarExpr
-> parseScalarExpr f p src =
->     either (Left . convParseError src) Right
->     $ parse (setPos p *> whiteSpace *> scalarExpr <* eof) f src
+> parseScalarExpr = wrapParse scalarExpr

+This helper function takes the parser given and:
+
+sets the position when parsing
+automatically skips leading whitespace
+checks the parser parses all the input using eof
+converts the error return to the nice wrapper
+
+> wrapParse :: P a
+>           -> FilePath
+>           -> Maybe (Int,Int)
+>           -> String
+>           -> Either ParseError a
+> wrapParse parser f p src =
+>     either (Left . convParseError src) Right
+>     $ parse (setPos p *> whiteSpace *> parser <* eof) f src
+
+> -- | Type to represent parse errors.
 > data ParseError = ParseError
->                   {peErrorString :: String
->                   ,peFilename :: FilePath
->                   ,pePosition :: (Int,Int)
->                   ,peFormattedError :: String
+>                   {peErrorString :: String -- ^ contains the error message
+>                   ,peFilename :: FilePath -- ^ filename location for the error
+>                   ,pePosition :: (Int,Int) -- ^ line number and column number location for the error
+>                   ,peFormattedError :: String -- ^ formatted error with the position, error message and source context
 >                   } deriving (Eq,Show)

 ------------------------------------------------
@ -57,35 +67,20 @@

 = scalar expressions

-> stringLiteral :: P String
-> stringLiteral = symbol_ "'" *> manyTill anyChar (symbol_ "'")
+== literals
+
+See the stringLiteral lexer below for notes on string literal syntax.

 > estring :: P ScalarExpr
 > estring = StringLit <$> stringLiteral

-digits
-digits.[digits][e[+-]digits]
-[digits].digits[e[+-]digits]
-digitse[+-]digits
-
 > number :: P ScalarExpr
-> number =
->     NumLit <$> (choice [int
->                         >>= optionSuffix dot
->                         >>= optionSuffix fracts
->                         >>= optionSuffix expon
->                        ,fract "" >>= optionSuffix expon]
->                 <* whiteSpace)
->   where
->     int = many1 digit
->     fract p = dot p >>= fracts
->     dot p = ((p++) . (:[])) <$> char '.'
->     fracts p = (p++) <$> int
->     expon p = do
->         void $ char 'e'
->         s <- option "" ((:[]) <$> (char '+' <|> char '-'))
->         i <- int
->         return (p ++ "e" ++ s ++ i)
+> number = NumLit <$> numberLiteral
+
+parse SQL interval literals, something like
+interval '5' day (3)
+or
+interval '5' month

 > interval :: P ScalarExpr
 > interval = try (keyword_ "interval") >>
@ -97,40 +92,40 @@ digitse[+-]digits
 > literal :: P ScalarExpr
 > literal = number <|> estring <|> interval

-> identifierString :: P String
-> identifierString = do
->     s <- (:) <$> letterOrUnderscore
->              <*> many letterDigitOrUnderscore <* whiteSpace
->     guard (s `notElem` blacklist)
->     return s
->   where
->     letterOrUnderscore = char '_' <|> letter
->     letterDigitOrUnderscore = char '_' <|> alphaNum
+== identifiers

-> blacklist :: [String]
-> blacklist = ["as", "from", "where", "having", "group", "order"
->             ,"inner", "left", "right", "full", "natural", "join"
->             ,"on", "using", "when", "then", "case", "end", "order"
->             ,"limit", "offset", "in"
->             ,"except", "intersect", "union"]
-
-TODO: talk about what must be in the blacklist, and what doesn't need
-to be.
+Uses the identifierString 'lexer'. See this function for notes on identifiers.

 > identifier :: P ScalarExpr
 > identifier = Iden <$> identifierString

+Identifier with one dot in it. This should be extended to any amount
+of dots.
+
 > dottedIden :: P ScalarExpr
 > dottedIden = Iden2 <$> identifierString
 >                    <*> (symbol "." *> identifierString)

+== star
+
+used in select *, select x.*, and agg(*) variations.
+
 > star :: P ScalarExpr
 > star = choice [Star <$ symbol "*"
 >               ,Star2 <$> (identifierString <* symbol "." <* symbol "*")]

+== function application, aggregates and windows

-> app :: P ScalarExpr
-> app = do
+this represents anything which syntactically looks like regular C
+function application: an identifier, parens with comma sep scalar
+expression arguments.
+
+The parsing for the aggregate extensions is here as well:
+
+aggregate([all|distinct] args [order by orderitems])
+
+> aggOrApp :: P ScalarExpr
+> aggOrApp = do
 >       i <- identifierString
 >       _ <- symbol "("
 >       d <- try duplicates
@ -143,6 +138,19 @@ to be.
 >               return $ App i es
 >           _ -> return $ AggregateApp i d es (fromMaybe [] od)

+> duplicates :: P (Maybe Duplicates)
+> duplicates = optionMaybe $ try $
+>     choice [All <$ keyword_ "all"
+>            ,Distinct <$ keyword "distinct"]
+
+parse a window call as a suffix of a regular function call
+
+this looks like this:
+
+functioncall(args) over ([partition by ids] [order by orderitems])
+
+No support for explicit frames yet.
+
 > windowSuffix :: ScalarExpr -> P ScalarExpr
 > windowSuffix e@(App f es) =
 >     choice [try (keyword_ "over")
@ -157,6 +165,11 @@ to be.

 > windowSuffix e = return e

+> app :: P ScalarExpr
+> app = aggOrApp >>= windowSuffix
+
+== case expression
+
 > scase :: P ScalarExpr
 > scase =
 >     Case <$> (try (keyword_ "case") *> optionMaybe (try scalarExpr'))
@ -167,6 +180,15 @@ to be.
 >     swhen = keyword_ "when" *>
 >             ((,) <$> scalarExpr' <*> (keyword_ "then" *> scalarExpr'))

+== miscellaneous keyword operators
+
+These are keyword operators which don't look like normal prefix,
+postfix or infix binary operators. They mostly look like function
+application but with keywords in the argument list instead of commas
+to separate the arguments.
+
+cast: cast(expr as type)
+
 > cast :: P ScalarExpr
 > cast = parensCast <|> prefixCast
 >   where
@ -176,12 +198,18 @@ to be.
 >     prefixCast = try (CastOp <$> typeName
 >                              <*> stringLiteral)

+extract(id from expr)
+
 > extract :: P ScalarExpr
 > extract = try (keyword_ "extract") >>
 >     parens (makeOp <$> identifierString
 >                    <*> (keyword_ "from" *> scalarExpr'))
 >   where makeOp n e = SpecialOp "extract" [Iden n, e]

+substring(x from expr to expr)
+
+todo: also support substring(x from expr)
+
 > substring :: P ScalarExpr
 > substring = try (keyword_ "substring") >>
 >     parens (makeOp <$> scalarExpr'
@ -190,6 +218,10 @@ to be.
 >                    )
 >   where makeOp a b c = SpecialOp "substring" [a,b,c]

+in: two variations:
+a in (expr0, expr1, ...)
+a in (queryexpr)
+
 > inSuffix :: ScalarExpr -> P ScalarExpr
 > inSuffix e =
 >     In <$> inty
@ -201,6 +233,19 @@ to be.
 >     inty = try $ choice [True <$ keyword_ "in"
 >                         ,False <$ keyword_ "not" <* keyword_ "in"]

+between:
+expr between expr and expr
+
+There is a complication when parsing between - when parsing the second
+expression it is ambiguous when you hit an 'and' whether it is a
+binary operator or part of the between. This code follows what
+postgres does, which might be standard across SQL implementations,
+which is that you can't have a binary and operator in the middle
+expression in a between unless it is wrapped in parens. The 'bExpr
+parsing' is used to create alternative scalar expression parser which
+is identical to the normal one expect it doesn't recognise the binary
+and operator. This is the call to scalarExpr'' True.
+
 > betweenSuffix :: ScalarExpr -> P ScalarExpr
 > betweenSuffix e =
 >     makeOp <$> opName
@ -213,6 +258,9 @@ to be.
 >              ,"not between" <$ keyword_ "not" <* keyword_ "between"]
 >     makeOp n a b c = SpecialOp n [a,b,c]

+subquery expression:
+[exists|all|any|some] (queryexpr)
+
 > subquery :: P ScalarExpr
 > subquery =
 >     choice
@ -225,6 +273,9 @@ to be.
 >            ,SqAny <$ keyword_ "any"
 >            ,SqSome <$ keyword_ "some"]

+typename: used in casts. Special cases for the multi keyword typenames
+that SQL supports.
+
 > typeName :: P TypeName
 > typeName = choice
 >     [TypeName "double precision"
@ -233,6 +284,23 @@ to be.
 >      <$ try (keyword_ "character" <* keyword_ "varying")
 >     ,TypeName <$> identifierString]

+== scalar parens
+
+> sparens :: P ScalarExpr
+> sparens = Parens <$> parens scalarExpr'
+
+
+== operator parsing
+
+The 'regular' operators in this parsing and in the abstract syntax are
+unary prefix, unary postfix and binary infix operators. The operators
+can be symbols (a + b), single keywords (a and b) or multiple keywords
+(a is similar to b).
+
+First, the list of the regulars operators split by operator type
+(prefix, postfix, binary) and by symbol/single keyword/ multiple
+keyword.
+
 > binOpSymbolNames :: [String]
 > binOpSymbolNames =
 >     ["=", "<=", ">=", "!=", "<>", "<", ">"
@ -251,18 +319,32 @@ to be.
 >     ,"is distinct from"
 >     ,"is not distinct from"]

-
 used for between parsing

 > binOpKeywordNamesNoAnd :: [String]
 > binOpKeywordNamesNoAnd = filter (/="and") binOpKeywordNames

+There aren't any multi keyword prefix operators currently supported.
+
 > prefixUnOpKeywordNames :: [String]
 > prefixUnOpKeywordNames = ["not"]

 > prefixUnOpSymbolNames :: [String]
 > prefixUnOpSymbolNames = ["+", "-"]

+There aren't any single keyword postfix operators currently supported. Maybe all these 'is's can be left factored?
+
+> postfixOpKeywords :: [String]
+> postfixOpKeywords = ["is null"
+>                     ,"is not null"
+>                     ,"is true"
+>                     ,"is not true"
+>                     ,"is false"
+>                     ,"is not false"
+>                     ,"is unknown"
+>                     ,"is not unknown"]
+
+The parsers:

 > prefixUnaryOp :: P ScalarExpr
 > prefixUnaryOp =
@ -271,31 +353,32 @@ used for between parsing
 >     opSymbol = choice (map (try . symbol) prefixUnOpSymbolNames
 >                       ++ map (try . keyword) prefixUnOpKeywordNames)

-> postfixOp :: ScalarExpr -> P ScalarExpr
-> postfixOp e =
+> postfixOpSuffix :: ScalarExpr -> P ScalarExpr
+> postfixOpSuffix e =
 >     try $ choice $ map makeOp opPairs
 >   where
->     -- could left factor here?
->     ops = ["is null"
->           ,"is not null"
->           ,"is true"
->           ,"is not true"
->           ,"is false"
->           ,"is not false"
->           ,"is unknown"
->           ,"is not unknown"]
->     opPairs = flip map ops $ \o -> (o, words o)
+>     opPairs = flip map postfixOpKeywords $ \o -> (o, words o)
 >     makeOp (o,ws) = try $ PostfixOp o e <$ keywords_ ws
 >     keywords_ = try . mapM_ keyword_

+Wrapper for non 'bExpr' parsing. See the between parser for
+explanation.
+
 > scalarExpr' :: P ScalarExpr
 > scalarExpr' = scalarExpr'' False

-the bexpr is to deal with between x and y
+The main scalar expression parser which includes the binary operator
+parsing. All the binary operators are parsed as same precedence and
+left associativity. This will be fixed in a pass over the abstract
+syntax, which isn't working at the moment.

-when we are parsing the scalar expr for x, we don't allow and as a
-binary operator except nested in parens. This is taken from how
-postgresql handles this
+TODO:
+left factor: stuff which starts with identifier
+
+move app>>=windowSuffix to a separate parser above next to app and
+windowSuffix themselves
+
+split out the binary op parsing from this function to above

 > scalarExpr'' :: Bool -> P ScalarExpr
 > scalarExpr'' bExpr = factor >>= trysuffix
@ -307,7 +390,7 @@ postgresql handles this
 >                     ,substring
 >                     ,subquery
 >                     ,prefixUnaryOp
->                     ,try app >>= windowSuffix
+>                     ,try app
 >                     ,try dottedIden
 >                     ,identifier
 >                     ,sparens]
@ -316,7 +399,7 @@ postgresql handles this
 >                 [BinOp <$> opSymbol <*> return e0 <*> factor
 >                 ,inSuffix e0
 >                 ,betweenSuffix e0
->                 ,postfixOp e0
+>                 ,postfixOpSuffix e0
 >                 ] >>= trysuffix
 >     opSymbol = choice
 >         (map (try . symbol) binOpSymbolNames
@ -327,71 +410,12 @@ postgresql handles this
 >                 else binOpKeywordNames))
 >     keywords ks = unwords <$> mapM keyword ks

-> sparens :: P ScalarExpr
-> sparens = Parens <$> parens scalarExpr'
+TODO: create the fixity adjuster. This should take a list of operators
+with precendence and associativity and adjust a scalar expr tree to
+match these. It shouldn't attempt to descend into scalar expressions
+inside nested query exprs in subqueries.

-attempt to fix the precedence and associativity. Doesn't work
-
-> toHaskell :: ScalarExpr -> HSE.Exp
-> toHaskell e = case e of
->     Iden i -> HSE.Var $ HSE.UnQual $ HSE.Ident i
->     StringLit l -> HSE.Lit $ HSE.String $ 's':l
->     NumLit l -> HSE.Lit $ HSE.String $ 'n':l
->     App n es -> HSE.App (toHaskell $ Iden n) $ ltoh es
->     Cast e0 (TypeName tn) -> toHaskell $ App ("cast:" ++ tn) [e0]
->     CastOp (TypeName tn) s -> toHaskell $ App ("castop:" ++ tn) [StringLit s]
->     --Op n [e0,e1] -> HSE.InfixApp (toHaskell e0)
->     --                             (HSE.QVarOp $ HSE.UnQual $ HSE.Ident n)
->     --                             (toHaskell e1)
->     --Op o [e0] -> toHaskell $ App ("unary:" ++ o) [e0]
->     --Op {} -> error $ "bad args to operator " ++ groom e
->     Star -> HSE.Var $ HSE.UnQual $ HSE.Ident "*"
->     Iden2 a b -> HSE.Var $ HSE.Qual (HSE.ModuleName a) (HSE.Ident b)
->     Star2 q -> HSE.Var $ HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")
->     Parens e0 -> HSE.Paren $ toHaskell e0
->     -- map the two maybes to lists with either 0 or 1 element
->     Case v ts el -> HSE.App (toHaskell $ Iden "$case")
->                     (HSE.List [ltoh $ maybeToList v
->                               ,HSE.List $ map (ltoh . (\(a,b) -> [a,b])) ts
->                               ,ltoh $ maybeToList el])
->     _ -> error "please fix me 1"
->   where
->     ltoh = HSE.List . map toHaskell
-
-> toSql :: HSE.Exp -> ScalarExpr
-> toSql e = case e of
->     HSE.Var (HSE.UnQual (HSE.Ident "*")) -> Star
->     HSE.Var (HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")) -> Star2 q
->     HSE.Var (HSE.Qual (HSE.ModuleName a) (HSE.Ident b)) -> Iden2 a b
->     HSE.Var (HSE.UnQual (HSE.Ident i)) -> Iden i
->     HSE.Lit (HSE.String ('s':l)) -> StringLit l
->     HSE.Lit (HSE.String ('n':l)) -> NumLit l
->     HSE.App (HSE.Var (HSE.UnQual (HSE.Ident "$case"))) (HSE.List [v,ts,el]) ->
->         Case (ltom v) (pairs ts) (ltom el)
->     {-HSE.App (HSE.Var (HSE.UnQual (HSE.Ident x)))
->             (HSE.List [ea])
->         | "unary:" `isPrefixOf` x ->
->           Op (drop 6 x) [toSql ea]
->         | "cast:" `isPrefixOf` x ->
->           Cast (toSql ea) (TypeName $ drop 5 x)-}
->     HSE.App (HSE.Var (HSE.UnQual (HSE.Ident x)))
->             (HSE.List [HSE.Lit (HSE.String ('s':ea))])
->         | "castop:" `isPrefixOf` x ->
->           CastOp (TypeName $ drop 7 x) ea
->     HSE.App (HSE.Var (HSE.UnQual (HSE.Ident i)))
->             (HSE.List es) -> App i $ map toSql es
->     {-HSE.InfixApp e0 (HSE.QVarOp (HSE.UnQual (HSE.Ident n))) e1 ->
->         Op n [toSql e0, toSql e1]-}
->     HSE.Paren e0 -> Parens $ toSql e0
->     _ -> error $ "unsupported haskell " ++ groom e
->   where
->     ltom (HSE.List []) = Nothing
->     ltom (HSE.List [ex]) = Just $ toSql ex
->     ltom ex = error $ "unsupported haskell " ++ groom ex
->     pairs (HSE.List l) = map (\(HSE.List [a,b]) -> (toSql a, toSql b)) l
->     pairs ex = error $ "unsupported haskell " ++ groom ex
-
-> sqlFixities :: [HSE.Fixity]
+> {-sqlFixities :: [HSE.Fixity]
 > sqlFixities = HSE.infixl_ 9 ["*", "/"]
 >               ++ HSE.infixl_ 8 ["+", "-"]
 >               ++ HSE.infixl_ 6 ["<=",">=","!=","<>","||", "like"]
@ -399,25 +423,30 @@ attempt to fix the precedence and associativity. Doesn't work
 >               ++ HSE.infixr_ 3 ["="]
 >               ++ HSE.infixr_ 2 ["or"]
 >               ++ HSE.infixl_ 1 ["and"]
->               ++ HSE.infixl_ 0 ["or"]
+>               ++ HSE.infixl_ 0 ["or"]-}

-> _fixFixity :: ScalarExpr -> ScalarExpr
-> _fixFixity se = runIdentity $
->      toSql <$> HSE.applyFixities sqlFixities (toHaskell se)
+> fixFixities :: ScalarExpr -> ScalarExpr
+> fixFixities = id
+
+The scalarExpr wrapper. The idea is that directly nested scalar
+expressions use the scalarExpr' parser, then other code uses the
+scalarExpr parser and then everyone gets the fixity fixes and it's
+easy to ensure that this fix is only applied once to each scalar
+expression tree (for efficiency and code clarity).

 > scalarExpr :: P ScalarExpr
 > scalarExpr =
 >     choice [try star
->            ,{-fixFixity <$>-} scalarExpr']
+>            ,fixFixities <$> scalarExpr']

 -------------------------------------------------

 = query expressions

-> duplicates :: P (Maybe Duplicates)
-> duplicates = optionMaybe $ try $
->     choice [All <$ keyword_ "all"
->            ,Distinct <$ keyword "distinct"]
+TODO: maybe refactor all the parsers. A parser wouldn't usually be
+optional or use try itself. The caller could do this.
+
+== select lists

 > selectItem :: P (Maybe String, ScalarExpr)
 > selectItem = flip (,) <$> scalarExpr <*> optionMaybe (try alias)
@ -426,6 +455,22 @@ attempt to fix the precedence and associativity. Doesn't work
 > selectList :: P [(Maybe String,ScalarExpr)]
 > selectList = commaSep1 selectItem

+== from
+
+this parser should be refactored, it is very unclear. Here is the
+rough grammar
+
+tref
+(cross | [natural]
+         ([inner]
+          | left [outer]
+          | right [outer]
+          | full [outer]
+         )
+join tref
+[on expr | using (...)]
+
+
 > from :: P [TableRef]
 > from = option [] (try (keyword_ "from") *> commaSep1 tref)
 >   where
@ -467,6 +512,14 @@ attempt to fix the precedence and associativity. Doesn't work
 >                   a2 = optionMaybe (try $ parens (commaSep1 identifierString))
 >               in option j (JoinAlias j <$> try a1 <*> try a2)

+== simple other parts
+
+Parsers for where, group by, having, order by and limit, which are
+pretty trivial.
+
+Here is a helper for parsing a few parts of the query expr (currently
+where, having, limit, offset).
+
 > optionalScalarExpr :: String -> P (Maybe ScalarExpr)
 > optionalScalarExpr k = optionMaybe (try (keyword_ k) *> scalarExpr)

@ -494,6 +547,8 @@ attempt to fix the precedence and associativity. Doesn't work
 > offset :: P (Maybe ScalarExpr)
 > offset = optionalScalarExpr "offset"

+== common table expressions
+
 > with :: P QueryExpr
 > with = try (keyword_ "with") >>
 >     With <$> commaSep1 withQuery <*> queryExpr
@ -502,6 +557,11 @@ attempt to fix the precedence and associativity. Doesn't work
 >         (,) <$> (identifierString <* optional (try $ keyword_ "as"))
 >             <*> parens queryExpr

+== query expression
+
+This parser parses any query expression variant: normal select, cte,
+and union, etc..
+
 > queryExpr :: P QueryExpr
 > queryExpr =
 >   choice [select >>= queryExprSuffix, with]
@ -533,17 +593,113 @@ attempt to fix the precedence and associativity. Doesn't work
 >             >>= queryExprSuffix
 >            ,return qe]

+wrapper for query expr which ignores optional trailing semicolon.
+
+> topLevelQueryExpr :: P QueryExpr
+> topLevelQueryExpr =
+>     queryExpr <* (choice [try $ symbol_ ";", return()])
+
+wrapper to parse a series of query exprs from a single source. They
+must be separated by semicolon, but for the last expression, the
+trailing semicolon is optional.
+
 > queryExprs :: P [QueryExpr]
 > queryExprs = do
 >     qe <- queryExpr
 >     choice [[qe] <$ eof
->            ,symbol ";" *>
+>            ,symbol_ ";" *>
 >             choice [[qe] <$ eof
 >                    ,(:) qe <$> queryExprs]]

 ------------------------------------------------

-= helper parsers
+= lexing parsers
+
+The lexing is a bit 'virtual', in the usual parsec style.
+
+> symbol :: String -> P String
+> symbol s = string s
+>            -- <* notFollowedBy (oneOf "+-/*<>=!|")
+>            <* whiteSpace
+
+> symbol_ :: String -> P ()
+> symbol_ s = symbol s *> return ()
+
+> keyword :: String -> P String
+> keyword s = (map toLower <$> string s)
+>             <* notFollowedBy (char '_' <|> alphaNum)
+>             <* whiteSpace
+
+> keyword_ :: String -> P ()
+> keyword_ s = keyword s *> return ()
+
+Identifiers are very simple at the moment: start with a letter or
+underscore, and continue with letter, underscore or digit. It doesn't
+support quoting other other sorts of identifiers yet. There is a
+blacklist of keywords which aren't supported as identifiers.
+
+> identifierString :: P String
+> identifierString = do
+>     s <- (:) <$> letterOrUnderscore
+>              <*> many letterDigitOrUnderscore <* whiteSpace
+>     guard (s `notElem` blacklist)
+>     return s
+>   where
+>     letterOrUnderscore = char '_' <|> letter
+>     letterDigitOrUnderscore = char '_' <|> alphaNum
+
+> blacklist :: [String]
+> blacklist = ["as", "from", "where", "having", "group", "order"
+>             ,"inner", "left", "right", "full", "natural", "join"
+>             ,"on", "using", "when", "then", "case", "end", "order"
+>             ,"limit", "offset", "in"
+>             ,"except", "intersect", "union"]
+
+TODO: talk about what must be in the blacklist, and what doesn't need
+to be.
+
+String literals: limited at the moment, no escaping \' or other
+variations.
+
+> stringLiteral :: P String
+> stringLiteral = char '\'' *> manyTill anyChar (symbol_ "'")
+
+number literals
+
+here is the rough grammar target:
+
+digits
+digits.[digits][e[+-]digits]
+[digits].digits[e[+-]digits]
+digitse[+-]digits
+
+numbers are parsed to strings, not to a numeric type. This is to aoivd
+making a decision on how to represent numbers, the client code can
+make this choice.
+
+> numberLiteral :: P String
+> numberLiteral =
+>     choice [int
+>             >>= optionSuffix dot
+>             >>= optionSuffix fracts
+>             >>= optionSuffix expon
+>            ,fract "" >>= optionSuffix expon]
+>     <* whiteSpace
+>   where
+>     int = many1 digit
+>     fract p = dot p >>= fracts
+>     -- todo: use some helper functions to improve the
+>     -- unclear pointfree code here
+>     dot p = ((p++) . (:[])) <$> char '.'
+>     fracts p = (p++) <$> int
+>     expon p = do
+>         void $ char 'e'
+>         s <- option "" ((:[]) <$> (char '+' <|> char '-'))
+>         i <- int
+>         return (p ++ "e" ++ s ++ i)
+
+
+whitespace parser which skips comments also

 > whiteSpace :: P ()
 > whiteSpace =
@ -561,6 +717,8 @@ attempt to fix the precedence and associativity. Doesn't work
 >     -- use many1 so we can more easily avoid non terminating loops
 >     simpleWhiteSpace = void $ many1 (oneOf " \t\n")

+= generic parser helpers
+
 > optionSuffix :: (a -> P a) -> a -> P a
 > optionSuffix p a = option a (p a)

@ -571,22 +729,6 @@ attempt to fix the precedence and associativity. Doesn't work
 > commaSep = (`sepBy` symbol_ ",")


-> symbol :: String -> P String
-> symbol s = string s
->            -- <* notFollowedBy (oneOf "+-/*<>=!|")
->            <* whiteSpace
-
-> symbol_ :: String -> P ()
-> symbol_ s = symbol s *> return ()
-
-> keyword :: String -> P String
-> keyword s = (map toLower <$> string s)
->             <* notFollowedBy (char '_' <|> alphaNum)
->             <* whiteSpace
-
-> keyword_ :: String -> P ()
-> keyword_ s = keyword s *> return ()
-
 > commaSep1 :: P a -> P [a]
 > commaSep1 = (`sepBy1` symbol_ ",")

--- a/PrettyIt.lhs
+++ b/PrettyIt.lhs
@ -11,8 +11,7 @@
 >     case args of
 >       [f] -> do
 >              src <- readFile f
->              either (error . peFormattedError
->                     )
+>              either (error . peFormattedError)
 >                     (putStrLn . intercalate "\n" . map prettyQueryExpr)
 >                     $ parseQueryExprs f Nothing src
 >       _ -> error "please pass filename to prettify"
--- a/2
+++ b/2
@ -2,11 +2,13 @@

 first release:

+tests for the queryexprs parser
 check the pretty printer on the tpch queries
 fix the fixity issue
 add automated tests to cabal
 do code documentation and haddock
  check the order of exports, imports and functions/cases in the files
+fix up the import namespaces/explicit names nicelyx
 do some tests for parse errors?
 website with haddock and table of parsing tests

--- a/simple-sql-parser.cabal
+++ b/simple-sql-parser.cabal
@ -27,10 +27,8 @@ library
  -- other-modules:       
  -- other-extensions:    
  build-depends:       base >=4.6 && <4.7,
-                       groom >=0.1 && <0.2,
                       parsec >=3.1 && <3.2,
                       mtl >=2.1 && <2.2,
-                       haskell-src-exts >=1.14 && <1.15,
                       pretty >= 1.1 && < 1.2
  -- hs-source-dirs:      
  default-language:    Haskell2010