tidying up
This commit is contained in:
parent
0da39d4498
commit
0de32d3429
|
@ -1,4 +1,5 @@
|
|||
|
||||
The parser code
|
||||
|
||||
> module Language.SQL.SimpleSQL.Parser
|
||||
> (parseQueryExpr
|
||||
|
@ -6,49 +7,58 @@
|
|||
> ,parseQueryExprs
|
||||
> ,ParseError(..)) where
|
||||
|
||||
> import Text.Groom
|
||||
> import Text.Parsec hiding (ParseError)
|
||||
> import qualified Text.Parsec as P
|
||||
> import Control.Monad.Identity
|
||||
> import Control.Applicative hiding (many, (<|>), optional)
|
||||
> import qualified Language.Haskell.Exts.Syntax as HSE
|
||||
> import qualified Language.Haskell.Exts.Fixity as HSE
|
||||
> import Data.Maybe
|
||||
> import Data.List
|
||||
> import Data.Char
|
||||
> import Text.Parsec hiding (ParseError)
|
||||
> import qualified Text.Parsec as P
|
||||
|
||||
> import Language.SQL.SimpleSQL.Syntax
|
||||
|
||||
The public api functions.
|
||||
|
||||
> parseQueryExpr :: FilePath
|
||||
> -> Maybe (Int,Int)
|
||||
> -> String
|
||||
> -- | Parses a query expr, trailing semicolon optional.
|
||||
> parseQueryExpr :: FilePath -- ^ filename to use in errors
|
||||
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
||||
> -> String -- ^ the sql source to parse
|
||||
> -> Either ParseError QueryExpr
|
||||
> parseQueryExpr f p src =
|
||||
> either (Left . convParseError src) Right
|
||||
> $ parse (setPos p *> whiteSpace *> queryExpr <* eof) f src
|
||||
> parseQueryExpr = wrapParse topLevelQueryExpr
|
||||
|
||||
> parseQueryExprs :: FilePath
|
||||
> -> Maybe (Int,Int)
|
||||
> -> String
|
||||
> parseQueryExprs :: FilePath -- ^ filename to use in errors
|
||||
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
||||
> -> String -- ^ the sql source to parse
|
||||
> -> Either ParseError [QueryExpr]
|
||||
> parseQueryExprs f p src =
|
||||
> either (Left . convParseError src) Right
|
||||
> $ parse (setPos p *> whiteSpace *> queryExprs <* eof) f src
|
||||
> parseQueryExprs = wrapParse queryExprs
|
||||
|
||||
> parseScalarExpr :: FilePath
|
||||
> parseScalarExpr :: FilePath -- ^ filename to use in errors
|
||||
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
||||
> -> String -- ^ the sql source to parse
|
||||
> -> Either ParseError ScalarExpr
|
||||
> parseScalarExpr = wrapParse scalarExpr
|
||||
|
||||
This helper function takes the parser given and:
|
||||
|
||||
sets the position when parsing
|
||||
automatically skips leading whitespace
|
||||
checks the parser parses all the input using eof
|
||||
converts the error return to the nice wrapper
|
||||
|
||||
> wrapParse :: P a
|
||||
> -> FilePath
|
||||
> -> Maybe (Int,Int)
|
||||
> -> String
|
||||
> -> Either ParseError ScalarExpr
|
||||
> parseScalarExpr f p src =
|
||||
> -> Either ParseError a
|
||||
> wrapParse parser f p src =
|
||||
> either (Left . convParseError src) Right
|
||||
> $ parse (setPos p *> whiteSpace *> scalarExpr <* eof) f src
|
||||
> $ parse (setPos p *> whiteSpace *> parser <* eof) f src
|
||||
|
||||
> -- | Type to represent parse errors.
|
||||
> data ParseError = ParseError
|
||||
> {peErrorString :: String
|
||||
> ,peFilename :: FilePath
|
||||
> ,pePosition :: (Int,Int)
|
||||
> ,peFormattedError :: String
|
||||
> {peErrorString :: String -- ^ contains the error message
|
||||
> ,peFilename :: FilePath -- ^ filename location for the error
|
||||
> ,pePosition :: (Int,Int) -- ^ line number and column number location for the error
|
||||
> ,peFormattedError :: String -- ^ formatted error with the position, error message and source context
|
||||
> } deriving (Eq,Show)
|
||||
|
||||
------------------------------------------------
|
||||
|
@ -57,35 +67,20 @@
|
|||
|
||||
= scalar expressions
|
||||
|
||||
> stringLiteral :: P String
|
||||
> stringLiteral = symbol_ "'" *> manyTill anyChar (symbol_ "'")
|
||||
== literals
|
||||
|
||||
See the stringLiteral lexer below for notes on string literal syntax.
|
||||
|
||||
> estring :: P ScalarExpr
|
||||
> estring = StringLit <$> stringLiteral
|
||||
|
||||
digits
|
||||
digits.[digits][e[+-]digits]
|
||||
[digits].digits[e[+-]digits]
|
||||
digitse[+-]digits
|
||||
|
||||
> number :: P ScalarExpr
|
||||
> number =
|
||||
> NumLit <$> (choice [int
|
||||
> >>= optionSuffix dot
|
||||
> >>= optionSuffix fracts
|
||||
> >>= optionSuffix expon
|
||||
> ,fract "" >>= optionSuffix expon]
|
||||
> <* whiteSpace)
|
||||
> where
|
||||
> int = many1 digit
|
||||
> fract p = dot p >>= fracts
|
||||
> dot p = ((p++) . (:[])) <$> char '.'
|
||||
> fracts p = (p++) <$> int
|
||||
> expon p = do
|
||||
> void $ char 'e'
|
||||
> s <- option "" ((:[]) <$> (char '+' <|> char '-'))
|
||||
> i <- int
|
||||
> return (p ++ "e" ++ s ++ i)
|
||||
> number = NumLit <$> numberLiteral
|
||||
|
||||
parse SQL interval literals, something like
|
||||
interval '5' day (3)
|
||||
or
|
||||
interval '5' month
|
||||
|
||||
> interval :: P ScalarExpr
|
||||
> interval = try (keyword_ "interval") >>
|
||||
|
@ -97,40 +92,40 @@ digitse[+-]digits
|
|||
> literal :: P ScalarExpr
|
||||
> literal = number <|> estring <|> interval
|
||||
|
||||
> identifierString :: P String
|
||||
> identifierString = do
|
||||
> s <- (:) <$> letterOrUnderscore
|
||||
> <*> many letterDigitOrUnderscore <* whiteSpace
|
||||
> guard (s `notElem` blacklist)
|
||||
> return s
|
||||
> where
|
||||
> letterOrUnderscore = char '_' <|> letter
|
||||
> letterDigitOrUnderscore = char '_' <|> alphaNum
|
||||
== identifiers
|
||||
|
||||
> blacklist :: [String]
|
||||
> blacklist = ["as", "from", "where", "having", "group", "order"
|
||||
> ,"inner", "left", "right", "full", "natural", "join"
|
||||
> ,"on", "using", "when", "then", "case", "end", "order"
|
||||
> ,"limit", "offset", "in"
|
||||
> ,"except", "intersect", "union"]
|
||||
|
||||
TODO: talk about what must be in the blacklist, and what doesn't need
|
||||
to be.
|
||||
Uses the identifierString 'lexer'. See this function for notes on identifiers.
|
||||
|
||||
> identifier :: P ScalarExpr
|
||||
> identifier = Iden <$> identifierString
|
||||
|
||||
Identifier with one dot in it. This should be extended to any amount
|
||||
of dots.
|
||||
|
||||
> dottedIden :: P ScalarExpr
|
||||
> dottedIden = Iden2 <$> identifierString
|
||||
> <*> (symbol "." *> identifierString)
|
||||
|
||||
== star
|
||||
|
||||
used in select *, select x.*, and agg(*) variations.
|
||||
|
||||
> star :: P ScalarExpr
|
||||
> star = choice [Star <$ symbol "*"
|
||||
> ,Star2 <$> (identifierString <* symbol "." <* symbol "*")]
|
||||
|
||||
== function application, aggregates and windows
|
||||
|
||||
> app :: P ScalarExpr
|
||||
> app = do
|
||||
this represents anything which syntactically looks like regular C
|
||||
function application: an identifier, parens with comma sep scalar
|
||||
expression arguments.
|
||||
|
||||
The parsing for the aggregate extensions is here as well:
|
||||
|
||||
aggregate([all|distinct] args [order by orderitems])
|
||||
|
||||
> aggOrApp :: P ScalarExpr
|
||||
> aggOrApp = do
|
||||
> i <- identifierString
|
||||
> _ <- symbol "("
|
||||
> d <- try duplicates
|
||||
|
@ -143,6 +138,19 @@ to be.
|
|||
> return $ App i es
|
||||
> _ -> return $ AggregateApp i d es (fromMaybe [] od)
|
||||
|
||||
> duplicates :: P (Maybe Duplicates)
|
||||
> duplicates = optionMaybe $ try $
|
||||
> choice [All <$ keyword_ "all"
|
||||
> ,Distinct <$ keyword "distinct"]
|
||||
|
||||
parse a window call as a suffix of a regular function call
|
||||
|
||||
this looks like this:
|
||||
|
||||
functioncall(args) over ([partition by ids] [order by orderitems])
|
||||
|
||||
No support for explicit frames yet.
|
||||
|
||||
> windowSuffix :: ScalarExpr -> P ScalarExpr
|
||||
> windowSuffix e@(App f es) =
|
||||
> choice [try (keyword_ "over")
|
||||
|
@ -157,6 +165,11 @@ to be.
|
|||
|
||||
> windowSuffix e = return e
|
||||
|
||||
> app :: P ScalarExpr
|
||||
> app = aggOrApp >>= windowSuffix
|
||||
|
||||
== case expression
|
||||
|
||||
> scase :: P ScalarExpr
|
||||
> scase =
|
||||
> Case <$> (try (keyword_ "case") *> optionMaybe (try scalarExpr'))
|
||||
|
@ -167,6 +180,15 @@ to be.
|
|||
> swhen = keyword_ "when" *>
|
||||
> ((,) <$> scalarExpr' <*> (keyword_ "then" *> scalarExpr'))
|
||||
|
||||
== miscellaneous keyword operators
|
||||
|
||||
These are keyword operators which don't look like normal prefix,
|
||||
postfix or infix binary operators. They mostly look like function
|
||||
application but with keywords in the argument list instead of commas
|
||||
to separate the arguments.
|
||||
|
||||
cast: cast(expr as type)
|
||||
|
||||
> cast :: P ScalarExpr
|
||||
> cast = parensCast <|> prefixCast
|
||||
> where
|
||||
|
@ -176,12 +198,18 @@ to be.
|
|||
> prefixCast = try (CastOp <$> typeName
|
||||
> <*> stringLiteral)
|
||||
|
||||
extract(id from expr)
|
||||
|
||||
> extract :: P ScalarExpr
|
||||
> extract = try (keyword_ "extract") >>
|
||||
> parens (makeOp <$> identifierString
|
||||
> <*> (keyword_ "from" *> scalarExpr'))
|
||||
> where makeOp n e = SpecialOp "extract" [Iden n, e]
|
||||
|
||||
substring(x from expr to expr)
|
||||
|
||||
todo: also support substring(x from expr)
|
||||
|
||||
> substring :: P ScalarExpr
|
||||
> substring = try (keyword_ "substring") >>
|
||||
> parens (makeOp <$> scalarExpr'
|
||||
|
@ -190,6 +218,10 @@ to be.
|
|||
> )
|
||||
> where makeOp a b c = SpecialOp "substring" [a,b,c]
|
||||
|
||||
in: two variations:
|
||||
a in (expr0, expr1, ...)
|
||||
a in (queryexpr)
|
||||
|
||||
> inSuffix :: ScalarExpr -> P ScalarExpr
|
||||
> inSuffix e =
|
||||
> In <$> inty
|
||||
|
@ -201,6 +233,19 @@ to be.
|
|||
> inty = try $ choice [True <$ keyword_ "in"
|
||||
> ,False <$ keyword_ "not" <* keyword_ "in"]
|
||||
|
||||
between:
|
||||
expr between expr and expr
|
||||
|
||||
There is a complication when parsing between - when parsing the second
|
||||
expression it is ambiguous when you hit an 'and' whether it is a
|
||||
binary operator or part of the between. This code follows what
|
||||
postgres does, which might be standard across SQL implementations,
|
||||
which is that you can't have a binary and operator in the middle
|
||||
expression in a between unless it is wrapped in parens. The 'bExpr
|
||||
parsing' is used to create alternative scalar expression parser which
|
||||
is identical to the normal one expect it doesn't recognise the binary
|
||||
and operator. This is the call to scalarExpr'' True.
|
||||
|
||||
> betweenSuffix :: ScalarExpr -> P ScalarExpr
|
||||
> betweenSuffix e =
|
||||
> makeOp <$> opName
|
||||
|
@ -213,6 +258,9 @@ to be.
|
|||
> ,"not between" <$ keyword_ "not" <* keyword_ "between"]
|
||||
> makeOp n a b c = SpecialOp n [a,b,c]
|
||||
|
||||
subquery expression:
|
||||
[exists|all|any|some] (queryexpr)
|
||||
|
||||
> subquery :: P ScalarExpr
|
||||
> subquery =
|
||||
> choice
|
||||
|
@ -225,6 +273,9 @@ to be.
|
|||
> ,SqAny <$ keyword_ "any"
|
||||
> ,SqSome <$ keyword_ "some"]
|
||||
|
||||
typename: used in casts. Special cases for the multi keyword typenames
|
||||
that SQL supports.
|
||||
|
||||
> typeName :: P TypeName
|
||||
> typeName = choice
|
||||
> [TypeName "double precision"
|
||||
|
@ -233,6 +284,23 @@ to be.
|
|||
> <$ try (keyword_ "character" <* keyword_ "varying")
|
||||
> ,TypeName <$> identifierString]
|
||||
|
||||
== scalar parens
|
||||
|
||||
> sparens :: P ScalarExpr
|
||||
> sparens = Parens <$> parens scalarExpr'
|
||||
|
||||
|
||||
== operator parsing
|
||||
|
||||
The 'regular' operators in this parsing and in the abstract syntax are
|
||||
unary prefix, unary postfix and binary infix operators. The operators
|
||||
can be symbols (a + b), single keywords (a and b) or multiple keywords
|
||||
(a is similar to b).
|
||||
|
||||
First, the list of the regulars operators split by operator type
|
||||
(prefix, postfix, binary) and by symbol/single keyword/ multiple
|
||||
keyword.
|
||||
|
||||
> binOpSymbolNames :: [String]
|
||||
> binOpSymbolNames =
|
||||
> ["=", "<=", ">=", "!=", "<>", "<", ">"
|
||||
|
@ -251,18 +319,32 @@ to be.
|
|||
> ,"is distinct from"
|
||||
> ,"is not distinct from"]
|
||||
|
||||
|
||||
used for between parsing
|
||||
|
||||
> binOpKeywordNamesNoAnd :: [String]
|
||||
> binOpKeywordNamesNoAnd = filter (/="and") binOpKeywordNames
|
||||
|
||||
There aren't any multi keyword prefix operators currently supported.
|
||||
|
||||
> prefixUnOpKeywordNames :: [String]
|
||||
> prefixUnOpKeywordNames = ["not"]
|
||||
|
||||
> prefixUnOpSymbolNames :: [String]
|
||||
> prefixUnOpSymbolNames = ["+", "-"]
|
||||
|
||||
There aren't any single keyword postfix operators currently supported. Maybe all these 'is's can be left factored?
|
||||
|
||||
> postfixOpKeywords :: [String]
|
||||
> postfixOpKeywords = ["is null"
|
||||
> ,"is not null"
|
||||
> ,"is true"
|
||||
> ,"is not true"
|
||||
> ,"is false"
|
||||
> ,"is not false"
|
||||
> ,"is unknown"
|
||||
> ,"is not unknown"]
|
||||
|
||||
The parsers:
|
||||
|
||||
> prefixUnaryOp :: P ScalarExpr
|
||||
> prefixUnaryOp =
|
||||
|
@ -271,31 +353,32 @@ used for between parsing
|
|||
> opSymbol = choice (map (try . symbol) prefixUnOpSymbolNames
|
||||
> ++ map (try . keyword) prefixUnOpKeywordNames)
|
||||
|
||||
> postfixOp :: ScalarExpr -> P ScalarExpr
|
||||
> postfixOp e =
|
||||
> postfixOpSuffix :: ScalarExpr -> P ScalarExpr
|
||||
> postfixOpSuffix e =
|
||||
> try $ choice $ map makeOp opPairs
|
||||
> where
|
||||
> -- could left factor here?
|
||||
> ops = ["is null"
|
||||
> ,"is not null"
|
||||
> ,"is true"
|
||||
> ,"is not true"
|
||||
> ,"is false"
|
||||
> ,"is not false"
|
||||
> ,"is unknown"
|
||||
> ,"is not unknown"]
|
||||
> opPairs = flip map ops $ \o -> (o, words o)
|
||||
> opPairs = flip map postfixOpKeywords $ \o -> (o, words o)
|
||||
> makeOp (o,ws) = try $ PostfixOp o e <$ keywords_ ws
|
||||
> keywords_ = try . mapM_ keyword_
|
||||
|
||||
Wrapper for non 'bExpr' parsing. See the between parser for
|
||||
explanation.
|
||||
|
||||
> scalarExpr' :: P ScalarExpr
|
||||
> scalarExpr' = scalarExpr'' False
|
||||
|
||||
the bexpr is to deal with between x and y
|
||||
The main scalar expression parser which includes the binary operator
|
||||
parsing. All the binary operators are parsed as same precedence and
|
||||
left associativity. This will be fixed in a pass over the abstract
|
||||
syntax, which isn't working at the moment.
|
||||
|
||||
when we are parsing the scalar expr for x, we don't allow and as a
|
||||
binary operator except nested in parens. This is taken from how
|
||||
postgresql handles this
|
||||
TODO:
|
||||
left factor: stuff which starts with identifier
|
||||
|
||||
move app>>=windowSuffix to a separate parser above next to app and
|
||||
windowSuffix themselves
|
||||
|
||||
split out the binary op parsing from this function to above
|
||||
|
||||
> scalarExpr'' :: Bool -> P ScalarExpr
|
||||
> scalarExpr'' bExpr = factor >>= trysuffix
|
||||
|
@ -307,7 +390,7 @@ postgresql handles this
|
|||
> ,substring
|
||||
> ,subquery
|
||||
> ,prefixUnaryOp
|
||||
> ,try app >>= windowSuffix
|
||||
> ,try app
|
||||
> ,try dottedIden
|
||||
> ,identifier
|
||||
> ,sparens]
|
||||
|
@ -316,7 +399,7 @@ postgresql handles this
|
|||
> [BinOp <$> opSymbol <*> return e0 <*> factor
|
||||
> ,inSuffix e0
|
||||
> ,betweenSuffix e0
|
||||
> ,postfixOp e0
|
||||
> ,postfixOpSuffix e0
|
||||
> ] >>= trysuffix
|
||||
> opSymbol = choice
|
||||
> (map (try . symbol) binOpSymbolNames
|
||||
|
@ -327,71 +410,12 @@ postgresql handles this
|
|||
> else binOpKeywordNames))
|
||||
> keywords ks = unwords <$> mapM keyword ks
|
||||
|
||||
> sparens :: P ScalarExpr
|
||||
> sparens = Parens <$> parens scalarExpr'
|
||||
TODO: create the fixity adjuster. This should take a list of operators
|
||||
with precendence and associativity and adjust a scalar expr tree to
|
||||
match these. It shouldn't attempt to descend into scalar expressions
|
||||
inside nested query exprs in subqueries.
|
||||
|
||||
attempt to fix the precedence and associativity. Doesn't work
|
||||
|
||||
> toHaskell :: ScalarExpr -> HSE.Exp
|
||||
> toHaskell e = case e of
|
||||
> Iden i -> HSE.Var $ HSE.UnQual $ HSE.Ident i
|
||||
> StringLit l -> HSE.Lit $ HSE.String $ 's':l
|
||||
> NumLit l -> HSE.Lit $ HSE.String $ 'n':l
|
||||
> App n es -> HSE.App (toHaskell $ Iden n) $ ltoh es
|
||||
> Cast e0 (TypeName tn) -> toHaskell $ App ("cast:" ++ tn) [e0]
|
||||
> CastOp (TypeName tn) s -> toHaskell $ App ("castop:" ++ tn) [StringLit s]
|
||||
> --Op n [e0,e1] -> HSE.InfixApp (toHaskell e0)
|
||||
> -- (HSE.QVarOp $ HSE.UnQual $ HSE.Ident n)
|
||||
> -- (toHaskell e1)
|
||||
> --Op o [e0] -> toHaskell $ App ("unary:" ++ o) [e0]
|
||||
> --Op {} -> error $ "bad args to operator " ++ groom e
|
||||
> Star -> HSE.Var $ HSE.UnQual $ HSE.Ident "*"
|
||||
> Iden2 a b -> HSE.Var $ HSE.Qual (HSE.ModuleName a) (HSE.Ident b)
|
||||
> Star2 q -> HSE.Var $ HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")
|
||||
> Parens e0 -> HSE.Paren $ toHaskell e0
|
||||
> -- map the two maybes to lists with either 0 or 1 element
|
||||
> Case v ts el -> HSE.App (toHaskell $ Iden "$case")
|
||||
> (HSE.List [ltoh $ maybeToList v
|
||||
> ,HSE.List $ map (ltoh . (\(a,b) -> [a,b])) ts
|
||||
> ,ltoh $ maybeToList el])
|
||||
> _ -> error "please fix me 1"
|
||||
> where
|
||||
> ltoh = HSE.List . map toHaskell
|
||||
|
||||
> toSql :: HSE.Exp -> ScalarExpr
|
||||
> toSql e = case e of
|
||||
> HSE.Var (HSE.UnQual (HSE.Ident "*")) -> Star
|
||||
> HSE.Var (HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")) -> Star2 q
|
||||
> HSE.Var (HSE.Qual (HSE.ModuleName a) (HSE.Ident b)) -> Iden2 a b
|
||||
> HSE.Var (HSE.UnQual (HSE.Ident i)) -> Iden i
|
||||
> HSE.Lit (HSE.String ('s':l)) -> StringLit l
|
||||
> HSE.Lit (HSE.String ('n':l)) -> NumLit l
|
||||
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident "$case"))) (HSE.List [v,ts,el]) ->
|
||||
> Case (ltom v) (pairs ts) (ltom el)
|
||||
> {-HSE.App (HSE.Var (HSE.UnQual (HSE.Ident x)))
|
||||
> (HSE.List [ea])
|
||||
> | "unary:" `isPrefixOf` x ->
|
||||
> Op (drop 6 x) [toSql ea]
|
||||
> | "cast:" `isPrefixOf` x ->
|
||||
> Cast (toSql ea) (TypeName $ drop 5 x)-}
|
||||
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident x)))
|
||||
> (HSE.List [HSE.Lit (HSE.String ('s':ea))])
|
||||
> | "castop:" `isPrefixOf` x ->
|
||||
> CastOp (TypeName $ drop 7 x) ea
|
||||
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident i)))
|
||||
> (HSE.List es) -> App i $ map toSql es
|
||||
> {-HSE.InfixApp e0 (HSE.QVarOp (HSE.UnQual (HSE.Ident n))) e1 ->
|
||||
> Op n [toSql e0, toSql e1]-}
|
||||
> HSE.Paren e0 -> Parens $ toSql e0
|
||||
> _ -> error $ "unsupported haskell " ++ groom e
|
||||
> where
|
||||
> ltom (HSE.List []) = Nothing
|
||||
> ltom (HSE.List [ex]) = Just $ toSql ex
|
||||
> ltom ex = error $ "unsupported haskell " ++ groom ex
|
||||
> pairs (HSE.List l) = map (\(HSE.List [a,b]) -> (toSql a, toSql b)) l
|
||||
> pairs ex = error $ "unsupported haskell " ++ groom ex
|
||||
|
||||
> sqlFixities :: [HSE.Fixity]
|
||||
> {-sqlFixities :: [HSE.Fixity]
|
||||
> sqlFixities = HSE.infixl_ 9 ["*", "/"]
|
||||
> ++ HSE.infixl_ 8 ["+", "-"]
|
||||
> ++ HSE.infixl_ 6 ["<=",">=","!=","<>","||", "like"]
|
||||
|
@ -399,25 +423,30 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> ++ HSE.infixr_ 3 ["="]
|
||||
> ++ HSE.infixr_ 2 ["or"]
|
||||
> ++ HSE.infixl_ 1 ["and"]
|
||||
> ++ HSE.infixl_ 0 ["or"]
|
||||
> ++ HSE.infixl_ 0 ["or"]-}
|
||||
|
||||
> _fixFixity :: ScalarExpr -> ScalarExpr
|
||||
> _fixFixity se = runIdentity $
|
||||
> toSql <$> HSE.applyFixities sqlFixities (toHaskell se)
|
||||
> fixFixities :: ScalarExpr -> ScalarExpr
|
||||
> fixFixities = id
|
||||
|
||||
The scalarExpr wrapper. The idea is that directly nested scalar
|
||||
expressions use the scalarExpr' parser, then other code uses the
|
||||
scalarExpr parser and then everyone gets the fixity fixes and it's
|
||||
easy to ensure that this fix is only applied once to each scalar
|
||||
expression tree (for efficiency and code clarity).
|
||||
|
||||
> scalarExpr :: P ScalarExpr
|
||||
> scalarExpr =
|
||||
> choice [try star
|
||||
> ,{-fixFixity <$>-} scalarExpr']
|
||||
> ,fixFixities <$> scalarExpr']
|
||||
|
||||
-------------------------------------------------
|
||||
|
||||
= query expressions
|
||||
|
||||
> duplicates :: P (Maybe Duplicates)
|
||||
> duplicates = optionMaybe $ try $
|
||||
> choice [All <$ keyword_ "all"
|
||||
> ,Distinct <$ keyword "distinct"]
|
||||
TODO: maybe refactor all the parsers. A parser wouldn't usually be
|
||||
optional or use try itself. The caller could do this.
|
||||
|
||||
== select lists
|
||||
|
||||
> selectItem :: P (Maybe String, ScalarExpr)
|
||||
> selectItem = flip (,) <$> scalarExpr <*> optionMaybe (try alias)
|
||||
|
@ -426,6 +455,22 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> selectList :: P [(Maybe String,ScalarExpr)]
|
||||
> selectList = commaSep1 selectItem
|
||||
|
||||
== from
|
||||
|
||||
this parser should be refactored, it is very unclear. Here is the
|
||||
rough grammar
|
||||
|
||||
tref
|
||||
(cross | [natural]
|
||||
([inner]
|
||||
| left [outer]
|
||||
| right [outer]
|
||||
| full [outer]
|
||||
)
|
||||
join tref
|
||||
[on expr | using (...)]
|
||||
|
||||
|
||||
> from :: P [TableRef]
|
||||
> from = option [] (try (keyword_ "from") *> commaSep1 tref)
|
||||
> where
|
||||
|
@ -467,6 +512,14 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> a2 = optionMaybe (try $ parens (commaSep1 identifierString))
|
||||
> in option j (JoinAlias j <$> try a1 <*> try a2)
|
||||
|
||||
== simple other parts
|
||||
|
||||
Parsers for where, group by, having, order by and limit, which are
|
||||
pretty trivial.
|
||||
|
||||
Here is a helper for parsing a few parts of the query expr (currently
|
||||
where, having, limit, offset).
|
||||
|
||||
> optionalScalarExpr :: String -> P (Maybe ScalarExpr)
|
||||
> optionalScalarExpr k = optionMaybe (try (keyword_ k) *> scalarExpr)
|
||||
|
||||
|
@ -494,6 +547,8 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> offset :: P (Maybe ScalarExpr)
|
||||
> offset = optionalScalarExpr "offset"
|
||||
|
||||
== common table expressions
|
||||
|
||||
> with :: P QueryExpr
|
||||
> with = try (keyword_ "with") >>
|
||||
> With <$> commaSep1 withQuery <*> queryExpr
|
||||
|
@ -502,6 +557,11 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> (,) <$> (identifierString <* optional (try $ keyword_ "as"))
|
||||
> <*> parens queryExpr
|
||||
|
||||
== query expression
|
||||
|
||||
This parser parses any query expression variant: normal select, cte,
|
||||
and union, etc..
|
||||
|
||||
> queryExpr :: P QueryExpr
|
||||
> queryExpr =
|
||||
> choice [select >>= queryExprSuffix, with]
|
||||
|
@ -533,17 +593,113 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> >>= queryExprSuffix
|
||||
> ,return qe]
|
||||
|
||||
wrapper for query expr which ignores optional trailing semicolon.
|
||||
|
||||
> topLevelQueryExpr :: P QueryExpr
|
||||
> topLevelQueryExpr =
|
||||
> queryExpr <* (choice [try $ symbol_ ";", return()])
|
||||
|
||||
wrapper to parse a series of query exprs from a single source. They
|
||||
must be separated by semicolon, but for the last expression, the
|
||||
trailing semicolon is optional.
|
||||
|
||||
> queryExprs :: P [QueryExpr]
|
||||
> queryExprs = do
|
||||
> qe <- queryExpr
|
||||
> choice [[qe] <$ eof
|
||||
> ,symbol ";" *>
|
||||
> ,symbol_ ";" *>
|
||||
> choice [[qe] <$ eof
|
||||
> ,(:) qe <$> queryExprs]]
|
||||
|
||||
------------------------------------------------
|
||||
|
||||
= helper parsers
|
||||
= lexing parsers
|
||||
|
||||
The lexing is a bit 'virtual', in the usual parsec style.
|
||||
|
||||
> symbol :: String -> P String
|
||||
> symbol s = string s
|
||||
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
|
||||
> <* whiteSpace
|
||||
|
||||
> symbol_ :: String -> P ()
|
||||
> symbol_ s = symbol s *> return ()
|
||||
|
||||
> keyword :: String -> P String
|
||||
> keyword s = (map toLower <$> string s)
|
||||
> <* notFollowedBy (char '_' <|> alphaNum)
|
||||
> <* whiteSpace
|
||||
|
||||
> keyword_ :: String -> P ()
|
||||
> keyword_ s = keyword s *> return ()
|
||||
|
||||
Identifiers are very simple at the moment: start with a letter or
|
||||
underscore, and continue with letter, underscore or digit. It doesn't
|
||||
support quoting other other sorts of identifiers yet. There is a
|
||||
blacklist of keywords which aren't supported as identifiers.
|
||||
|
||||
> identifierString :: P String
|
||||
> identifierString = do
|
||||
> s <- (:) <$> letterOrUnderscore
|
||||
> <*> many letterDigitOrUnderscore <* whiteSpace
|
||||
> guard (s `notElem` blacklist)
|
||||
> return s
|
||||
> where
|
||||
> letterOrUnderscore = char '_' <|> letter
|
||||
> letterDigitOrUnderscore = char '_' <|> alphaNum
|
||||
|
||||
> blacklist :: [String]
|
||||
> blacklist = ["as", "from", "where", "having", "group", "order"
|
||||
> ,"inner", "left", "right", "full", "natural", "join"
|
||||
> ,"on", "using", "when", "then", "case", "end", "order"
|
||||
> ,"limit", "offset", "in"
|
||||
> ,"except", "intersect", "union"]
|
||||
|
||||
TODO: talk about what must be in the blacklist, and what doesn't need
|
||||
to be.
|
||||
|
||||
String literals: limited at the moment, no escaping \' or other
|
||||
variations.
|
||||
|
||||
> stringLiteral :: P String
|
||||
> stringLiteral = char '\'' *> manyTill anyChar (symbol_ "'")
|
||||
|
||||
number literals
|
||||
|
||||
here is the rough grammar target:
|
||||
|
||||
digits
|
||||
digits.[digits][e[+-]digits]
|
||||
[digits].digits[e[+-]digits]
|
||||
digitse[+-]digits
|
||||
|
||||
numbers are parsed to strings, not to a numeric type. This is to aoivd
|
||||
making a decision on how to represent numbers, the client code can
|
||||
make this choice.
|
||||
|
||||
> numberLiteral :: P String
|
||||
> numberLiteral =
|
||||
> choice [int
|
||||
> >>= optionSuffix dot
|
||||
> >>= optionSuffix fracts
|
||||
> >>= optionSuffix expon
|
||||
> ,fract "" >>= optionSuffix expon]
|
||||
> <* whiteSpace
|
||||
> where
|
||||
> int = many1 digit
|
||||
> fract p = dot p >>= fracts
|
||||
> -- todo: use some helper functions to improve the
|
||||
> -- unclear pointfree code here
|
||||
> dot p = ((p++) . (:[])) <$> char '.'
|
||||
> fracts p = (p++) <$> int
|
||||
> expon p = do
|
||||
> void $ char 'e'
|
||||
> s <- option "" ((:[]) <$> (char '+' <|> char '-'))
|
||||
> i <- int
|
||||
> return (p ++ "e" ++ s ++ i)
|
||||
|
||||
|
||||
whitespace parser which skips comments also
|
||||
|
||||
> whiteSpace :: P ()
|
||||
> whiteSpace =
|
||||
|
@ -561,6 +717,8 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> -- use many1 so we can more easily avoid non terminating loops
|
||||
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
|
||||
|
||||
= generic parser helpers
|
||||
|
||||
> optionSuffix :: (a -> P a) -> a -> P a
|
||||
> optionSuffix p a = option a (p a)
|
||||
|
||||
|
@ -571,22 +729,6 @@ attempt to fix the precedence and associativity. Doesn't work
|
|||
> commaSep = (`sepBy` symbol_ ",")
|
||||
|
||||
|
||||
> symbol :: String -> P String
|
||||
> symbol s = string s
|
||||
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
|
||||
> <* whiteSpace
|
||||
|
||||
> symbol_ :: String -> P ()
|
||||
> symbol_ s = symbol s *> return ()
|
||||
|
||||
> keyword :: String -> P String
|
||||
> keyword s = (map toLower <$> string s)
|
||||
> <* notFollowedBy (char '_' <|> alphaNum)
|
||||
> <* whiteSpace
|
||||
|
||||
> keyword_ :: String -> P ()
|
||||
> keyword_ s = keyword s *> return ()
|
||||
|
||||
> commaSep1 :: P a -> P [a]
|
||||
> commaSep1 = (`sepBy1` symbol_ ",")
|
||||
|
||||
|
|
|
@ -11,8 +11,7 @@
|
|||
> case args of
|
||||
> [f] -> do
|
||||
> src <- readFile f
|
||||
> either (error . peFormattedError
|
||||
> )
|
||||
> either (error . peFormattedError)
|
||||
> (putStrLn . intercalate "\n" . map prettyQueryExpr)
|
||||
> $ parseQueryExprs f Nothing src
|
||||
> _ -> error "please pass filename to prettify"
|
||||
|
|
2
TODO
2
TODO
|
@ -2,11 +2,13 @@
|
|||
|
||||
first release:
|
||||
|
||||
tests for the queryexprs parser
|
||||
check the pretty printer on the tpch queries
|
||||
fix the fixity issue
|
||||
add automated tests to cabal
|
||||
do code documentation and haddock
|
||||
check the order of exports, imports and functions/cases in the files
|
||||
fix up the import namespaces/explicit names nicelyx
|
||||
do some tests for parse errors?
|
||||
website with haddock and table of parsing tests
|
||||
|
||||
|
|
|
@ -27,10 +27,8 @@ library
|
|||
-- other-modules:
|
||||
-- other-extensions:
|
||||
build-depends: base >=4.6 && <4.7,
|
||||
groom >=0.1 && <0.2,
|
||||
parsec >=3.1 && <3.2,
|
||||
mtl >=2.1 && <2.2,
|
||||
haskell-src-exts >=1.14 && <1.15,
|
||||
pretty >= 1.1 && < 1.2
|
||||
-- hs-source-dirs:
|
||||
default-language: Haskell2010
|
||||
|
|
Loading…
Reference in a new issue