1
Fork 0
simple-sql-parser/Language/SQL/SimpleSQL/Parser.lhs

788 lines
24 KiB
Plaintext
Raw Normal View History

2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
The parser code
2013-12-13 11:39:26 +01:00
2013-12-13 15:04:48 +01:00
> module Language.SQL.SimpleSQL.Parser
> (parseQueryExpr
> ,parseScalarExpr
> ,parseQueryExprs
2013-12-13 18:21:44 +01:00
> ,ParseError(..)) where
2013-12-13 11:39:26 +01:00
> import Control.Monad.Identity
> import Control.Applicative hiding (many, (<|>), optional)
> import Data.Maybe
> import Data.Char
2013-12-14 09:55:44 +01:00
> import Text.Parsec hiding (ParseError)
> import qualified Text.Parsec as P
2013-12-13 11:39:26 +01:00
2013-12-13 15:04:48 +01:00
> import Language.SQL.SimpleSQL.Syntax
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
The public api functions.
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
> -- | Parses a query expr, trailing semicolon optional.
> parseQueryExpr :: FilePath -- ^ filename to use in errors
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
> -> String -- ^ the sql source to parse
2013-12-13 18:21:44 +01:00
> -> Either ParseError QueryExpr
2013-12-14 09:55:44 +01:00
> parseQueryExpr = wrapParse topLevelQueryExpr
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
> parseQueryExprs :: FilePath -- ^ filename to use in errors
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
> -> String -- ^ the sql source to parse
> -> Either ParseError [QueryExpr]
2013-12-14 09:55:44 +01:00
> parseQueryExprs = wrapParse queryExprs
2013-12-14 09:55:44 +01:00
> parseScalarExpr :: FilePath -- ^ filename to use in errors
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
> -> String -- ^ the sql source to parse
2013-12-13 18:21:44 +01:00
> -> Either ParseError ScalarExpr
2013-12-14 09:55:44 +01:00
> parseScalarExpr = wrapParse scalarExpr
This helper function takes the parser given and:
sets the position when parsing
automatically skips leading whitespace
checks the parser parses all the input using eof
converts the error return to the nice wrapper
> wrapParse :: P a
> -> FilePath
> -> Maybe (Int,Int)
> -> String
> -> Either ParseError a
> wrapParse parser f p src =
2013-12-13 18:21:44 +01:00
> either (Left . convParseError src) Right
2013-12-14 09:55:44 +01:00
> $ parse (setPos p *> whiteSpace *> parser <* eof) f src
2013-12-13 18:21:44 +01:00
2013-12-14 09:55:44 +01:00
> -- | Type to represent parse errors.
2013-12-13 18:21:44 +01:00
> data ParseError = ParseError
2013-12-14 09:55:44 +01:00
> {peErrorString :: String -- ^ contains the error message
> ,peFilename :: FilePath -- ^ filename location for the error
> ,pePosition :: (Int,Int) -- ^ line number and column number location for the error
> ,peFormattedError :: String -- ^ formatted error with the position, error message and source context
2013-12-13 19:01:57 +01:00
> } deriving (Eq,Show)
2013-12-14 00:01:07 +01:00
2013-12-14 00:14:23 +01:00
------------------------------------------------
2013-12-13 18:21:44 +01:00
2013-12-13 11:39:26 +01:00
> type P a = ParsecT String () Identity a
= scalar expressions
2013-12-14 09:55:44 +01:00
== literals
See the stringLiteral lexer below for notes on string literal syntax.
2013-12-13 11:39:26 +01:00
> estring :: P ScalarExpr
> estring = StringLit <$> stringLiteral
> number :: P ScalarExpr
2013-12-14 09:55:44 +01:00
> number = NumLit <$> numberLiteral
parse SQL interval literals, something like
interval '5' day (3)
or
interval '5' month
2013-12-13 11:39:26 +01:00
> interval :: P ScalarExpr
> interval = try (keyword_ "interval") >>
> IntervalLit
> <$> stringLiteral
> <*> identifierString
> <*> optionMaybe (try $ parens integerLiteral)
2013-12-13 11:39:26 +01:00
> literal :: P ScalarExpr
> literal = number <|> estring <|> interval
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
== identifiers
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
Uses the identifierString 'lexer'. See this function for notes on identifiers.
2013-12-13 11:39:26 +01:00
> identifier :: P ScalarExpr
> identifier = Iden <$> identifierString
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
Identifier with one dot in it. This should be extended to any amount
of dots.
> dottedIden :: P ScalarExpr
> dottedIden = Iden2 <$> identifierString
2013-12-13 20:13:36 +01:00
> <*> (symbol "." *> identifierString)
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
== star
used in select *, select x.*, and agg(*) variations.
2013-12-13 11:39:26 +01:00
> star :: P ScalarExpr
> star = choice [Star <$ symbol "*"
> ,Star2 <$> (identifierString <* symbol "." <* symbol "*")]
2013-12-14 09:55:44 +01:00
== function application, aggregates and windows
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
this represents anything which syntactically looks like regular C
function application: an identifier, parens with comma sep scalar
expression arguments.
The parsing for the aggregate extensions is here as well:
aggregate([all|distinct] args [order by orderitems])
> aggOrApp :: P ScalarExpr
> aggOrApp = do
> i <- identifierString
> _ <- symbol "("
> d <- try duplicates
> es <- choice [(:[]) <$> try star
> ,commaSep scalarExpr']
> od <- try $ optionMaybe orderBy
> _ <- symbol ")"
> case (d,od) of
> (Nothing,Nothing) ->
> return $ App i es
> _ -> return $ AggregateApp i d es (fromMaybe [] od)
2013-12-14 09:55:44 +01:00
> duplicates :: P (Maybe Duplicates)
> duplicates = optionMaybe $ try $
> choice [All <$ keyword_ "all"
> ,Distinct <$ keyword "distinct"]
parse a window call as a suffix of a regular function call
this looks like this:
functioncall(args) over ([partition by ids] [order by orderitems])
No support for explicit frames yet.
The convention in this file is that the 'Suffix', erm, suffix on
parser names means that they have been left factored.
2013-12-13 22:31:36 +01:00
> windowSuffix :: ScalarExpr -> P ScalarExpr
> windowSuffix (App f es) =
> try (keyword_ "over")
> *> parens (WindowApp f es
> <$> option [] partitionBy
> <*> option [] orderBy)
2013-12-13 22:31:36 +01:00
> where
> partitionBy = try (keyword_ "partition") >>
> keyword_ "by" >>
> commaSep1 scalarExpr'
> windowSuffix _ = fail ""
2013-12-13 22:31:36 +01:00
TODO: review all the suffix functions, use the optionSuffix combinator
to simplify the suffix parsers
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
> app :: P ScalarExpr
> app = aggOrApp >>= optionSuffix windowSuffix
2013-12-14 09:55:44 +01:00
== case expression
2013-12-13 11:39:26 +01:00
> scase :: P ScalarExpr
> scase =
> Case <$> (try (keyword_ "case") *> optionMaybe (try scalarExpr'))
> <*> many1 swhen
> <*> optionMaybe (try (keyword_ "else") *> scalarExpr')
> <* keyword_ "end"
> where
> swhen = keyword_ "when" *>
> ((,) <$> scalarExpr' <*> (keyword_ "then" *> scalarExpr'))
2013-12-14 09:55:44 +01:00
== miscellaneous keyword operators
These are keyword operators which don't look like normal prefix,
postfix or infix binary operators. They mostly look like function
application but with keywords in the argument list instead of commas
to separate the arguments.
cast: cast(expr as type)
> cast :: P ScalarExpr
> cast = parensCast <|> prefixCast
> where
> parensCast = try (keyword_ "cast") >>
2013-12-13 20:13:36 +01:00
> parens (Cast <$> scalarExpr'
> <*> (keyword_ "as" *> typeName))
> prefixCast = try (CastOp <$> typeName
> <*> stringLiteral)
2013-12-14 09:55:44 +01:00
extract(id from expr)
2013-12-13 21:38:43 +01:00
> extract :: P ScalarExpr
> extract = try (keyword_ "extract") >>
> parens (makeOp <$> identifierString
2013-12-14 00:14:23 +01:00
> <*> (keyword_ "from" *> scalarExpr'))
2013-12-13 21:38:43 +01:00
> where makeOp n e = SpecialOp "extract" [Iden n, e]
2013-12-14 09:55:44 +01:00
substring(x from expr to expr)
todo: also support substring(x from expr)
> substring :: P ScalarExpr
> substring = try (keyword_ "substring") >>
> parens (makeOp <$> scalarExpr'
2013-12-14 00:14:23 +01:00
> <*> (keyword_ "from" *> scalarExpr')
> <*> (keyword_ "for" *> scalarExpr')
> )
> where makeOp a b c = SpecialOp "substring" [a,b,c]
2013-12-14 09:55:44 +01:00
in: two variations:
a in (expr0, expr1, ...)
a in (queryexpr)
> inSuffix :: ScalarExpr -> P ScalarExpr
> inSuffix e =
2013-12-14 00:14:23 +01:00
> In <$> inty
> <*> return e
> <*> parens (choice
> [InQueryExpr <$> queryExpr
> ,InList <$> commaSep1 scalarExpr'])
> where
> inty = try $ choice [True <$ keyword_ "in"
> ,False <$ keyword_ "not" <* keyword_ "in"]
2013-12-14 09:55:44 +01:00
between:
expr between expr and expr
There is a complication when parsing between - when parsing the second
expression it is ambiguous when you hit an 'and' whether it is a
binary operator or part of the between. This code follows what
postgres does, which might be standard across SQL implementations,
which is that you can't have a binary and operator in the middle
expression in a between unless it is wrapped in parens. The 'bExpr
parsing' is used to create alternative scalar expression parser which
is identical to the normal one expect it doesn't recognise the binary
and operator. This is the call to scalarExpr'' True.
2013-12-13 20:13:36 +01:00
> betweenSuffix :: ScalarExpr -> P ScalarExpr
> betweenSuffix e =
2013-12-14 00:14:23 +01:00
> makeOp <$> opName
> <*> return e
> <*> scalarExpr'' True
> <*> (keyword_ "and" *> scalarExpr')
2013-12-13 20:13:36 +01:00
> where
> opName = try $ choice
> ["between" <$ keyword_ "between"
> ,"not between" <$ keyword_ "not" <* keyword_ "between"]
> makeOp n a b c = SpecialOp n [a,b,c]
2013-12-13 20:13:36 +01:00
2013-12-14 09:55:44 +01:00
subquery expression:
[exists|all|any|some] (queryexpr)
> subquery :: P ScalarExpr
> subquery =
> choice
> [try $ SubQueryExpr SqSq <$> parens queryExpr
> ,SubQueryExpr <$> try sqkw <*> parens queryExpr]
> where
> sqkw = try $ choice
> [SqExists <$ keyword_ "exists"
> ,SqAll <$ try (keyword_ "all")
> ,SqAny <$ keyword_ "any"
> ,SqSome <$ keyword_ "some"]
2013-12-14 09:55:44 +01:00
typename: used in casts. Special cases for the multi keyword typenames
that SQL supports.
> typeName :: P TypeName
> typeName = choice
> [TypeName "double precision"
> <$ try (keyword_ "double" <* keyword_ "precision")
> ,TypeName "character varying"
> <$ try (keyword_ "character" <* keyword_ "varying")
> ,TypeName <$> identifierString]
2013-12-14 09:55:44 +01:00
== scalar parens
> sparens :: P ScalarExpr
> sparens = Parens <$> parens scalarExpr'
== operator parsing
The 'regular' operators in this parsing and in the abstract syntax are
unary prefix, unary postfix and binary infix operators. The operators
can be symbols (a + b), single keywords (a and b) or multiple keywords
(a is similar to b).
First, the list of the regulars operators split by operator type
(prefix, postfix, binary) and by symbol/single keyword/ multiple
keyword.
> binOpSymbolNames :: [String]
2013-12-14 00:14:23 +01:00
> binOpSymbolNames =
> ["=", "<=", ">=", "!=", "<>", "<", ">"
> ,"*", "/", "+", "-"
> ,"||"]
> binOpKeywordNames :: [String]
2013-12-14 00:14:23 +01:00
> binOpKeywordNames = ["and", "or", "like", "overlaps"]
> binOpMultiKeywordNames :: [[String]]
> binOpMultiKeywordNames = map words
> ["not like"
> ,"not similar"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
2013-12-13 20:13:36 +01:00
used for between parsing
> binOpKeywordNamesNoAnd :: [String]
> binOpKeywordNamesNoAnd = filter (/="and") binOpKeywordNames
2013-12-14 09:55:44 +01:00
There aren't any multi keyword prefix operators currently supported.
> prefixUnOpKeywordNames :: [String]
> prefixUnOpKeywordNames = ["not"]
2013-12-13 19:01:57 +01:00
> prefixUnOpSymbolNames :: [String]
> prefixUnOpSymbolNames = ["+", "-"]
2013-12-13 19:01:57 +01:00
2013-12-14 09:55:44 +01:00
There aren't any single keyword postfix operators currently supported. Maybe all these 'is's can be left factored?
> postfixOpKeywords :: [String]
> postfixOpKeywords = ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
The parsers:
2013-12-13 19:01:57 +01:00
> prefixUnaryOp :: P ScalarExpr
> prefixUnaryOp =
2013-12-13 21:25:22 +01:00
> PrefixOp <$> opSymbol <*> scalarExpr'
2013-12-13 19:01:57 +01:00
> where
> opSymbol = choice (map (try . symbol) prefixUnOpSymbolNames
> ++ map (try . keyword) prefixUnOpKeywordNames)
2013-12-14 09:55:44 +01:00
> postfixOpSuffix :: ScalarExpr -> P ScalarExpr
> postfixOpSuffix e =
> try $ choice $ map makeOp opPairs
> where
2013-12-14 09:55:44 +01:00
> opPairs = flip map postfixOpKeywords $ \o -> (o, words o)
2013-12-14 00:14:23 +01:00
> makeOp (o,ws) = try $ PostfixOp o e <$ keywords_ ws
2013-12-13 21:25:22 +01:00
> keywords_ = try . mapM_ keyword_
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
Wrapper for non 'bExpr' parsing. See the between parser for
explanation.
2013-12-13 11:39:26 +01:00
> scalarExpr' :: P ScalarExpr
2013-12-13 20:13:36 +01:00
> scalarExpr' = scalarExpr'' False
2013-12-14 09:55:44 +01:00
The main scalar expression parser which includes the binary operator
parsing. All the binary operators are parsed as same precedence and
left associativity. This will be fixed in a pass over the abstract
syntax, which isn't working at the moment.
TODO:
left factor: stuff which starts with identifier
move app>>=windowSuffix to a separate parser above next to app and
windowSuffix themselves
2013-12-13 20:13:36 +01:00
2013-12-14 09:55:44 +01:00
split out the binary op parsing from this function to above
2013-12-13 20:13:36 +01:00
> scalarExpr'' :: Bool -> P ScalarExpr
> scalarExpr'' bExpr = factor >>= trysuffix
2013-12-13 11:39:26 +01:00
> where
> factor = choice [literal
> ,scase
> ,cast
2013-12-13 21:38:43 +01:00
> ,extract
> ,substring
> ,subquery
> ,prefixUnaryOp
2013-12-14 09:55:44 +01:00
> ,try app
> ,try dottedIden
2013-12-13 11:39:26 +01:00
> ,identifier
> ,sparens]
2013-12-13 11:39:26 +01:00
> trysuffix e = try (suffix e) <|> return e
> suffix e0 = choice
> [BinOp <$> opSymbol <*> return e0 <*> factor
2013-12-13 20:13:36 +01:00
> ,inSuffix e0
> ,betweenSuffix e0
2013-12-14 09:55:44 +01:00
> ,postfixOpSuffix e0
> ] >>= trysuffix
> opSymbol = choice
> (map (try . symbol) binOpSymbolNames
> ++ map (try . keywords) binOpMultiKeywordNames
> ++ map (try . keyword)
> (if bExpr
> then binOpKeywordNamesNoAnd
> else binOpKeywordNames))
2013-12-13 21:25:22 +01:00
> keywords ks = unwords <$> mapM keyword ks
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
TODO: create the fixity adjuster. This should take a list of operators
with precedence and associativity and adjust a scalar expr tree to
2013-12-14 09:55:44 +01:00
match these. It shouldn't attempt to descend into scalar expressions
inside nested query exprs in subqueries. This way we separate out
parsing from handling the precedence and associativity. Is it a good
idea to separate these? I'm not sure. I think it makes some error
messages potentially a little less helpful without some extra work,
but apart from that, I think it is a win in terms of code clarity. The
errors which are harder to produce nicely I think are limited to
chained binary operators with no parens which have no associativity
which should be a parse error.
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
> {-sqlFixities :: [HSE.Fixity]
2013-12-13 11:39:26 +01:00
> sqlFixities = HSE.infixl_ 9 ["*", "/"]
> ++ HSE.infixl_ 8 ["+", "-"]
> ++ HSE.infixl_ 6 ["<=",">=","!=","<>","||", "like"]
> ++ HSE.infix_ 4 ["<", ">"]
> ++ HSE.infixr_ 3 ["="]
> ++ HSE.infixr_ 2 ["or"]
> ++ HSE.infixl_ 1 ["and"]
2013-12-14 09:55:44 +01:00
> ++ HSE.infixl_ 0 ["or"]-}
> fixFixities :: ScalarExpr -> ScalarExpr
> fixFixities = id
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
The scalarExpr wrapper. The idea is that directly nested scalar
expressions use the scalarExpr' parser, then other code uses the
scalarExpr parser and then everyone gets the fixity fixes and it's
easy to ensure that this fix is only applied once to each scalar
expression tree (for efficiency and code clarity).
2013-12-13 11:39:26 +01:00
> scalarExpr :: P ScalarExpr
> scalarExpr =
> choice [try star
2013-12-14 09:55:44 +01:00
> ,fixFixities <$> scalarExpr']
2013-12-13 11:39:26 +01:00
-------------------------------------------------
= query expressions
2013-12-14 09:55:44 +01:00
TODO: maybe refactor all the parsers. A parser wouldn't usually be
optional or use try itself. The caller could do this.
== select lists
2013-12-13 16:27:02 +01:00
2013-12-13 11:39:26 +01:00
> selectItem :: P (Maybe String, ScalarExpr)
> selectItem = flip (,) <$> scalarExpr <*> optionMaybe (try alias)
> where alias = optional (try (keyword_ "as")) *> identifierString
> selectList :: P [(Maybe String,ScalarExpr)]
2013-12-13 16:27:02 +01:00
> selectList = commaSep1 selectItem
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
== from
this parser should be refactored, it is very unclear. Here is the
rough grammar
tref
(cross | [natural]
([inner]
| left [outer]
| right [outer]
| full [outer]
)
join tref
[on expr | using (...)]
2013-12-13 11:39:26 +01:00
> from :: P [TableRef]
> from = option [] (try (keyword_ "from") *> commaSep1 tref)
> where
> tref = choice [try (JoinQueryExpr <$> parens queryExpr)
> ,JoinParens <$> parens tref
2013-12-13 11:39:26 +01:00
> ,SimpleTableRef <$> identifierString]
> >>= optionSuffix pjoin
2013-12-13 11:39:26 +01:00
> >>= optionSuffix alias
> pjoin tref0 =
2013-12-13 11:39:26 +01:00
> choice
> [try (keyword_ "natural") *> keyword_ "inner"
> *> conditionlessSuffix tref0 Inner (Just JoinNatural)
> ,try (keyword_ "join")
> *> (JoinTableRef Inner tref0 <$> tref <*> joinExpr)
> ,try (keyword_ "inner")
> *> conditionSuffix tref0 Inner
> ,try (choice [JLeft <$ keyword_ "left"
> ,JRight <$ keyword_ "right"
> ,Full <$ keyword_ "full"])
> >>= outerJoinSuffix tref0
> ,try (keyword_ "cross")
> *> conditionlessSuffix tref0 Cross Nothing
> ]
> >>= optionSuffix pjoin
2013-12-13 11:39:26 +01:00
> outerJoinSuffix tref0 jt =
> optional (keyword_ "outer") *> conditionSuffix tref0 jt
> conditionSuffix tref0 jt =
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> joinExpr)
> conditionlessSuffix tref0 jt jc =
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> return jc)
> joinExpr = choice
> [(Just . JoinUsing)
> <$> (try (keyword_ "using")
> *> parens (commaSep1 identifierString))
2013-12-13 11:39:26 +01:00
> ,(Just . JoinOn) <$> (try (keyword_ "on") *> scalarExpr)
> ,return Nothing
> ]
> alias j = let a1 = optional (try (keyword_ "as")) *> identifierString
2013-12-13 23:37:34 +01:00
> a2 = optionMaybe (try $ parens (commaSep1 identifierString))
> in option j (JoinAlias j <$> try a1 <*> try a2)
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
== simple other parts
Parsers for where, group by, having, order by and limit, which are
pretty trivial.
Here is a helper for parsing a few parts of the query expr (currently
where, having, limit, offset).
2013-12-13 16:27:02 +01:00
> optionalScalarExpr :: String -> P (Maybe ScalarExpr)
> optionalScalarExpr k = optionMaybe (try (keyword_ k) *> scalarExpr)
2013-12-13 11:39:26 +01:00
> swhere :: P (Maybe ScalarExpr)
2013-12-13 16:27:02 +01:00
> swhere = optionalScalarExpr "where"
2013-12-13 11:39:26 +01:00
> sgroupBy :: P [ScalarExpr]
> sgroupBy = option [] (try (keyword_ "group")
> *> keyword_ "by"
> *> commaSep1 scalarExpr)
> having :: P (Maybe ScalarExpr)
2013-12-13 16:27:02 +01:00
> having = optionalScalarExpr "having"
2013-12-13 11:39:26 +01:00
2013-12-13 16:08:10 +01:00
> orderBy :: P [(ScalarExpr,Direction)]
2013-12-14 00:14:23 +01:00
> orderBy = try (keyword_ "order") *> keyword_ "by" *> commaSep1 ob
2013-12-13 16:08:10 +01:00
> where
> ob = (,) <$> scalarExpr
> <*> option Asc (choice [Asc <$ keyword_ "asc"
> ,Desc <$ keyword_ "desc"])
2013-12-13 11:39:26 +01:00
2013-12-13 16:27:02 +01:00
> limit :: P (Maybe ScalarExpr)
> limit = optionalScalarExpr "limit"
> offset :: P (Maybe ScalarExpr)
> offset = optionalScalarExpr "offset"
2013-12-14 09:55:44 +01:00
== common table expressions
> with :: P QueryExpr
> with = try (keyword_ "with") >>
2013-12-14 00:14:23 +01:00
> With <$> commaSep1 withQuery <*> queryExpr
> where
2013-12-14 00:14:23 +01:00
> withQuery =
> (,) <$> (identifierString <* optional (try $ keyword_ "as"))
> <*> parens queryExpr
2013-12-13 16:27:02 +01:00
2013-12-14 09:55:44 +01:00
== query expression
This parser parses any query expression variant: normal select, cte,
and union, etc..
2013-12-13 11:39:26 +01:00
> queryExpr :: P QueryExpr
> queryExpr =
> choice [select >>= queryExprSuffix, with]
> where
> select = try (keyword_ "select") >>
> Select
> <$> (fromMaybe All <$> duplicates)
> <*> selectList
> <*> from
> <*> swhere
> <*> sgroupBy
> <*> having
> <*> option [] orderBy
> <*> limit
> <*> offset
2013-12-13 22:41:12 +01:00
> queryExprSuffix :: QueryExpr -> P QueryExpr
> queryExprSuffix qe =
> choice [(CombineQueryExpr qe
> <$> try (choice
> [Union <$ keyword_ "union"
> ,Intersect <$ keyword_ "intersect"
> ,Except <$ keyword_ "except"])
> <*> (fromMaybe All <$> duplicates)
2013-12-14 00:14:23 +01:00
> <*> option Respectively
> (try (Corresponding
> <$ keyword_ "corresponding"))
> <*> queryExpr)
> >>= queryExprSuffix
2013-12-13 22:41:12 +01:00
> ,return qe]
2013-12-13 11:39:26 +01:00
2013-12-14 09:55:44 +01:00
wrapper for query expr which ignores optional trailing semicolon.
> topLevelQueryExpr :: P QueryExpr
> topLevelQueryExpr =
> queryExpr <* (choice [try $ symbol_ ";", return()])
wrapper to parse a series of query exprs from a single source. They
must be separated by semicolon, but for the last expression, the
trailing semicolon is optional.
> queryExprs :: P [QueryExpr]
> queryExprs = do
> qe <- queryExpr
> choice [[qe] <$ eof
2013-12-14 09:55:44 +01:00
> ,symbol_ ";" *>
> choice [[qe] <$ eof
> ,(:) qe <$> queryExprs]]
2013-12-13 11:39:26 +01:00
------------------------------------------------
2013-12-14 09:55:44 +01:00
= lexing parsers
The lexing is a bit 'virtual', in the usual parsec style.
> symbol :: String -> P String
> symbol s = string s
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
> <* whiteSpace
> symbol_ :: String -> P ()
> symbol_ s = symbol s *> return ()
> keyword :: String -> P String
> keyword s = (map toLower <$> string s)
> <* notFollowedBy (char '_' <|> alphaNum)
> <* whiteSpace
> keyword_ :: String -> P ()
> keyword_ s = keyword s *> return ()
Identifiers are very simple at the moment: start with a letter or
underscore, and continue with letter, underscore or digit. It doesn't
support quoting other other sorts of identifiers yet. There is a
blacklist of keywords which aren't supported as identifiers.
> identifierString :: P String
> identifierString = do
> s <- (:) <$> letterOrUnderscore
> <*> many letterDigitOrUnderscore <* whiteSpace
> guard (s `notElem` blacklist)
> return s
> where
> letterOrUnderscore = char '_' <|> letter
> letterDigitOrUnderscore = char '_' <|> alphaNum
> blacklist :: [String]
> blacklist = ["as", "from", "where", "having", "group", "order"
> ,"inner", "left", "right", "full", "natural", "join"
> ,"on", "using", "when", "then", "case", "end", "order"
> ,"limit", "offset", "in"
> ,"except", "intersect", "union"]
TODO: talk about what must be in the blacklist, and what doesn't need
to be.
String literals: limited at the moment, no escaping \' or other
variations.
> stringLiteral :: P String
> stringLiteral = char '\'' *> manyTill anyChar (symbol_ "'")
number literals
here is the rough grammar target:
digits
digits.[digits][e[+-]digits]
[digits].digits[e[+-]digits]
digitse[+-]digits
numbers are parsed to strings, not to a numeric type. This is to aoivd
making a decision on how to represent numbers, the client code can
make this choice.
> numberLiteral :: P String
> numberLiteral =
> choice [int
> >>= optionSuffix dot
> >>= optionSuffix fracts
> >>= optionSuffix expon
> ,fract "" >>= optionSuffix expon]
> <* whiteSpace
> where
> int = many1 digit
> fract p = dot p >>= fracts
> -- todo: use some helper functions to improve the
> -- unclear pointfree code here
> dot p = ((p++) . (:[])) <$> char '.'
> fracts p = (p++) <$> int
> expon p = do
> void $ char 'e'
> s <- option "" ((:[]) <$> (char '+' <|> char '-'))
> i <- int
> return (p ++ "e" ++ s ++ i)
lexer for integer literals which appear in some places in sql
> integerLiteral :: P Int
> integerLiteral = read <$> many1 digit <* whiteSpace
2013-12-14 09:55:44 +01:00
whitespace parser which skips comments also
2013-12-13 11:39:26 +01:00
> whiteSpace :: P ()
> whiteSpace =
> choice [simpleWhiteSpace *> whiteSpace
> ,lineComment *> whiteSpace
> ,blockComment *> whiteSpace
> ,return ()]
> where
> lineComment = try (string "--")
> *> manyTill anyChar (void (char '\n') <|> eof)
> blockComment = -- no nesting of block comments in SQL
> try (string "/*")
> -- TODO: why is try used herex
> *> manyTill anyChar (try $ string "*/")
> -- use many1 so we can more easily avoid non terminating loops
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
2013-12-14 09:55:44 +01:00
= generic parser helpers
2013-12-13 11:39:26 +01:00
> optionSuffix :: (a -> P a) -> a -> P a
> optionSuffix p a = option a (p a)
> parens :: P a -> P a
> parens = between (symbol_ "(") (symbol_ ")")
2013-12-13 11:39:26 +01:00
> commaSep :: P a -> P [a]
> commaSep = (`sepBy` symbol_ ",")
> commaSep1 :: P a -> P [a]
> commaSep1 = (`sepBy1` symbol_ ",")
2013-12-14 00:14:23 +01:00
--------------------------------------------
= helper functions
> setPos :: Maybe (Int,Int) -> P ()
> setPos Nothing = return ()
> setPos (Just (l,c)) = fmap f getPosition >>= setPosition
> where f = flip setSourceColumn c
> . flip setSourceLine l
> convParseError :: String -> P.ParseError -> ParseError
> convParseError src e =
> ParseError
> {peErrorString = show e
> ,peFilename = sourceName p
> ,pePosition = (sourceLine p, sourceColumn p)
> ,peFormattedError = formatError src e
> }
> where
> p = errorPos e
format the error more nicely: emacs format for positioning, plus
context
> formatError :: String -> P.ParseError -> String
> formatError src e =
> sourceName p ++ ":" ++ show (sourceLine p)
> ++ ":" ++ show (sourceColumn p) ++ ":"
> ++ context
> ++ show e
> where
> context =
> let lns = take 1 $ drop (sourceLine p - 1) $ lines src
> in case lns of
> [x] -> "\n" ++ x ++ "\n"
> ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
> _ -> ""
> p = errorPos e