2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
The parser code
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-13 15:04:48 +01:00
|
|
|
> module Language.SQL.SimpleSQL.Parser
|
|
|
|
> (parseQueryExpr
|
|
|
|
> ,parseScalarExpr
|
2013-12-13 23:34:05 +01:00
|
|
|
> ,parseQueryExprs
|
2013-12-13 18:21:44 +01:00
|
|
|
> ,ParseError(..)) where
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> import Control.Monad.Identity
|
|
|
|
> import Control.Applicative hiding (many, (<|>), optional)
|
|
|
|
> import Data.Maybe
|
2013-12-13 23:07:45 +01:00
|
|
|
> import Data.Char
|
2013-12-14 09:55:44 +01:00
|
|
|
> import Text.Parsec hiding (ParseError)
|
|
|
|
> import qualified Text.Parsec as P
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-13 15:04:48 +01:00
|
|
|
> import Language.SQL.SimpleSQL.Syntax
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
The public api functions.
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> -- | Parses a query expr, trailing semicolon optional.
|
|
|
|
> parseQueryExpr :: FilePath -- ^ filename to use in errors
|
|
|
|
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
|
|
|
> -> String -- ^ the sql source to parse
|
2013-12-13 18:21:44 +01:00
|
|
|
> -> Either ParseError QueryExpr
|
2013-12-14 09:55:44 +01:00
|
|
|
> parseQueryExpr = wrapParse topLevelQueryExpr
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> parseQueryExprs :: FilePath -- ^ filename to use in errors
|
|
|
|
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
|
|
|
> -> String -- ^ the sql source to parse
|
2013-12-13 23:34:05 +01:00
|
|
|
> -> Either ParseError [QueryExpr]
|
2013-12-14 09:55:44 +01:00
|
|
|
> parseQueryExprs = wrapParse queryExprs
|
2013-12-13 23:34:05 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> parseScalarExpr :: FilePath -- ^ filename to use in errors
|
|
|
|
> -> Maybe (Int,Int) -- ^ line number and column number to use in errors
|
|
|
|
> -> String -- ^ the sql source to parse
|
2013-12-13 18:21:44 +01:00
|
|
|
> -> Either ParseError ScalarExpr
|
2013-12-14 09:55:44 +01:00
|
|
|
> parseScalarExpr = wrapParse scalarExpr
|
|
|
|
|
|
|
|
This helper function takes the parser given and:
|
|
|
|
|
|
|
|
sets the position when parsing
|
|
|
|
automatically skips leading whitespace
|
|
|
|
checks the parser parses all the input using eof
|
|
|
|
converts the error return to the nice wrapper
|
|
|
|
|
|
|
|
> wrapParse :: P a
|
|
|
|
> -> FilePath
|
|
|
|
> -> Maybe (Int,Int)
|
|
|
|
> -> String
|
|
|
|
> -> Either ParseError a
|
|
|
|
> wrapParse parser f p src =
|
2013-12-13 18:21:44 +01:00
|
|
|
> either (Left . convParseError src) Right
|
2013-12-14 09:55:44 +01:00
|
|
|
> $ parse (setPos p *> whiteSpace *> parser <* eof) f src
|
2013-12-13 18:21:44 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> -- | Type to represent parse errors.
|
2013-12-13 18:21:44 +01:00
|
|
|
> data ParseError = ParseError
|
2013-12-14 09:55:44 +01:00
|
|
|
> {peErrorString :: String -- ^ contains the error message
|
|
|
|
> ,peFilename :: FilePath -- ^ filename location for the error
|
|
|
|
> ,pePosition :: (Int,Int) -- ^ line number and column number location for the error
|
|
|
|
> ,peFormattedError :: String -- ^ formatted error with the position, error message and source context
|
2013-12-13 19:01:57 +01:00
|
|
|
> } deriving (Eq,Show)
|
2013-12-14 00:01:07 +01:00
|
|
|
|
2013-12-14 00:14:23 +01:00
|
|
|
------------------------------------------------
|
2013-12-13 18:21:44 +01:00
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> type P a = ParsecT String () Identity a
|
|
|
|
|
|
|
|
= scalar expressions
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== literals
|
|
|
|
|
|
|
|
See the stringLiteral lexer below for notes on string literal syntax.
|
2013-12-13 19:24:20 +01:00
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> estring :: P ScalarExpr
|
2013-12-13 19:24:20 +01:00
|
|
|
> estring = StringLit <$> stringLiteral
|
2013-12-13 16:00:22 +01:00
|
|
|
|
|
|
|
> number :: P ScalarExpr
|
2013-12-14 09:55:44 +01:00
|
|
|
> number = NumLit <$> numberLiteral
|
|
|
|
|
|
|
|
parse SQL interval literals, something like
|
|
|
|
interval '5' day (3)
|
|
|
|
or
|
|
|
|
interval '5' month
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-13 23:07:45 +01:00
|
|
|
> interval :: P ScalarExpr
|
|
|
|
> interval = try (keyword_ "interval") >>
|
|
|
|
> IntervalLit
|
|
|
|
> <$> stringLiteral
|
|
|
|
> <*> identifierString
|
2013-12-14 10:23:58 +01:00
|
|
|
> <*> optionMaybe (try $ parens integerLiteral)
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> literal :: P ScalarExpr
|
2013-12-13 23:07:45 +01:00
|
|
|
> literal = number <|> estring <|> interval
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== identifiers
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
Uses the identifierString 'lexer'. See this function for notes on identifiers.
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> identifier :: P ScalarExpr
|
2013-12-13 16:00:22 +01:00
|
|
|
> identifier = Iden <$> identifierString
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
Identifier with one dot in it. This should be extended to any amount
|
|
|
|
of dots.
|
|
|
|
|
2013-12-13 16:00:22 +01:00
|
|
|
> dottedIden :: P ScalarExpr
|
|
|
|
> dottedIden = Iden2 <$> identifierString
|
2013-12-13 20:13:36 +01:00
|
|
|
> <*> (symbol "." *> identifierString)
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== star
|
|
|
|
|
|
|
|
used in select *, select x.*, and agg(*) variations.
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> star :: P ScalarExpr
|
|
|
|
> star = choice [Star <$ symbol "*"
|
|
|
|
> ,Star2 <$> (identifierString <* symbol "." <* symbol "*")]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== function application, aggregates and windows
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
this represents anything which syntactically looks like regular C
|
|
|
|
function application: an identifier, parens with comma sep scalar
|
|
|
|
expression arguments.
|
|
|
|
|
|
|
|
The parsing for the aggregate extensions is here as well:
|
|
|
|
|
|
|
|
aggregate([all|distinct] args [order by orderitems])
|
|
|
|
|
|
|
|
> aggOrApp :: P ScalarExpr
|
|
|
|
> aggOrApp = do
|
2013-12-13 22:18:30 +01:00
|
|
|
> i <- identifierString
|
|
|
|
> _ <- symbol "("
|
|
|
|
> d <- try duplicates
|
|
|
|
> es <- choice [(:[]) <$> try star
|
|
|
|
> ,commaSep scalarExpr']
|
|
|
|
> od <- try $ optionMaybe orderBy
|
|
|
|
> _ <- symbol ")"
|
|
|
|
> case (d,od) of
|
|
|
|
> (Nothing,Nothing) ->
|
|
|
|
> return $ App i es
|
|
|
|
> _ -> return $ AggregateApp i d es (fromMaybe [] od)
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> duplicates :: P (Maybe Duplicates)
|
|
|
|
> duplicates = optionMaybe $ try $
|
|
|
|
> choice [All <$ keyword_ "all"
|
|
|
|
> ,Distinct <$ keyword "distinct"]
|
|
|
|
|
|
|
|
parse a window call as a suffix of a regular function call
|
|
|
|
|
|
|
|
this looks like this:
|
|
|
|
|
|
|
|
functioncall(args) over ([partition by ids] [order by orderitems])
|
|
|
|
|
|
|
|
No support for explicit frames yet.
|
|
|
|
|
2013-12-14 10:23:58 +01:00
|
|
|
The convention in this file is that the 'Suffix', erm, suffix on
|
|
|
|
parser names means that they have been left factored.
|
|
|
|
|
2013-12-13 22:31:36 +01:00
|
|
|
> windowSuffix :: ScalarExpr -> P ScalarExpr
|
2013-12-14 10:23:58 +01:00
|
|
|
> windowSuffix (App f es) =
|
|
|
|
> try (keyword_ "over")
|
|
|
|
> *> parens (WindowApp f es
|
|
|
|
> <$> option [] partitionBy
|
|
|
|
> <*> option [] orderBy)
|
2013-12-13 22:31:36 +01:00
|
|
|
> where
|
|
|
|
> partitionBy = try (keyword_ "partition") >>
|
|
|
|
> keyword_ "by" >>
|
|
|
|
> commaSep1 scalarExpr'
|
2013-12-14 10:23:58 +01:00
|
|
|
> windowSuffix _ = fail ""
|
2013-12-13 22:31:36 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> app :: P ScalarExpr
|
2013-12-14 10:23:58 +01:00
|
|
|
> app = aggOrApp >>= optionSuffix windowSuffix
|
2013-12-14 09:55:44 +01:00
|
|
|
|
|
|
|
== case expression
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> scase :: P ScalarExpr
|
|
|
|
> scase =
|
|
|
|
> Case <$> (try (keyword_ "case") *> optionMaybe (try scalarExpr'))
|
|
|
|
> <*> many1 swhen
|
|
|
|
> <*> optionMaybe (try (keyword_ "else") *> scalarExpr')
|
|
|
|
> <* keyword_ "end"
|
|
|
|
> where
|
|
|
|
> swhen = keyword_ "when" *>
|
|
|
|
> ((,) <$> scalarExpr' <*> (keyword_ "then" *> scalarExpr'))
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== miscellaneous keyword operators
|
|
|
|
|
|
|
|
These are keyword operators which don't look like normal prefix,
|
|
|
|
postfix or infix binary operators. They mostly look like function
|
|
|
|
application but with keywords in the argument list instead of commas
|
|
|
|
to separate the arguments.
|
|
|
|
|
|
|
|
cast: cast(expr as type)
|
|
|
|
|
2013-12-13 19:24:20 +01:00
|
|
|
> cast :: P ScalarExpr
|
|
|
|
> cast = parensCast <|> prefixCast
|
|
|
|
> where
|
|
|
|
> parensCast = try (keyword_ "cast") >>
|
2013-12-13 20:13:36 +01:00
|
|
|
> parens (Cast <$> scalarExpr'
|
2013-12-13 19:24:20 +01:00
|
|
|
> <*> (keyword_ "as" *> typeName))
|
|
|
|
> prefixCast = try (CastOp <$> typeName
|
|
|
|
> <*> stringLiteral)
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
extract(id from expr)
|
|
|
|
|
2013-12-13 21:38:43 +01:00
|
|
|
> extract :: P ScalarExpr
|
|
|
|
> extract = try (keyword_ "extract") >>
|
|
|
|
> parens (makeOp <$> identifierString
|
2013-12-14 00:14:23 +01:00
|
|
|
> <*> (keyword_ "from" *> scalarExpr'))
|
2013-12-13 21:38:43 +01:00
|
|
|
> where makeOp n e = SpecialOp "extract" [Iden n, e]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
substring(x from expr to expr)
|
|
|
|
|
|
|
|
todo: also support substring(x from expr)
|
|
|
|
|
2013-12-13 23:34:05 +01:00
|
|
|
> substring :: P ScalarExpr
|
|
|
|
> substring = try (keyword_ "substring") >>
|
|
|
|
> parens (makeOp <$> scalarExpr'
|
2013-12-14 00:14:23 +01:00
|
|
|
> <*> (keyword_ "from" *> scalarExpr')
|
|
|
|
> <*> (keyword_ "for" *> scalarExpr')
|
2013-12-13 23:34:05 +01:00
|
|
|
> )
|
|
|
|
> where makeOp a b c = SpecialOp "substring" [a,b,c]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
in: two variations:
|
|
|
|
a in (expr0, expr1, ...)
|
|
|
|
a in (queryexpr)
|
|
|
|
|
2013-12-13 20:00:06 +01:00
|
|
|
> inSuffix :: ScalarExpr -> P ScalarExpr
|
|
|
|
> inSuffix e =
|
2013-12-14 00:14:23 +01:00
|
|
|
> In <$> inty
|
|
|
|
> <*> return e
|
|
|
|
> <*> parens (choice
|
|
|
|
> [InQueryExpr <$> queryExpr
|
|
|
|
> ,InList <$> commaSep1 scalarExpr'])
|
2013-12-13 20:00:06 +01:00
|
|
|
> where
|
|
|
|
> inty = try $ choice [True <$ keyword_ "in"
|
|
|
|
> ,False <$ keyword_ "not" <* keyword_ "in"]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
between:
|
|
|
|
expr between expr and expr
|
|
|
|
|
|
|
|
There is a complication when parsing between - when parsing the second
|
|
|
|
expression it is ambiguous when you hit an 'and' whether it is a
|
|
|
|
binary operator or part of the between. This code follows what
|
|
|
|
postgres does, which might be standard across SQL implementations,
|
|
|
|
which is that you can't have a binary and operator in the middle
|
|
|
|
expression in a between unless it is wrapped in parens. The 'bExpr
|
|
|
|
parsing' is used to create alternative scalar expression parser which
|
|
|
|
is identical to the normal one expect it doesn't recognise the binary
|
|
|
|
and operator. This is the call to scalarExpr'' True.
|
|
|
|
|
2013-12-13 20:13:36 +01:00
|
|
|
> betweenSuffix :: ScalarExpr -> P ScalarExpr
|
|
|
|
> betweenSuffix e =
|
2013-12-14 00:14:23 +01:00
|
|
|
> makeOp <$> opName
|
|
|
|
> <*> return e
|
|
|
|
> <*> scalarExpr'' True
|
|
|
|
> <*> (keyword_ "and" *> scalarExpr')
|
2013-12-13 20:13:36 +01:00
|
|
|
> where
|
|
|
|
> opName = try $ choice
|
|
|
|
> ["between" <$ keyword_ "between"
|
|
|
|
> ,"not between" <$ keyword_ "not" <* keyword_ "between"]
|
2013-12-13 20:26:14 +01:00
|
|
|
> makeOp n a b c = SpecialOp n [a,b,c]
|
2013-12-13 20:13:36 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
subquery expression:
|
|
|
|
[exists|all|any|some] (queryexpr)
|
|
|
|
|
2013-12-13 19:43:28 +01:00
|
|
|
> subquery :: P ScalarExpr
|
|
|
|
> subquery =
|
|
|
|
> choice
|
|
|
|
> [try $ SubQueryExpr SqSq <$> parens queryExpr
|
|
|
|
> ,SubQueryExpr <$> try sqkw <*> parens queryExpr]
|
|
|
|
> where
|
|
|
|
> sqkw = try $ choice
|
|
|
|
> [SqExists <$ keyword_ "exists"
|
|
|
|
> ,SqAll <$ try (keyword_ "all")
|
|
|
|
> ,SqAny <$ keyword_ "any"
|
|
|
|
> ,SqSome <$ keyword_ "some"]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
typename: used in casts. Special cases for the multi keyword typenames
|
|
|
|
that SQL supports.
|
|
|
|
|
2013-12-13 19:24:20 +01:00
|
|
|
> typeName :: P TypeName
|
|
|
|
> typeName = choice
|
|
|
|
> [TypeName "double precision"
|
2013-12-13 23:07:45 +01:00
|
|
|
> <$ try (keyword_ "double" <* keyword_ "precision")
|
2013-12-13 19:24:20 +01:00
|
|
|
> ,TypeName "character varying"
|
2013-12-13 23:07:45 +01:00
|
|
|
> <$ try (keyword_ "character" <* keyword_ "varying")
|
2013-12-13 19:24:20 +01:00
|
|
|
> ,TypeName <$> identifierString]
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== scalar parens
|
|
|
|
|
|
|
|
> sparens :: P ScalarExpr
|
|
|
|
> sparens = Parens <$> parens scalarExpr'
|
|
|
|
|
|
|
|
|
|
|
|
== operator parsing
|
|
|
|
|
|
|
|
The 'regular' operators in this parsing and in the abstract syntax are
|
|
|
|
unary prefix, unary postfix and binary infix operators. The operators
|
|
|
|
can be symbols (a + b), single keywords (a and b) or multiple keywords
|
|
|
|
(a is similar to b).
|
|
|
|
|
|
|
|
First, the list of the regulars operators split by operator type
|
|
|
|
(prefix, postfix, binary) and by symbol/single keyword/ multiple
|
|
|
|
keyword.
|
|
|
|
|
2013-12-13 13:08:33 +01:00
|
|
|
> binOpSymbolNames :: [String]
|
2013-12-14 00:14:23 +01:00
|
|
|
> binOpSymbolNames =
|
|
|
|
> ["=", "<=", ">=", "!=", "<>", "<", ">"
|
|
|
|
> ,"*", "/", "+", "-"
|
|
|
|
> ,"||"]
|
2013-12-13 13:08:33 +01:00
|
|
|
|
|
|
|
> binOpKeywordNames :: [String]
|
2013-12-14 00:14:23 +01:00
|
|
|
> binOpKeywordNames = ["and", "or", "like", "overlaps"]
|
2013-12-13 20:41:50 +01:00
|
|
|
|
|
|
|
> binOpMultiKeywordNames :: [[String]]
|
|
|
|
> binOpMultiKeywordNames = map words
|
|
|
|
> ["not like"
|
|
|
|
> ,"not similar"
|
|
|
|
> ,"is similar to"
|
|
|
|
> ,"is not similar to"
|
|
|
|
> ,"is distinct from"
|
|
|
|
> ,"is not distinct from"]
|
|
|
|
|
2013-12-13 20:13:36 +01:00
|
|
|
used for between parsing
|
|
|
|
|
|
|
|
> binOpKeywordNamesNoAnd :: [String]
|
|
|
|
> binOpKeywordNamesNoAnd = filter (/="and") binOpKeywordNames
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
There aren't any multi keyword prefix operators currently supported.
|
|
|
|
|
2013-12-13 20:26:14 +01:00
|
|
|
> prefixUnOpKeywordNames :: [String]
|
|
|
|
> prefixUnOpKeywordNames = ["not"]
|
2013-12-13 19:01:57 +01:00
|
|
|
|
2013-12-13 20:26:14 +01:00
|
|
|
> prefixUnOpSymbolNames :: [String]
|
|
|
|
> prefixUnOpSymbolNames = ["+", "-"]
|
2013-12-13 19:01:57 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
There aren't any single keyword postfix operators currently supported. Maybe all these 'is's can be left factored?
|
|
|
|
|
|
|
|
> postfixOpKeywords :: [String]
|
|
|
|
> postfixOpKeywords = ["is null"
|
|
|
|
> ,"is not null"
|
|
|
|
> ,"is true"
|
|
|
|
> ,"is not true"
|
|
|
|
> ,"is false"
|
|
|
|
> ,"is not false"
|
|
|
|
> ,"is unknown"
|
|
|
|
> ,"is not unknown"]
|
|
|
|
|
|
|
|
The parsers:
|
2013-12-13 19:01:57 +01:00
|
|
|
|
2013-12-13 20:26:14 +01:00
|
|
|
> prefixUnaryOp :: P ScalarExpr
|
|
|
|
> prefixUnaryOp =
|
2013-12-13 21:25:22 +01:00
|
|
|
> PrefixOp <$> opSymbol <*> scalarExpr'
|
2013-12-13 19:01:57 +01:00
|
|
|
> where
|
2013-12-13 20:26:14 +01:00
|
|
|
> opSymbol = choice (map (try . symbol) prefixUnOpSymbolNames
|
|
|
|
> ++ map (try . keyword) prefixUnOpKeywordNames)
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> postfixOpSuffix :: ScalarExpr -> P ScalarExpr
|
|
|
|
> postfixOpSuffix e =
|
2013-12-13 20:34:59 +01:00
|
|
|
> try $ choice $ map makeOp opPairs
|
|
|
|
> where
|
2013-12-14 09:55:44 +01:00
|
|
|
> opPairs = flip map postfixOpKeywords $ \o -> (o, words o)
|
2013-12-14 00:14:23 +01:00
|
|
|
> makeOp (o,ws) = try $ PostfixOp o e <$ keywords_ ws
|
2013-12-13 21:25:22 +01:00
|
|
|
> keywords_ = try . mapM_ keyword_
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 10:28:45 +01:00
|
|
|
All the binary operators are parsed as same precedence and left
|
|
|
|
associativity.
|
2013-12-13 20:13:36 +01:00
|
|
|
|
2013-12-14 10:28:45 +01:00
|
|
|
> binaryOperatorSuffix :: Bool -> ScalarExpr -> P ScalarExpr
|
|
|
|
> binaryOperatorSuffix bExpr e0 =
|
|
|
|
> BinOp <$> opSymbol <*> return e0 <*> factor
|
2013-12-13 11:39:26 +01:00
|
|
|
> where
|
2013-12-13 20:41:50 +01:00
|
|
|
> opSymbol = choice
|
|
|
|
> (map (try . symbol) binOpSymbolNames
|
|
|
|
> ++ map (try . keywords) binOpMultiKeywordNames
|
|
|
|
> ++ map (try . keyword)
|
|
|
|
> (if bExpr
|
|
|
|
> then binOpKeywordNamesNoAnd
|
|
|
|
> else binOpKeywordNames))
|
2013-12-13 21:25:22 +01:00
|
|
|
> keywords ks = unwords <$> mapM keyword ks
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
TODO: create the fixity adjuster. This should take a list of operators
|
2013-12-14 10:23:58 +01:00
|
|
|
with precedence and associativity and adjust a scalar expr tree to
|
2013-12-14 09:55:44 +01:00
|
|
|
match these. It shouldn't attempt to descend into scalar expressions
|
2013-12-14 10:23:58 +01:00
|
|
|
inside nested query exprs in subqueries. This way we separate out
|
|
|
|
parsing from handling the precedence and associativity. Is it a good
|
|
|
|
idea to separate these? I'm not sure. I think it makes some error
|
|
|
|
messages potentially a little less helpful without some extra work,
|
|
|
|
but apart from that, I think it is a win in terms of code clarity. The
|
|
|
|
errors which are harder to produce nicely I think are limited to
|
|
|
|
chained binary operators with no parens which have no associativity
|
|
|
|
which should be a parse error.
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
> {-sqlFixities :: [HSE.Fixity]
|
2013-12-13 11:39:26 +01:00
|
|
|
> sqlFixities = HSE.infixl_ 9 ["*", "/"]
|
|
|
|
> ++ HSE.infixl_ 8 ["+", "-"]
|
|
|
|
> ++ HSE.infixl_ 6 ["<=",">=","!=","<>","||", "like"]
|
|
|
|
> ++ HSE.infix_ 4 ["<", ">"]
|
|
|
|
> ++ HSE.infixr_ 3 ["="]
|
|
|
|
> ++ HSE.infixr_ 2 ["or"]
|
|
|
|
> ++ HSE.infixl_ 1 ["and"]
|
2013-12-14 09:55:44 +01:00
|
|
|
> ++ HSE.infixl_ 0 ["or"]-}
|
|
|
|
|
|
|
|
> fixFixities :: ScalarExpr -> ScalarExpr
|
|
|
|
> fixFixities = id
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 10:28:45 +01:00
|
|
|
== scalar expressions
|
|
|
|
|
|
|
|
TODO:
|
|
|
|
left factor stuff which starts with identifier
|
|
|
|
|
|
|
|
This parses most of the scalar exprs. I'm not sure if factor is the
|
|
|
|
correct terminology here. The order of the parsers and use of try is
|
|
|
|
carefully done to make everything work. It is a little fragile and
|
|
|
|
could at least do with some heavy explanation.
|
|
|
|
|
|
|
|
> factor :: P ScalarExpr
|
|
|
|
> factor = choice [literal
|
|
|
|
> ,scase
|
|
|
|
> ,cast
|
|
|
|
> ,extract
|
|
|
|
> ,substring
|
|
|
|
> ,subquery
|
|
|
|
> ,prefixUnaryOp
|
|
|
|
> ,try app
|
|
|
|
> ,try dottedIden
|
|
|
|
> ,identifier
|
|
|
|
> ,sparens]
|
|
|
|
|
|
|
|
putting the factor together with the extra bits
|
|
|
|
|
|
|
|
> scalarExpr'' :: Bool -> P ScalarExpr
|
|
|
|
> scalarExpr'' bExpr = factor >>= trysuffix
|
|
|
|
> where
|
|
|
|
> trysuffix e = try (suffix e) <|> return e
|
|
|
|
> suffix e0 = choice
|
|
|
|
> [binaryOperatorSuffix bExpr e0
|
|
|
|
> ,inSuffix e0
|
|
|
|
> ,betweenSuffix e0
|
|
|
|
> ,postfixOpSuffix e0
|
|
|
|
> ] >>= trysuffix
|
|
|
|
|
|
|
|
Wrapper for non 'bExpr' parsing. See the between parser for
|
|
|
|
explanation.
|
|
|
|
|
|
|
|
> scalarExpr' :: P ScalarExpr
|
|
|
|
> scalarExpr' = scalarExpr'' False
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
The scalarExpr wrapper. The idea is that directly nested scalar
|
|
|
|
expressions use the scalarExpr' parser, then other code uses the
|
|
|
|
scalarExpr parser and then everyone gets the fixity fixes and it's
|
|
|
|
easy to ensure that this fix is only applied once to each scalar
|
|
|
|
expression tree (for efficiency and code clarity).
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> scalarExpr :: P ScalarExpr
|
|
|
|
> scalarExpr =
|
|
|
|
> choice [try star
|
2013-12-14 09:55:44 +01:00
|
|
|
> ,fixFixities <$> scalarExpr']
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
-------------------------------------------------
|
|
|
|
|
|
|
|
= query expressions
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
TODO: maybe refactor all the parsers. A parser wouldn't usually be
|
|
|
|
optional or use try itself. The caller could do this.
|
|
|
|
|
|
|
|
== select lists
|
2013-12-13 16:27:02 +01:00
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> selectItem :: P (Maybe String, ScalarExpr)
|
|
|
|
> selectItem = flip (,) <$> scalarExpr <*> optionMaybe (try alias)
|
|
|
|
> where alias = optional (try (keyword_ "as")) *> identifierString
|
|
|
|
|
|
|
|
> selectList :: P [(Maybe String,ScalarExpr)]
|
2013-12-13 16:27:02 +01:00
|
|
|
> selectList = commaSep1 selectItem
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== from
|
|
|
|
|
|
|
|
this parser should be refactored, it is very unclear. Here is the
|
|
|
|
rough grammar
|
|
|
|
|
|
|
|
tref
|
|
|
|
(cross | [natural]
|
|
|
|
([inner]
|
|
|
|
| left [outer]
|
|
|
|
| right [outer]
|
|
|
|
| full [outer]
|
|
|
|
)
|
|
|
|
join tref
|
|
|
|
[on expr | using (...)]
|
|
|
|
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> from :: P [TableRef]
|
|
|
|
> from = option [] (try (keyword_ "from") *> commaSep1 tref)
|
|
|
|
> where
|
2013-12-13 13:08:33 +01:00
|
|
|
> tref = choice [try (JoinQueryExpr <$> parens queryExpr)
|
|
|
|
> ,JoinParens <$> parens tref
|
2013-12-13 11:39:26 +01:00
|
|
|
> ,SimpleTableRef <$> identifierString]
|
2013-12-13 14:05:32 +01:00
|
|
|
> >>= optionSuffix pjoin
|
2013-12-13 11:39:26 +01:00
|
|
|
> >>= optionSuffix alias
|
2013-12-13 14:05:32 +01:00
|
|
|
> pjoin tref0 =
|
2013-12-13 11:39:26 +01:00
|
|
|
> choice
|
|
|
|
> [try (keyword_ "natural") *> keyword_ "inner"
|
|
|
|
> *> conditionlessSuffix tref0 Inner (Just JoinNatural)
|
|
|
|
> ,try (keyword_ "join")
|
|
|
|
> *> (JoinTableRef Inner tref0 <$> tref <*> joinExpr)
|
|
|
|
> ,try (keyword_ "inner")
|
|
|
|
> *> conditionSuffix tref0 Inner
|
|
|
|
> ,try (choice [JLeft <$ keyword_ "left"
|
|
|
|
> ,JRight <$ keyword_ "right"
|
|
|
|
> ,Full <$ keyword_ "full"])
|
|
|
|
> >>= outerJoinSuffix tref0
|
|
|
|
> ,try (keyword_ "cross")
|
|
|
|
> *> conditionlessSuffix tref0 Cross Nothing
|
|
|
|
> ]
|
2013-12-13 14:05:32 +01:00
|
|
|
> >>= optionSuffix pjoin
|
2013-12-13 11:39:26 +01:00
|
|
|
> outerJoinSuffix tref0 jt =
|
|
|
|
> optional (keyword_ "outer") *> conditionSuffix tref0 jt
|
|
|
|
> conditionSuffix tref0 jt =
|
|
|
|
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> joinExpr)
|
|
|
|
> conditionlessSuffix tref0 jt jc =
|
|
|
|
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> return jc)
|
|
|
|
> joinExpr = choice
|
|
|
|
> [(Just . JoinUsing)
|
|
|
|
> <$> (try (keyword_ "using")
|
2013-12-13 13:08:33 +01:00
|
|
|
> *> parens (commaSep1 identifierString))
|
2013-12-13 11:39:26 +01:00
|
|
|
> ,(Just . JoinOn) <$> (try (keyword_ "on") *> scalarExpr)
|
|
|
|
> ,return Nothing
|
|
|
|
> ]
|
|
|
|
> alias j = let a1 = optional (try (keyword_ "as")) *> identifierString
|
2013-12-13 23:37:34 +01:00
|
|
|
> a2 = optionMaybe (try $ parens (commaSep1 identifierString))
|
|
|
|
> in option j (JoinAlias j <$> try a1 <*> try a2)
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== simple other parts
|
|
|
|
|
|
|
|
Parsers for where, group by, having, order by and limit, which are
|
|
|
|
pretty trivial.
|
|
|
|
|
|
|
|
Here is a helper for parsing a few parts of the query expr (currently
|
|
|
|
where, having, limit, offset).
|
|
|
|
|
2013-12-13 16:27:02 +01:00
|
|
|
> optionalScalarExpr :: String -> P (Maybe ScalarExpr)
|
|
|
|
> optionalScalarExpr k = optionMaybe (try (keyword_ k) *> scalarExpr)
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> swhere :: P (Maybe ScalarExpr)
|
2013-12-13 16:27:02 +01:00
|
|
|
> swhere = optionalScalarExpr "where"
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> sgroupBy :: P [ScalarExpr]
|
|
|
|
> sgroupBy = option [] (try (keyword_ "group")
|
|
|
|
> *> keyword_ "by"
|
|
|
|
> *> commaSep1 scalarExpr)
|
|
|
|
|
|
|
|
> having :: P (Maybe ScalarExpr)
|
2013-12-13 16:27:02 +01:00
|
|
|
> having = optionalScalarExpr "having"
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-13 16:08:10 +01:00
|
|
|
> orderBy :: P [(ScalarExpr,Direction)]
|
2013-12-14 00:14:23 +01:00
|
|
|
> orderBy = try (keyword_ "order") *> keyword_ "by" *> commaSep1 ob
|
2013-12-13 16:08:10 +01:00
|
|
|
> where
|
|
|
|
> ob = (,) <$> scalarExpr
|
|
|
|
> <*> option Asc (choice [Asc <$ keyword_ "asc"
|
|
|
|
> ,Desc <$ keyword_ "desc"])
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-13 16:27:02 +01:00
|
|
|
> limit :: P (Maybe ScalarExpr)
|
|
|
|
> limit = optionalScalarExpr "limit"
|
|
|
|
|
|
|
|
> offset :: P (Maybe ScalarExpr)
|
|
|
|
> offset = optionalScalarExpr "offset"
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== common table expressions
|
|
|
|
|
2013-12-13 23:58:12 +01:00
|
|
|
> with :: P QueryExpr
|
|
|
|
> with = try (keyword_ "with") >>
|
2013-12-14 00:14:23 +01:00
|
|
|
> With <$> commaSep1 withQuery <*> queryExpr
|
2013-12-13 23:58:12 +01:00
|
|
|
> where
|
2013-12-14 00:14:23 +01:00
|
|
|
> withQuery =
|
|
|
|
> (,) <$> (identifierString <* optional (try $ keyword_ "as"))
|
|
|
|
> <*> parens queryExpr
|
2013-12-13 16:27:02 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
== query expression
|
|
|
|
|
|
|
|
This parser parses any query expression variant: normal select, cte,
|
|
|
|
and union, etc..
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> queryExpr :: P QueryExpr
|
|
|
|
> queryExpr =
|
2013-12-13 23:58:12 +01:00
|
|
|
> choice [select >>= queryExprSuffix, with]
|
|
|
|
> where
|
|
|
|
> select = try (keyword_ "select") >>
|
|
|
|
> Select
|
|
|
|
> <$> (fromMaybe All <$> duplicates)
|
|
|
|
> <*> selectList
|
|
|
|
> <*> from
|
|
|
|
> <*> swhere
|
|
|
|
> <*> sgroupBy
|
|
|
|
> <*> having
|
|
|
|
> <*> option [] orderBy
|
|
|
|
> <*> limit
|
|
|
|
> <*> offset
|
2013-12-13 22:41:12 +01:00
|
|
|
|
|
|
|
> queryExprSuffix :: QueryExpr -> P QueryExpr
|
|
|
|
> queryExprSuffix qe =
|
2013-12-13 23:58:12 +01:00
|
|
|
> choice [(CombineQueryExpr qe
|
|
|
|
> <$> try (choice
|
|
|
|
> [Union <$ keyword_ "union"
|
|
|
|
> ,Intersect <$ keyword_ "intersect"
|
|
|
|
> ,Except <$ keyword_ "except"])
|
|
|
|
> <*> (fromMaybe All <$> duplicates)
|
2013-12-14 00:14:23 +01:00
|
|
|
> <*> option Respectively
|
|
|
|
> (try (Corresponding
|
|
|
|
> <$ keyword_ "corresponding"))
|
2013-12-13 23:58:12 +01:00
|
|
|
> <*> queryExpr)
|
|
|
|
> >>= queryExprSuffix
|
2013-12-13 22:41:12 +01:00
|
|
|
> ,return qe]
|
2013-12-13 11:39:26 +01:00
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
wrapper for query expr which ignores optional trailing semicolon.
|
|
|
|
|
|
|
|
> topLevelQueryExpr :: P QueryExpr
|
|
|
|
> topLevelQueryExpr =
|
|
|
|
> queryExpr <* (choice [try $ symbol_ ";", return()])
|
|
|
|
|
|
|
|
wrapper to parse a series of query exprs from a single source. They
|
|
|
|
must be separated by semicolon, but for the last expression, the
|
|
|
|
trailing semicolon is optional.
|
|
|
|
|
2013-12-13 23:34:05 +01:00
|
|
|
> queryExprs :: P [QueryExpr]
|
|
|
|
> queryExprs = do
|
|
|
|
> qe <- queryExpr
|
|
|
|
> choice [[qe] <$ eof
|
2013-12-14 09:55:44 +01:00
|
|
|
> ,symbol_ ";" *>
|
2013-12-13 23:34:05 +01:00
|
|
|
> choice [[qe] <$ eof
|
|
|
|
> ,(:) qe <$> queryExprs]]
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
------------------------------------------------
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
= lexing parsers
|
|
|
|
|
|
|
|
The lexing is a bit 'virtual', in the usual parsec style.
|
|
|
|
|
|
|
|
> symbol :: String -> P String
|
|
|
|
> symbol s = string s
|
|
|
|
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
|
|
|
|
> <* whiteSpace
|
|
|
|
|
|
|
|
> symbol_ :: String -> P ()
|
|
|
|
> symbol_ s = symbol s *> return ()
|
|
|
|
|
|
|
|
> keyword :: String -> P String
|
|
|
|
> keyword s = (map toLower <$> string s)
|
|
|
|
> <* notFollowedBy (char '_' <|> alphaNum)
|
|
|
|
> <* whiteSpace
|
|
|
|
|
|
|
|
> keyword_ :: String -> P ()
|
|
|
|
> keyword_ s = keyword s *> return ()
|
|
|
|
|
|
|
|
Identifiers are very simple at the moment: start with a letter or
|
|
|
|
underscore, and continue with letter, underscore or digit. It doesn't
|
|
|
|
support quoting other other sorts of identifiers yet. There is a
|
|
|
|
blacklist of keywords which aren't supported as identifiers.
|
|
|
|
|
|
|
|
> identifierString :: P String
|
|
|
|
> identifierString = do
|
|
|
|
> s <- (:) <$> letterOrUnderscore
|
|
|
|
> <*> many letterDigitOrUnderscore <* whiteSpace
|
|
|
|
> guard (s `notElem` blacklist)
|
|
|
|
> return s
|
|
|
|
> where
|
|
|
|
> letterOrUnderscore = char '_' <|> letter
|
|
|
|
> letterDigitOrUnderscore = char '_' <|> alphaNum
|
|
|
|
|
|
|
|
> blacklist :: [String]
|
|
|
|
> blacklist = ["as", "from", "where", "having", "group", "order"
|
|
|
|
> ,"inner", "left", "right", "full", "natural", "join"
|
|
|
|
> ,"on", "using", "when", "then", "case", "end", "order"
|
|
|
|
> ,"limit", "offset", "in"
|
|
|
|
> ,"except", "intersect", "union"]
|
|
|
|
|
|
|
|
TODO: talk about what must be in the blacklist, and what doesn't need
|
|
|
|
to be.
|
|
|
|
|
|
|
|
String literals: limited at the moment, no escaping \' or other
|
|
|
|
variations.
|
|
|
|
|
|
|
|
> stringLiteral :: P String
|
|
|
|
> stringLiteral = char '\'' *> manyTill anyChar (symbol_ "'")
|
|
|
|
|
|
|
|
number literals
|
|
|
|
|
|
|
|
here is the rough grammar target:
|
|
|
|
|
|
|
|
digits
|
|
|
|
digits.[digits][e[+-]digits]
|
|
|
|
[digits].digits[e[+-]digits]
|
|
|
|
digitse[+-]digits
|
|
|
|
|
|
|
|
numbers are parsed to strings, not to a numeric type. This is to aoivd
|
|
|
|
making a decision on how to represent numbers, the client code can
|
|
|
|
make this choice.
|
|
|
|
|
|
|
|
> numberLiteral :: P String
|
|
|
|
> numberLiteral =
|
|
|
|
> choice [int
|
|
|
|
> >>= optionSuffix dot
|
|
|
|
> >>= optionSuffix fracts
|
|
|
|
> >>= optionSuffix expon
|
|
|
|
> ,fract "" >>= optionSuffix expon]
|
|
|
|
> <* whiteSpace
|
|
|
|
> where
|
|
|
|
> int = many1 digit
|
|
|
|
> fract p = dot p >>= fracts
|
|
|
|
> -- todo: use some helper functions to improve the
|
|
|
|
> -- unclear pointfree code here
|
|
|
|
> dot p = ((p++) . (:[])) <$> char '.'
|
|
|
|
> fracts p = (p++) <$> int
|
|
|
|
> expon p = do
|
|
|
|
> void $ char 'e'
|
|
|
|
> s <- option "" ((:[]) <$> (char '+' <|> char '-'))
|
|
|
|
> i <- int
|
|
|
|
> return (p ++ "e" ++ s ++ i)
|
|
|
|
|
2013-12-14 10:23:58 +01:00
|
|
|
lexer for integer literals which appear in some places in sql
|
|
|
|
|
|
|
|
> integerLiteral :: P Int
|
|
|
|
> integerLiteral = read <$> many1 digit <* whiteSpace
|
2013-12-14 09:55:44 +01:00
|
|
|
|
|
|
|
whitespace parser which skips comments also
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> whiteSpace :: P ()
|
|
|
|
> whiteSpace =
|
|
|
|
> choice [simpleWhiteSpace *> whiteSpace
|
|
|
|
> ,lineComment *> whiteSpace
|
|
|
|
> ,blockComment *> whiteSpace
|
|
|
|
> ,return ()]
|
|
|
|
> where
|
|
|
|
> lineComment = try (string "--")
|
|
|
|
> *> manyTill anyChar (void (char '\n') <|> eof)
|
|
|
|
> blockComment = -- no nesting of block comments in SQL
|
|
|
|
> try (string "/*")
|
|
|
|
> -- TODO: why is try used herex
|
|
|
|
> *> manyTill anyChar (try $ string "*/")
|
|
|
|
> -- use many1 so we can more easily avoid non terminating loops
|
|
|
|
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
|
|
|
|
|
2013-12-14 09:55:44 +01:00
|
|
|
= generic parser helpers
|
|
|
|
|
2013-12-13 11:39:26 +01:00
|
|
|
> optionSuffix :: (a -> P a) -> a -> P a
|
|
|
|
> optionSuffix p a = option a (p a)
|
|
|
|
|
2013-12-13 13:08:33 +01:00
|
|
|
> parens :: P a -> P a
|
|
|
|
> parens = between (symbol_ "(") (symbol_ ")")
|
2013-12-13 11:39:26 +01:00
|
|
|
|
|
|
|
> commaSep :: P a -> P [a]
|
|
|
|
> commaSep = (`sepBy` symbol_ ",")
|
|
|
|
|
|
|
|
|
|
|
|
> commaSep1 :: P a -> P [a]
|
|
|
|
> commaSep1 = (`sepBy1` symbol_ ",")
|
2013-12-14 00:14:23 +01:00
|
|
|
|
|
|
|
--------------------------------------------
|
|
|
|
|
|
|
|
= helper functions
|
|
|
|
|
|
|
|
> setPos :: Maybe (Int,Int) -> P ()
|
|
|
|
> setPos Nothing = return ()
|
|
|
|
> setPos (Just (l,c)) = fmap f getPosition >>= setPosition
|
|
|
|
> where f = flip setSourceColumn c
|
|
|
|
> . flip setSourceLine l
|
|
|
|
|
|
|
|
> convParseError :: String -> P.ParseError -> ParseError
|
|
|
|
> convParseError src e =
|
|
|
|
> ParseError
|
|
|
|
> {peErrorString = show e
|
|
|
|
> ,peFilename = sourceName p
|
|
|
|
> ,pePosition = (sourceLine p, sourceColumn p)
|
|
|
|
> ,peFormattedError = formatError src e
|
|
|
|
> }
|
|
|
|
> where
|
|
|
|
> p = errorPos e
|
|
|
|
|
|
|
|
format the error more nicely: emacs format for positioning, plus
|
|
|
|
context
|
|
|
|
|
|
|
|
> formatError :: String -> P.ParseError -> String
|
|
|
|
> formatError src e =
|
|
|
|
> sourceName p ++ ":" ++ show (sourceLine p)
|
|
|
|
> ++ ":" ++ show (sourceColumn p) ++ ":"
|
|
|
|
> ++ context
|
|
|
|
> ++ show e
|
|
|
|
> where
|
|
|
|
> context =
|
|
|
|
> let lns = take 1 $ drop (sourceLine p - 1) $ lines src
|
|
|
|
> in case lns of
|
|
|
|
> [x] -> "\n" ++ x ++ "\n"
|
|
|
|
> ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
|
|
|
|
> _ -> ""
|
|
|
|
> p = errorPos e
|