1
Fork 0

rearrange part of the parser file

This commit is contained in:
Jake Wheat 2014-04-19 13:10:46 +03:00
parent fdb90c0440
commit 445c10a01d

View file

@ -37,6 +37,22 @@ Both the left factoring and error message work are greatly complicated
by the large number of shared prefixes of the various elements in SQL by the large number of shared prefixes of the various elements in SQL
syntax. syntax.
TOC:
Names - parsing identifiers
Typenames
Value expressions
Simple literals
star, param
parens expression, row constructor and scalar subquery
case, cast, exists, unique, array/ multiset constructor
typed literal, app, special function, aggregate, window function
suffixes: in, between, quantified comparison, match, array subscript,
escape, collate
operators
value expression top level
helpers
> {-# LANGUAGE TupleSections #-} > {-# LANGUAGE TupleSections #-}
> -- | This is the module with the parser functions. > -- | This is the module with the parser functions.
> module Language.SQL.SimpleSQL.Parser > module Language.SQL.SimpleSQL.Parser
@ -401,7 +417,7 @@ TODO: this code needs heavy refactoring
= Value expressions = Value expressions
== literals == simple literals
See the stringToken lexer below for notes on string literal syntax. See the stringToken lexer below for notes on string literal syntax.
@ -411,27 +427,6 @@ See the stringToken lexer below for notes on string literal syntax.
> number :: Parser ValueExpr > number :: Parser ValueExpr
> number = NumLit <$> numberLiteral > number = NumLit <$> numberLiteral
parse SQL interval literals, something like
interval '5' day (3)
or
interval '5' month
wrap the whole lot in try, in case we get something like this:
interval '3 days'
which parses as a typed literal
> interval :: Parser ValueExpr
> interval = keyword_ "interval" >> do
> s <- optionMaybe $ choice [True <$ symbol_ "+"
> ,False <$ symbol_ "-"]
> lit <- stringToken
> q <- optionMaybe intervalQualifier
> mkIt s lit q
> where
> mkIt Nothing val Nothing = return $ TypedLit (TypeName [Name "interval"]) val
> mkIt s val (Just (a,b)) = return $ IntervalLit s val a b
> mkIt (Just {}) _val Nothing = fail "cannot use sign without interval qualifier"
> characterSetLiteral :: Parser ValueExpr > characterSetLiteral :: Parser ValueExpr
> characterSetLiteral = > characterSetLiteral =
> CSStringLit <$> shortCSPrefix <*> stringToken > CSStringLit <$> shortCSPrefix <*> stringToken
@ -443,10 +438,14 @@ which parses as a typed literal
> ,string "U&" > ,string "U&"
> ] <* lookAhead quote > ] <* lookAhead quote
TODO: remove try and relocate some
> literal :: Parser ValueExpr > literal :: Parser ValueExpr
> literal = number <|> stringValue <|> try interval <|> try characterSetLiteral > literal = number <|> stringValue <|> try interval <|> try characterSetLiteral
== star == star, param, host param
=== star
used in select *, select x.*, and agg(*) variations, and some other used in select *, select x.*, and agg(*) variations, and some other
places as well. Because it is quite general, the parser doesn't places as well. Because it is quite general, the parser doesn't
@ -458,6 +457,7 @@ in any value expression context.
== parameter == parameter
unnamed parameter
use in e.g. select * from t where a = ? use in e.g. select * from t where a = ?
> parameter :: Parser ValueExpr > parameter :: Parser ValueExpr
@ -473,81 +473,117 @@ select x from t where x > :param
> <$> hostParameterToken > <$> hostParameterToken
> <*> optionMaybe (keyword "indicator" *> hostParameterToken) > <*> optionMaybe (keyword "indicator" *> hostParameterToken)
== function application, aggregates and windows == parens
this represents anything which syntactically looks like regular C value expression parens, row ctor and scalar subquery
function application: an identifier, parens with comma sep value
expression arguments.
The parsing for the aggregate extensions is here as well: > parensTerm :: Parser ValueExpr
> parensTerm = parens $ choice
aggregate([all|distinct] args [order by orderitems]) > [SubQueryExpr SqSq <$> queryExpr
> ,ctor <$> commaSep1 valueExpr]
TODO: try to refactor the parser to not allow distinct/all or order by
if there are no value exprs
> aggOrApp :: [Name] -> Parser ValueExpr
> aggOrApp n =
> makeApp n
> <$> parens ((,,) <$> (fromMaybe SQDefault <$> duplicates)
> <*> choice [commaSep valueExpr]
> <*> (optionMaybe orderBy))
> where > where
> makeApp i (SQDefault,es,Nothing) = App i es > ctor [a] = Parens a
> makeApp i (d,es,od) = AggregateApp i d es (fromMaybe [] od) > ctor as = SpecialOp [Name "rowctor"] as
parse a window call as a suffix of a regular function call == case, cast, exists, unique, array/multiset constructor, interval
this looks like this:
functionname(args) over ([partition by ids] [order by orderitems])
No support for explicit frames yet. All of these start with a fixed keyword which is reserved.
The convention in this file is that the 'Suffix', erm, suffix on === case expression
parser names means that they have been left factored. These are almost
always used with the optionSuffix combinator.
> windowSuffix :: ValueExpr -> Parser ValueExpr > caseValue :: Parser ValueExpr
> windowSuffix (App f es) = > caseValue =
> keyword_ "over" > Case <$> (keyword_ "case" *> optionMaybe valueExpr)
> *> parens (WindowApp f es > <*> many1 whenClause
> <$> option [] partitionBy > <*> optionMaybe elseClause
> <*> option [] orderBy > <* keyword_ "end"
> <*> optionMaybe frameClause)
> where > where
> partitionBy = keywords_ ["partition","by"] >> commaSep1 valueExpr > whenClause = (,) <$> (keyword_ "when" *> commaSep1 valueExpr)
> frameClause = > <*> (keyword_ "then" *> valueExpr)
> mkFrame <$> choice [FrameRows <$ keyword_ "rows" > elseClause = keyword_ "else" *> valueExpr
> ,FrameRange <$ keyword_ "range"]
> <*> frameStartEnd
> frameStartEnd =
> choice
> [keyword_ "between" >>
> mkFrameBetween <$> frameLimit True
> <*> (keyword_ "and" *> frameLimit True)
> ,mkFrameFrom <$> frameLimit False]
> -- use the bexpression style from the between parsing for frame between
> frameLimit useB =
> choice
> [Current <$ keywords_ ["current", "row"]
> -- todo: create an automatic left factor for stuff like
> -- this
> ,keyword_ "unbounded" >>
> choice [UnboundedPreceding <$ keyword_ "preceding"
> ,UnboundedFollowing <$ keyword_ "following"]
> ,do
> e <- if useB then valueExprB else valueExpr
> choice [Preceding e <$ keyword_ "preceding"
> ,Following e <$ keyword_ "following"]
> ]
> mkFrameBetween s e rs = FrameBetween rs s e
> mkFrameFrom s rs = FrameFrom rs s
> mkFrame rs c = c rs
> windowSuffix _ = fail ""
> app :: [Name] -> Parser ValueExpr === cast
> app n = aggOrApp n >>= optionSuffix windowSuffix
== iden prefix term cast: cast(expr as type)
> cast :: Parser ValueExpr
> cast = keyword_ "cast" >>
> parens (Cast <$> valueExpr
> <*> (keyword_ "as" *> typeName))
=== exists, unique
subquery expression:
[exists|unique] (queryexpr)
> subquery :: Parser ValueExpr
> subquery = SubQueryExpr <$> sqkw <*> parens queryExpr
> where
> sqkw = choice
> [SqExists <$ keyword_ "exists"
> ,SqUnique <$ keyword_ "unique"]
=== array/multiset constructor
> arrayCtor :: Parser ValueExpr
> arrayCtor = keyword_ "array" >>
> choice
> [ArrayCtor <$> parens queryExpr
> ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
As far as I can tell, table(query expr) is just syntax sugar for
multiset(query expr). It must be there for compatibility or something.
> multisetCtor :: Parser ValueExpr
> multisetCtor =
> choice
> [keyword_ "multiset" >>
> choice
> [MultisetQueryCtor <$> parens queryExpr
> ,MultisetCtor <$> brackets (commaSep valueExpr)]
> ,keyword_ "table" >>
> MultisetQueryCtor <$> parens queryExpr]
=== interval
interval literals are a special case and we follow the grammar less
permissively here
parse SQL interval literals, something like
interval '5' day (3)
or
interval '5' month
if the literal looks like this:
interval 'something'
then it is parsed as a regular typed literal. It must have a
interval-datetime-field suffix to parse as an intervallit
> interval :: Parser ValueExpr
> interval = keyword_ "interval" >> do
> s <- optionMaybe $ choice [True <$ symbol_ "+"
> ,False <$ symbol_ "-"]
> lit <- stringToken
> q <- optionMaybe intervalQualifier
> mkIt s lit q
> where
> mkIt Nothing val Nothing = return $ TypedLit (TypeName [Name "interval"]) val
> mkIt s val (Just (a,b)) = return $ IntervalLit s val a b
> mkIt (Just {}) _val Nothing = fail "cannot use sign without interval qualifier"
== typed literal, app, special, aggregate, window, iden
All of these start with identifiers (some of the special functions
start with reserved keywords).
they are all variations on suffixes on the basic identifier parser
The windows is a suffix on the app parser
=== iden prefix term
all the value expressions which start with an identifier all the value expressions which start with an identifier
@ -561,33 +597,15 @@ all the value expressions which start with an identifier
> where > where
> iden n = app n <|> return (Iden n) > iden n = app n <|> return (Iden n)
== case expression typed literal
> caseValue :: Parser ValueExpr === special
> caseValue =
> Case <$> (keyword_ "case" *> optionMaybe valueExpr)
> <*> many1 whenClause
> <*> optionMaybe elseClause
> <* keyword_ "end"
> where
> whenClause = (,) <$> (keyword_ "when" *> commaSep1 valueExpr)
> <*> (keyword_ "then" *> valueExpr)
> elseClause = keyword_ "else" *> valueExpr
== miscellaneous keyword operators
These are keyword operators which don't look like normal prefix, These are keyword operators which don't look like normal prefix,
postfix or infix binary operators. They mostly look like function postfix or infix binary operators. They mostly look like function
application but with keywords in the argument list instead of commas application but with keywords in the argument list instead of commas
to separate the arguments. to separate the arguments.
cast: cast(expr as type)
> cast :: Parser ValueExpr
> cast = keyword_ "cast" >>
> parens (Cast <$> valueExpr
> <*> (keyword_ "as" *> typeName))
the special op keywords the special op keywords
parse an operator which is parse an operator which is
operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.) operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
@ -694,6 +712,91 @@ in the source
> $ catMaybes [Just (fa,StringLit ch) > $ catMaybes [Just (fa,StringLit ch)
> ,Just ("from", fr)] > ,Just ("from", fr)]
=== app, aggregate, window
this represents anything which syntactically looks like regular C
function application: an identifier, parens with comma sep value
expression arguments.
The parsing for the aggregate extensions is here as well:
aggregate([all|distinct] args [order by orderitems])
TODO: try to refactor the parser to not allow distinct/all or order by
if there are no value exprs
> aggOrApp :: [Name] -> Parser ValueExpr
> aggOrApp n =
> makeApp n
> <$> parens ((,,) <$> (fromMaybe SQDefault <$> duplicates)
> <*> choice [commaSep valueExpr]
> <*> (optionMaybe orderBy))
> where
> makeApp i (SQDefault,es,Nothing) = App i es
> makeApp i (d,es,od) = AggregateApp i d es (fromMaybe [] od)
> app :: [Name] -> Parser ValueExpr
> app n = aggOrApp n >>= optionSuffix windowSuffix
==== window suffix
parse a window call as a suffix of a regular function call
this looks like this:
functionname(args) over ([partition by ids] [order by orderitems])
No support for explicit frames yet.
The convention in this file is that the 'Suffix', erm, suffix on
parser names means that they have been left factored. These are almost
always used with the optionSuffix combinator.
> windowSuffix :: ValueExpr -> Parser ValueExpr
> windowSuffix (App f es) =
> keyword_ "over"
> *> parens (WindowApp f es
> <$> option [] partitionBy
> <*> option [] orderBy
> <*> optionMaybe frameClause)
> where
> partitionBy = keywords_ ["partition","by"] >> commaSep1 valueExpr
> frameClause =
> mkFrame <$> choice [FrameRows <$ keyword_ "rows"
> ,FrameRange <$ keyword_ "range"]
> <*> frameStartEnd
> frameStartEnd =
> choice
> [keyword_ "between" >>
> mkFrameBetween <$> frameLimit True
> <*> (keyword_ "and" *> frameLimit True)
> ,mkFrameFrom <$> frameLimit False]
> -- use the bexpression style from the between parsing for frame between
> frameLimit useB =
> choice
> [Current <$ keywords_ ["current", "row"]
> -- todo: create an automatic left factor for stuff like
> -- this
> ,keyword_ "unbounded" >>
> choice [UnboundedPreceding <$ keyword_ "preceding"
> ,UnboundedFollowing <$ keyword_ "following"]
> ,do
> e <- if useB then valueExprB else valueExpr
> choice [Preceding e <$ keyword_ "preceding"
> ,Following e <$ keyword_ "following"]
> ]
> mkFrameBetween s e rs = FrameBetween rs s e
> mkFrameFrom s rs = FrameFrom rs s
> mkFrame rs c = c rs
> windowSuffix _ = fail ""
== suffixes
These are all generic suffixes on any value expr
=== in
in: two variations: in: two variations:
a in (expr0, expr1, ...) a in (expr0, expr1, ...)
a in (queryexpr) a in (queryexpr)
@ -711,6 +814,7 @@ this is parsed as a postfix operator which is why it is in this form
> ,False <$ keywords_ ["not","in"]] > ,False <$ keywords_ ["not","in"]]
> mkIn i v = \e -> In i e v > mkIn i v = \e -> In i e v
=== between
between: between:
expr between expr and expr expr between expr and expr
@ -736,16 +840,7 @@ and operator. This is the call to valueExprB.
> ,"not between" <$ try (keywords_ ["not","between"])] > ,"not between" <$ try (keywords_ ["not","between"])]
> makeOp n b c = \a -> SpecialOp [n] [a,b,c] > makeOp n b c = \a -> SpecialOp [n] [a,b,c]
subquery expression: === quantified comparison
[exists|unique] (queryexpr)
> subquery :: Parser ValueExpr
> subquery = SubQueryExpr <$> sqkw <*> parens queryExpr
> where
> sqkw = choice
> [SqExists <$ keyword_ "exists"
> ,SqUnique <$ keyword_ "unique"]
a = any (select * from t) a = any (select * from t)
@ -763,6 +858,8 @@ a = any (select * from t)
> ,CPSome <$ keyword_ "some" > ,CPSome <$ keyword_ "some"
> ,CPAll <$ keyword_ "all"] > ,CPAll <$ keyword_ "all"]
=== match
a match (select a from t) a match (select a from t)
> matchPredicate :: Parser (ValueExpr -> ValueExpr) > matchPredicate :: Parser (ValueExpr -> ValueExpr)
@ -772,28 +869,14 @@ a match (select a from t)
> q <- parens queryExpr > q <- parens queryExpr
> return $ \v -> Match v u q > return $ \v -> Match v u q
=== array subscript
> arrayPostfix :: Parser (ValueExpr -> ValueExpr) > arrayPostfix :: Parser (ValueExpr -> ValueExpr)
> arrayPostfix = do > arrayPostfix = do
> es <- brackets (commaSep valueExpr) > es <- brackets (commaSep valueExpr)
> return $ \v -> Array v es > return $ \v -> Array v es
> arrayCtor :: Parser ValueExpr === escape
> arrayCtor = keyword_ "array" >>
> choice
> [ArrayCtor <$> parens queryExpr
> ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
> multisetCtor :: Parser ValueExpr
> multisetCtor =
> choice
> [keyword_ "multiset" >>
> choice
> [MultisetQueryCtor <$> parens queryExpr
> ,MultisetCtor <$> brackets (commaSep valueExpr)]
> ,keyword_ "table" >>
> MultisetQueryCtor <$> parens queryExpr]
> escape :: Parser (ValueExpr -> ValueExpr) > escape :: Parser (ValueExpr -> ValueExpr)
> escape = do > escape = do
@ -803,23 +886,16 @@ a match (select a from t)
> c <- anyChar > c <- anyChar
> return $ \v -> ctor v c > return $ \v -> ctor v c
=== collate
> collate :: Parser (ValueExpr -> ValueExpr) > collate :: Parser (ValueExpr -> ValueExpr)
> collate = do > collate = do
> keyword_ "collate" > keyword_ "collate"
> i <- names > i <- names
> return $ \v -> Collate v i > return $ \v -> Collate v i
== value expression parens, row ctor and scalar subquery
> parensTerm :: Parser ValueExpr == operators
> parensTerm = parens $ choice
> [SubQueryExpr SqSq <$> queryExpr
> ,ctor <$> commaSep1 valueExpr]
> where
> ctor [a] = Parens a
> ctor as = SpecialOp [Name "rowctor"] as
== operator parsing
The 'regular' operators in this parsing and in the abstract syntax are The 'regular' operators in this parsing and in the abstract syntax are
unary prefix, unary postfix and binary infix operators. The operators unary prefix, unary postfix and binary infix operators. The operators
@ -832,7 +908,7 @@ TODO: to fix the parsing completely, I think will need to parse
without precedence and associativity and fix up afterwards, since SQL without precedence and associativity and fix up afterwards, since SQL
syntax is way too messy. It might be possible to avoid this if we syntax is way too messy. It might be possible to avoid this if we
wanted to avoid extensibility and to not be concerned with parse error wanted to avoid extensibility and to not be concerned with parse error
messages, but both of these are considered too important. messages, but both of these are too important.
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]] > opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
> opTable bExpr = > opTable bExpr =
@ -930,10 +1006,7 @@ messages, but both of these are considered too important.
> prefix' p = E.Prefix . chainl1 p $ return (.) > prefix' p = E.Prefix . chainl1 p $ return (.)
> postfix' p = E.Postfix . chainl1 p $ return (flip (.)) > postfix' p = E.Postfix . chainl1 p $ return (flip (.))
== value expressions == value expression top level
TODO:
left factor stuff which starts with identifier
This parses most of the value exprs.The order of the parsers and use This parses most of the value exprs.The order of the parsers and use
of try is carefully done to make everything work. It is a little of try is carefully done to make everything work. It is a little
@ -946,14 +1019,14 @@ fragile and could at least do with some heavy explanation.
> term = choice [literal > term = choice [literal
> ,parameter > ,parameter
> ,hostParameter > ,hostParameter
> ,star
> ,parensTerm
> ,caseValue > ,caseValue
> ,cast > ,cast
> ,arrayCtor > ,arrayCtor
> ,multisetCtor > ,multisetCtor
> ,specialOpKs
> ,parensTerm
> ,subquery > ,subquery
> ,star > ,specialOpKs
> ,idenPrefixTerm] > ,idenPrefixTerm]
> <?> "value expression" > <?> "value expression"
@ -962,8 +1035,7 @@ expose the b expression for window frame clause range between
> valueExprB :: Parser ValueExpr > valueExprB :: Parser ValueExpr
> valueExprB = E.buildExpressionParser (opTable True) term > valueExprB = E.buildExpressionParser (opTable True) term
== helper parsers
== helpers for value exprs
> intervalQualifier :: Parser (IntervalTypeField,Maybe IntervalTypeField) > intervalQualifier :: Parser (IntervalTypeField,Maybe IntervalTypeField)
> intervalQualifier = > intervalQualifier =
@ -990,7 +1062,6 @@ use a data type for the datetime field?
> choice [All <$ keyword_ "all" > choice [All <$ keyword_ "all"
> ,Distinct <$ keyword "distinct"] > ,Distinct <$ keyword "distinct"]
------------------------------------------------- -------------------------------------------------
= query expressions = query expressions