From 7f90318647f8af0acaa13784446e624a1139137c Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Fri, 9 May 2014 23:26:18 +0300 Subject: [PATCH] refactor the app/agg/window parsing --- Language/SQL/SimpleSQL/Parser.lhs | 162 +++++++++++++++++------------- 1 file changed, 90 insertions(+), 72 deletions(-) diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 81e9f6f..6ab38d4 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -185,7 +185,7 @@ fixing them in the syntax but leaving them till the semantic checking > import Control.Monad.Identity (Identity) > import Control.Monad (guard, void, when) > import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>), (<**>), pure) -> import Data.Maybe (fromMaybe,catMaybes) +> import Data.Maybe (catMaybes) > import Data.Char (toLower) > import Text.Parsec (setPosition,setSourceColumn,setSourceLine,getPosition > ,option,between,sepBy,sepBy1,string,manyTill,anyChar @@ -476,9 +476,9 @@ with some work to improve the readability still. > charSet = keywords_ ["character", "set"] *> names > tcollate = keyword_ "collate" *> names > ---------------------------- -> tnSuffix = multisetSuffix <|> arrayTNSuffix -> multisetSuffix = MultisetTypeName <$ keyword_ "multiset" -> arrayTNSuffix = keyword_ "array" *> +> tnSuffix = multiset <|> array +> multiset = MultisetTypeName <$ keyword_ "multiset" +> array = keyword_ "array" *> > (ArrayTypeName <$$> optionMaybe (brackets unsignedInteger)) > ---------------------------- > -- this parser handles the fixed set of multi word @@ -702,9 +702,7 @@ all the value expressions which start with an identifier > idenExpr = > -- todo: work out how to left factor this > try (TypedLit <$> typeName <*> stringToken) -> <|> (names >>= iden) -> where -> iden n = app n <|> pure (Iden n) +> <|> (names <**> option Iden app) === special @@ -819,62 +817,87 @@ in the source > $ catMaybes [Just (fa,StringLit ch) > ,Just ("from", fr)] - === app, aggregate, window -this represents anything which syntactically looks like regular C -function application: an identifier, parens with comma sep value -expression arguments. +This parses all these variations: +normal function application with just a csv of value exprs +aggregate variations (distinct, order by in parens, filter and where + suffixes) +window apps (fn/agg followed by over) -The parsing for the aggregate extensions is here as well: +This code still needs some tidying, and eventually has to be left +factored with the typename 'literal' parser. -aggregate([all|distinct] args [order by orderitems]) +> app :: Parser ([Name] -> ValueExpr) +> app = +> openParen *> choice +> [((,,) <$> duplicates +> <*> commaSep1 valueExpr +> <*> (option [] orderBy <* closeParen)) +> <**> (afilterz +> <|> pure (\(d,es,ob) f -> AggregateApp f d es ob Nothing)) +> -- separate cases with no all or distinct which have at least one +> -- value expr +> ,commaSep1 valueExpr +> <**> choice +> [closeParen *> choice [window +> ,withinGroup +> ,afiltery +> ,pure (flip App)] +> ,(orderBy <* closeParen) +> <**> +> choice [afilterx +> ,pure (\ob es f -> AggregateApp f SQDefault es ob Nothing)]] +> ,([] <$ closeParen) +> <**> choice [window +> ,withinGroup +> ,pure (flip App)] +> ] -TODO: try to refactor the parser to not allow distinct/all or order by -if there are no value exprs +todo: brain no work - fix this mess. Should be able to convert these +to simple applicative functions then inline them -> aggOrApp :: [Name] -> Parser ValueExpr -> aggOrApp n = -> makeApp n -> <$> parens ((,,) <$> (fromMaybe SQDefault <$> duplicates) -> <*> choice [commaSep valueExpr] -> <*> (optionMaybe orderBy)) +> afilterx :: Parser ([SortSpec] +> -> [ValueExpr] +> -> [Name] +> -> ValueExpr) +> afilterx = do +> f <- afilter +> pure $ \ob es nm -> f SQDefault es ob nm + +> afiltery :: Parser ([ValueExpr] +> -> [Name] +> -> ValueExpr) +> afiltery = do +> f <- afilter +> pure $ \es nm -> f SQDefault es [] nm + + +> afilterz :: Parser ((SetQuantifier +> ,[ValueExpr] +> ,[SortSpec]) +> -> [Name] +> -> ValueExpr) +> afilterz = do +> f <- afilter +> pure $ \(sq,es,ob) nm -> f sq es ob nm + +> afilter :: Parser (SetQuantifier +> -> [ValueExpr] +> -> [SortSpec] +> -> [Name] +> -> ValueExpr) +> afilter = +> keyword_ "filter" *> (ctor <$> parens (keyword_ "where" *> valueExpr)) > where -> makeApp i (SQDefault,es,Nothing) = App i es -> makeApp i (d,es,od) = AggregateApp i d es (fromMaybe [] od) Nothing +> ctor ve sq es ob f = AggregateApp f sq es ob (Just ve) -TODO: change all these suffix functions to use type -Parser (ValueExpr -> ValueExpr) +> withinGroup :: Parser ([ValueExpr] -> [Name] -> ValueExpr) +> withinGroup = +> keywords_ ["within", "group"] *> +> ((\ob es nm -> AggregateAppGroup nm es ob) <$> parens orderBy) -> app :: [Name] -> Parser ValueExpr -> app n = `aggOrApp n >>= \a -> choice -> [windowSuffix a -> ,filterSuffix a -> ,withinGroupSuffix a -> ,pure a] - -> filterSuffix :: ValueExpr -> Parser ValueExpr -> filterSuffix (App nm es) = -> filterSuffix (AggregateApp nm SQDefault es [] Nothing) -> filterSuffix agg@(AggregateApp {}) = -> filterSuffix' agg -> filterSuffix _ = fail "" - -> filterSuffix' :: ValueExpr -> Parser ValueExpr -> filterSuffix' agg = -> keyword_ "filter" >> -> rep <$> parens(keyword_ "where" *> (Just <$> valueExpr)) -> where -> rep f = agg {aggFilter = f} - - - -> withinGroupSuffix :: ValueExpr -> Parser ValueExpr -> withinGroupSuffix (App nm es) = keywords_ ["within", "group"] >> -> AggregateAppGroup nm es <$> parens orderBy -> withinGroupSuffix _ = fail "" - -==== window suffix +==== window parse a window call as a suffix of a regular function call this looks like this: @@ -882,18 +905,16 @@ functionname(args) over ([partition by ids] [order by orderitems]) No support for explicit frames yet. -The convention in this file is that the 'Suffix', erm, suffix on -parser names means that they have been left factored. These are almost -always used with the optionSuffix combinator. +TODO: add window support for other aggregate variations, needs some +changes to the syntax also -> windowSuffix :: ValueExpr -> Parser ValueExpr -> windowSuffix (App f es) = -> keyword_ "over" -> *> parens (WindowApp f es -> <$> option [] partitionBy -> <*> option [] orderBy -> <*> optionMaybe frameClause) +> window :: Parser ([ValueExpr] -> [Name] -> ValueExpr) +> window = keyword_ "over" *> parens (ctorWrap +> <$> option [] partitionBy +> <*> option [] orderBy +> <*> optionMaybe frameClause) > where +> ctorWrap pb ob fc es f = WindowApp f es pb ob fc > partitionBy = keywords_ ["partition","by"] >> commaSep1 valueExpr > frameClause = > mkFrame <$> choice [FrameRows <$ keyword_ "rows" @@ -922,9 +943,6 @@ always used with the optionSuffix combinator. > mkFrameBetween s e rs = FrameBetween rs s e > mkFrameFrom s rs = FrameFrom rs s > mkFrame rs c = c rs -> windowSuffix _ = fail "" - - == suffixes @@ -1123,7 +1141,7 @@ messages, but both of these are too important. > o <- choice [Union <$ keyword_ "union" > ,Intersect <$ keyword_ "intersect" > ,Except <$ keyword_ "except"] -> d <- fromMaybe SQDefault <$> duplicates +> d <- option SQDefault duplicates > pure (\a b -> MultisetBinOp a o d b)) > E.AssocLeft > prefixKeyword nm = prefix (keyword_ nm) nm @@ -1198,8 +1216,8 @@ use a data type for the datetime field? This is used in multiset operations (value expr), selects (query expr) and set operations (query expr). -> duplicates :: Parser (Maybe SetQuantifier) -> duplicates = optionMaybe $ +> duplicates :: Parser SetQuantifier +> duplicates = > choice [All <$ keyword_ "all" > ,Distinct <$ keyword "distinct"] @@ -1357,7 +1375,7 @@ and union, etc.. > where > select = keyword_ "select" >> > mkSelect -> <$> (fromMaybe SQDefault <$> duplicates) +> <$> option SQDefault duplicates > <*> selectList > <*> optionMaybe tableExpression > mkSelect d sl Nothing = @@ -1396,7 +1414,7 @@ be in the public syntax? > setOp :: Parser (QueryExpr -> QueryExpr -> QueryExpr) > setOp = cq > <$> setOpK -> <*> (fromMaybe SQDefault <$> duplicates) +> <*> option SQDefault duplicates > <*> corr > where > cq o d c q0 q1 = CombineQueryExpr q0 o d c q1