From b703e04af3f045b109f1dfbc4b0ae1849c36c772 Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Tue, 17 Dec 2013 19:17:03 +0200 Subject: [PATCH] add syntax for new grouping expressions --- Language/SQL/SimpleSQL/Parser.lhs | 4 +- Language/SQL/SimpleSQL/Pretty.lhs | 10 +- Language/SQL/SimpleSQL/Syntax.lhs | 11 +- TODO | 14 ++ simple-sql-parser.cabal | 1 + tools/Language/SQL/SimpleSQL/FullQueries.lhs | 2 +- tools/Language/SQL/SimpleSQL/GroupBy.lhs | 226 ++++++++++++++++++ .../SQL/SimpleSQL/QueryExprComponents.lhs | 21 +- tools/Language/SQL/SimpleSQL/Tests.lhs | 5 +- 9 files changed, 266 insertions(+), 28 deletions(-) create mode 100644 tools/Language/SQL/SimpleSQL/GroupBy.lhs diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 817357c..8638bc8 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -598,10 +598,10 @@ where, having, limit, offset). > swhere :: P ScalarExpr > swhere = keywordScalarExpr "where" -> sgroupBy :: P [ScalarExpr] +> sgroupBy :: P [GroupingExpr] > sgroupBy = try (keyword_ "group") > *> keyword_ "by" -> *> commaSep1 scalarExpr +> *> commaSep1 (SimpleGroup <$> scalarExpr) > having :: P ScalarExpr > having = keywordScalarExpr "having" diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index 3b20f62..56b9a24 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -255,10 +255,16 @@ > (\e -> sep [text k > ,nest (length k + 1) $ scalarExpr e]) -> grpBy :: [ScalarExpr] -> Doc +> grpBy :: [GroupingExpr] -> Doc > grpBy [] = empty > grpBy gs = sep [text "group by" -> ,nest 9 $ commaSep $ map scalarExpr gs] +> ,nest 9 $ commaSep $ map ge gs] +> where +> ge (SimpleGroup e) = scalarExpr e +> ge (GroupingParens g) = parens (commaSep $ map ge g) +> ge (Cube es) = text "cube" <> parens (commaSep $ map ge es) +> ge (Rollup es) = text "rollup" <> parens (commaSep $ map ge es) +> ge (GroupingSets es) = text "grouping sets" <> parens (commaSep $ map ge es) > orderBy :: [OrderField] -> Doc > orderBy [] = empty diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index 82669e4..ae11d3a 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -20,6 +20,7 @@ > ,CombineOp(..) > ,Corresponding(..) > ,Alias(..) +> ,GroupingExpr(..) > -- ** From > ,TableRef(..) > ,JoinType(..) @@ -189,7 +190,7 @@ > -- ^ the column aliases and the expressions > ,qeFrom :: [TableRef] > ,qeWhere :: Maybe ScalarExpr -> ,qeGroupBy :: [ScalarExpr] +> ,qeGroupBy :: [GroupingExpr] > ,qeHaving :: Maybe ScalarExpr > ,qeOrderBy :: [OrderField] > ,qeOffset :: Maybe ScalarExpr @@ -240,6 +241,14 @@ I'm not sure if this is valid syntax or not. > -- | Corresponding, an option for the set operators > data Corresponding = Corresponding | Respectively deriving (Eq,Show,Read) +> data GroupingExpr +> = GroupingParens [GroupingExpr] +> | Cube [GroupingExpr] +> | Rollup [GroupingExpr] +> | GroupingSets [GroupingExpr] +> | SimpleGroup ScalarExpr +> deriving (Eq,Show,Read) + > -- | Represents a entry in the csv of tables in the from clause. > data TableRef = -- | from t > TRSimple Name diff --git a/TODO b/TODO index 23f641b..623b2b4 100644 --- a/TODO +++ b/TODO @@ -7,6 +7,19 @@ group by extensions. Question: some of the syntax can be represented by app and row ctor, should this be reused or new syntax created (the standard has special grammar for cube and rollup). +SELECT EmpId, Yr, SUM(Sales) AS Sales +FROM Sales +GROUP BY GROUPING SETS((EmpId, Yr), (EmpId), ()) + +SELECT EmpId, Yr, SUM(Sales) AS Sales +FROM Sales +GROUP BY GROUPING SETS((EmpId, Yr), (EmpId)) + +{ (grouping_column[, ...]) | ROLLUP (grouping_column[, ...]) | +CUBE (grouping_column[, ...]) | GROUPING SETS (grouping_set_list) | +( ) | grouping_set, grouping_set_list } + + collate? -> postfix operator which binds very tightly: a < 'foo' collate 'C' -> @@ -15,6 +28,7 @@ Op "<" [Iden "a", SpecialOp "collate" [StringLit 'foo', StringLit also postfix in order by: select a from t order by a collate 'C': add to order by syntax, one collation per column +have to do fixity for this to work much more table reference tests, for joins and aliases etc. diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal index 02d2f42..a5d01ae 100644 --- a/simple-sql-parser.cabal +++ b/simple-sql-parser.cabal @@ -54,6 +54,7 @@ Test-Suite Tests Language.SQL.SimpleSQL.Fixity, Language.SQL.SimpleSQL.FullQueries, + Language.SQL.SimpleSQL.GroupBy, Language.SQL.SimpleSQL.Postgres, Language.SQL.SimpleSQL.QueryExprComponents, Language.SQL.SimpleSQL.QueryExprs, diff --git a/tools/Language/SQL/SimpleSQL/FullQueries.lhs b/tools/Language/SQL/SimpleSQL/FullQueries.lhs index b4e3ecb..81c88ff 100644 --- a/tools/Language/SQL/SimpleSQL/FullQueries.lhs +++ b/tools/Language/SQL/SimpleSQL/FullQueries.lhs @@ -30,7 +30,7 @@ Some tests for parsing full queries. > "+" (Iden "d")])] > ,qeFrom = [TRSimple "t", TRSimple "u"] > ,qeWhere = Just $ BinOp (Iden "a") ">" (NumLit "5") -> ,qeGroupBy = [Iden "a"] +> ,qeGroupBy = [SimpleGroup $ Iden "a"] > ,qeHaving = Just $ BinOp (App "count" [NumLit "1"]) > ">" (NumLit "5") > ,qeOrderBy = [OrderField (Iden "s") Asc NullsOrderDefault] diff --git a/tools/Language/SQL/SimpleSQL/GroupBy.lhs b/tools/Language/SQL/SimpleSQL/GroupBy.lhs new file mode 100644 index 0000000..98ff090 --- /dev/null +++ b/tools/Language/SQL/SimpleSQL/GroupBy.lhs @@ -0,0 +1,226 @@ + +Here are the tests for the group by component of query exprs + +> {-# LANGUAGE OverloadedStrings #-} +> module Language.SQL.SimpleSQL.GroupBy (groupByTests) where + +> import Language.SQL.SimpleSQL.TestTypes +> import Language.SQL.SimpleSQL.Syntax + + +> groupByTests :: TestItem +> groupByTests = Group "groupByTests" +> [simpleGroupBy +> ,newGroupBy +> ] + +> simpleGroupBy :: TestItem +> simpleGroupBy = Group "simpleGroupBy" $ map (uncurry TestQueryExpr) +> [("select a,sum(b) from t group by a" +> ,makeSelect {qeSelectList = [(Nothing, Iden "a") +> ,(Nothing, App "sum" [Iden "b"])] +> ,qeFrom = [TRSimple "t"] +> ,qeGroupBy = [SimpleGroup $ Iden "a"] +> }) + +> ,("select a,b,sum(c) from t group by a,b" +> ,makeSelect {qeSelectList = [(Nothing, Iden "a") +> ,(Nothing, Iden "b") +> ,(Nothing, App "sum" [Iden "c"])] +> ,qeFrom = [TRSimple "t"] +> ,qeGroupBy = [SimpleGroup $ Iden "a" +> ,SimpleGroup $ Iden "b"] +> }) +> ] + +test the new group by (), grouping sets, cube and rollup syntax (not +sure which sql version they were introduced, 1999 or 2003 I think). + +> newGroupBy :: TestItem +> newGroupBy = Group "newGroupBy" $ map (uncurry TestQueryExpr) +> [ + +group by () + + +GROUP BY a +GROUP BY GROUPING SETS((a)) +GROUP BY a,b,c +GROUP BY GROUPING SETS((a,b,c)) +GROUP BY ROLLUP(a,b) +GROUP BY GROUPING SETS((a,b) +(a) +() ) +GROUP BY ROLLUP(b,a) +GROUP BY GROUPING SETS((b,a) +(b) +() ) +GROUP BY CUBE(a,b,c) +GROUP BY GROUPING SETS((a,b,c) +(a,b) +(a,c) +(b,c) +(a) +(b) +(c) +() ) +GROUP BY ROLLUP(Province, County, City) +GROUP BY ROLLUP(Province, (County, City)) +GROUP BY ROLLUP(Province, (County, City)) +GROUP BY GROUPING SETS((Province, County, City) +(Province) +() ) +GROUP BY GROUPING SETS((Province, County, City) +(Province, County) +(Province) +() ) +GROUP BY a, ROLLUP(b,c) +GROUP BY GROUPING SETS((a,b,c) +(a,b) +(a) ) +GROUP BY a, b, ROLLUP(c,d) +GROUP BY GROUPING SETS((a,b,c,d) +(a,b,c) +(a,b) ) +GROUP BY ROLLUP(a), ROLLUP(b,c) +GROUP BY GROUPING SETS((a,b,c) +(a,b) +(a) +(b,c) +(b) +() ) +GROUP BY ROLLUP(a), CUBE(b,c) +GROUP BY GROUPING SETS((a,b,c) +(a,b) +(a,c) +(a) +(b,c) +(b) +(c) +() ) +GROUP BY CUBE(a,b), ROLLUP(c,d) +GROUP BY GROUPING SETS((a,b,c,d) +(a,b,c) +(a,b) +(a,c,d) +(a,c) +(a) +(b,c,d) +(b,c) +(b) +(c,d) +(c) +() ) +GROUP BY a, ROLLUP(a,b) +GROUP BY GROUPING SETS((a,b) +(a) ) +GROUP BY Region, +ROLLUP(Sales_Person, WEEK(Sales_Date)), +CUBE(YEAR(Sales_Date), MONTH (Sales_Date)) +GROUP BY ROLLUP (Region, Sales_Person, WEEK(Sales_Date), +YEAR(Sales_Date), MONTH(Sales_Date) ) + + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +SALES_PERSON, SUM(SALES) AS UNITS_SOLD +FROM SALES +WHERE WEEK(SALES_DATE) = 13 +GROUP BY WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON +ORDER BY WEEK, DAY_WEEK, SALES_PERSON + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +SALES_PERSON, SUM(SALES) AS UNITS_SOLD +FROM SALES +WHERE WEEK(SALES_DATE) = 13 +GROUP BY GROUPING SETS ( (WEEK(SALES_DATE), SALES_PERSON), +(DAYOFWEEK(SALES_DATE), SALES_PERSON)) +ORDER BY WEEK, DAY_WEEK, SALES_PERSON + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +SALES_PERSON, SUM(SALES) AS UNITS_SOLD +FROM SALES +WHERE WEEK(SALES_DATE) = 13 +GROUP BY ROLLUP ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON ) +ORDER BY WEEK, DAY_WEEK, SALES_PERSON + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +SALES_PERSON, SUM(SALES) AS UNITS_SOLD +FROM SALES +WHERE WEEK(SALES_DATE) = 13 +GROUP BY CUBE ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON ) +ORDER BY WEEK, DAY_WEEK, SALES_PERSON + +SELECT SALES_PERSON, +MONTH(SALES_DATE) AS MONTH, +SUM(SALES) AS UNITS_SOLD +FROM SALES +GROUP BY GROUPING SETS ( (SALES_PERSON, MONTH(SALES_DATE)), +() +) +ORDER BY SALES_PERSON, MONTH + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +SUM(SALES) AS UNITS_SOLD +FROM SALES +GROUP BY ROLLUP ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE) ) +ORDER BY WEEK, DAY_WEEK + +SELECT MONTH(SALES_DATE) AS MONTH, +REGION, +SUM(SALES) AS UNITS_SOLD +FROM SALES +GROUP BY ROLLUP ( MONTH(SALES_DATE), REGION ); +ORDER BY MONTH, REGION + +SELECT WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +MONTH(SALES_DATE) AS MONTH, +REGION, +SUM(SALES) AS UNITS_SOLD +FROM SALES +GROUP BY GROUPING SETS ( ROLLUP( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE) ), +ROLLUP( MONTH(SALES_DATE), REGION ) ) +ORDER BY WEEK, DAY_WEEK, MONTH, REGION + +SELECT R1, R2, +WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +MONTH(SALES_DATE) AS MONTH, +REGION, SUM(SALES) AS UNITS_SOLD +FROM SALES,(VALUES(’GROUP 1’,’GROUP 2’)) AS X(R1,R2) +GROUP BY GROUPING SETS ((R1, ROLLUP(WEEK(SALES_DATE), +DAYOFWEEK(SALES_DATE))), +(R2,ROLLUP( MONTH(SALES_DATE), REGION ) ) +ORDER BY WEEK, DAY_WEEK, MONTH, REGION +) + +SELECT COALESCE(R1,R2) AS GROUP, +WEEK(SALES_DATE) AS WEEK, +DAYOFWEEK(SALES_DATE) AS DAY_WEEK, +MONTH(SALES_DATE) AS MONTH, +REGION, SUM(SALES) AS UNITS_SOLD +FROM SALES,(VALUES(’GROUP 1’,’GROUP 2’)) AS X(R1,R2) +GROUP BY GROUPING SETS ((R1, ROLLUP(WEEK(SALES_DATE), +DAYOFWEEK(SALES_DATE))), +(R2,ROLLUP( MONTH(SALES_DATE), REGION ) ) +ORDER BY GROUP, WEEK, DAY_WEEK, MONTH, REGION; +) + +SELECT MONTH(SALES_DATE) AS MONTH, +REGION, +SUM(SALES) AS UNITS_SOLD, +MAX(SALES) AS BEST_SALE, +824 +SQL Reference Volume 1 +Examples of grouping sets, cube, and rollup queries +CAST(ROUND(AVG(DECIMAL(SALES)),2) AS DECIMAL(5,2)) AS AVG_UNITS_SOLD +FROM SALES +GROUP BY CUBE(MONTH(SALES_DATE),REGION) +ORDER BY MONTH, REGION + +> ] diff --git a/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs b/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs index abc2427..521bc8f 100644 --- a/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs +++ b/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs @@ -17,7 +17,6 @@ These are a few misc tests which don't fit anywhere else. > [duplicates > ,selectLists > ,whereClause -> ,groupByClause > ,having > ,orderBy > ,limit @@ -75,31 +74,13 @@ These are a few misc tests which don't fit anywhere else. > ,qeWhere = Just $ BinOp (Iden "a") "=" (NumLit "5")}) > ] -> groupByClause :: TestItem -> groupByClause = Group "groupByClause" $ map (uncurry TestQueryExpr) -> [("select a,sum(b) from t group by a" -> ,makeSelect {qeSelectList = [(Nothing, Iden "a") -> ,(Nothing, App "sum" [Iden "b"])] -> ,qeFrom = [TRSimple "t"] -> ,qeGroupBy = [Iden "a"] -> }) - -> ,("select a,b,sum(c) from t group by a,b" -> ,makeSelect {qeSelectList = [(Nothing, Iden "a") -> ,(Nothing, Iden "b") -> ,(Nothing, App "sum" [Iden "c"])] -> ,qeFrom = [TRSimple "t"] -> ,qeGroupBy = [Iden "a",Iden "b"] -> }) -> ] - > having :: TestItem > having = Group "having" $ map (uncurry TestQueryExpr) > [("select a,sum(b) from t group by a having sum(b) > 5" > ,makeSelect {qeSelectList = [(Nothing, Iden "a") > ,(Nothing, App "sum" [Iden "b"])] > ,qeFrom = [TRSimple "t"] -> ,qeGroupBy = [Iden "a"] +> ,qeGroupBy = [SimpleGroup $ Iden "a"] > ,qeHaving = Just $ BinOp (App "sum" [Iden "b"]) > ">" (NumLit "5") > }) diff --git a/tools/Language/SQL/SimpleSQL/Tests.lhs b/tools/Language/SQL/SimpleSQL/Tests.lhs index 8d148b6..54349a1 100644 --- a/tools/Language/SQL/SimpleSQL/Tests.lhs +++ b/tools/Language/SQL/SimpleSQL/Tests.lhs @@ -26,10 +26,10 @@ tpch tests > import Language.SQL.SimpleSQL.TestTypes > import Language.SQL.SimpleSQL.FullQueries +> import Language.SQL.SimpleSQL.GroupBy > import Language.SQL.SimpleSQL.Postgres > import Language.SQL.SimpleSQL.QueryExprComponents > import Language.SQL.SimpleSQL.QueryExprs - > import Language.SQL.SimpleSQL.TableRefs > import Language.SQL.SimpleSQL.ScalarExprs > import Language.SQL.SimpleSQL.Tpch @@ -44,8 +44,9 @@ order on the generated documentation. > Group "parserTest" > [scalarExprTests > ,queryExprComponentTests -> ,tableRefTests > ,queryExprsTests +> ,tableRefTests +> ,groupByTests > ,fullQueriesTests > ,postgresTests > ,tpchTests