From b703e04af3f045b109f1dfbc4b0ae1849c36c772 Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Tue, 17 Dec 2013 19:17:03 +0200
Subject: [PATCH] add syntax for new grouping expressions

---
 Language/SQL/SimpleSQL/Parser.lhs             |   4 +-
 Language/SQL/SimpleSQL/Pretty.lhs             |  10 +-
 Language/SQL/SimpleSQL/Syntax.lhs             |  11 +-
 TODO                                          |  14 ++
 simple-sql-parser.cabal                       |   1 +
 tools/Language/SQL/SimpleSQL/FullQueries.lhs  |   2 +-
 tools/Language/SQL/SimpleSQL/GroupBy.lhs      | 226 ++++++++++++++++++
 .../SQL/SimpleSQL/QueryExprComponents.lhs     |  21 +-
 tools/Language/SQL/SimpleSQL/Tests.lhs        |   5 +-
 9 files changed, 266 insertions(+), 28 deletions(-)
 create mode 100644 tools/Language/SQL/SimpleSQL/GroupBy.lhs

diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs
index 817357c..8638bc8 100644
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@@ -598,10 +598,10 @@ where, having, limit, offset).
 > swhere :: P ScalarExpr
 > swhere = keywordScalarExpr "where"
 
-> sgroupBy :: P [ScalarExpr]
+> sgroupBy :: P [GroupingExpr]
 > sgroupBy = try (keyword_ "group")
 >            *> keyword_ "by"
->            *> commaSep1 scalarExpr
+>            *> commaSep1 (SimpleGroup <$> scalarExpr)
 
 > having :: P ScalarExpr
 > having = keywordScalarExpr "having"
diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs
index 3b20f62..56b9a24 100644
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@@ -255,10 +255,16 @@
 >       (\e -> sep [text k
 >                  ,nest (length k + 1) $ scalarExpr e])
 
-> grpBy :: [ScalarExpr] -> Doc
+> grpBy :: [GroupingExpr] -> Doc
 > grpBy [] = empty
 > grpBy gs = sep [text "group by"
->                ,nest 9 $ commaSep $ map scalarExpr gs]
+>                ,nest 9 $ commaSep $ map ge gs]
+>   where
+>     ge (SimpleGroup e) = scalarExpr e
+>     ge (GroupingParens g) = parens (commaSep $ map ge g)
+>     ge (Cube es) = text "cube" <> parens (commaSep $ map ge es)
+>     ge (Rollup es) = text "rollup" <> parens (commaSep $ map ge es)
+>     ge (GroupingSets es) = text "grouping sets" <> parens (commaSep $ map ge es)
 
 > orderBy :: [OrderField] -> Doc
 > orderBy [] = empty
diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs
index 82669e4..ae11d3a 100644
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@@ -20,6 +20,7 @@
 >     ,CombineOp(..)
 >     ,Corresponding(..)
 >     ,Alias(..)
+>     ,GroupingExpr(..)
 >      -- ** From
 >     ,TableRef(..)
 >     ,JoinType(..)
@@ -189,7 +190,7 @@
 >        -- ^ the column aliases and the expressions
 >       ,qeFrom :: [TableRef]
 >       ,qeWhere :: Maybe ScalarExpr
->       ,qeGroupBy :: [ScalarExpr]
+>       ,qeGroupBy :: [GroupingExpr]
 >       ,qeHaving :: Maybe ScalarExpr
 >       ,qeOrderBy :: [OrderField]
 >       ,qeOffset :: Maybe ScalarExpr
@@ -240,6 +241,14 @@ I'm not sure if this is valid syntax or not.
 > -- | Corresponding, an option for the set operators
 > data Corresponding = Corresponding | Respectively deriving (Eq,Show,Read)
 
+> data GroupingExpr
+>     = GroupingParens [GroupingExpr]
+>     | Cube [GroupingExpr]
+>     | Rollup [GroupingExpr]
+>     | GroupingSets [GroupingExpr]
+>     | SimpleGroup ScalarExpr
+>       deriving (Eq,Show,Read)
+
 > -- | Represents a entry in the csv of tables in the from clause.
 > data TableRef = -- | from t
 >                 TRSimple Name
diff --git a/TODO b/TODO
index 23f641b..623b2b4 100644
--- a/TODO
+++ b/TODO
@@ -7,6 +7,19 @@ group by extensions. Question: some of the syntax can be represented
    by app and row ctor, should this be reused or new syntax created
    (the standard has special grammar for cube and rollup).
 
+SELECT EmpId, Yr, SUM(Sales) AS Sales
+FROM Sales
+GROUP BY GROUPING SETS((EmpId, Yr), (EmpId), ())
+
+SELECT EmpId, Yr, SUM(Sales) AS Sales
+FROM Sales
+GROUP BY GROUPING SETS((EmpId, Yr), (EmpId))
+
+{ (grouping_column[, ...]) | ROLLUP (grouping_column[, ...]) |
+CUBE (grouping_column[, ...]) | GROUPING SETS (grouping_set_list) |
+( ) | grouping_set, grouping_set_list }
+
+
 collate? -> postfix operator which binds very tightly:
 a < 'foo' collate 'C'
 ->
@@ -15,6 +28,7 @@ Op "<" [Iden "a", SpecialOp "collate" [StringLit 'foo', StringLit
   also postfix in order by:
 select a from t order by a collate 'C': add to order by syntax, one
    collation per column
+have to do fixity for this to work
 
 much more table reference tests, for joins and aliases etc.
 
diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal
index 02d2f42..a5d01ae 100644
--- a/simple-sql-parser.cabal
+++ b/simple-sql-parser.cabal
@@ -54,6 +54,7 @@ Test-Suite Tests
                        Language.SQL.SimpleSQL.Fixity,
 
                        Language.SQL.SimpleSQL.FullQueries,
+                       Language.SQL.SimpleSQL.GroupBy,
                        Language.SQL.SimpleSQL.Postgres,
                        Language.SQL.SimpleSQL.QueryExprComponents,
                        Language.SQL.SimpleSQL.QueryExprs,
diff --git a/tools/Language/SQL/SimpleSQL/FullQueries.lhs b/tools/Language/SQL/SimpleSQL/FullQueries.lhs
index b4e3ecb..81c88ff 100644
--- a/tools/Language/SQL/SimpleSQL/FullQueries.lhs
+++ b/tools/Language/SQL/SimpleSQL/FullQueries.lhs
@@ -30,7 +30,7 @@ Some tests for parsing full queries.
 >                                          "+" (Iden "d")])]
 >       ,qeFrom = [TRSimple "t", TRSimple "u"]
 >       ,qeWhere = Just $ BinOp (Iden "a") ">" (NumLit "5")
->       ,qeGroupBy = [Iden "a"]
+>       ,qeGroupBy = [SimpleGroup $ Iden "a"]
 >       ,qeHaving = Just $ BinOp (App "count" [NumLit "1"])
 >                                ">" (NumLit "5")
 >       ,qeOrderBy = [OrderField (Iden "s") Asc NullsOrderDefault]
diff --git a/tools/Language/SQL/SimpleSQL/GroupBy.lhs b/tools/Language/SQL/SimpleSQL/GroupBy.lhs
new file mode 100644
index 0000000..98ff090
--- /dev/null
+++ b/tools/Language/SQL/SimpleSQL/GroupBy.lhs
@@ -0,0 +1,226 @@
+
+Here are the tests for the group by component of query exprs
+
+> {-# LANGUAGE OverloadedStrings #-}
+> module Language.SQL.SimpleSQL.GroupBy (groupByTests) where
+
+> import Language.SQL.SimpleSQL.TestTypes
+> import Language.SQL.SimpleSQL.Syntax
+
+
+> groupByTests :: TestItem
+> groupByTests = Group "groupByTests"
+>     [simpleGroupBy
+>     ,newGroupBy
+>     ]
+
+> simpleGroupBy :: TestItem
+> simpleGroupBy = Group "simpleGroupBy" $ map (uncurry TestQueryExpr)
+>     [("select a,sum(b) from t group by a"
+>      ,makeSelect {qeSelectList = [(Nothing, Iden "a")
+>                                  ,(Nothing, App "sum" [Iden "b"])]
+>                  ,qeFrom = [TRSimple "t"]
+>                  ,qeGroupBy = [SimpleGroup $ Iden "a"]
+>                  })
+
+>     ,("select a,b,sum(c) from t group by a,b"
+>      ,makeSelect {qeSelectList = [(Nothing, Iden "a")
+>                                  ,(Nothing, Iden "b")
+>                                  ,(Nothing, App "sum" [Iden "c"])]
+>                  ,qeFrom = [TRSimple "t"]
+>                  ,qeGroupBy = [SimpleGroup $ Iden "a"
+>                               ,SimpleGroup $ Iden "b"]
+>                  })
+>     ]
+
+test the new group by (), grouping sets, cube and rollup syntax (not
+sure which sql version they were introduced, 1999 or 2003 I think).
+
+> newGroupBy :: TestItem
+> newGroupBy = Group "newGroupBy" $ map (uncurry TestQueryExpr)
+>     [
+
+group by ()
+
+
+GROUP BY a
+GROUP BY GROUPING SETS((a))
+GROUP BY a,b,c
+GROUP BY GROUPING SETS((a,b,c))
+GROUP BY ROLLUP(a,b)
+GROUP BY GROUPING SETS((a,b)
+(a)
+() )
+GROUP BY ROLLUP(b,a)
+GROUP BY GROUPING SETS((b,a)
+(b)
+() )
+GROUP BY CUBE(a,b,c)
+GROUP BY GROUPING SETS((a,b,c)
+(a,b)
+(a,c)
+(b,c)
+(a)
+(b)
+(c)
+() )
+GROUP BY ROLLUP(Province, County, City)
+GROUP BY ROLLUP(Province, (County, City))
+GROUP BY ROLLUP(Province, (County, City))
+GROUP BY GROUPING SETS((Province, County, City)
+(Province)
+() )
+GROUP BY GROUPING SETS((Province, County, City)
+(Province, County)
+(Province)
+() )
+GROUP BY a, ROLLUP(b,c)
+GROUP BY GROUPING SETS((a,b,c)
+(a,b)
+(a) )
+GROUP BY a, b, ROLLUP(c,d)
+GROUP BY GROUPING SETS((a,b,c,d)
+(a,b,c)
+(a,b) )
+GROUP BY ROLLUP(a), ROLLUP(b,c)
+GROUP BY GROUPING SETS((a,b,c)
+(a,b)
+(a)
+(b,c)
+(b)
+() )
+GROUP BY ROLLUP(a), CUBE(b,c)
+GROUP BY GROUPING SETS((a,b,c)
+(a,b)
+(a,c)
+(a)
+(b,c)
+(b)
+(c)
+() )
+GROUP BY CUBE(a,b), ROLLUP(c,d)
+GROUP BY GROUPING SETS((a,b,c,d)
+(a,b,c)
+(a,b)
+(a,c,d)
+(a,c)
+(a)
+(b,c,d)
+(b,c)
+(b)
+(c,d)
+(c)
+() )
+GROUP BY a, ROLLUP(a,b)
+GROUP BY GROUPING SETS((a,b)
+(a) )
+GROUP BY Region,
+ROLLUP(Sales_Person, WEEK(Sales_Date)),
+CUBE(YEAR(Sales_Date), MONTH (Sales_Date))
+GROUP BY ROLLUP (Region, Sales_Person, WEEK(Sales_Date),
+YEAR(Sales_Date), MONTH(Sales_Date) )
+
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+SALES_PERSON, SUM(SALES) AS UNITS_SOLD
+FROM SALES
+WHERE WEEK(SALES_DATE) = 13
+GROUP BY WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON
+ORDER BY WEEK, DAY_WEEK, SALES_PERSON
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+SALES_PERSON, SUM(SALES) AS UNITS_SOLD
+FROM SALES
+WHERE WEEK(SALES_DATE) = 13
+GROUP BY GROUPING SETS ( (WEEK(SALES_DATE), SALES_PERSON),
+(DAYOFWEEK(SALES_DATE), SALES_PERSON))
+ORDER BY WEEK, DAY_WEEK, SALES_PERSON
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+SALES_PERSON, SUM(SALES) AS UNITS_SOLD
+FROM SALES
+WHERE WEEK(SALES_DATE) = 13
+GROUP BY ROLLUP ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON )
+ORDER BY WEEK, DAY_WEEK, SALES_PERSON
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+SALES_PERSON, SUM(SALES) AS UNITS_SOLD
+FROM SALES
+WHERE WEEK(SALES_DATE) = 13
+GROUP BY CUBE ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE), SALES_PERSON )
+ORDER BY WEEK, DAY_WEEK, SALES_PERSON
+
+SELECT SALES_PERSON,
+MONTH(SALES_DATE) AS MONTH,
+SUM(SALES) AS UNITS_SOLD
+FROM SALES
+GROUP BY GROUPING SETS ( (SALES_PERSON, MONTH(SALES_DATE)),
+()
+)
+ORDER BY SALES_PERSON, MONTH
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+SUM(SALES) AS UNITS_SOLD
+FROM SALES
+GROUP BY ROLLUP ( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE) )
+ORDER BY WEEK, DAY_WEEK
+
+SELECT MONTH(SALES_DATE) AS MONTH,
+REGION,
+SUM(SALES) AS UNITS_SOLD
+FROM SALES
+GROUP BY ROLLUP ( MONTH(SALES_DATE), REGION );
+ORDER BY MONTH, REGION
+
+SELECT WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+MONTH(SALES_DATE) AS MONTH,
+REGION,
+SUM(SALES) AS UNITS_SOLD
+FROM SALES
+GROUP BY GROUPING SETS ( ROLLUP( WEEK(SALES_DATE), DAYOFWEEK(SALES_DATE) ),
+ROLLUP( MONTH(SALES_DATE), REGION ) )
+ORDER BY WEEK, DAY_WEEK, MONTH, REGION
+
+SELECT R1, R2,
+WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+MONTH(SALES_DATE) AS MONTH,
+REGION, SUM(SALES) AS UNITS_SOLD
+FROM SALES,(VALUES(’GROUP 1’,’GROUP 2’)) AS X(R1,R2)
+GROUP BY GROUPING SETS ((R1, ROLLUP(WEEK(SALES_DATE),
+DAYOFWEEK(SALES_DATE))),
+(R2,ROLLUP( MONTH(SALES_DATE), REGION ) )
+ORDER BY WEEK, DAY_WEEK, MONTH, REGION
+)
+
+SELECT COALESCE(R1,R2) AS GROUP,
+WEEK(SALES_DATE) AS WEEK,
+DAYOFWEEK(SALES_DATE) AS DAY_WEEK,
+MONTH(SALES_DATE) AS MONTH,
+REGION, SUM(SALES) AS UNITS_SOLD
+FROM SALES,(VALUES(’GROUP 1’,’GROUP 2’)) AS X(R1,R2)
+GROUP BY GROUPING SETS ((R1, ROLLUP(WEEK(SALES_DATE),
+DAYOFWEEK(SALES_DATE))),
+(R2,ROLLUP( MONTH(SALES_DATE), REGION ) )
+ORDER BY GROUP, WEEK, DAY_WEEK, MONTH, REGION;
+)
+
+SELECT MONTH(SALES_DATE) AS MONTH,
+REGION,
+SUM(SALES) AS UNITS_SOLD,
+MAX(SALES) AS BEST_SALE,
+824
+SQL Reference Volume 1
+Examples of grouping sets, cube, and rollup queries
+CAST(ROUND(AVG(DECIMAL(SALES)),2) AS DECIMAL(5,2)) AS AVG_UNITS_SOLD
+FROM SALES
+GROUP BY CUBE(MONTH(SALES_DATE),REGION)
+ORDER BY MONTH, REGION
+
+>     ]
diff --git a/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs b/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs
index abc2427..521bc8f 100644
--- a/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs
+++ b/tools/Language/SQL/SimpleSQL/QueryExprComponents.lhs
@@ -17,7 +17,6 @@ These are a few misc tests which don't fit anywhere else.
 >     [duplicates
 >     ,selectLists
 >     ,whereClause
->     ,groupByClause
 >     ,having
 >     ,orderBy
 >     ,limit
@@ -75,31 +74,13 @@ These are a few misc tests which don't fit anywhere else.
 >                  ,qeWhere = Just $ BinOp (Iden "a") "=" (NumLit "5")})
 >     ]
 
-> groupByClause :: TestItem
-> groupByClause = Group "groupByClause" $ map (uncurry TestQueryExpr)
->     [("select a,sum(b) from t group by a"
->      ,makeSelect {qeSelectList = [(Nothing, Iden "a")
->                                  ,(Nothing, App "sum" [Iden "b"])]
->                  ,qeFrom = [TRSimple "t"]
->                  ,qeGroupBy = [Iden "a"]
->                  })
-
->     ,("select a,b,sum(c) from t group by a,b"
->      ,makeSelect {qeSelectList = [(Nothing, Iden "a")
->                                  ,(Nothing, Iden "b")
->                                  ,(Nothing, App "sum" [Iden "c"])]
->                  ,qeFrom = [TRSimple "t"]
->                  ,qeGroupBy = [Iden "a",Iden "b"]
->                  })
->     ]
-
 > having :: TestItem
 > having = Group "having" $ map (uncurry TestQueryExpr)
 >     [("select a,sum(b) from t group by a having sum(b) > 5"
 >      ,makeSelect {qeSelectList = [(Nothing, Iden "a")
 >                                  ,(Nothing, App "sum" [Iden "b"])]
 >                  ,qeFrom = [TRSimple "t"]
->                  ,qeGroupBy = [Iden "a"]
+>                  ,qeGroupBy = [SimpleGroup $ Iden "a"]
 >                  ,qeHaving = Just $ BinOp (App "sum" [Iden "b"])
 >                                           ">" (NumLit "5")
 >                  })
diff --git a/tools/Language/SQL/SimpleSQL/Tests.lhs b/tools/Language/SQL/SimpleSQL/Tests.lhs
index 8d148b6..54349a1 100644
--- a/tools/Language/SQL/SimpleSQL/Tests.lhs
+++ b/tools/Language/SQL/SimpleSQL/Tests.lhs
@@ -26,10 +26,10 @@ tpch tests
 > import Language.SQL.SimpleSQL.TestTypes
 
 > import Language.SQL.SimpleSQL.FullQueries
+> import Language.SQL.SimpleSQL.GroupBy
 > import Language.SQL.SimpleSQL.Postgres
 > import Language.SQL.SimpleSQL.QueryExprComponents
 > import Language.SQL.SimpleSQL.QueryExprs
-
 > import Language.SQL.SimpleSQL.TableRefs
 > import Language.SQL.SimpleSQL.ScalarExprs
 > import Language.SQL.SimpleSQL.Tpch
@@ -44,8 +44,9 @@ order on the generated documentation.
 >     Group "parserTest"
 >     [scalarExprTests
 >     ,queryExprComponentTests
->     ,tableRefTests
 >     ,queryExprsTests
+>     ,tableRefTests
+>     ,groupByTests
 >     ,fullQueriesTests
 >     ,postgresTests
 >     ,tpchTests