basic multiset support

2014-04-18 20:50:24 +03:00 · 2014-04-18 20:50:24 +03:00 · 4e1a1da820
parent 2ff8580dbf
commit 4e1a1da820
6 changed files with 128 additions and 86 deletions
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@ -494,6 +494,17 @@ a match (select a from t)
 >     [ArrayCtor <$> parens queryExpr
 >     ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
 > multisetCtor :: Parser ValueExpr
 > multisetCtor =
 >     choice
 >     [keyword_ "multiset" >>
 >      choice
 >      [MultisetQueryCtor <$> parens queryExpr
 >      ,MultisetCtor <$> brackets (commaSep valueExpr)]
 >     ,keyword_ "table" >>
 >      MultisetQueryCtor <$> parens queryExpr]
 > escape :: Parser (ValueExpr -> ValueExpr)
 > escape = do
 >     ctor <- choice
@ -726,6 +737,7 @@ messages, but both of these are considered too important.
 >              ,"is unknown"
 >              ,"is not unknown"]
 >             ]
 >          ++ [multisetBinOp]
 >          -- have to use try with inSuffix because of a conflict
 >          -- with 'in' in position function, and not between
 >          -- between also has a try in it to deal with 'not'
@ -755,6 +767,14 @@ messages, but both of these are considered too important.
 >           return $ PostfixOp [Name $ unwords o]
 >     binary p nm assoc =
 >       E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
 >     multisetBinOp = E.Infix (do
 >         keyword_ "multiset"
 >         o <- choice [Union <$ keyword_ "union"
 >                     ,Intersect <$ keyword_ "intersect"
 >                     ,Except <$ keyword_ "except"]
 >         d <- fromMaybe SQDefault <$> duplicates
 >         return (\a b -> MultisetBinOp a o d b))
 >           E.AssocLeft
 >     prefixKeyword nm = prefix (keyword_ nm) nm
 >     prefixSym nm = prefix (symbol_ nm) nm
 >     prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
@ -787,6 +807,7 @@ fragile and could at least do with some heavy explanation.
 >               ,caseValue
 >               ,cast
 >               ,arrayCtor
 >               ,multisetCtor
 >               ,specialOpKs
 >               ,parensTerm
 >               ,subquery
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@ -173,6 +173,28 @@ which have been changed to try to improve the layout of the output.
 > valueExpr (ArrayCtor q) =
 >     text "array" <> parens (queryExpr q)
 > valueExpr (MultisetCtor es) =
 >     text "multiset" <> brackets (commaSep $ map valueExpr es)
 > valueExpr (MultisetQueryCtor q) =
 >     text "multiset" <> parens (queryExpr q)
 > valueExpr (MultisetBinOp a c q b) =
 >     sep
 >     [valueExpr a
 >     ,text "multiset"
 >     ,text $ case c of
 >                 Union -> "union"
 >                 Intersect -> "intersect"
 >                 Except -> "except"
 >     ,case q of
 >          SQDefault -> empty
 >          All -> text "all"
 >          Distinct -> text "distinct"
 >     ,valueExpr b]
 > valueExpr (CSStringLit cs st) =
 >   text cs <> quotes (text $ doubleUpQuotes st)
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@ -147,6 +147,9 @@
 >     | Escape ValueExpr Char
 >     | UEscape ValueExpr Char
 >     | Collate ValueExpr String
 >     | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr
 >     | MultisetCtor [ValueExpr]
 >     | MultisetQueryCtor QueryExpr
 >       deriving (Eq,Show,Read,Data,Typeable)
 > -- | Represents an identifier name, which can be quoted or unquoted.
--- a/28
+++ b/28
@ -6,7 +6,6 @@ table expression in syntax:
  QueryExpr = Select SelectList (Maybe TableExpr)
  and the TableExpr contains all the other bits?
 finish off ansi 2003 support or specific subset
  summarize todos
 start looking at error messages
 change the booleans in the ast to better types for less ambiguity
 represent missing optional bits in the ast as nothing instead of the
@ -18,6 +17,10 @@ review haddock in the syntax and update
 review syntax names and representation
 careful review of token parses wrt trailing delimiters/junk
 decide how to handle character set literals and identifiers: don't
   have any intention of actually supporting switching character sets
   in the middle of parsing so maybe this would be better disabled?
 review places in the parse which should allow only a fixed set of
   identifiers (e.g. in interval literals)
@ -25,14 +28,23 @@ decide whether to represent numeric literals better, instead of a
   single string - break up into parts, or parse to a Decimal or
   something
 refactor the typename parsing
 remove the IsString for Name and [Name]
 fixes:
 keyword tree, add explicit result then can use for joins also
 keyword tree support prefix mode so can start from already parsed
   token
 do the final big left factor: typenames, interval lits, iden +
   suffixes
 rough SQL 2003 todo, including tests to write:
-switch TypedLit to CSStringLit based on first char being underscore?
+idens: "", unicode
 idens: "", unicode, charset?, check dotted idens and contexts
 add missing type name support: lots of missing ones here, including
   simple stuff like lob variations, and new things like interval,
   row, ref, scope, array, multiset type names.
  add tests for all the typenames cast and typed literal
 date and time literals
 multisets
 review window functions, window clause
@ -45,6 +57,8 @@ unnest
 filter in aggs
 within group in aggs
 rows review
 matching simple partial full
 LNR: maybe leave until after next release
--- a/tools/Language/SQL/SimpleSQL/Postgres.lhs
+++ b/tools/Language/SQL/SimpleSQL/Postgres.lhs
@ -29,7 +29,9 @@ TODO: get all the commented out tests working
 >     ,"SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same');"
->     ,"SELECT ROW(table.*) IS NULL FROM table;"
+>     -- table is a reservered keyword?
 >     --,"SELECT ROW(table.*) IS NULL FROM table;"
 >     ,"SELECT ROW(tablex.*) IS NULL FROM table;"
 >     ,"SELECT true OR somefunc();"
--- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs
@ -21,19 +21,22 @@ large amount of the SQL.
 >     ,unicodeStringLiterals
 >     ,binaryStringLiterals
 >     ,numericLiterals
->     --,dateAndTimeLiterals
+>     ,intervalLiterals
 >     ,booleanLiterals
->     --,identifiers
+>     ,identifiers
 >     ,typeNameTests
->     --,parenthesizedValueExpression
+>     ,parenthesizedValueExpression
 >     ,targetSpecification
 >     ,contextuallyTypeValueSpec
 >     --,nextValueExpression
 >     ,arrayElementReference
->     --,multisetElementReference
+>     ,multisetElementReference
 >     --,numericValueExpression
 >     --,booleanValueExpression
 >     ,arrayValueConstructor
 >     ,multisetValueExpression
 >     ,multisetValueFunction
 >     ,multisetValueConstructor
 >     --,tableValueConstructor
 >     --,fromClause
 >     --,whereClause
@ -486,9 +489,7 @@ The <quote symbol> rule consists of two immediately adjacent <quote> marks with
 >      ,TypedLit (TypeName "_francais") "français")
 >     ]
-TODO: all the stuff with character set representations.
+TODO: all the stuff with character set representations. (?)
 == other string literals
@ -662,15 +663,10 @@ TODO: interval literals
 (e.g. parse the literal string), at the moment they are treated as
 normal typed literals
-> {-dateAndTimeLiterals :: TestItem
+> intervalLiterals :: TestItem
-> dateAndTimeLiterals = Group "date and time literals" $ map (uncurry TestValueExpr)
+> intervalLiterals = Group "intervalLiterals literals" $ map (uncurry TestValueExpr)
->     [("date 'date literal'"
+>     [
->      ,TypedLit (TypeName "date") "date literal")
+>     ]
 >     ,("time 'time literal'"
 >      ,TypedLit (TypeName "time") "time literal")
 >     ,("timestamp 'timestamp literal'"
 >      ,TypedLit (TypeName "timestamp") "timestamp literal")
 >     ]-}
 TODO: intervals + more date and time literals
@ -804,11 +800,14 @@ TODO: language identifiers have different rules to generic identifiers
 > identifiers :: TestItem
 > identifiers = Group "identifiers" $ map (uncurry TestValueExpr)
->     [("test",undefined)
+>     [("test",Iden [Name "test"])
->     ,("_test",undefined)
+>     ,("_test",Iden [Name "_test"])
->     ,("t1",undefined)
+>     ,("t1",Iden [Name "t1"])
->     ,("a.b",undefined)
+>     ,("a.b",Iden [Name "a", Name "b"])
->     ,("a.b.c",undefined)
+>     ,("a.b.c",Iden [Name "a", Name "b", Name "c"])
 >      -- TODO: quoted idens
 >      -- double double quotes in quoted idens
 >      -- unicode idens syntax (needs escape?)
 >     ]
 TODO: module stuff
@ -1093,58 +1092,11 @@ expression
 >         ,(ctn ++ " 'test'", TypedLit stn "test")
 >         ]
 > {-typeNames :: TestItem
 > typeNames = Group "type names" $ map (uncurry TestValueExpr)
 >     [("cast('test' as character(5))", undefined)
 >     ,("cast('test' as character)", undefined)
 >     ,("cast('test' as char(5))", undefined)
 >     ,("cast('test' as character varying(5))", undefined)
 >     ,("cast('test' as char varying(5))", undefined)
 >     ,("cast('test' as varchar(5))", undefined)
 >     ,("cast('test' as char(5) character set _xxx)", undefined)
 >     ,("cast('test' as national character(5))", undefined)
 >     ,("cast('test' as national char)", undefined)
 >     ,("cast('test' as nchar)", undefined)
 >     ,("cast('test' as national character varying)", undefined)
 >     ,("cast('test' as national char varying)", undefined)
 >     ,("cast('test' as nchar varying)", undefined)
 >     ,("cast('test' as bit(4))", undefined)
 >     ,("cast('test' as bit varying(4))", undefined)
 >     ,("cast('test' as bit varying)", undefined)
 >     ,("cast(5 as numeric)", undefined)
 >     ,("cast(5 as numeric(3))", undefined)
 >     ,("cast(5 as numeric(5,3))", undefined)
 >     ,("cast(5 as decimal(5,3))", undefined)
 >     ,("cast(5 as dec(5,3))", undefined)
 >     ,("cast(5 as integer)", undefined)
 >     ,("cast(5 as int)", undefined)
 >     ,("cast(5 as smallint)", undefined)
 >     ,("cast(5 as float(23))", undefined)
 >     ,("cast(5 as float)", undefined)
 >     ,("cast(5 as real)", undefined)
 >     ,("cast(5 as double precision)", undefined)
 >     ,("cast('01-01-99' as date)", undefined)
 >     ,("cast('01-01-99' as time)", undefined)
 >     ,("cast('01-01-99' as time(3))", undefined)
 >     ,("cast('01-01-99' as timestamp(3))", undefined)
 >     ,("cast('01-01-99' as timestamp with time zone)", undefined)
 >     ,("cast('01-01-99' as time(3) with time zone)", undefined)
 >     ]-}
 == 6.2 <field definition> (p173)
 <field definition>    ::=   <field name> <data type> [ <reference scope check> ]
-This is used when e.g. casting to a row type.
+This is used in row type names.
 == 6.3 <value expression primary> (p174)
@ -1156,8 +1108,8 @@ This is used when e.g. casting to a row type.
 > parenthesizedValueExpression :: TestItem
 > parenthesizedValueExpression = Group "parenthesized value expression" $ map (uncurry TestValueExpr)
->     [("(3)", undefined)
+>     [("(3)", Parens (NumLit "3"))
->     ,("((3))", undefined)
+>     ,("((3))", Parens $ Parens (NumLit "3"))
 >     ]
 <nonparenthesized value expression primary>    ::= 
@ -1278,7 +1230,7 @@ for or how it works
 > contextuallyTypeValueSpec = Group "contextually typed value specification" $ map (uncurry TestValueExpr)
 >     [("null", Iden "null")
 >     ,("array[]", Array (Iden "array") [])
->     --,("multiset[]", undefined)
+>     ,("multiset[]", MultisetCtor [])
 >     ,("default", Iden "default")
 >     ]
@ -1547,10 +1499,11 @@ TODO: work out the precendence of the array element reference suffix
 > multisetElementReference :: TestItem
 > multisetElementReference = Group "multiset element reference" $ map (uncurry TestValueExpr)
->     [("element(something)", undefined)
+>     [("element(something)", App "element" [Iden "something"])
 >     ]
-TODO: work out how to parse this
+Not sure why this is called element reference, since it appears to be
 like unnest, and entirely unlike array element reference.
 6.25 <value expression> (p236)
@ -1977,7 +1930,19 @@ operator is ||, same as the string concatenation operator.
 <multiset primary>    ::=   <multiset value function> | <value expression primary>
-TODO: multiset value expression
+> multisetValueExpression :: TestItem
 > multisetValueExpression = Group "multiset value expression" $ map (uncurry TestValueExpr)
 >    [("a multiset union b"
 >     ,MultisetBinOp (Iden "a") Union SQDefault (Iden "b"))
 >    ,("a multiset union all b"
 >     ,MultisetBinOp (Iden "a") Union All (Iden "b"))
 >    ,("a multiset union distinct b"
 >     ,MultisetBinOp (Iden "a") Union Distinct (Iden "b"))
 >    ,("a multiset except b"
 >     ,MultisetBinOp (Iden "a") Except SQDefault (Iden "b"))
 >    ,("a multiset intersect b"
 >     ,MultisetBinOp (Iden "a") Intersect SQDefault (Iden "b"))
 >    ]
 == 6.38 <multiset value function> (p289)
@ -1985,7 +1950,10 @@ TODO: multiset value expression
 <multiset set function>    ::=   SET <left paren> <multiset value expression> <right paren>
-TODO: multiset value function
+> multisetValueFunction :: TestItem
 > multisetValueFunction = Group "multiset value function" $ map (uncurry TestValueExpr)
 >    [("set(a)", App "set" [Iden "a"])
 >    ]
 == 6.39 <multiset value constructor> (p290)
@ -2004,7 +1972,18 @@ TODO: multiset value function
 <table value constructor by query>    ::=   TABLE <left paren> <query expression> <right paren>
-TODO: multiset value constructor
+table is just syntactic sugar for multiset (why does it exist?)
 > multisetValueConstructor :: TestItem
 > multisetValueConstructor = Group "multiset value constructor" $ map (uncurry TestValueExpr)
 >    [("multiset[a,b,c]", MultisetCtor[Iden "a", Iden "b", Iden "c"])
 >    ,("multiset(select * from t)", MultisetQueryCtor qe)
 >    ,("table(select * from t)", MultisetQueryCtor qe)
 >    ]
 >   where
 >     qe = makeSelect {qeSelectList = [(Star,Nothing)]
 >                     ,qeFrom = [TRSimple [Name "t"]]}
 = 7 Query expressions
@ -2900,7 +2879,8 @@ Specify the precision of an interval data type.
 <interval leading field precision>    ::=   <unsigned integer>
-TODO: interval qualifier
+interval qualifier is covered in the typenames and the interval
 literals where it is used
 == 10.2 <language clause> (p469)