1
Fork 0

basic multiset support

This commit is contained in:
Jake Wheat 2014-04-18 20:50:24 +03:00
parent 2ff8580dbf
commit 4e1a1da820
6 changed files with 128 additions and 86 deletions

View file

@ -494,6 +494,17 @@ a match (select a from t)
> [ArrayCtor <$> parens queryExpr
> ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
> multisetCtor :: Parser ValueExpr
> multisetCtor =
> choice
> [keyword_ "multiset" >>
> choice
> [MultisetQueryCtor <$> parens queryExpr
> ,MultisetCtor <$> brackets (commaSep valueExpr)]
> ,keyword_ "table" >>
> MultisetQueryCtor <$> parens queryExpr]
> escape :: Parser (ValueExpr -> ValueExpr)
> escape = do
> ctor <- choice
@ -726,6 +737,7 @@ messages, but both of these are considered too important.
> ,"is unknown"
> ,"is not unknown"]
> ]
> ++ [multisetBinOp]
> -- have to use try with inSuffix because of a conflict
> -- with 'in' in position function, and not between
> -- between also has a try in it to deal with 'not'
@ -755,6 +767,14 @@ messages, but both of these are considered too important.
> return $ PostfixOp [Name $ unwords o]
> binary p nm assoc =
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
> multisetBinOp = E.Infix (do
> keyword_ "multiset"
> o <- choice [Union <$ keyword_ "union"
> ,Intersect <$ keyword_ "intersect"
> ,Except <$ keyword_ "except"]
> d <- fromMaybe SQDefault <$> duplicates
> return (\a b -> MultisetBinOp a o d b))
> E.AssocLeft
> prefixKeyword nm = prefix (keyword_ nm) nm
> prefixSym nm = prefix (symbol_ nm) nm
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
@ -787,6 +807,7 @@ fragile and could at least do with some heavy explanation.
> ,caseValue
> ,cast
> ,arrayCtor
> ,multisetCtor
> ,specialOpKs
> ,parensTerm
> ,subquery

View file

@ -173,6 +173,28 @@ which have been changed to try to improve the layout of the output.
> valueExpr (ArrayCtor q) =
> text "array" <> parens (queryExpr q)
> valueExpr (MultisetCtor es) =
> text "multiset" <> brackets (commaSep $ map valueExpr es)
> valueExpr (MultisetQueryCtor q) =
> text "multiset" <> parens (queryExpr q)
> valueExpr (MultisetBinOp a c q b) =
> sep
> [valueExpr a
> ,text "multiset"
> ,text $ case c of
> Union -> "union"
> Intersect -> "intersect"
> Except -> "except"
> ,case q of
> SQDefault -> empty
> All -> text "all"
> Distinct -> text "distinct"
> ,valueExpr b]
> valueExpr (CSStringLit cs st) =
> text cs <> quotes (text $ doubleUpQuotes st)

View file

@ -147,6 +147,9 @@
> | Escape ValueExpr Char
> | UEscape ValueExpr Char
> | Collate ValueExpr String
> | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr
> | MultisetCtor [ValueExpr]
> | MultisetQueryCtor QueryExpr
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents an identifier name, which can be quoted or unquoted.

28
TODO
View file

@ -6,7 +6,6 @@ table expression in syntax:
QueryExpr = Select SelectList (Maybe TableExpr)
and the TableExpr contains all the other bits?
finish off ansi 2003 support or specific subset
summarize todos
start looking at error messages
change the booleans in the ast to better types for less ambiguity
represent missing optional bits in the ast as nothing instead of the
@ -18,6 +17,10 @@ review haddock in the syntax and update
review syntax names and representation
careful review of token parses wrt trailing delimiters/junk
decide how to handle character set literals and identifiers: don't
have any intention of actually supporting switching character sets
in the middle of parsing so maybe this would be better disabled?
review places in the parse which should allow only a fixed set of
identifiers (e.g. in interval literals)
@ -25,14 +28,23 @@ decide whether to represent numeric literals better, instead of a
single string - break up into parts, or parse to a Decimal or
something
refactor the typename parsing
remove the IsString for Name and [Name]
fixes:
keyword tree, add explicit result then can use for joins also
keyword tree support prefix mode so can start from already parsed
token
do the final big left factor: typenames, interval lits, iden +
suffixes
rough SQL 2003 todo, including tests to write:
switch TypedLit to CSStringLit based on first char being underscore?
idens: "", unicode, charset?, check dotted idens and contexts
add missing type name support: lots of missing ones here, including
simple stuff like lob variations, and new things like interval,
row, ref, scope, array, multiset type names.
add tests for all the typenames cast and typed literal
idens: "", unicode
date and time literals
multisets
review window functions, window clause
@ -45,6 +57,8 @@ unnest
filter in aggs
within group in aggs
rows review
matching simple partial full
LNR: maybe leave until after next release

View file

@ -29,7 +29,9 @@ TODO: get all the commented out tests working
> ,"SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same');"
> ,"SELECT ROW(table.*) IS NULL FROM table;"
> -- table is a reservered keyword?
> --,"SELECT ROW(table.*) IS NULL FROM table;"
> ,"SELECT ROW(tablex.*) IS NULL FROM table;"
> ,"SELECT true OR somefunc();"

View file

@ -21,19 +21,22 @@ large amount of the SQL.
> ,unicodeStringLiterals
> ,binaryStringLiterals
> ,numericLiterals
> --,dateAndTimeLiterals
> ,intervalLiterals
> ,booleanLiterals
> --,identifiers
> ,identifiers
> ,typeNameTests
> --,parenthesizedValueExpression
> ,parenthesizedValueExpression
> ,targetSpecification
> ,contextuallyTypeValueSpec
> --,nextValueExpression
> ,arrayElementReference
> --,multisetElementReference
> ,multisetElementReference
> --,numericValueExpression
> --,booleanValueExpression
> ,arrayValueConstructor
> ,multisetValueExpression
> ,multisetValueFunction
> ,multisetValueConstructor
> --,tableValueConstructor
> --,fromClause
> --,whereClause
@ -486,9 +489,7 @@ The <quote symbol> rule consists of two immediately adjacent <quote> marks with
> ,TypedLit (TypeName "_francais") "français")
> ]
TODO: all the stuff with character set representations.
TODO: all the stuff with character set representations. (?)
== other string literals
@ -662,15 +663,10 @@ TODO: interval literals
(e.g. parse the literal string), at the moment they are treated as
normal typed literals
> {-dateAndTimeLiterals :: TestItem
> dateAndTimeLiterals = Group "date and time literals" $ map (uncurry TestValueExpr)
> [("date 'date literal'"
> ,TypedLit (TypeName "date") "date literal")
> ,("time 'time literal'"
> ,TypedLit (TypeName "time") "time literal")
> ,("timestamp 'timestamp literal'"
> ,TypedLit (TypeName "timestamp") "timestamp literal")
> ]-}
> intervalLiterals :: TestItem
> intervalLiterals = Group "intervalLiterals literals" $ map (uncurry TestValueExpr)
> [
> ]
TODO: intervals + more date and time literals
@ -804,11 +800,14 @@ TODO: language identifiers have different rules to generic identifiers
> identifiers :: TestItem
> identifiers = Group "identifiers" $ map (uncurry TestValueExpr)
> [("test",undefined)
> ,("_test",undefined)
> ,("t1",undefined)
> ,("a.b",undefined)
> ,("a.b.c",undefined)
> [("test",Iden [Name "test"])
> ,("_test",Iden [Name "_test"])
> ,("t1",Iden [Name "t1"])
> ,("a.b",Iden [Name "a", Name "b"])
> ,("a.b.c",Iden [Name "a", Name "b", Name "c"])
> -- TODO: quoted idens
> -- double double quotes in quoted idens
> -- unicode idens syntax (needs escape?)
> ]
TODO: module stuff
@ -1093,58 +1092,11 @@ expression
> ,(ctn ++ " 'test'", TypedLit stn "test")
> ]
> {-typeNames :: TestItem
> typeNames = Group "type names" $ map (uncurry TestValueExpr)
> [("cast('test' as character(5))", undefined)
> ,("cast('test' as character)", undefined)
> ,("cast('test' as char(5))", undefined)
> ,("cast('test' as character varying(5))", undefined)
> ,("cast('test' as char varying(5))", undefined)
> ,("cast('test' as varchar(5))", undefined)
> ,("cast('test' as char(5) character set _xxx)", undefined)
> ,("cast('test' as national character(5))", undefined)
> ,("cast('test' as national char)", undefined)
> ,("cast('test' as nchar)", undefined)
> ,("cast('test' as national character varying)", undefined)
> ,("cast('test' as national char varying)", undefined)
> ,("cast('test' as nchar varying)", undefined)
> ,("cast('test' as bit(4))", undefined)
> ,("cast('test' as bit varying(4))", undefined)
> ,("cast('test' as bit varying)", undefined)
> ,("cast(5 as numeric)", undefined)
> ,("cast(5 as numeric(3))", undefined)
> ,("cast(5 as numeric(5,3))", undefined)
> ,("cast(5 as decimal(5,3))", undefined)
> ,("cast(5 as dec(5,3))", undefined)
> ,("cast(5 as integer)", undefined)
> ,("cast(5 as int)", undefined)
> ,("cast(5 as smallint)", undefined)
> ,("cast(5 as float(23))", undefined)
> ,("cast(5 as float)", undefined)
> ,("cast(5 as real)", undefined)
> ,("cast(5 as double precision)", undefined)
> ,("cast('01-01-99' as date)", undefined)
> ,("cast('01-01-99' as time)", undefined)
> ,("cast('01-01-99' as time(3))", undefined)
> ,("cast('01-01-99' as timestamp(3))", undefined)
> ,("cast('01-01-99' as timestamp with time zone)", undefined)
> ,("cast('01-01-99' as time(3) with time zone)", undefined)
> ]-}
== 6.2 <field definition> (p173)
<field definition> ::= <field name> <data type> [ <reference scope check> ]
This is used when e.g. casting to a row type.
This is used in row type names.
== 6.3 <value expression primary> (p174)
@ -1156,8 +1108,8 @@ This is used when e.g. casting to a row type.
> parenthesizedValueExpression :: TestItem
> parenthesizedValueExpression = Group "parenthesized value expression" $ map (uncurry TestValueExpr)
> [("(3)", undefined)
> ,("((3))", undefined)
> [("(3)", Parens (NumLit "3"))
> ,("((3))", Parens $ Parens (NumLit "3"))
> ]
<nonparenthesized value expression primary> ::=
@ -1278,7 +1230,7 @@ for or how it works
> contextuallyTypeValueSpec = Group "contextually typed value specification" $ map (uncurry TestValueExpr)
> [("null", Iden "null")
> ,("array[]", Array (Iden "array") [])
> --,("multiset[]", undefined)
> ,("multiset[]", MultisetCtor [])
> ,("default", Iden "default")
> ]
@ -1547,10 +1499,11 @@ TODO: work out the precendence of the array element reference suffix
> multisetElementReference :: TestItem
> multisetElementReference = Group "multiset element reference" $ map (uncurry TestValueExpr)
> [("element(something)", undefined)
> [("element(something)", App "element" [Iden "something"])
> ]
TODO: work out how to parse this
Not sure why this is called element reference, since it appears to be
like unnest, and entirely unlike array element reference.
6.25 <value expression> (p236)
@ -1977,7 +1930,19 @@ operator is ||, same as the string concatenation operator.
<multiset primary> ::= <multiset value function> | <value expression primary>
TODO: multiset value expression
> multisetValueExpression :: TestItem
> multisetValueExpression = Group "multiset value expression" $ map (uncurry TestValueExpr)
> [("a multiset union b"
> ,MultisetBinOp (Iden "a") Union SQDefault (Iden "b"))
> ,("a multiset union all b"
> ,MultisetBinOp (Iden "a") Union All (Iden "b"))
> ,("a multiset union distinct b"
> ,MultisetBinOp (Iden "a") Union Distinct (Iden "b"))
> ,("a multiset except b"
> ,MultisetBinOp (Iden "a") Except SQDefault (Iden "b"))
> ,("a multiset intersect b"
> ,MultisetBinOp (Iden "a") Intersect SQDefault (Iden "b"))
> ]
== 6.38 <multiset value function> (p289)
@ -1985,7 +1950,10 @@ TODO: multiset value expression
<multiset set function> ::= SET <left paren> <multiset value expression> <right paren>
TODO: multiset value function
> multisetValueFunction :: TestItem
> multisetValueFunction = Group "multiset value function" $ map (uncurry TestValueExpr)
> [("set(a)", App "set" [Iden "a"])
> ]
== 6.39 <multiset value constructor> (p290)
@ -2004,7 +1972,18 @@ TODO: multiset value function
<table value constructor by query> ::= TABLE <left paren> <query expression> <right paren>
TODO: multiset value constructor
table is just syntactic sugar for multiset (why does it exist?)
> multisetValueConstructor :: TestItem
> multisetValueConstructor = Group "multiset value constructor" $ map (uncurry TestValueExpr)
> [("multiset[a,b,c]", MultisetCtor[Iden "a", Iden "b", Iden "c"])
> ,("multiset(select * from t)", MultisetQueryCtor qe)
> ,("table(select * from t)", MultisetQueryCtor qe)
> ]
> where
> qe = makeSelect {qeSelectList = [(Star,Nothing)]
> ,qeFrom = [TRSimple [Name "t"]]}
= 7 Query expressions
@ -2900,7 +2879,8 @@ Specify the precision of an interval data type.
<interval leading field precision> ::= <unsigned integer>
TODO: interval qualifier
interval qualifier is covered in the typenames and the interval
literals where it is used
== 10.2 <language clause> (p469)