1
Fork 0

basic multiset support

This commit is contained in:
Jake Wheat 2014-04-18 20:50:24 +03:00
parent 2ff8580dbf
commit 4e1a1da820
6 changed files with 128 additions and 86 deletions

View file

@ -494,6 +494,17 @@ a match (select a from t)
> [ArrayCtor <$> parens queryExpr > [ArrayCtor <$> parens queryExpr
> ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)] > ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
> multisetCtor :: Parser ValueExpr
> multisetCtor =
> choice
> [keyword_ "multiset" >>
> choice
> [MultisetQueryCtor <$> parens queryExpr
> ,MultisetCtor <$> brackets (commaSep valueExpr)]
> ,keyword_ "table" >>
> MultisetQueryCtor <$> parens queryExpr]
> escape :: Parser (ValueExpr -> ValueExpr) > escape :: Parser (ValueExpr -> ValueExpr)
> escape = do > escape = do
> ctor <- choice > ctor <- choice
@ -726,6 +737,7 @@ messages, but both of these are considered too important.
> ,"is unknown" > ,"is unknown"
> ,"is not unknown"] > ,"is not unknown"]
> ] > ]
> ++ [multisetBinOp]
> -- have to use try with inSuffix because of a conflict > -- have to use try with inSuffix because of a conflict
> -- with 'in' in position function, and not between > -- with 'in' in position function, and not between
> -- between also has a try in it to deal with 'not' > -- between also has a try in it to deal with 'not'
@ -755,6 +767,14 @@ messages, but both of these are considered too important.
> return $ PostfixOp [Name $ unwords o] > return $ PostfixOp [Name $ unwords o]
> binary p nm assoc = > binary p nm assoc =
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc > E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
> multisetBinOp = E.Infix (do
> keyword_ "multiset"
> o <- choice [Union <$ keyword_ "union"
> ,Intersect <$ keyword_ "intersect"
> ,Except <$ keyword_ "except"]
> d <- fromMaybe SQDefault <$> duplicates
> return (\a b -> MultisetBinOp a o d b))
> E.AssocLeft
> prefixKeyword nm = prefix (keyword_ nm) nm > prefixKeyword nm = prefix (keyword_ nm) nm
> prefixSym nm = prefix (symbol_ nm) nm > prefixSym nm = prefix (symbol_ nm) nm
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm])) > prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
@ -787,6 +807,7 @@ fragile and could at least do with some heavy explanation.
> ,caseValue > ,caseValue
> ,cast > ,cast
> ,arrayCtor > ,arrayCtor
> ,multisetCtor
> ,specialOpKs > ,specialOpKs
> ,parensTerm > ,parensTerm
> ,subquery > ,subquery

View file

@ -173,6 +173,28 @@ which have been changed to try to improve the layout of the output.
> valueExpr (ArrayCtor q) = > valueExpr (ArrayCtor q) =
> text "array" <> parens (queryExpr q) > text "array" <> parens (queryExpr q)
> valueExpr (MultisetCtor es) =
> text "multiset" <> brackets (commaSep $ map valueExpr es)
> valueExpr (MultisetQueryCtor q) =
> text "multiset" <> parens (queryExpr q)
> valueExpr (MultisetBinOp a c q b) =
> sep
> [valueExpr a
> ,text "multiset"
> ,text $ case c of
> Union -> "union"
> Intersect -> "intersect"
> Except -> "except"
> ,case q of
> SQDefault -> empty
> All -> text "all"
> Distinct -> text "distinct"
> ,valueExpr b]
> valueExpr (CSStringLit cs st) = > valueExpr (CSStringLit cs st) =
> text cs <> quotes (text $ doubleUpQuotes st) > text cs <> quotes (text $ doubleUpQuotes st)

View file

@ -147,6 +147,9 @@
> | Escape ValueExpr Char > | Escape ValueExpr Char
> | UEscape ValueExpr Char > | UEscape ValueExpr Char
> | Collate ValueExpr String > | Collate ValueExpr String
> | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr
> | MultisetCtor [ValueExpr]
> | MultisetQueryCtor QueryExpr
> deriving (Eq,Show,Read,Data,Typeable) > deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents an identifier name, which can be quoted or unquoted. > -- | Represents an identifier name, which can be quoted or unquoted.

28
TODO
View file

@ -6,7 +6,6 @@ table expression in syntax:
QueryExpr = Select SelectList (Maybe TableExpr) QueryExpr = Select SelectList (Maybe TableExpr)
and the TableExpr contains all the other bits? and the TableExpr contains all the other bits?
finish off ansi 2003 support or specific subset finish off ansi 2003 support or specific subset
summarize todos
start looking at error messages start looking at error messages
change the booleans in the ast to better types for less ambiguity change the booleans in the ast to better types for less ambiguity
represent missing optional bits in the ast as nothing instead of the represent missing optional bits in the ast as nothing instead of the
@ -18,6 +17,10 @@ review haddock in the syntax and update
review syntax names and representation review syntax names and representation
careful review of token parses wrt trailing delimiters/junk careful review of token parses wrt trailing delimiters/junk
decide how to handle character set literals and identifiers: don't
have any intention of actually supporting switching character sets
in the middle of parsing so maybe this would be better disabled?
review places in the parse which should allow only a fixed set of review places in the parse which should allow only a fixed set of
identifiers (e.g. in interval literals) identifiers (e.g. in interval literals)
@ -25,14 +28,23 @@ decide whether to represent numeric literals better, instead of a
single string - break up into parts, or parse to a Decimal or single string - break up into parts, or parse to a Decimal or
something something
refactor the typename parsing
remove the IsString for Name and [Name]
fixes:
keyword tree, add explicit result then can use for joins also
keyword tree support prefix mode so can start from already parsed
token
do the final big left factor: typenames, interval lits, iden +
suffixes
rough SQL 2003 todo, including tests to write: rough SQL 2003 todo, including tests to write:
switch TypedLit to CSStringLit based on first char being underscore? idens: "", unicode
idens: "", unicode, charset?, check dotted idens and contexts
add missing type name support: lots of missing ones here, including
simple stuff like lob variations, and new things like interval,
row, ref, scope, array, multiset type names.
add tests for all the typenames cast and typed literal
date and time literals date and time literals
multisets multisets
review window functions, window clause review window functions, window clause
@ -45,6 +57,8 @@ unnest
filter in aggs filter in aggs
within group in aggs within group in aggs
rows review rows review
matching simple partial full
LNR: maybe leave until after next release LNR: maybe leave until after next release

View file

@ -29,7 +29,9 @@ TODO: get all the commented out tests working
> ,"SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same');" > ,"SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same');"
> ,"SELECT ROW(table.*) IS NULL FROM table;" > -- table is a reservered keyword?
> --,"SELECT ROW(table.*) IS NULL FROM table;"
> ,"SELECT ROW(tablex.*) IS NULL FROM table;"
> ,"SELECT true OR somefunc();" > ,"SELECT true OR somefunc();"

View file

@ -21,19 +21,22 @@ large amount of the SQL.
> ,unicodeStringLiterals > ,unicodeStringLiterals
> ,binaryStringLiterals > ,binaryStringLiterals
> ,numericLiterals > ,numericLiterals
> --,dateAndTimeLiterals > ,intervalLiterals
> ,booleanLiterals > ,booleanLiterals
> --,identifiers > ,identifiers
> ,typeNameTests > ,typeNameTests
> --,parenthesizedValueExpression > ,parenthesizedValueExpression
> ,targetSpecification > ,targetSpecification
> ,contextuallyTypeValueSpec > ,contextuallyTypeValueSpec
> --,nextValueExpression > --,nextValueExpression
> ,arrayElementReference > ,arrayElementReference
> --,multisetElementReference > ,multisetElementReference
> --,numericValueExpression > --,numericValueExpression
> --,booleanValueExpression > --,booleanValueExpression
> ,arrayValueConstructor > ,arrayValueConstructor
> ,multisetValueExpression
> ,multisetValueFunction
> ,multisetValueConstructor
> --,tableValueConstructor > --,tableValueConstructor
> --,fromClause > --,fromClause
> --,whereClause > --,whereClause
@ -486,9 +489,7 @@ The <quote symbol> rule consists of two immediately adjacent <quote> marks with
> ,TypedLit (TypeName "_francais") "français") > ,TypedLit (TypeName "_francais") "français")
> ] > ]
TODO: all the stuff with character set representations. TODO: all the stuff with character set representations. (?)
== other string literals == other string literals
@ -662,15 +663,10 @@ TODO: interval literals
(e.g. parse the literal string), at the moment they are treated as (e.g. parse the literal string), at the moment they are treated as
normal typed literals normal typed literals
> {-dateAndTimeLiterals :: TestItem > intervalLiterals :: TestItem
> dateAndTimeLiterals = Group "date and time literals" $ map (uncurry TestValueExpr) > intervalLiterals = Group "intervalLiterals literals" $ map (uncurry TestValueExpr)
> [("date 'date literal'" > [
> ,TypedLit (TypeName "date") "date literal") > ]
> ,("time 'time literal'"
> ,TypedLit (TypeName "time") "time literal")
> ,("timestamp 'timestamp literal'"
> ,TypedLit (TypeName "timestamp") "timestamp literal")
> ]-}
TODO: intervals + more date and time literals TODO: intervals + more date and time literals
@ -804,11 +800,14 @@ TODO: language identifiers have different rules to generic identifiers
> identifiers :: TestItem > identifiers :: TestItem
> identifiers = Group "identifiers" $ map (uncurry TestValueExpr) > identifiers = Group "identifiers" $ map (uncurry TestValueExpr)
> [("test",undefined) > [("test",Iden [Name "test"])
> ,("_test",undefined) > ,("_test",Iden [Name "_test"])
> ,("t1",undefined) > ,("t1",Iden [Name "t1"])
> ,("a.b",undefined) > ,("a.b",Iden [Name "a", Name "b"])
> ,("a.b.c",undefined) > ,("a.b.c",Iden [Name "a", Name "b", Name "c"])
> -- TODO: quoted idens
> -- double double quotes in quoted idens
> -- unicode idens syntax (needs escape?)
> ] > ]
TODO: module stuff TODO: module stuff
@ -1093,58 +1092,11 @@ expression
> ,(ctn ++ " 'test'", TypedLit stn "test") > ,(ctn ++ " 'test'", TypedLit stn "test")
> ] > ]
> {-typeNames :: TestItem
> typeNames = Group "type names" $ map (uncurry TestValueExpr)
> [("cast('test' as character(5))", undefined)
> ,("cast('test' as character)", undefined)
> ,("cast('test' as char(5))", undefined)
> ,("cast('test' as character varying(5))", undefined)
> ,("cast('test' as char varying(5))", undefined)
> ,("cast('test' as varchar(5))", undefined)
> ,("cast('test' as char(5) character set _xxx)", undefined)
> ,("cast('test' as national character(5))", undefined)
> ,("cast('test' as national char)", undefined)
> ,("cast('test' as nchar)", undefined)
> ,("cast('test' as national character varying)", undefined)
> ,("cast('test' as national char varying)", undefined)
> ,("cast('test' as nchar varying)", undefined)
> ,("cast('test' as bit(4))", undefined)
> ,("cast('test' as bit varying(4))", undefined)
> ,("cast('test' as bit varying)", undefined)
> ,("cast(5 as numeric)", undefined)
> ,("cast(5 as numeric(3))", undefined)
> ,("cast(5 as numeric(5,3))", undefined)
> ,("cast(5 as decimal(5,3))", undefined)
> ,("cast(5 as dec(5,3))", undefined)
> ,("cast(5 as integer)", undefined)
> ,("cast(5 as int)", undefined)
> ,("cast(5 as smallint)", undefined)
> ,("cast(5 as float(23))", undefined)
> ,("cast(5 as float)", undefined)
> ,("cast(5 as real)", undefined)
> ,("cast(5 as double precision)", undefined)
> ,("cast('01-01-99' as date)", undefined)
> ,("cast('01-01-99' as time)", undefined)
> ,("cast('01-01-99' as time(3))", undefined)
> ,("cast('01-01-99' as timestamp(3))", undefined)
> ,("cast('01-01-99' as timestamp with time zone)", undefined)
> ,("cast('01-01-99' as time(3) with time zone)", undefined)
> ]-}
== 6.2 <field definition> (p173) == 6.2 <field definition> (p173)
<field definition> ::= <field name> <data type> [ <reference scope check> ] <field definition> ::= <field name> <data type> [ <reference scope check> ]
This is used when e.g. casting to a row type. This is used in row type names.
== 6.3 <value expression primary> (p174) == 6.3 <value expression primary> (p174)
@ -1156,8 +1108,8 @@ This is used when e.g. casting to a row type.
> parenthesizedValueExpression :: TestItem > parenthesizedValueExpression :: TestItem
> parenthesizedValueExpression = Group "parenthesized value expression" $ map (uncurry TestValueExpr) > parenthesizedValueExpression = Group "parenthesized value expression" $ map (uncurry TestValueExpr)
> [("(3)", undefined) > [("(3)", Parens (NumLit "3"))
> ,("((3))", undefined) > ,("((3))", Parens $ Parens (NumLit "3"))
> ] > ]
<nonparenthesized value expression primary> ::= <nonparenthesized value expression primary> ::=
@ -1278,7 +1230,7 @@ for or how it works
> contextuallyTypeValueSpec = Group "contextually typed value specification" $ map (uncurry TestValueExpr) > contextuallyTypeValueSpec = Group "contextually typed value specification" $ map (uncurry TestValueExpr)
> [("null", Iden "null") > [("null", Iden "null")
> ,("array[]", Array (Iden "array") []) > ,("array[]", Array (Iden "array") [])
> --,("multiset[]", undefined) > ,("multiset[]", MultisetCtor [])
> ,("default", Iden "default") > ,("default", Iden "default")
> ] > ]
@ -1547,10 +1499,11 @@ TODO: work out the precendence of the array element reference suffix
> multisetElementReference :: TestItem > multisetElementReference :: TestItem
> multisetElementReference = Group "multiset element reference" $ map (uncurry TestValueExpr) > multisetElementReference = Group "multiset element reference" $ map (uncurry TestValueExpr)
> [("element(something)", undefined) > [("element(something)", App "element" [Iden "something"])
> ] > ]
TODO: work out how to parse this Not sure why this is called element reference, since it appears to be
like unnest, and entirely unlike array element reference.
6.25 <value expression> (p236) 6.25 <value expression> (p236)
@ -1977,7 +1930,19 @@ operator is ||, same as the string concatenation operator.
<multiset primary> ::= <multiset value function> | <value expression primary> <multiset primary> ::= <multiset value function> | <value expression primary>
TODO: multiset value expression > multisetValueExpression :: TestItem
> multisetValueExpression = Group "multiset value expression" $ map (uncurry TestValueExpr)
> [("a multiset union b"
> ,MultisetBinOp (Iden "a") Union SQDefault (Iden "b"))
> ,("a multiset union all b"
> ,MultisetBinOp (Iden "a") Union All (Iden "b"))
> ,("a multiset union distinct b"
> ,MultisetBinOp (Iden "a") Union Distinct (Iden "b"))
> ,("a multiset except b"
> ,MultisetBinOp (Iden "a") Except SQDefault (Iden "b"))
> ,("a multiset intersect b"
> ,MultisetBinOp (Iden "a") Intersect SQDefault (Iden "b"))
> ]
== 6.38 <multiset value function> (p289) == 6.38 <multiset value function> (p289)
@ -1985,7 +1950,10 @@ TODO: multiset value expression
<multiset set function> ::= SET <left paren> <multiset value expression> <right paren> <multiset set function> ::= SET <left paren> <multiset value expression> <right paren>
TODO: multiset value function > multisetValueFunction :: TestItem
> multisetValueFunction = Group "multiset value function" $ map (uncurry TestValueExpr)
> [("set(a)", App "set" [Iden "a"])
> ]
== 6.39 <multiset value constructor> (p290) == 6.39 <multiset value constructor> (p290)
@ -2004,7 +1972,18 @@ TODO: multiset value function
<table value constructor by query> ::= TABLE <left paren> <query expression> <right paren> <table value constructor by query> ::= TABLE <left paren> <query expression> <right paren>
TODO: multiset value constructor table is just syntactic sugar for multiset (why does it exist?)
> multisetValueConstructor :: TestItem
> multisetValueConstructor = Group "multiset value constructor" $ map (uncurry TestValueExpr)
> [("multiset[a,b,c]", MultisetCtor[Iden "a", Iden "b", Iden "c"])
> ,("multiset(select * from t)", MultisetQueryCtor qe)
> ,("table(select * from t)", MultisetQueryCtor qe)
> ]
> where
> qe = makeSelect {qeSelectList = [(Star,Nothing)]
> ,qeFrom = [TRSimple [Name "t"]]}
= 7 Query expressions = 7 Query expressions
@ -2900,7 +2879,8 @@ Specify the precision of an interval data type.
<interval leading field precision> ::= <unsigned integer> <interval leading field precision> ::= <unsigned integer>
TODO: interval qualifier interval qualifier is covered in the typenames and the interval
literals where it is used
== 10.2 <language clause> (p469) == 10.2 <language clause> (p469)