diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index a44eb52..a7fbf99 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -494,6 +494,17 @@ a match (select a from t) > [ArrayCtor <$> parens queryExpr > ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)] +> multisetCtor :: Parser ValueExpr +> multisetCtor = +> choice +> [keyword_ "multiset" >> +> choice +> [MultisetQueryCtor <$> parens queryExpr +> ,MultisetCtor <$> brackets (commaSep valueExpr)] +> ,keyword_ "table" >> +> MultisetQueryCtor <$> parens queryExpr] + + > escape :: Parser (ValueExpr -> ValueExpr) > escape = do > ctor <- choice @@ -726,6 +737,7 @@ messages, but both of these are considered too important. > ,"is unknown" > ,"is not unknown"] > ] +> ++ [multisetBinOp] > -- have to use try with inSuffix because of a conflict > -- with 'in' in position function, and not between > -- between also has a try in it to deal with 'not' @@ -755,6 +767,14 @@ messages, but both of these are considered too important. > return $ PostfixOp [Name $ unwords o] > binary p nm assoc = > E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc +> multisetBinOp = E.Infix (do +> keyword_ "multiset" +> o <- choice [Union <$ keyword_ "union" +> ,Intersect <$ keyword_ "intersect" +> ,Except <$ keyword_ "except"] +> d <- fromMaybe SQDefault <$> duplicates +> return (\a b -> MultisetBinOp a o d b)) +> E.AssocLeft > prefixKeyword nm = prefix (keyword_ nm) nm > prefixSym nm = prefix (symbol_ nm) nm > prefix p nm = prefix' (p >> return (PrefixOp [Name nm])) @@ -787,6 +807,7 @@ fragile and could at least do with some heavy explanation. > ,caseValue > ,cast > ,arrayCtor +> ,multisetCtor > ,specialOpKs > ,parensTerm > ,subquery diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index e2c9c0d..6bfabb0 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -173,6 +173,28 @@ which have been changed to try to improve the layout of the output. > valueExpr (ArrayCtor q) = > text "array" <> parens (queryExpr q) +> valueExpr (MultisetCtor es) = +> text "multiset" <> brackets (commaSep $ map valueExpr es) + +> valueExpr (MultisetQueryCtor q) = +> text "multiset" <> parens (queryExpr q) + +> valueExpr (MultisetBinOp a c q b) = +> sep +> [valueExpr a +> ,text "multiset" +> ,text $ case c of +> Union -> "union" +> Intersect -> "intersect" +> Except -> "except" +> ,case q of +> SQDefault -> empty +> All -> text "all" +> Distinct -> text "distinct" +> ,valueExpr b] + + + > valueExpr (CSStringLit cs st) = > text cs <> quotes (text $ doubleUpQuotes st) diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index 620b472..a56a29a 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -147,6 +147,9 @@ > | Escape ValueExpr Char > | UEscape ValueExpr Char > | Collate ValueExpr String +> | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr +> | MultisetCtor [ValueExpr] +> | MultisetQueryCtor QueryExpr > deriving (Eq,Show,Read,Data,Typeable) > -- | Represents an identifier name, which can be quoted or unquoted. diff --git a/TODO b/TODO index 6f5e86d..405b935 100644 --- a/TODO +++ b/TODO @@ -6,7 +6,6 @@ table expression in syntax: QueryExpr = Select SelectList (Maybe TableExpr) and the TableExpr contains all the other bits? finish off ansi 2003 support or specific subset - summarize todos start looking at error messages change the booleans in the ast to better types for less ambiguity represent missing optional bits in the ast as nothing instead of the @@ -18,6 +17,10 @@ review haddock in the syntax and update review syntax names and representation careful review of token parses wrt trailing delimiters/junk +decide how to handle character set literals and identifiers: don't + have any intention of actually supporting switching character sets + in the middle of parsing so maybe this would be better disabled? + review places in the parse which should allow only a fixed set of identifiers (e.g. in interval literals) @@ -25,14 +28,23 @@ decide whether to represent numeric literals better, instead of a single string - break up into parts, or parse to a Decimal or something +refactor the typename parsing + +remove the IsString for Name and [Name] + +fixes: + +keyword tree, add explicit result then can use for joins also + +keyword tree support prefix mode so can start from already parsed + token + +do the final big left factor: typenames, interval lits, iden + + suffixes + rough SQL 2003 todo, including tests to write: -switch TypedLit to CSStringLit based on first char being underscore? -idens: "", unicode, charset?, check dotted idens and contexts -add missing type name support: lots of missing ones here, including - simple stuff like lob variations, and new things like interval, - row, ref, scope, array, multiset type names. - add tests for all the typenames cast and typed literal +idens: "", unicode date and time literals multisets review window functions, window clause @@ -45,6 +57,8 @@ unnest filter in aggs within group in aggs rows review +matching simple partial full + LNR: maybe leave until after next release diff --git a/tools/Language/SQL/SimpleSQL/Postgres.lhs b/tools/Language/SQL/SimpleSQL/Postgres.lhs index ae66ef2..804db43 100644 --- a/tools/Language/SQL/SimpleSQL/Postgres.lhs +++ b/tools/Language/SQL/SimpleSQL/Postgres.lhs @@ -29,7 +29,9 @@ TODO: get all the commented out tests working > ,"SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same');" -> ,"SELECT ROW(table.*) IS NULL FROM table;" +> -- table is a reservered keyword? +> --,"SELECT ROW(table.*) IS NULL FROM table;" +> ,"SELECT ROW(tablex.*) IS NULL FROM table;" > ,"SELECT true OR somefunc();" diff --git a/tools/Language/SQL/SimpleSQL/SQL2003.lhs b/tools/Language/SQL/SimpleSQL/SQL2003.lhs index c720c76..9505965 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs @@ -21,19 +21,22 @@ large amount of the SQL. > ,unicodeStringLiterals > ,binaryStringLiterals > ,numericLiterals -> --,dateAndTimeLiterals +> ,intervalLiterals > ,booleanLiterals -> --,identifiers +> ,identifiers > ,typeNameTests -> --,parenthesizedValueExpression +> ,parenthesizedValueExpression > ,targetSpecification > ,contextuallyTypeValueSpec > --,nextValueExpression > ,arrayElementReference -> --,multisetElementReference +> ,multisetElementReference > --,numericValueExpression > --,booleanValueExpression > ,arrayValueConstructor +> ,multisetValueExpression +> ,multisetValueFunction +> ,multisetValueConstructor > --,tableValueConstructor > --,fromClause > --,whereClause @@ -486,9 +489,7 @@ The rule consists of two immediately adjacent marks with > ,TypedLit (TypeName "_francais") "français") > ] -TODO: all the stuff with character set representations. - - +TODO: all the stuff with character set representations. (?) == other string literals @@ -662,15 +663,10 @@ TODO: interval literals (e.g. parse the literal string), at the moment they are treated as normal typed literals -> {-dateAndTimeLiterals :: TestItem -> dateAndTimeLiterals = Group "date and time literals" $ map (uncurry TestValueExpr) -> [("date 'date literal'" -> ,TypedLit (TypeName "date") "date literal") -> ,("time 'time literal'" -> ,TypedLit (TypeName "time") "time literal") -> ,("timestamp 'timestamp literal'" -> ,TypedLit (TypeName "timestamp") "timestamp literal") -> ]-} +> intervalLiterals :: TestItem +> intervalLiterals = Group "intervalLiterals literals" $ map (uncurry TestValueExpr) +> [ +> ] TODO: intervals + more date and time literals @@ -804,11 +800,14 @@ TODO: language identifiers have different rules to generic identifiers > identifiers :: TestItem > identifiers = Group "identifiers" $ map (uncurry TestValueExpr) -> [("test",undefined) -> ,("_test",undefined) -> ,("t1",undefined) -> ,("a.b",undefined) -> ,("a.b.c",undefined) +> [("test",Iden [Name "test"]) +> ,("_test",Iden [Name "_test"]) +> ,("t1",Iden [Name "t1"]) +> ,("a.b",Iden [Name "a", Name "b"]) +> ,("a.b.c",Iden [Name "a", Name "b", Name "c"]) +> -- TODO: quoted idens +> -- double double quotes in quoted idens +> -- unicode idens syntax (needs escape?) > ] TODO: module stuff @@ -1093,58 +1092,11 @@ expression > ,(ctn ++ " 'test'", TypedLit stn "test") > ] -> {-typeNames :: TestItem -> typeNames = Group "type names" $ map (uncurry TestValueExpr) -> [("cast('test' as character(5))", undefined) -> ,("cast('test' as character)", undefined) -> ,("cast('test' as char(5))", undefined) -> ,("cast('test' as character varying(5))", undefined) -> ,("cast('test' as char varying(5))", undefined) -> ,("cast('test' as varchar(5))", undefined) -> ,("cast('test' as char(5) character set _xxx)", undefined) - -> ,("cast('test' as national character(5))", undefined) -> ,("cast('test' as national char)", undefined) -> ,("cast('test' as nchar)", undefined) -> ,("cast('test' as national character varying)", undefined) -> ,("cast('test' as national char varying)", undefined) -> ,("cast('test' as nchar varying)", undefined) - -> ,("cast('test' as bit(4))", undefined) -> ,("cast('test' as bit varying(4))", undefined) -> ,("cast('test' as bit varying)", undefined) - -> ,("cast(5 as numeric)", undefined) -> ,("cast(5 as numeric(3))", undefined) -> ,("cast(5 as numeric(5,3))", undefined) -> ,("cast(5 as decimal(5,3))", undefined) -> ,("cast(5 as dec(5,3))", undefined) -> ,("cast(5 as integer)", undefined) -> ,("cast(5 as int)", undefined) -> ,("cast(5 as smallint)", undefined) -> ,("cast(5 as float(23))", undefined) -> ,("cast(5 as float)", undefined) -> ,("cast(5 as real)", undefined) -> ,("cast(5 as double precision)", undefined) - -> ,("cast('01-01-99' as date)", undefined) -> ,("cast('01-01-99' as time)", undefined) -> ,("cast('01-01-99' as time(3))", undefined) -> ,("cast('01-01-99' as timestamp(3))", undefined) -> ,("cast('01-01-99' as timestamp with time zone)", undefined) -> ,("cast('01-01-99' as time(3) with time zone)", undefined) -> ]-} - - - - == 6.2 (p173) ::= [ ] -This is used when e.g. casting to a row type. - - +This is used in row type names. == 6.3 (p174) @@ -1156,8 +1108,8 @@ This is used when e.g. casting to a row type. > parenthesizedValueExpression :: TestItem > parenthesizedValueExpression = Group "parenthesized value expression" $ map (uncurry TestValueExpr) -> [("(3)", undefined) -> ,("((3))", undefined) +> [("(3)", Parens (NumLit "3")) +> ,("((3))", Parens $ Parens (NumLit "3")) > ] ::= @@ -1278,7 +1230,7 @@ for or how it works > contextuallyTypeValueSpec = Group "contextually typed value specification" $ map (uncurry TestValueExpr) > [("null", Iden "null") > ,("array[]", Array (Iden "array") []) -> --,("multiset[]", undefined) +> ,("multiset[]", MultisetCtor []) > ,("default", Iden "default") > ] @@ -1547,10 +1499,11 @@ TODO: work out the precendence of the array element reference suffix > multisetElementReference :: TestItem > multisetElementReference = Group "multiset element reference" $ map (uncurry TestValueExpr) -> [("element(something)", undefined) +> [("element(something)", App "element" [Iden "something"]) > ] -TODO: work out how to parse this +Not sure why this is called element reference, since it appears to be +like unnest, and entirely unlike array element reference. 6.25 (p236) @@ -1977,7 +1930,19 @@ operator is ||, same as the string concatenation operator. ::= | -TODO: multiset value expression +> multisetValueExpression :: TestItem +> multisetValueExpression = Group "multiset value expression" $ map (uncurry TestValueExpr) +> [("a multiset union b" +> ,MultisetBinOp (Iden "a") Union SQDefault (Iden "b")) +> ,("a multiset union all b" +> ,MultisetBinOp (Iden "a") Union All (Iden "b")) +> ,("a multiset union distinct b" +> ,MultisetBinOp (Iden "a") Union Distinct (Iden "b")) +> ,("a multiset except b" +> ,MultisetBinOp (Iden "a") Except SQDefault (Iden "b")) +> ,("a multiset intersect b" +> ,MultisetBinOp (Iden "a") Intersect SQDefault (Iden "b")) +> ] == 6.38 (p289) @@ -1985,7 +1950,10 @@ TODO: multiset value expression ::= SET -TODO: multiset value function +> multisetValueFunction :: TestItem +> multisetValueFunction = Group "multiset value function" $ map (uncurry TestValueExpr) +> [("set(a)", App "set" [Iden "a"]) +> ] == 6.39 (p290) @@ -2004,7 +1972,18 @@ TODO: multiset value function ::= TABLE -TODO: multiset value constructor +table is just syntactic sugar for multiset (why does it exist?) + +> multisetValueConstructor :: TestItem +> multisetValueConstructor = Group "multiset value constructor" $ map (uncurry TestValueExpr) +> [("multiset[a,b,c]", MultisetCtor[Iden "a", Iden "b", Iden "c"]) +> ,("multiset(select * from t)", MultisetQueryCtor qe) +> ,("table(select * from t)", MultisetQueryCtor qe) +> ] +> where +> qe = makeSelect {qeSelectList = [(Star,Nothing)] +> ,qeFrom = [TRSimple [Name "t"]]} + = 7 Query expressions @@ -2900,7 +2879,8 @@ Specify the precision of an interval data type. ::= -TODO: interval qualifier +interval qualifier is covered in the typenames and the interval +literals where it is used == 10.2 (p469)