diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 09ccff4..6d255cd 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -38,9 +38,9 @@ lexers, this isn't 100% complete at the moment and needs fixing. The parsing code is aggressively left factored, and try is avoided as much as possible. Try is avoided because: -* when it is overused it makes the code hard to follow -* when it is overused it makes the parsing code harder to debug -* it makes the parser error messages much worse + * when it is overused it makes the code hard to follow + * when it is overused it makes the parsing code harder to debug + * it makes the parser error messages much worse The code could be made a bit simpler with a few extra 'trys', but this isn't done because of the impact on the parser error @@ -74,9 +74,9 @@ syntax. There are three big areas which are tricky to left factor: -* typenames -* value expressions which can start with an identifier -* infix and suffix operators + * typenames + * value expressions which can start with an identifier + * infix and suffix operators === typenames @@ -97,12 +97,12 @@ error messages really bad. Here is a list of these nodes: -* identifiers -* function application -* aggregate application -* window application -* typed literal: typename 'literal string' -* interval literal which is like the typed literal with some extras + * identifiers + * function application + * aggregate application + * window application + * typed literal: typename 'literal string' + * interval literal which is like the typed literal with some extras There is further ambiguity e.g. with typed literals with precision, functions, aggregates, etc. - these are an identifier, followed by @@ -113,12 +113,12 @@ is. There is also a set of nodes which start with an identifier/keyword but can commit since no other syntax can start the same way: -* case -* cast -* exists, unique subquery -* array constructor -* multiset constructor -* all the special syntax functions: extract, position, substring, + * case + * cast + * exists, unique subquery + * array constructor + * multiset constructor + * all the special syntax functions: extract, position, substring, convert, translate, overlay, trim, etc. The interval literal mentioned above is treated in this group at the @@ -143,10 +143,10 @@ standard which is able to eliminate a number of possibilities just in the grammar, which this parser allows. This is done for a number of reasons: -* it makes the parser simple - less variations -* it should allow for dialects and extensibility more easily in the + * it makes the parser simple - less variations + * it should allow for dialects and extensibility more easily in the future (e.g. new infix binary operators with custom precedence) -* many things which are effectively checked in the grammar in the + * many things which are effectively checked in the grammar in the standard, can be checked using a typechecker or other simple static analysis @@ -481,7 +481,6 @@ TODO: this code needs heavy refactoring > ,return Nothing] > return (p,x) > lobUnits = choice [LobCharacters <$ keyword_ "characters" -> ,LobCodeUnits <$ keyword_ "code_units" > ,LobOctets <$ keyword_ "octets"] > -- deal with multiset and array suffixes > tnSuffix x = @@ -657,6 +656,10 @@ multiset(query expr). It must be there for compatibility or something. > ,keyword_ "table" >> > MultisetQueryCtor <$> parens queryExpr] +> nextValueFor :: Parser ValueExpr +> nextValueFor = keywords_ ["next","value","for"] >> +> NextValueFor <$> names + === interval interval literals are a special case and we follow the grammar less @@ -1161,6 +1164,7 @@ fragile and could at least do with some heavy explanation. > ,cast > ,arrayCtor > ,multisetCtor +> ,nextValueFor > ,subquery > ,intervalLit > ,specialOpKs @@ -1740,7 +1744,7 @@ means). > ,"class_origin" > ,"coalesce" > ,"cobol" -> ,"code_units" +> --,"code_units" > ,"collation" > ,"collation_catalog" > ,"collation_name" @@ -2002,13 +2006,13 @@ means). > ,"cube" > ,"current" > --,"current_date" -> ,"current_default_transform_group" -> ,"current_path" -> ,"current_role" +> --,"current_default_transform_group" +> --,"current_path" +> --,"current_role" > ,"current_time" > ,"current_timestamp" > ,"current_transform_group_for_type" -> ,"current_user" +> --,"current_user" > ,"cursor" > ,"cycle" > ,"date" @@ -2052,7 +2056,7 @@ means). > ,"global" > ,"grant" > ,"group" -> ,"grouping" +> --,"grouping" > ,"having" > ,"hold" > --,"hour" @@ -2088,7 +2092,7 @@ means). > ,"method" > --,"minute" > ,"modifies" -> ,"module" +> --,"module" > --,"month" > ,"multiset" > ,"national" @@ -2151,7 +2155,7 @@ means). > --,"second" > ,"select" > ,"sensitive" -> ,"session_user" +> --,"session_user" > --,"set" > ,"similar" > ,"smallint" @@ -2167,7 +2171,7 @@ means). > ,"submultiset" > ,"symmetric" > ,"system" -> ,"system_user" +> --,"system_user" > ,"table" > ,"then" > ,"time" @@ -2187,7 +2191,7 @@ means). > ,"unnest" > ,"update" > ,"upper" -> ,"user" +> --,"user" > ,"using" > --,"value" > ,"values" diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index 363a239..52a7f22 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -218,6 +218,9 @@ which have been changed to try to improve the layout of the output. > valueExpr (Collate v c) = > valueExpr v <+> text "collate" <+> names c +> valueExpr (NextValueFor ns) = +> text "next value for" <+> names ns + > doubleUpQuotes :: String -> String > doubleUpQuotes [] = [] @@ -263,7 +266,6 @@ which have been changed to try to improve the layout of the output. > LobG -> text "G") m > <+> me (\x -> case x of > LobCharacters -> text "CHARACTERS" -> LobCodeUnits -> text "CODE_UNITS" > LobOctets -> text "OCTETS") u) > typeName (CharTypeName t i cs col) = > names t diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index c92aab0..20c60d6 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -158,6 +158,7 @@ > | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr > | MultisetCtor [ValueExpr] > | MultisetQueryCtor QueryExpr +> | NextValueFor [Name] > deriving (Eq,Show,Read,Data,Typeable) > -- | Represents an identifier name, which can be quoted or unquoted. @@ -190,7 +191,6 @@ TODO: add ref and scope, any others? > data LobMultiplier = LobK | LobM | LobG > deriving (Eq,Show,Read,Data,Typeable) > data LobUnits = LobCharacters -> | LobCodeUnits > | LobOctets > deriving (Eq,Show,Read,Data,Typeable) diff --git a/TODO b/TODO index 9b5c891..4b520f2 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,11 @@ continue 2003 review and tests +1. start replacing the 2003 stuff with 2011 +2. create an error message document for the website + - base of error messages but add some more variations +3. start thinking about tests for invalid syntax + + touch up the expr hack as best as can careful review of token parses wrt trailing delimiters/junk @@ -56,6 +62,8 @@ rules for changing the multi keyword parsing: rough SQL 2003 todo, including tests to write: +switch to SQL 2011 + now: review the commented out reserved keyword entries and work out how to fix @@ -64,7 +72,6 @@ go through almost all the predicates window functions missing bits, window clauses from: more tests, review missing tablesample, unnest, etc. -aggregates: where, filter + review rows review match missing bit between symmetric diff --git a/changelog b/changelog index 3b5bc7a..27abad6 100644 --- a/changelog +++ b/changelog @@ -1,4 +1,4 @@ -0.4.0-dev (updated to 7a847045163feb2339ab40ebe93afe2f1c9ad813) +0.4.0-dev (updated to 705724197463cd19dd8749dfd51e2eb8f1d02b8e) completely remove dependency on haskell-src-exts improve the error messages a great deal fix some trailing whitespace issues in the keyword style functions, @@ -52,6 +52,11 @@ quote character in the identifier implement complete interval literals (fixed the handling of the interval qualifier) + make most of the standard reserved words actually reserved (still + some gaps) + change the natural in join abstract syntax to match the concrete + syntax instead of combining natural, on and using into one field + support filter and within group for aggregates 0.3.1 (commit 5cba9a1cac19d66166aed2876d809aef892ff59f) update to work with ghc 7.8.1 0.3.0 (commit 9e75fa93650b4f1a08d94f4225a243bcc50445ae) diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal index ddec911..03b3eb3 100644 --- a/simple-sql-parser.cabal +++ b/simple-sql-parser.cabal @@ -28,7 +28,7 @@ library Language.SQL.SimpleSQL.Parser, Language.SQL.SimpleSQL.Syntax other-extensions: TupleSections - build-depends: base >=4.6 && <4.7, + build-depends: base >=4.6 && <4.8, parsec >=3.1 && <3.2, mtl >=2.1 && <2.2, pretty >= 1.1 && < 1.2 @@ -40,7 +40,7 @@ Test-Suite Tests type: exitcode-stdio-1.0 main-is: RunTests.lhs hs-source-dirs: .,tools - Build-Depends: base >=4.6 && <4.7, + Build-Depends: base >=4.6 && <4.8, parsec >=3.1 && <3.2, mtl >=2.1 && <2.2, pretty >= 1.1 && < 1.2, @@ -71,7 +71,7 @@ Test-Suite Tests executable SQLIndent main-is: SQLIndent.lhs hs-source-dirs: .,tools - Build-Depends: base >=4.6 && <4.7, + Build-Depends: base >=4.6 && <4.8, parsec >=3.1 && <3.2, mtl >=2.1 && <2.2, pretty >= 1.1 && < 1.2 diff --git a/tools/Language/SQL/SimpleSQL/SQL2003.lhs b/tools/Language/SQL/SimpleSQL/SQL2003.lhs index f140287..6031114 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs @@ -15,7 +15,8 @@ large amount of the SQL. > sql2003Tests :: TestItem > sql2003Tests = Group "sql2003Tests" -> [stringLiterals +> [Group "literals" [ +> stringLiterals > ,nationalCharacterStringLiterals > ,unicodeStringLiterals > ,binaryStringLiterals @@ -23,32 +24,72 @@ large amount of the SQL. > ,intervalLiterals > ,booleanLiterals > ,identifiers -> ,typeNameTests +> ],Group "value expressions" +> [typeNameTests > ,parenthesizedValueExpression +> ,someGeneralValues > ,targetSpecification > ,contextuallyTypeValueSpec -> --,nextValueExpression +> ,moduleColumnRef +> ,groupingOperation +> --,windowFunction +> --,caseExpression +> --,castSpecification +> ,nextValueExpression +> -- subtype treatment, method invoc, static m i, new spec, attrib/method ref, deref, method ref, ref res > ,arrayElementReference > ,multisetElementReference -> --,numericValueExpression +> ,numericValueExpression +> --,numericValueFunction +> --,stringValueExpression +> --,stringValueFunction +> --,datetimeValueExpression +> --,datetimeValueFunction +> --,intervalValueExpression +> --,intervalValueFunction > --,booleanValueExpression +> --arrayValueExpression > ,arrayValueConstructor > ,multisetValueExpression > ,multisetValueFunction > ,multisetValueConstructor +> ],Group "query expressions" +> [ +> -- rowValueConstructor +> --,rowValueExpression > --,tableValueConstructor > --,fromClause +> --,joinedTable > --,whereClause -> ,groupbyClause +> groupbyClause +> --,havingClause +> --,windowClause > --,querySpecification -> --,queryExpressions -> ,quantifiedComparisonPredicate +> --,querySpecifications +> --,setOperations +> --,withExpressions +> ],Group "predicates" +> [--comparisonPredicate +> --,betweenPredicate +> --,inPredicate +> --,likePredicate +> --,similarPredicae +> --,nullPredicate +> quantifiedComparisonPredicate +> --,existsPredicate > ,uniquePredicate +> --,normalizedPredicate > ,matchPredicate +> --,overlapsPredicate +> --,distinctPredicate +> --,memberPredicate +> --,submultisetPredicate +> --,setPredicate > ,collateClause > ,aggregateFunctions > ,sortSpecificationList > ] +> ] = 5 Lexical Elements @@ -1001,8 +1042,6 @@ create a list of type name variations: > ,("blob(3M)", LobTypeName [Name "blob"] 3 (Just LobM) Nothing) > ,("blob(4M characters) " > ,LobTypeName [Name "blob"] 4 (Just LobM) (Just LobCharacters)) -> ,("blob(5 code_units) " -> ,LobTypeName [Name "blob"] 5 Nothing (Just LobCodeUnits)) > ,("blob(6G octets) " > ,LobTypeName [Name "blob"] 6 (Just LobG) (Just LobOctets)) > ,("national character large object(7K) " @@ -1172,7 +1211,19 @@ This is used in row type names. | USER | VALUE -TODO: review how the special keywords are parsed and add tests for these + +> someGeneralValues :: TestItem +> someGeneralValues = Group "some general values" $ map (uncurry TestValueExpr) $ +> map mkIden ["CURRENT_DEFAULT_TRANSFORM_GROUP" +> ,"CURRENT_PATH" +> ,"CURRENT_ROLE" +> ,"CURRENT_USER" +> ,"SESSION_USER" +> ,"SYSTEM_USER" +> ,"USER" +> ,"VALUE"] +> where +> mkIden nm = (nm,Iden [Name nm]) ::= @@ -1269,8 +1320,11 @@ already covered above in the identifiers and names section | MODULE -TODO: work out the exact syntax and add - +> moduleColumnRef :: TestItem +> moduleColumnRef = Group "module column ref" $ map (uncurry TestValueExpr) +> [("MODULE.something.something", Iden [Name "MODULE" +> ,Name "something" +> ,Name "something"])] == 6.8 (p190) @@ -1304,7 +1358,19 @@ ORDER BY department, job, "Total Empl", "Average Sal"; TODO: de-oracle the syntax and add as test case - +> groupingOperation :: TestItem +> groupingOperation = Group "grouping operation" $ map (uncurry TestQueryExpr) +> [("SELECT SalesQuota, SUM(SalesYTD) TotalSalesYTD,\n\ +> \ GROUPING(SalesQuota) AS Grouping\n\ +> \FROM Sales.SalesPerson\n\ +> \GROUP BY ROLLUP(SalesQuota);" +> ,makeSelect +> {qeSelectList = [(Iden [Name "SalesQuota"],Nothing) +> ,(App [Name "SUM"] [Iden [Name "SalesYTD"]],Just (Name "TotalSalesYTD")) +> ,(App [Name "GROUPING"] [Iden [Name "SalesQuota"]],Just (Name "Grouping"))] +> ,qeFrom = [TRSimple [Name "Sales",Name "SalesPerson"]] +> ,qeGroupBy = [Rollup [SimpleGroup (Iden [Name "SalesQuota"])]]}) +> ] == 6.10 (p193) @@ -1323,6 +1389,10 @@ TODO: de-oracle the syntax and add as test case TODO: window functions +> windowFunctions :: TestItem +> windowFunctions = Group "window functions" $ map (uncurry TestValueExpr) +> [ +> ] == 6.11 (p197) @@ -1371,7 +1441,10 @@ TODO: window functions TODO: case expressions plus the 'abbreviations' - +> caseExpression :: TestItem +> caseExpression = Group "case expression" $ map (uncurry TestValueExpr) +> [ +> ] == 6.12 (p200) @@ -1391,7 +1464,7 @@ This is already covered above > nextValueExpression :: TestItem > nextValueExpression = Group "next value expression" $ map (uncurry TestValueExpr) -> [("next value for a.b", undefined) +> [("next value for a.b", NextValueFor [Name "a", Name "b"]) > ] @@ -1569,13 +1642,16 @@ Specify a numeric value. > numericValueExpression :: TestItem > numericValueExpression = Group "numeric value expression" $ map (uncurry TestValueExpr) -> [("a + b", undefined) -> ,("a - b", undefined) -> ,("a * b", undefined) -> ,("a / b", undefined) -> ,("+a", undefined) -> ,("-a", undefined) +> [("a + b", binOp "+") +> ,("a - b", binOp "-") +> ,("a * b", binOp "*") +> ,("a / b", binOp "/") +> ,("+a", prefOp "+") +> ,("-a", prefOp "-") > ] +> where +> binOp o = BinOp (Iden [Name "a"]) [Name o] (Iden [Name "b"]) +> prefOp o = PrefixOp [Name o] (Iden [Name "a"]) == 6.27 (p242) @@ -1600,12 +1676,22 @@ Specify a function yielding a value of type numeric. | +> numericValueFunction :: TestItem +> numericValueFunction = Group "numeric value function" $ map (uncurry TestValueExpr) +> [ + + ::= POSITION IN [ USING ] ::= POSITION IN +> ("position (a in b)",undefined) +> ,("position (a in b using characters)",undefined) +> ,("position (a in b using octets)",undefined) + + TODO: position expressions ::= @@ -1663,6 +1749,9 @@ TODO: extract expression TODO: lots more expressions above +> ] + + == 6.28 (p251) Specify a character string value or a binary string value. @@ -3160,9 +3249,3 @@ TODO: review sort specifications > qe = makeSelect > {qeSelectList = [(Star,Nothing)] > ,qeFrom = [TRSimple [Name "t"]]} - -TODO: what happened to the collation in order by? -Answer: sort used to be a column reference with an optional -collate. Since it is now a value expression, the collate doesn't need -to be mentioned here. -