From c814cc94378563f0750787ca207d906f4c2665ea Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Fri, 18 Apr 2014 10:47:39 +0300 Subject: [PATCH] tweak to error message for reserved keyword as identifier add in some group by and order by tests --- Language/SQL/SimpleSQL/Parser.lhs | 5 +- TODO | 118 +++++++---------------- tools/Language/SQL/SimpleSQL/SQL2003.lhs | 53 +++++++--- 3 files changed, 77 insertions(+), 99 deletions(-) diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 518fbfc..7781d71 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -8,7 +8,7 @@ > ,ParseError(..)) where > import Control.Monad.Identity (Identity) -> import Control.Monad (guard, void) +> import Control.Monad (guard, void, when) > import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>)) > import Data.Maybe (fromMaybe,catMaybes) > import Data.Char (toLower) @@ -1079,7 +1079,8 @@ instead, and create an alternative suffix parser > identifierBlacklist :: [String] -> Parser String > identifierBlacklist bl = try (do > i <- identifier -> guard (map toLower i `notElem` bl) +> when (map toLower i `elem` bl) $ +> fail $ "keyword not allowed here: " ++ i > return i) > "identifier" diff --git a/TODO b/TODO index 6036167..03db888 100644 --- a/TODO +++ b/TODO @@ -15,31 +15,7 @@ look at fixing the expression parsing completely represent natural and using/on in the syntax more close to the concrete syntax - don't combine in the ast -review the token parsers, and make sure they have trailing delimiters - or consume bad trailing characters and fail (e.g. 1e2e3 in a select - list parses as '1e2 e3' i.e. '1e2 as e3' - split the general symbol and operator parsing, and make it tighter - in terms of when the symbol or operator ends (don't allow to end - early) -approach: review the lexical syntax, create complete list of - tokens/token generators. Divide into tokens which must be followed - by some particular other token or at least one whitespace, and ones - which can be immediately followed by another token. Then fix the - lexing parsers to work this way -whitespace/comments -integers -numbers -string literals -keywords -operator symbols <>=+=^%/*!|~& -non operator symbols ()?,;"' -identifiers -quoted identifiers - -identifiers and keywords are ok for now -there are issues with integers, numbers, operators and non operator - symbols - +careful review of token parses wrt trailing delimiters/junk review places in the parse which should allow only a fixed set of identifiers (e.g. in interval literals) @@ -49,25 +25,30 @@ decide whether to represent numeric literals better, instead of a something rough SQL 2003 todo, including tests to write: -can multipart identifiers have whitespace around the '.'? -more work on date and time literals -support "" in delimited identifier -unicode identifier -support needed MODULE syntax in identifiers - already covered? -review qualification names in identifiers support in various contexts - (e.g. function app, table refs) + +idens: "", unicode, charset?, check dotted idens and contexts add missing type name support: lots of missing ones here, including simple stuff like lob variations, and new things like interval, row, ref, scope, array, multiset type names. +date and time literals +multisets +review window functions, window clause +review cases +search/cycle, exclusions +special operators +from clause review +table sample +unnest +filter in aggs +within group in aggs +rows review + +support needed MODULE syntax in identifiers - already covered? decide how to represent special identifiers including the session variables or whatever they are called like current_user -multiset[] grouping - needs special syntax? -review window function support and missing bits -review case expressions next value for probably leave for now: subtypes, methods, new /routine, dereference -multiset element reference - maybe nothing to do double check associativity, precedence (value exprs, joins, set ops) position expressions length expressions @@ -83,7 +64,6 @@ overlay specifictype datetime value expressions intervals -multiset value expressions, constructors row value constructors, expressions review review table value constructor exactly what is allowed lots more tests for from clause variations @@ -101,7 +81,6 @@ in predicate review escape for like escape for [not] similar to regular expression syntax? -quantified comparison predicate: represent different from current normalized predicate overlaps predicate distinct from predicate @@ -123,36 +102,23 @@ review areas where this parser is too permissive, e.g. value warnings or something), unqualified asterisk in select list - - - - - left factor/try removal: -try in the interval literal - have to left factor with the typed literal "interval 'xxx'" syntax - + with identifier -try in the prefix cast: LF with identifier -few tries in the specialopks: need review - + left factor the start of these (e.g. for function style substring - and for keyword style substring) -not between: needs left factoring with a bunch of suffix operators -subqueries: need left factoring with all the stuff which starts with - open parens. The subquery ast needs rethink as well -typename: left factor with identifier -inSuffix in expr table: conflicts with 'in' keyword in precision - - left factor -the binary and postfix multi keyword ops need left factoring since - several share prefixes -app needs lf with parens, identifier, etc. -parens lf in nonJoinTref -name start lf in nonJoinTref -all of the above should help the error messages a lot +character set literal: leading identifier +typed literal: leading identifier +special operators: needs some rewriting to remove try + + left factor with iden( patterns +conflict with in suffix and in in position +conflict with not prefix op and not between?? +multi word type names: left factor +quantified comparison: left factor with normal comparison +multi word operator names in expressions +hardcode all the symbols in the symbol parser/split? -big feature summary: + +future big feature summary: all ansi sql queries -better expression tree parsing +completely working expression tree parsing error messages, left factor dml, ddl, procedural sql position annotation @@ -165,7 +131,7 @@ extensibility performance analysis -= next release += stuff try and use the proper css theme create a header like in the haddock with simple-sql-parser + @@ -186,9 +152,6 @@ regenerate the examples on the index.txt docs -add to website: pretty printed tpch, maybe other queries as - demonstration - add preamble to the rendered test page add links from the supported sql page to the rendered test page for @@ -206,23 +169,10 @@ review internal sql collection for more syntax/tests other -review syntax to replace maybe and bool with better ctors - ---- demo program: convert tpch to sql server syntax exe processor -review abstract syntax (e.g. combine App with SpecialOp?) - -more operators - -sql server top syntax - -named windows - -extended string literals, escapes and other flavours (like pg and - oracle custom delimiters) - run through other manuals for example queries and features: sql in a nutshell, sql guide, sql reference guide, sql standard, sql server manual, oracle manual, teradata manual + re-through postgresql @@ -239,6 +189,8 @@ ast checker: checks the ast represents valid syntax, the parser = other sql support +top +string literals full number literals -> other bases? apply, pivot @@ -254,7 +206,3 @@ sqlite sap dbmss (can't work out what are separate products or what are the dialects) -maybe later: other dml - insert, update, delete, truncate, merge + set, show? - copy, execute?, explain?, begin/end/rollback? - diff --git a/tools/Language/SQL/SimpleSQL/SQL2003.lhs b/tools/Language/SQL/SimpleSQL/SQL2003.lhs index 57dee83..755194d 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs @@ -37,14 +37,14 @@ large amount of the SQL. > --,tableValueConstructor > --,fromClause > --,whereClause -> --,groupbyClause +> ,groupbyClause > --,querySpecification > --,queryExpressions > ,quantifiedComparisonPredicate > ,uniquePredicate > ,matchPredicate > ,collateClause -> --,sortSpecificationList +> ,sortSpecificationList > ] = 5 Lexical Elements @@ -2113,11 +2113,25 @@ groups, and not general value expressions. > groupbyClause :: TestItem > groupbyClause = Group "group by clause" $ map (uncurry TestQueryExpr) -> [("select a, sum(b) from t group by a", undefined) -> ,("select a, c,sum(b) from t group by a,c", undefined) -> ,("select a, c,sum(b) from t group by a,c collate x", undefined) -> ,("select a, c,sum(b) from t group by a,c collate x having sum(b) > 100", undefined) +> [("select a, sum(b) from t group by a",q) +> ,("select a, sum(b),c from t group by a,c" +> ,q1 {qeGroupBy = qeGroupBy q1 ++ [SimpleGroup $ Iden "c"]}) +> ,("select a, sum(b),c from t group by a,c collate x" +> ,q1 {qeGroupBy = qeGroupBy q1 +> ++ [SimpleGroup $ Collate (Iden "c") "x"]}) +> ,("select a, sum(b),c from t group by a,c collate x having sum(b) > 100" +> ,q1 {qeGroupBy = qeGroupBy q1 +> ++ [SimpleGroup $ Collate (Iden "c") "x"] +> ,qeHaving = Just (BinOp (App "sum" [Iden "b"]) +> ">" (NumLit "100"))}) > ] +> where +> q = makeSelect +> {qeSelectList = [(Iden "a",Nothing), (App "sum" [Iden "b"],Nothing)] +> ,qeFrom = [TRSimple "t"] +> ,qeGroupBy = [SimpleGroup $ Iden "a"] +> } +> q1 = q {qeSelectList = qeSelectList q ++ [(Iden "c", Nothing)]} 7.10 (p329) @@ -2896,13 +2910,28 @@ TODO: review sort specifications > sortSpecificationList :: TestItem > sortSpecificationList = Group "sort specification list" $ map (uncurry TestQueryExpr) -> [("select * from t order by a", undefined) -> ,("select * from t order by a,b", undefined) -> ,("select * from t order by a asc,b", undefined) -> ,("select * from t order by a desc,b", undefined) -> ,("select * from t order by a collate x desc,b", undefined) -> ,("select * from t order by 1,2", undefined) +> [("select * from t order by a" +> ,qe {qeOrderBy = [SortSpec (Iden "a") Asc NullsOrderDefault]}) +> ,("select * from t order by a,b" +> ,qe {qeOrderBy = [SortSpec (Iden "a") Asc NullsOrderDefault +> ,SortSpec (Iden "b") Asc NullsOrderDefault]}) +> ,("select * from t order by a asc,b" +> ,qe {qeOrderBy = [SortSpec (Iden "a") Asc NullsOrderDefault +> ,SortSpec (Iden "b") Asc NullsOrderDefault]}) +> ,("select * from t order by a desc,b" +> ,qe {qeOrderBy = [SortSpec (Iden "a") Desc NullsOrderDefault +> ,SortSpec (Iden "b") Asc NullsOrderDefault]}) +> ,("select * from t order by a collate x desc,b" +> ,qe {qeOrderBy = [SortSpec (Collate (Iden "a") "x") Desc NullsOrderDefault +> ,SortSpec (Iden "b") Asc NullsOrderDefault]}) +> ,("select * from t order by 1,2" +> ,qe {qeOrderBy = [SortSpec (NumLit "1") Asc NullsOrderDefault +> ,SortSpec (NumLit "2") Asc NullsOrderDefault]}) > ] +> where +> qe = makeSelect +> {qeSelectList = [(Star,Nothing)] +> ,qeFrom = [TRSimple "t"]} TODO: what happened to the collation in order by? Answer: sort used to be a column reference with an optional