diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 6d255cd..7f34703 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -420,7 +420,7 @@ TODO: this code needs heavy refactoring > typeName :: Parser TypeName > typeName = -> (rowTypeName <|> intervalTypeName <|> ref <|> otherTypeName) +> (rowTypeName <|> intervalTypeName <|> otherTypeName) > >>= tnSuffix > "typename" > where @@ -432,11 +432,6 @@ TODO: this code needs heavy refactoring > intervalTypeName = > keyword_ "interval" >> > uncurry IntervalTypeName <$> intervalQualifier -> ref = -> keyword_ "ref" >> -> RefTypeName -> <$> parens (names) -> <*> optionMaybe (keyword_ "scope" *> names) > -- other type names, which includes: > -- precision, scale, lob scale and units, timezone, character > -- set and collations @@ -478,6 +473,8 @@ TODO: this code needs heavy refactoring > x <- choice [Just LobK <$ keyword_ "k" > ,Just LobM <$ keyword_ "m" > ,Just LobG <$ keyword_ "g" +> ,Just LobT <$ keyword_ "t" +> ,Just LobP <$ keyword_ "p" > ,return Nothing] > return (p,x) > lobUnits = choice [LobCharacters <$ keyword_ "characters" diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index 52a7f22..1752d77 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -263,7 +263,9 @@ which have been changed to try to improve the layout of the output. > <> me (\x -> case x of > LobK -> text "K" > LobM -> text "M" -> LobG -> text "G") m +> LobG -> text "G" +> LobT -> text "T" +> LobP -> text "P") m > <+> me (\x -> case x of > LobCharacters -> text "CHARACTERS" > LobOctets -> text "OCTETS") u) @@ -297,11 +299,6 @@ which have been changed to try to improve the layout of the output. > typeName (MultisetTypeName tn) = > typeName tn <+> text "multiset" -> typeName (RefTypeName rt sc) = -> text "ref" -> <> parens (names rt) -> <+> me (\x -> text "scope" <+> names x) sc - > intervalTypeField :: IntervalTypeField -> Doc > intervalTypeField (Itf n p) = > text n diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index 20c60d6..4dc7900 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -167,8 +167,6 @@ > | UQName String > deriving (Eq,Show,Read,Data,Typeable) -TODO: add ref and scope, any others? - > -- | Represents a type name, used in casts. > data TypeName > = TypeName [Name] @@ -182,13 +180,12 @@ TODO: add ref and scope, any others? > | IntervalTypeName IntervalTypeField (Maybe IntervalTypeField) > | ArrayTypeName TypeName (Maybe Integer) > | MultisetTypeName TypeName -> | RefTypeName [Name] (Maybe [Name]) > deriving (Eq,Show,Read,Data,Typeable) > data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer)) > deriving (Eq,Show,Read,Data,Typeable) -> data LobMultiplier = LobK | LobM | LobG +> data LobMultiplier = LobK | LobM | LobG | LobT | LobP > deriving (Eq,Show,Read,Data,Typeable) > data LobUnits = LobCharacters > | LobOctets diff --git a/TODO b/TODO index 468201e..dd1fb8b 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,4 @@ -continue 2003 review and tests +continue 2011 review and tests 1. start replacing the 2003 stuff with 2011 2. create an error message document for the website @@ -62,76 +62,58 @@ rules for changing the multi keyword parsing: recursive, then don't do this. -rough SQL 2003 todo, including tests to write: +rough SQL 2011 todo, including tests to write: -switch to SQL 2011 - -now: review the commented out reserved keyword entries and work out how to fix -go through all? the functions -go through almost all the predicates -window functions missing bits, window clauses -from: more tests, review missing - tablesample, unnest, etc. -rows review -match missing bit -between symmetric -case review -detail list from the grammar, LNR = maybe leave until after next - release, otherwise planned for next release +big areas: +window functions +nested window functions +case +table ref +joined table +group by +window clause -LNR support needed MODULE syntax in identifiers - already covered? -LNR decide how to represent special identifiers including the session - variables or whatever they are called like current_user -LNR grouping - needs special syntax? -LNR next value for -LNR subtypes, methods, new /routine, dereference -LNR double check associativity, precedence (value exprs, joins, set ops) -position expressions -length expressions -extract expression -cardinality expression? -check concatenations -substring expressions -regular expression substring function -convert -translate -trim -overlay -LNR specifictype -row value constructors, expressions review -review table value constructor exactly what is allowed -lots more tests for from clause variations -tablesamples -unnest -table function derived table -only spec -join variations, including union join -review group by -window clauses -LNR all fields reference with alias 'select * as (a,b,c) ... ' -search or cycle clause -between symmetric/asymmetric -in predicate review -escape for like -escape for [not] similar to -LNR regular expression syntax? -normalized predicate -overlaps predicate -distinct from predicate -member predicate -submultiset predicate -set predicate -LNR type predicate -additional stuff review: -complete the list of keywords/reserved keywords and check everything - still works ok. The parser will reject all unquoted identifiers - which are the same as reserved or unreserved keywords. -LNR select into -LNR other language format identifiers for host params? +other areas: +unicode escape, strings and idens +character set behaviour review +datetime literals +mixed quoting identifier chains +names/identifiers careful review +typenames: lengths, binary +general value bits + collate for +numeric val fn +string exp fn +datetime exp fn +interval exp fn +rows +table value constructor +interval qualifier +query spec +with +setop +explicit table +order/offset/fetch +search/cycle +preds: +between +in +like +similar +regex like? +null +normalize +match +overlaps +distinct +member +submultiset +period +create list of unsupported syntax: xml, ref, subtypes, modules? --- @@ -248,6 +230,10 @@ string literals full number literals -> other bases? apply, pivot +maybe add dml and ddl, source poses, quasi quotes + +leave: type check, dialects, procedural, separate lexing? + other dialect targets: postgres oracle diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal index b13095b..fa8ea32 100644 --- a/simple-sql-parser.cabal +++ b/simple-sql-parser.cabal @@ -1,7 +1,10 @@ name: simple-sql-parser version: 0.4.0 synopsis: A parser for SQL queries -description: A parser for SQL queries. Please see the homepage for more information . + +description: A parser for SQL queries. Parses most SQL:2011 + queries. Please see the homepage for more information + . homepage: http://jakewheat.github.io/simple-sql-parser/ license: BSD3 @@ -36,6 +39,7 @@ library default-language: Haskell2010 ghc-options: -Wall other-extensions: TupleSections,DeriveDataTypeable + Test-Suite Tests type: exitcode-stdio-1.0 main-is: RunTests.lhs @@ -67,7 +71,7 @@ Test-Suite Tests Language.SQL.SimpleSQL.Tpch, Language.SQL.SimpleSQL.ValueExprs - other-extensions: TupleSections,OverloadedStrings,DeriveDataTypeable + other-extensions: TupleSections,DeriveDataTypeable default-language: Haskell2010 ghc-options: -Wall @@ -78,6 +82,7 @@ executable SQLIndent parsec >=3.1 && <3.2, mtl >=2.1 && <2.2, pretty >= 1.1 && < 1.2 + other-extensions: TupleSections,DeriveDataTypeable default-language: Haskell2010 ghc-options: -Wall if flag(sqlindent) diff --git a/tools/Language/SQL/SimpleSQL/SQL2003.lhs b/tools/Language/SQL/SimpleSQL/SQL2003.lhs index 6031114..5b6e016 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs @@ -1130,8 +1130,6 @@ create a list of type name variations: > ,("interval year(4) to second(2,3)" > ,IntervalTypeName (Itf "year" $ Just (4,Nothing)) > (Just $ Itf "second" $ Just (2, Just 3))) -> ,("ref (t)", RefTypeName [Name "t"] Nothing) -> ,("ref (t) scope q", RefTypeName [Name "t"] (Just [Name "q"])) > ] Now test each variation in both cast expression and typed literal diff --git a/tools/Language/SQL/SimpleSQL/SQL2011.lhs b/tools/Language/SQL/SimpleSQL/SQL2011.lhs index d4d288e..d88ea48 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2011.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2011.lhs @@ -16,6 +16,7 @@ which parts aren't currently supported. > [literals > ,identifiers > ,typeNameTests +> ,fieldDefinition > ,valueExpressions > ,queryExpressions > ,scalarSubquery @@ -494,6 +495,11 @@ Specify a non-null value. > ,StringLit "a quote: ', stuff") > ,("''" > ,StringLit "") + +I'm not sure how this should work. Maybe the parser should reject non +ascii characters in strings and identifiers unless the current SQL +character set allows them. + > ,("_francais 'français'" > ,TypedLit (TypeName [Name "_francais"]) "français") > ] @@ -1055,6 +1061,8 @@ create a list of type name variations: > ,("decimal(15,2)", PrecScaleTypeName [Name "decimal"] 15 2) > -- lob prec + with multiname > ,("blob(3M)", LobTypeName [Name "blob"] 3 (Just LobM) Nothing) +> ,("blob(3T)", LobTypeName [Name "blob"] 3 (Just LobT) Nothing) +> ,("blob(3P)", LobTypeName [Name "blob"] 3 (Just LobP) Nothing) > ,("blob(4M characters) " > ,LobTypeName [Name "blob"] 4 (Just LobM) (Just LobCharacters)) > ,("blob(6G octets) " @@ -1145,8 +1153,6 @@ create a list of type name variations: > ,("interval year(4) to second(2,3)" > ,IntervalTypeName (Itf "year" $ Just (4,Nothing)) > (Just $ Itf "second" $ Just (2, Just 3))) -> ,("ref (t)", RefTypeName [Name "t"] Nothing) -> ,("ref (t) scope q", RefTypeName [Name "t"] (Just [Name "q"])) > ] Now test each variation in both cast expression and typed literal @@ -1169,7 +1175,13 @@ Define a field of a row type. ::= -Tested in the row type above. +> fieldDefinition :: TestItem +> fieldDefinition = Group "field definition" +> $ map (uncurry TestValueExpr) +> [("cast('(1,2)' as row(a int,b char))" +> ,Cast (StringLit "(1,2)") +> $ RowTypeName [(Name "a", TypeName [Name "int"]) +> ,(Name "b", TypeName [Name "char"])])] == 6.3 @@ -1214,6 +1226,8 @@ Specify a value that is syntactically self-delimited. > [generalValueSpecification > ,parameterSpecification > ,contextuallyTypedValueSpecification +> ,identifierChain +> ,columnReference > ,setFunctionSpecification > ,windowFunction > ,nestedWindowFunction @@ -1384,7 +1398,10 @@ Disambiguate a -separated chain of identifiers. ::= -tested with the identifier tests above +> identifierChain :: TestItem +> identifierChain = Group "identifier chain" +> $ map (uncurry TestValueExpr) +> [("a.b", Iden [Name "a",Name "b"])] == 6.7 @@ -1395,7 +1412,10 @@ Reference a column. | MODULE -tested with the identifier tests above +> columnReference :: TestItem +> columnReference = Group "column reference" +> $ map (uncurry TestValueExpr) +> [("module.a.b", Iden [Name "module",Name "a",Name "b"])] == 6.8 @@ -1404,8 +1424,6 @@ Reference an SQL parameter. ::= -tested with the identifier tests above - == 6.9 Function @@ -1590,6 +1608,11 @@ Specify a conditional value. | | +I haven't seen these part 2 style when operands in the wild. It +doesn't even allow all the binary operators here. We will allow them +all, and parser and represent these expressions by considering all the +binary ops as unary prefix ops. + ::= | NULL ::= @@ -1611,8 +1634,6 @@ Specify a data conversion. ::= | -This is already tested with the type name tests - > castSpecification :: TestItem > castSpecification = Group "cast specification" > $ map (uncurry TestValueExpr) @@ -2230,7 +2251,6 @@ Specify an interval value. > intervalValueExpression :: TestItem > intervalValueExpression = Group "interval value expression" > [-- todo: interval value expression -> intervalValueFunction > ] @@ -2334,7 +2354,6 @@ Specify an array value. > arrayValueExpression :: TestItem > arrayValueExpression = Group "array value expression" > [-- todo: array value expression -> arrayValueFunction > ] == 6.37 @@ -3080,7 +3099,9 @@ the result of recursive query expressions. == 7.15 Function -Specify a scalar value, a row, or a table derived from a . + +Specify a scalar value, a row, or a table derived from a . ::= @@ -3169,9 +3190,24 @@ Specify a comparison of two row values. > comparisonPredicates :: TestItem > comparisonPredicates = Group "comparison predicates" -> [-- todo: comparison predicates +> $ map (uncurry TestValueExpr) +> $ map mkOp ["=", "<>", "<", ">", "<=", ">="] +> ++ [("ROW(a) = ROW(b)" +> ,BinOp (App [Name "ROW"] [a]) +> [Name "="] +> (App [Name "ROW"] [b])) +> ,("(a,b) = (c,d)" +> ,BinOp (SpecialOp [Name "rowctor"] [a,b]) +> [Name "="] +> (SpecialOp [Name "rowctor"] [Iden [Name "c"], Iden [Name "d"]])) > ] +> where +> mkOp nm = ("a " ++ nm ++ " b" +> ,BinOp a [Name nm] b) +> a = Iden [Name "a"] +> b = Iden [Name "b"] +TODO: what other tests, more complex expressions with comparisons? == 8.3