preparation for non-queries

some docs in the lexer bump the version to 0.5.0 add placeholder files for the planned initial non query support
2015-08-01 18:08:54 +03:00 · 2015-08-01 18:08:54 +03:00 · 6fc8869f73
parent 0f1f000ee5
commit 6fc8869f73
10 changed files with 3943 additions and 22 deletions
--- a/Language/SQL/SimpleSQL/Lexer.lhs
+++ b/Language/SQL/SimpleSQL/Lexer.lhs
@ -111,6 +111,13 @@ parsec
 > prettyTokens :: Dialect -> [Token] -> String
 > prettyTokens d ts = concat $ map (prettyToken d) ts

+When parsing a quoted identifier, you can have a double quote
+character in the identifier like this: "quotes""identifier" ->
+quoted"identifer. The double double quotes character is changed to a
+single character in the lexer and expanded back to two characters in
+the pretty printer. This also applies to strings, which can embed a
+single quote like this: 'string''with quote'.
+
 > doubleChars :: Char -> String -> String
 > doubleChars _ [] = []
 > doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
@ -142,6 +149,14 @@ TODO: try to make all parsers applicative only
 > sqlToken d = do
 >     p' <- getPosition
 >     let p = ("",sourceLine p', sourceColumn p')
+
+The order of parsers is important: strings and quoted identifiers can
+start out looking like normal identifiers, so we try to parse these
+first and use a little bit of try. Line and block comments start like
+symbols, so we try these before symbol. Numbers can start with a . so
+this is also tried before symbol (a .1 will be parsed as a number, but
+. otherwise will be parsed as a symbol).
+
 >     (p,) <$> choice [sqlString d
 >                     ,identifier d
 >                     ,hostParam d
@ -151,10 +166,20 @@ TODO: try to make all parsers applicative only
 >                     ,symbol d
 >                     ,sqlWhitespace d]

+Parses identifiers:
+
+simple_identifier_23
+u&"unicode quoted identifier"
+"quoted identifier"
+"quoted identifier "" with double quote char"
+`mysql quoted identifier`
+
 > identifier :: Dialect -> Parser Token
 > identifier d =
 >     choice
 >     [QIdentifier <$> qiden
+>      -- try is used here to avoid a conflict with identifiers
+>      -- and quoted strings which also start with a 'u'
 >     ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
 >     ,Identifier <$> identifierString
 >     ,DQIdentifier "`" "`" <$> mySqlQIden
@ -174,12 +199,23 @@ TODO: try to make all parsers applicative only
 >         guard (d == MySQL)
 >         char '`' *> takeWhile1 (/='`') <* char '`'

+This parses a valid identifier without quotes.
+
 > identifierString :: Parser String
 > identifierString =
 >     startsWith (\c -> c == '_' || isAlpha c)
 >                (\c -> c == '_' || isAlphaNum c)


+Parse a SQL string. Examples:
+
+'basic string'
+'string with '' a quote'
+n'international text'
+b'binary string'
+x'hexidecimal string'
+
+
 > sqlString :: Dialect -> Parser Token
 > sqlString _ =
 >     choice [csString
@ -195,6 +231,10 @@ TODO: try to make all parsers applicative only
 >                 void $ char '\''
 >                 normalStringSuffix $ concat [t,s,"'"]
 >                ,return $ concat [t,s]]
+>     -- try is used to to avoid conflicts with
+>     -- identifiers which can start with n,b,x,u
+>     -- once we read the quote type and the starting '
+>     -- then we commit to a string
 >     csString = CSSqlString <$> try (cs  <* char '\'') <*> normalStringSuffix ""
 >     cs = choice [(:[]) <$> oneOf "nNbBxX"
 >                 ,string "u&"
@ -204,17 +244,6 @@ TODO: try to make all parsers applicative only
 > hostParam _ = HostParam <$> (char ':' *> identifierString)


-> sqlNumber :: Dialect -> Parser Token
-> sqlNumber _ = SqlNumber <$>
->     (int <??> (pp dot <??.> pp int)
->      <|> try ((++) <$> dot <*> int))
->     <??> pp expon
->   where
->     int = many1 digit
->     dot = string "."
->     expon = (:) <$> oneOf "eE" <*> sInt
->     sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
->     pp = (<$$> (++))

 digits
 digits.[digits][e[+-]digits]
@ -229,12 +258,32 @@ the constant. Note that any leading plus or minus sign is not actually
 considered part of the constant; it is an operator applied to the
 constant.

+> sqlNumber :: Dialect -> Parser Token
+> sqlNumber _ = SqlNumber <$>
+>     (int <??> (pp dot <??.> pp int)
+>      -- try is used in case we read a dot
+>      -- and it isn't part of a number
+>      -- if there are any following digits, then we commit
+>      -- to it being a number and not something else
+>      <|> try ((++) <$> dot <*> int))
+>     <??> pp expon
+>   where
+>     int = many1 digit
+>     dot = string "."
+>     expon = (:) <$> oneOf "eE" <*> sInt
+>     sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
+>     pp = (<$$> (++))
+

 A symbol is one of the two character symbols, or one of the single
 character symbols in the two lists below.

 > symbol :: Dialect -> Parser Token
 > symbol _ = Symbol <$> choice (many1 (char '.') :
+>                  -- try is used because most of the first
+>                  -- characters of the two character symbols
+>                  -- can also be part of a single character symbol
+>                  -- maybe this would be better with left factoring?
 >                  map (try . string) [">=","<=","!=","<>","||"]
 >                  ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")

@ -244,9 +293,15 @@ character symbols in the two lists below.
 > lineComment :: Dialect -> Parser Token
 > lineComment _ =
 >     (\s -> LineComment $ concat ["--",s]) <$>
+>     -- try is used here in case we see a - symbol
+>     -- once we read two -- then we commit to the comment token
 >     (try (string "--") *>
 >      manyTill anyChar (void (char '\n') <|> eof))

+Try is used in the block comment for the two symbol bits because we
+want to backtrack if we read the first symbol but the second symbol
+isn't there.
+
 > blockComment :: Dialect -> Parser Token
 > blockComment _ =
 >     (\s -> BlockComment $ concat ["/*",s]) <$>
@ -268,6 +323,8 @@ character symbols in the two lists below.
 >              ,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]


+Some helper combinators
+
 > startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
 > startsWith p ps = do
 >   c <- satisfy p
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@ -1984,3 +1984,6 @@ dialect (for instance, string and identifier parsing rules vary from
 dialect to dialect and version to version, and most or all SQL DBMSs
 appear to have a set of flags to further enable or disable variations
 for quoting and escaping strings and identifiers).
+
+The dialect stuff can also be used for custom options: e.g. to only
+parse dml for instance.
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@ -1,5 +1,5 @@

-> -- | The AST for SQL queries.
+> -- | The AST for SQL.
 > {-# LANGUAGE DeriveDataTypeable #-}
 > module Language.SQL.SimpleSQL.Syntax
 >     (-- * Value expressions
--- a/simple-sql-parser.cabal
+++ b/simple-sql-parser.cabal
@ -1,9 +1,12 @@
 name:                simple-sql-parser
-version:             0.4.1
-synopsis:            A parser for SQL queries
+version:             0.5.0
+synopsis:            A parser for SQL.

-description:         A parser for SQL queries. Parses most SQL:2011
-                     queries. Please see the homepage for more information
+description:         A parser for SQL. Parses most SQL:2011
+                     queries, DML, schema/DDL, transaction control,
+                     session and connection management, access
+                     control. Please see the homepage for more
+                     information
                     <http://jakewheat.github.io/simple-sql-parser/>.

 homepage:            http://jakewheat.github.io/simple-sql-parser/
@ -68,7 +71,11 @@ Test-Suite Tests
                       Language.SQL.SimpleSQL.Postgres,
                       Language.SQL.SimpleSQL.QueryExprComponents,
                       Language.SQL.SimpleSQL.QueryExprs,
-                       Language.SQL.SimpleSQL.SQL2011,
+                       Language.SQL.SimpleSQL.SQL2011Queries,
+                       Language.SQL.SimpleSQL.SQL2011AccessControl,
+                       Language.SQL.SimpleSQL.SQL2011Bits,
+                       Language.SQL.SimpleSQL.SQL2011DataManipulation,
+                       Language.SQL.SimpleSQL.SQL2011Schema,
                       Language.SQL.SimpleSQL.TableRefs,
                       Language.SQL.SimpleSQL.TestTypes,
                       Language.SQL.SimpleSQL.Tests,
--- a/tools/Language/SQL/SimpleSQL/SQL2011AccessControl.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011AccessControl.lhs
@ -0,0 +1,12 @@
+
+Section 12 in Foundation
+
+grant, etc
+
+
+> module Language.SQL.SimpleSQL.SQL2011AccessControl (sql2011AccessControlTests) where
+
+> import Language.SQL.SimpleSQL.TestTypes
+
+> sql2011AccessControlTests :: TestItem
+> sql2011AccessControlTests = Group "sql 2011 access control tests" []
--- a/tools/Language/SQL/SimpleSQL/SQL2011Bits.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011Bits.lhs
@ -0,0 +1,15 @@
+
+Sections 16, 17, 18 and 19 in Foundation
+
+This module covers the tests for control statements (call and return),
+transaction management (begin, commit, savepoint, etc.), connection
+management, and session management (set).
+
+
+> module Language.SQL.SimpleSQL.SQL2011Bits (sql2011BitsTests) where
+
+> import Language.SQL.SimpleSQL.TestTypes
+
+> sql2011BitsTests :: TestItem
+> sql2011BitsTests = Group "sql 2011 bits tests" []
+
--- a/tools/Language/SQL/SimpleSQL/SQL2011DataManipulation.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011DataManipulation.lhs
--- a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
@ -3,16 +3,26 @@ This file goes through the grammar for SQL 2011 (using the draft standard).

 We are only looking at the query syntax, and no other parts.

+There are other files which cover some of the other sections.
+Possible sections not covered yet:
+13 modules
+16 control statements
+20 dynamic
+22 direct
+23 diagnostics
+
+
+
 The goal is to create some example tests for each bit of grammar, with
 some areas getting more comprehensive coverage tests, and also to note
 which parts aren't currently supported.

-> module Language.SQL.SimpleSQL.SQL2011 (sql2011Tests) where
+> module Language.SQL.SimpleSQL.SQL2011Queries (sql2011QueryTests) where
 > import Language.SQL.SimpleSQL.TestTypes
 > import Language.SQL.SimpleSQL.Syntax

-> sql2011Tests :: TestItem
-> sql2011Tests = Group "sql 2011 tests"
+> sql2011QueryTests :: TestItem
+> sql2011QueryTests = Group "sql 2011 query tests"
 >     [literals
 >     ,identifiers
 >     ,typeNameTests
--- a/tools/Language/SQL/SimpleSQL/SQL2011Schema.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011Schema.lhs
--- a/tools/Language/SQL/SimpleSQL/Tests.lhs
+++ b/tools/Language/SQL/SimpleSQL/Tests.lhs
@ -29,7 +29,11 @@ test data to the Test.Framework tests.
 > import Language.SQL.SimpleSQL.Tpch
 > import Language.SQL.SimpleSQL.LexerTests

-> import Language.SQL.SimpleSQL.SQL2011
+> import Language.SQL.SimpleSQL.SQL2011Queries
+> import Language.SQL.SimpleSQL.SQL2011AccessControl
+> import Language.SQL.SimpleSQL.SQL2011Bits
+> import Language.SQL.SimpleSQL.SQL2011DataManipulation
+> import Language.SQL.SimpleSQL.SQL2011Schema

 > import Language.SQL.SimpleSQL.MySQL

@ -48,7 +52,11 @@ order on the generated documentation.
 >     ,fullQueriesTests
 >     ,postgresTests
 >     ,tpchTests
->     ,sql2011Tests
+>     ,sql2011QueryTests
+>     ,sql2011DataManipulationTests
+>     ,sql2011SchemaTests
+>     ,sql2011AccessControlTests
+>     ,sql2011BitsTests
 >     ,mySQLTests
 >     ]