preparation for non-queries
some docs in the lexer bump the version to 0.5.0 add placeholder files for the planned initial non query support
This commit is contained in:
parent
0f1f000ee5
commit
6fc8869f73
|
@ -111,6 +111,13 @@ parsec
|
|||
> prettyTokens :: Dialect -> [Token] -> String
|
||||
> prettyTokens d ts = concat $ map (prettyToken d) ts
|
||||
|
||||
When parsing a quoted identifier, you can have a double quote
|
||||
character in the identifier like this: "quotes""identifier" ->
|
||||
quoted"identifer. The double double quotes character is changed to a
|
||||
single character in the lexer and expanded back to two characters in
|
||||
the pretty printer. This also applies to strings, which can embed a
|
||||
single quote like this: 'string''with quote'.
|
||||
|
||||
> doubleChars :: Char -> String -> String
|
||||
> doubleChars _ [] = []
|
||||
> doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
|
||||
|
@ -142,6 +149,14 @@ TODO: try to make all parsers applicative only
|
|||
> sqlToken d = do
|
||||
> p' <- getPosition
|
||||
> let p = ("",sourceLine p', sourceColumn p')
|
||||
|
||||
The order of parsers is important: strings and quoted identifiers can
|
||||
start out looking like normal identifiers, so we try to parse these
|
||||
first and use a little bit of try. Line and block comments start like
|
||||
symbols, so we try these before symbol. Numbers can start with a . so
|
||||
this is also tried before symbol (a .1 will be parsed as a number, but
|
||||
. otherwise will be parsed as a symbol).
|
||||
|
||||
> (p,) <$> choice [sqlString d
|
||||
> ,identifier d
|
||||
> ,hostParam d
|
||||
|
@ -151,10 +166,20 @@ TODO: try to make all parsers applicative only
|
|||
> ,symbol d
|
||||
> ,sqlWhitespace d]
|
||||
|
||||
Parses identifiers:
|
||||
|
||||
simple_identifier_23
|
||||
u&"unicode quoted identifier"
|
||||
"quoted identifier"
|
||||
"quoted identifier "" with double quote char"
|
||||
`mysql quoted identifier`
|
||||
|
||||
> identifier :: Dialect -> Parser Token
|
||||
> identifier d =
|
||||
> choice
|
||||
> [QIdentifier <$> qiden
|
||||
> -- try is used here to avoid a conflict with identifiers
|
||||
> -- and quoted strings which also start with a 'u'
|
||||
> ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
|
||||
> ,Identifier <$> identifierString
|
||||
> ,DQIdentifier "`" "`" <$> mySqlQIden
|
||||
|
@ -174,12 +199,23 @@ TODO: try to make all parsers applicative only
|
|||
> guard (d == MySQL)
|
||||
> char '`' *> takeWhile1 (/='`') <* char '`'
|
||||
|
||||
This parses a valid identifier without quotes.
|
||||
|
||||
> identifierString :: Parser String
|
||||
> identifierString =
|
||||
> startsWith (\c -> c == '_' || isAlpha c)
|
||||
> (\c -> c == '_' || isAlphaNum c)
|
||||
|
||||
|
||||
Parse a SQL string. Examples:
|
||||
|
||||
'basic string'
|
||||
'string with '' a quote'
|
||||
n'international text'
|
||||
b'binary string'
|
||||
x'hexidecimal string'
|
||||
|
||||
|
||||
> sqlString :: Dialect -> Parser Token
|
||||
> sqlString _ =
|
||||
> choice [csString
|
||||
|
@ -195,6 +231,10 @@ TODO: try to make all parsers applicative only
|
|||
> void $ char '\''
|
||||
> normalStringSuffix $ concat [t,s,"'"]
|
||||
> ,return $ concat [t,s]]
|
||||
> -- try is used to to avoid conflicts with
|
||||
> -- identifiers which can start with n,b,x,u
|
||||
> -- once we read the quote type and the starting '
|
||||
> -- then we commit to a string
|
||||
> csString = CSSqlString <$> try (cs <* char '\'') <*> normalStringSuffix ""
|
||||
> cs = choice [(:[]) <$> oneOf "nNbBxX"
|
||||
> ,string "u&"
|
||||
|
@ -204,17 +244,6 @@ TODO: try to make all parsers applicative only
|
|||
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
||||
|
||||
|
||||
> sqlNumber :: Dialect -> Parser Token
|
||||
> sqlNumber _ = SqlNumber <$>
|
||||
> (int <??> (pp dot <??.> pp int)
|
||||
> <|> try ((++) <$> dot <*> int))
|
||||
> <??> pp expon
|
||||
> where
|
||||
> int = many1 digit
|
||||
> dot = string "."
|
||||
> expon = (:) <$> oneOf "eE" <*> sInt
|
||||
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
||||
> pp = (<$$> (++))
|
||||
|
||||
digits
|
||||
digits.[digits][e[+-]digits]
|
||||
|
@ -229,12 +258,32 @@ the constant. Note that any leading plus or minus sign is not actually
|
|||
considered part of the constant; it is an operator applied to the
|
||||
constant.
|
||||
|
||||
> sqlNumber :: Dialect -> Parser Token
|
||||
> sqlNumber _ = SqlNumber <$>
|
||||
> (int <??> (pp dot <??.> pp int)
|
||||
> -- try is used in case we read a dot
|
||||
> -- and it isn't part of a number
|
||||
> -- if there are any following digits, then we commit
|
||||
> -- to it being a number and not something else
|
||||
> <|> try ((++) <$> dot <*> int))
|
||||
> <??> pp expon
|
||||
> where
|
||||
> int = many1 digit
|
||||
> dot = string "."
|
||||
> expon = (:) <$> oneOf "eE" <*> sInt
|
||||
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
||||
> pp = (<$$> (++))
|
||||
|
||||
|
||||
A symbol is one of the two character symbols, or one of the single
|
||||
character symbols in the two lists below.
|
||||
|
||||
> symbol :: Dialect -> Parser Token
|
||||
> symbol _ = Symbol <$> choice (many1 (char '.') :
|
||||
> -- try is used because most of the first
|
||||
> -- characters of the two character symbols
|
||||
> -- can also be part of a single character symbol
|
||||
> -- maybe this would be better with left factoring?
|
||||
> map (try . string) [">=","<=","!=","<>","||"]
|
||||
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
|
||||
|
||||
|
@ -244,9 +293,15 @@ character symbols in the two lists below.
|
|||
> lineComment :: Dialect -> Parser Token
|
||||
> lineComment _ =
|
||||
> (\s -> LineComment $ concat ["--",s]) <$>
|
||||
> -- try is used here in case we see a - symbol
|
||||
> -- once we read two -- then we commit to the comment token
|
||||
> (try (string "--") *>
|
||||
> manyTill anyChar (void (char '\n') <|> eof))
|
||||
|
||||
Try is used in the block comment for the two symbol bits because we
|
||||
want to backtrack if we read the first symbol but the second symbol
|
||||
isn't there.
|
||||
|
||||
> blockComment :: Dialect -> Parser Token
|
||||
> blockComment _ =
|
||||
> (\s -> BlockComment $ concat ["/*",s]) <$>
|
||||
|
@ -268,6 +323,8 @@ character symbols in the two lists below.
|
|||
> ,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]
|
||||
|
||||
|
||||
Some helper combinators
|
||||
|
||||
> startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
|
||||
> startsWith p ps = do
|
||||
> c <- satisfy p
|
||||
|
|
|
@ -1984,3 +1984,6 @@ dialect (for instance, string and identifier parsing rules vary from
|
|||
dialect to dialect and version to version, and most or all SQL DBMSs
|
||||
appear to have a set of flags to further enable or disable variations
|
||||
for quoting and escaping strings and identifiers).
|
||||
|
||||
The dialect stuff can also be used for custom options: e.g. to only
|
||||
parse dml for instance.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
> -- | The AST for SQL queries.
|
||||
> -- | The AST for SQL.
|
||||
> {-# LANGUAGE DeriveDataTypeable #-}
|
||||
> module Language.SQL.SimpleSQL.Syntax
|
||||
> (-- * Value expressions
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
name: simple-sql-parser
|
||||
version: 0.4.1
|
||||
synopsis: A parser for SQL queries
|
||||
version: 0.5.0
|
||||
synopsis: A parser for SQL.
|
||||
|
||||
description: A parser for SQL queries. Parses most SQL:2011
|
||||
queries. Please see the homepage for more information
|
||||
description: A parser for SQL. Parses most SQL:2011
|
||||
queries, DML, schema/DDL, transaction control,
|
||||
session and connection management, access
|
||||
control. Please see the homepage for more
|
||||
information
|
||||
<http://jakewheat.github.io/simple-sql-parser/>.
|
||||
|
||||
homepage: http://jakewheat.github.io/simple-sql-parser/
|
||||
|
@ -68,7 +71,11 @@ Test-Suite Tests
|
|||
Language.SQL.SimpleSQL.Postgres,
|
||||
Language.SQL.SimpleSQL.QueryExprComponents,
|
||||
Language.SQL.SimpleSQL.QueryExprs,
|
||||
Language.SQL.SimpleSQL.SQL2011,
|
||||
Language.SQL.SimpleSQL.SQL2011Queries,
|
||||
Language.SQL.SimpleSQL.SQL2011AccessControl,
|
||||
Language.SQL.SimpleSQL.SQL2011Bits,
|
||||
Language.SQL.SimpleSQL.SQL2011DataManipulation,
|
||||
Language.SQL.SimpleSQL.SQL2011Schema,
|
||||
Language.SQL.SimpleSQL.TableRefs,
|
||||
Language.SQL.SimpleSQL.TestTypes,
|
||||
Language.SQL.SimpleSQL.Tests,
|
||||
|
|
12
tools/Language/SQL/SimpleSQL/SQL2011AccessControl.lhs
Normal file
12
tools/Language/SQL/SimpleSQL/SQL2011AccessControl.lhs
Normal file
|
@ -0,0 +1,12 @@
|
|||
|
||||
Section 12 in Foundation
|
||||
|
||||
grant, etc
|
||||
|
||||
|
||||
> module Language.SQL.SimpleSQL.SQL2011AccessControl (sql2011AccessControlTests) where
|
||||
|
||||
> import Language.SQL.SimpleSQL.TestTypes
|
||||
|
||||
> sql2011AccessControlTests :: TestItem
|
||||
> sql2011AccessControlTests = Group "sql 2011 access control tests" []
|
15
tools/Language/SQL/SimpleSQL/SQL2011Bits.lhs
Normal file
15
tools/Language/SQL/SimpleSQL/SQL2011Bits.lhs
Normal file
|
@ -0,0 +1,15 @@
|
|||
|
||||
Sections 16, 17, 18 and 19 in Foundation
|
||||
|
||||
This module covers the tests for control statements (call and return),
|
||||
transaction management (begin, commit, savepoint, etc.), connection
|
||||
management, and session management (set).
|
||||
|
||||
|
||||
> module Language.SQL.SimpleSQL.SQL2011Bits (sql2011BitsTests) where
|
||||
|
||||
> import Language.SQL.SimpleSQL.TestTypes
|
||||
|
||||
> sql2011BitsTests :: TestItem
|
||||
> sql2011BitsTests = Group "sql 2011 bits tests" []
|
||||
|
2497
tools/Language/SQL/SimpleSQL/SQL2011DataManipulation.lhs
Normal file
2497
tools/Language/SQL/SimpleSQL/SQL2011DataManipulation.lhs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -3,16 +3,26 @@ This file goes through the grammar for SQL 2011 (using the draft standard).
|
|||
|
||||
We are only looking at the query syntax, and no other parts.
|
||||
|
||||
There are other files which cover some of the other sections.
|
||||
Possible sections not covered yet:
|
||||
13 modules
|
||||
16 control statements
|
||||
20 dynamic
|
||||
22 direct
|
||||
23 diagnostics
|
||||
|
||||
|
||||
|
||||
The goal is to create some example tests for each bit of grammar, with
|
||||
some areas getting more comprehensive coverage tests, and also to note
|
||||
which parts aren't currently supported.
|
||||
|
||||
> module Language.SQL.SimpleSQL.SQL2011 (sql2011Tests) where
|
||||
> module Language.SQL.SimpleSQL.SQL2011Queries (sql2011QueryTests) where
|
||||
> import Language.SQL.SimpleSQL.TestTypes
|
||||
> import Language.SQL.SimpleSQL.Syntax
|
||||
|
||||
> sql2011Tests :: TestItem
|
||||
> sql2011Tests = Group "sql 2011 tests"
|
||||
> sql2011QueryTests :: TestItem
|
||||
> sql2011QueryTests = Group "sql 2011 query tests"
|
||||
> [literals
|
||||
> ,identifiers
|
||||
> ,typeNameTests
|
1312
tools/Language/SQL/SimpleSQL/SQL2011Schema.lhs
Normal file
1312
tools/Language/SQL/SimpleSQL/SQL2011Schema.lhs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -29,7 +29,11 @@ test data to the Test.Framework tests.
|
|||
> import Language.SQL.SimpleSQL.Tpch
|
||||
> import Language.SQL.SimpleSQL.LexerTests
|
||||
|
||||
> import Language.SQL.SimpleSQL.SQL2011
|
||||
> import Language.SQL.SimpleSQL.SQL2011Queries
|
||||
> import Language.SQL.SimpleSQL.SQL2011AccessControl
|
||||
> import Language.SQL.SimpleSQL.SQL2011Bits
|
||||
> import Language.SQL.SimpleSQL.SQL2011DataManipulation
|
||||
> import Language.SQL.SimpleSQL.SQL2011Schema
|
||||
|
||||
> import Language.SQL.SimpleSQL.MySQL
|
||||
|
||||
|
@ -48,7 +52,11 @@ order on the generated documentation.
|
|||
> ,fullQueriesTests
|
||||
> ,postgresTests
|
||||
> ,tpchTests
|
||||
> ,sql2011Tests
|
||||
> ,sql2011QueryTests
|
||||
> ,sql2011DataManipulationTests
|
||||
> ,sql2011SchemaTests
|
||||
> ,sql2011AccessControlTests
|
||||
> ,sql2011BitsTests
|
||||
> ,mySQLTests
|
||||
> ]
|
||||
|
||||
|
|
Loading…
Reference in a new issue