1
Fork 0

add iden keywords and app keywords to the dialect

This commit is contained in:
Jake Wheat 2019-09-01 09:34:10 +01:00
parent 9f2ff37c54
commit 86f5e203af
5 changed files with 77 additions and 22 deletions

View file

@ -20,9 +20,17 @@ Data types to represent different dialect options
> data Dialect = Dialect
> { -- | The list of reserved keywords
> diKeywords :: [String]
> -- | The list of reserved keywords, which can also be used as
> -- | an identifier
> ,diIdentifierKeywords :: [String]
> -- | The list of reserved keywords, which can also be used as
> -- | a function name (including aggregates and window
> -- | functions)
> ,diAppKeywords :: [String]
> -- | does the dialect support ansi fetch first syntax
> ,diFetchFirst :: Bool
> -- | does the dialect support limit keyword (mysql, postgres, ...)
> -- | does the dialect support limit keyword (mysql, postgres,
> -- | ...)
> ,diLimit :: Bool
> -- | allow parsing ODBC syntax
> ,diOdbc :: Bool
@ -50,6 +58,8 @@ Data types to represent different dialect options
> -- | ansi sql 2011 dialect
> ansi2011 :: Dialect
> ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords
> ,diIdentifierKeywords = []
> ,diAppKeywords = ["set"]
> ,diFetchFirst = True
> ,diLimit = False
> ,diOdbc = False
@ -104,6 +114,28 @@ mostly, things are keywords to avoid them mistakenly being parsed as
aliases or as identifiers/functions/function-like things (aggs,
windows, etc.)
some rationale for having quite string reserved keywords:
1. sql has the unusual (these days) feature of quoting identifiers
which allows you to use any keyword in any context
2. the user already has to deal with a very long list of keywords in
sql. this is not very user friendly
3. if the user has to remember which situations which keyword needs
quoting, and which it doesn't need quoting, this is also not very
user friendly, even if it means less quoting sometimes. E.g. if
you only need to quote 'from' in places where it is ambiguous, and
you want to take advantage of this, this list of good/not-good
places is based on the weirdness of SQL grammar and the
implementation details of the parser - and it's especially bad if
you are using from as an iden without quotes, and you edit the sql
statement, and now from is in a position where it does need
quotes, and you get a obscure error message
4. there is a lot more potential for nice clear error messages
keywords are never allowed without quoting
> ansi2011ReservedKeywords :: [String]
> ansi2011ReservedKeywords =
> [--"abs" -- function

View file

@ -719,22 +719,20 @@ all the scalar expressions which start with an identifier
> idenExpr =
> -- todo: work out how to left factor this
> try (TypedLit <$> typeName <*> singleQuotesOnlyStringTok)
> <|> (try keywordFunction <**> app)
> <|> (names <**> option Iden app)
> <|> keywordFunctionOrIden
> where
> -- this is a special case because 'set' is a reserved keyword
> -- and the names parser won't parse it
> -- can't remove it from the reserved keyword list, because
> -- it is used in a lot of places which are ambiguous as a keyword
> -- this approach might be needed with other keywords which look
> -- like identifiers or functions
> keywordFunction =
> let makeKeywordFunction x = if map toLower x `elem` keywordFunctionNames
> then return [Name Nothing x]
> else fail ""
> in unquotedIdentifierTok [] Nothing >>= makeKeywordFunction
> keywordFunctionNames = ["set"
> ]
> -- special cases for keywords that can be parsed as an iden or app
> keywordFunctionOrIden = try $ do
> x <- unquotedIdentifierTok [] Nothing
> d <- getState
> let i = map toLower x `elem` diIdentifierKeywords d
> a = map toLower x `elem` diAppKeywords d
> case () of
> _ | i && a -> pure [Name Nothing x] <**> option Iden app
> | i -> pure (Iden [Name Nothing x])
> | a -> pure [Name Nothing x] <**> app
> | otherwise -> fail ""
=== special

9
TODO
View file

@ -4,6 +4,15 @@ review alters, and think about adding rename versions
which are really common and useful, but not in ansi
https://github.com/JakeWheat/simple-sql-parser/issues/20
try to get some control over the pretty printing and the error
messages by creating some dumps of pretty printing and error messages,
then can rerun these every so often to see how they've changed
-> expose in the dialect:
keywords which can (also?) appear as identifiers in scalar expressions
keywords which can (also?) appear as app-likes - this is already implemented
finish off going through the keyword list
do more examples

View file

@ -4,8 +4,24 @@
> import Language.SQL.SimpleSQL.TestTypes
> customDialectTests :: TestItem
> customDialectTests = Group "custom dialect tests" (map (ParseQueryExpr myDialect) sometests
> ++ [ParseScalarExprFails ansi2011 "SELECT DATE('2000-01-01')"])
> customDialectTests = Group "custom dialect tests" (map (uncurry ParseQueryExpr) passTests
> ++ map (uncurry ParseScalarExprFails) failTests )
> where
> myDialect = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)}
> sometests = ["SELECT DATE('2000-01-01')"]
> failTests = [(ansi2011,"SELECT DATE('2000-01-01')")
> ,(ansi2011,"SELECT DATE")
> ,(dateApp,"SELECT DATE")
> ,(dateIden,"SELECT DATE('2000-01-01')")
> -- show this never being allowed as an alias
> ,(ansi2011,"SELECT a date")
> ,(dateApp,"SELECT a date")
> ,(dateIden,"SELECT a date")
> ]
> passTests = [(ansi2011,"SELECT a b")
> ,(noDateKeyword,"SELECT DATE('2000-01-01')")
> ,(noDateKeyword,"SELECT DATE")
> ,(dateApp,"SELECT DATE('2000-01-01')")
> ,(dateIden,"SELECT DATE")
> ]
> noDateKeyword = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)}
> dateIden = ansi2011 {diIdentifierKeywords = "date" : diIdentifierKeywords ansi2011}
> dateApp = ansi2011 {diAppKeywords = "date" : diAppKeywords ansi2011}

View file

@ -14,15 +14,15 @@ cp website/main.css build
cp website/ocean.css build
# index
asciidoctor website/index.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/index.html
asciidoctor website/index.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/index.html
asciidoctor website/supported_sql.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/supported_sql.html
asciidoctor website/supported_sql.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/supported_sql.html
# tpch sql file
# pandoc src/tpch.sql -s --highlight-style kate -o tpch.sql.html
# rendered test cases
# build the parserexe target first to fix the package database
cabal exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc
cabal -v0 exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc
asciidoctor build/test_cases.asciidoc -o - | \
sed -e "s/max-width:62\.5em//g" \