From 86f5e203af378cf8697afd06fa595bce9f20a8e5 Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Sun, 1 Sep 2019 09:34:10 +0100 Subject: [PATCH] add iden keywords and app keywords to the dialect --- Language/SQL/SimpleSQL/Dialect.lhs | 34 ++++++++++++++++++- Language/SQL/SimpleSQL/Parse.lhs | 26 +++++++------- TODO | 9 +++++ .../Language/SQL/SimpleSQL/CustomDialect.lhs | 24 ++++++++++--- website/make_website.sh | 6 ++-- 5 files changed, 77 insertions(+), 22 deletions(-) diff --git a/Language/SQL/SimpleSQL/Dialect.lhs b/Language/SQL/SimpleSQL/Dialect.lhs index 0fdfcf9..0eeb540 100644 --- a/Language/SQL/SimpleSQL/Dialect.lhs +++ b/Language/SQL/SimpleSQL/Dialect.lhs @@ -20,9 +20,17 @@ Data types to represent different dialect options > data Dialect = Dialect > { -- | The list of reserved keywords > diKeywords :: [String] +> -- | The list of reserved keywords, which can also be used as +> -- | an identifier +> ,diIdentifierKeywords :: [String] +> -- | The list of reserved keywords, which can also be used as +> -- | a function name (including aggregates and window +> -- | functions) +> ,diAppKeywords :: [String] > -- | does the dialect support ansi fetch first syntax > ,diFetchFirst :: Bool -> -- | does the dialect support limit keyword (mysql, postgres, ...) +> -- | does the dialect support limit keyword (mysql, postgres, +> -- | ...) > ,diLimit :: Bool > -- | allow parsing ODBC syntax > ,diOdbc :: Bool @@ -50,6 +58,8 @@ Data types to represent different dialect options > -- | ansi sql 2011 dialect > ansi2011 :: Dialect > ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords +> ,diIdentifierKeywords = [] +> ,diAppKeywords = ["set"] > ,diFetchFirst = True > ,diLimit = False > ,diOdbc = False @@ -104,6 +114,28 @@ mostly, things are keywords to avoid them mistakenly being parsed as aliases or as identifiers/functions/function-like things (aggs, windows, etc.) +some rationale for having quite string reserved keywords: + +1. sql has the unusual (these days) feature of quoting identifiers + which allows you to use any keyword in any context + +2. the user already has to deal with a very long list of keywords in + sql. this is not very user friendly + +3. if the user has to remember which situations which keyword needs + quoting, and which it doesn't need quoting, this is also not very + user friendly, even if it means less quoting sometimes. E.g. if + you only need to quote 'from' in places where it is ambiguous, and + you want to take advantage of this, this list of good/not-good + places is based on the weirdness of SQL grammar and the + implementation details of the parser - and it's especially bad if + you are using from as an iden without quotes, and you edit the sql + statement, and now from is in a position where it does need + quotes, and you get a obscure error message + +4. there is a lot more potential for nice clear error messages + keywords are never allowed without quoting + > ansi2011ReservedKeywords :: [String] > ansi2011ReservedKeywords = > [--"abs" -- function diff --git a/Language/SQL/SimpleSQL/Parse.lhs b/Language/SQL/SimpleSQL/Parse.lhs index 39d755d..8cd0908 100644 --- a/Language/SQL/SimpleSQL/Parse.lhs +++ b/Language/SQL/SimpleSQL/Parse.lhs @@ -719,22 +719,20 @@ all the scalar expressions which start with an identifier > idenExpr = > -- todo: work out how to left factor this > try (TypedLit <$> typeName <*> singleQuotesOnlyStringTok) -> <|> (try keywordFunction <**> app) > <|> (names <**> option Iden app) +> <|> keywordFunctionOrIden > where -> -- this is a special case because 'set' is a reserved keyword -> -- and the names parser won't parse it -> -- can't remove it from the reserved keyword list, because -> -- it is used in a lot of places which are ambiguous as a keyword -> -- this approach might be needed with other keywords which look -> -- like identifiers or functions -> keywordFunction = -> let makeKeywordFunction x = if map toLower x `elem` keywordFunctionNames -> then return [Name Nothing x] -> else fail "" -> in unquotedIdentifierTok [] Nothing >>= makeKeywordFunction -> keywordFunctionNames = ["set" -> ] +> -- special cases for keywords that can be parsed as an iden or app +> keywordFunctionOrIden = try $ do +> x <- unquotedIdentifierTok [] Nothing +> d <- getState +> let i = map toLower x `elem` diIdentifierKeywords d +> a = map toLower x `elem` diAppKeywords d +> case () of +> _ | i && a -> pure [Name Nothing x] <**> option Iden app +> | i -> pure (Iden [Name Nothing x]) +> | a -> pure [Name Nothing x] <**> app +> | otherwise -> fail "" === special diff --git a/TODO b/TODO index 3b204ac..a7e2da1 100644 --- a/TODO +++ b/TODO @@ -4,6 +4,15 @@ review alters, and think about adding rename versions which are really common and useful, but not in ansi https://github.com/JakeWheat/simple-sql-parser/issues/20 +try to get some control over the pretty printing and the error +messages by creating some dumps of pretty printing and error messages, +then can rerun these every so often to see how they've changed + +-> expose in the dialect: +keywords which can (also?) appear as identifiers in scalar expressions +keywords which can (also?) appear as app-likes - this is already implemented + + finish off going through the keyword list do more examples diff --git a/tools/Language/SQL/SimpleSQL/CustomDialect.lhs b/tools/Language/SQL/SimpleSQL/CustomDialect.lhs index ab04d5b..daa22a9 100644 --- a/tools/Language/SQL/SimpleSQL/CustomDialect.lhs +++ b/tools/Language/SQL/SimpleSQL/CustomDialect.lhs @@ -4,8 +4,24 @@ > import Language.SQL.SimpleSQL.TestTypes > customDialectTests :: TestItem -> customDialectTests = Group "custom dialect tests" (map (ParseQueryExpr myDialect) sometests -> ++ [ParseScalarExprFails ansi2011 "SELECT DATE('2000-01-01')"]) +> customDialectTests = Group "custom dialect tests" (map (uncurry ParseQueryExpr) passTests +> ++ map (uncurry ParseScalarExprFails) failTests ) > where -> myDialect = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)} -> sometests = ["SELECT DATE('2000-01-01')"] +> failTests = [(ansi2011,"SELECT DATE('2000-01-01')") +> ,(ansi2011,"SELECT DATE") +> ,(dateApp,"SELECT DATE") +> ,(dateIden,"SELECT DATE('2000-01-01')") +> -- show this never being allowed as an alias +> ,(ansi2011,"SELECT a date") +> ,(dateApp,"SELECT a date") +> ,(dateIden,"SELECT a date") +> ] +> passTests = [(ansi2011,"SELECT a b") +> ,(noDateKeyword,"SELECT DATE('2000-01-01')") +> ,(noDateKeyword,"SELECT DATE") +> ,(dateApp,"SELECT DATE('2000-01-01')") +> ,(dateIden,"SELECT DATE") +> ] +> noDateKeyword = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)} +> dateIden = ansi2011 {diIdentifierKeywords = "date" : diIdentifierKeywords ansi2011} +> dateApp = ansi2011 {diAppKeywords = "date" : diAppKeywords ansi2011} diff --git a/website/make_website.sh b/website/make_website.sh index 90c6dd4..b550e36 100755 --- a/website/make_website.sh +++ b/website/make_website.sh @@ -14,15 +14,15 @@ cp website/main.css build cp website/ocean.css build # index -asciidoctor website/index.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/index.html +asciidoctor website/index.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/index.html -asciidoctor website/supported_sql.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/supported_sql.html +asciidoctor website/supported_sql.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/supported_sql.html # tpch sql file # pandoc src/tpch.sql -s --highlight-style kate -o tpch.sql.html # rendered test cases # build the parserexe target first to fix the package database -cabal exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc +cabal -v0 exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc asciidoctor build/test_cases.asciidoc -o - | \ sed -e "s/max-width:62\.5em//g" \