From 86f5e203af378cf8697afd06fa595bce9f20a8e5 Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Sun, 1 Sep 2019 09:34:10 +0100
Subject: [PATCH] add iden keywords and app keywords to the dialect

---
 Language/SQL/SimpleSQL/Dialect.lhs            | 34 ++++++++++++++++++-
 Language/SQL/SimpleSQL/Parse.lhs              | 26 +++++++-------
 TODO                                          |  9 +++++
 .../Language/SQL/SimpleSQL/CustomDialect.lhs  | 24 ++++++++++---
 website/make_website.sh                       |  6 ++--
 5 files changed, 77 insertions(+), 22 deletions(-)

diff --git a/Language/SQL/SimpleSQL/Dialect.lhs b/Language/SQL/SimpleSQL/Dialect.lhs
index 0fdfcf9..0eeb540 100644
--- a/Language/SQL/SimpleSQL/Dialect.lhs
+++ b/Language/SQL/SimpleSQL/Dialect.lhs
@@ -20,9 +20,17 @@ Data types to represent different dialect options
 > data Dialect = Dialect
 >     { -- | The list of reserved keywords
 >      diKeywords :: [String]
+>       -- | The list of reserved keywords, which can also be used as
+>       -- |  an identifier
+>     ,diIdentifierKeywords :: [String]
+>       -- | The list of reserved keywords, which can also be used as
+>       -- |  a function name (including aggregates and window
+>       -- |  functions)
+>     ,diAppKeywords :: [String]
 >      -- | does the dialect support ansi fetch first syntax
 >     ,diFetchFirst :: Bool
->      -- | does the dialect support limit keyword (mysql, postgres, ...)
+>      -- | does the dialect support limit keyword (mysql, postgres,
+>      -- |  ...)
 >     ,diLimit :: Bool
 >      -- | allow parsing ODBC syntax
 >     ,diOdbc :: Bool
@@ -50,6 +58,8 @@ Data types to represent different dialect options
 > -- | ansi sql 2011 dialect
 > ansi2011 :: Dialect
 > ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords
+>                    ,diIdentifierKeywords = []
+>                    ,diAppKeywords = ["set"]
 >                    ,diFetchFirst = True
 >                    ,diLimit = False
 >                    ,diOdbc = False
@@ -104,6 +114,28 @@ mostly, things are keywords to avoid them mistakenly being parsed as
 aliases or as identifiers/functions/function-like things (aggs,
 windows, etc.)
 
+some rationale for having quite string reserved keywords:
+
+1. sql has the unusual (these days) feature of quoting identifiers
+    which allows you to use any keyword in any context
+
+2. the user already has to deal with a very long list of keywords in
+   sql. this is not very user friendly
+
+3. if the user has to remember which situations which keyword needs
+   quoting, and which it doesn't need quoting, this is also not very
+   user friendly, even if it means less quoting sometimes. E.g. if
+   you only need to quote 'from' in places where it is ambiguous, and
+   you want to take advantage of this, this list of good/not-good
+   places is based on the weirdness of SQL grammar and the
+   implementation details of the parser - and it's especially bad if
+   you are using from as an iden without quotes, and you edit the sql
+   statement, and now from is in a position where it does need
+   quotes, and you get a obscure error message
+
+4. there is a lot more potential for nice clear error messages
+   keywords are never allowed without quoting
+
 > ansi2011ReservedKeywords :: [String]
 > ansi2011ReservedKeywords =
 >     [--"abs" -- function
diff --git a/Language/SQL/SimpleSQL/Parse.lhs b/Language/SQL/SimpleSQL/Parse.lhs
index 39d755d..8cd0908 100644
--- a/Language/SQL/SimpleSQL/Parse.lhs
+++ b/Language/SQL/SimpleSQL/Parse.lhs
@@ -719,22 +719,20 @@ all the scalar expressions which start with an identifier
 > idenExpr =
 >     -- todo: work out how to left factor this
 >     try (TypedLit <$> typeName <*> singleQuotesOnlyStringTok)
->     <|> (try keywordFunction <**> app)
 >     <|> (names <**> option Iden app)
+>     <|> keywordFunctionOrIden
 >   where
->     -- this is a special case because 'set' is a reserved keyword
->     -- and the names parser won't parse it
->     -- can't remove it from the reserved keyword list, because
->     -- it is used in a lot of places which are ambiguous as a keyword
->     -- this approach might be needed with other keywords which look
->     --  like identifiers or functions
->     keywordFunction =
->         let makeKeywordFunction x = if map toLower x `elem` keywordFunctionNames
->                                     then return [Name Nothing x]
->                                     else fail ""
->         in unquotedIdentifierTok [] Nothing >>= makeKeywordFunction
->     keywordFunctionNames = ["set"
->                            ]
+>     -- special cases for keywords that can be parsed as an iden or app
+>     keywordFunctionOrIden = try $ do
+>         x <- unquotedIdentifierTok [] Nothing
+>         d <- getState
+>         let i = map toLower x `elem` diIdentifierKeywords d
+>             a = map toLower x `elem` diAppKeywords d
+>         case () of
+>             _  | i && a -> pure [Name Nothing x] <**> option Iden app
+>                | i -> pure (Iden [Name Nothing x])
+>                | a -> pure [Name Nothing x] <**> app
+>                | otherwise -> fail ""
 
 
 === special
diff --git a/TODO b/TODO
index 3b204ac..a7e2da1 100644
--- a/TODO
+++ b/TODO
@@ -4,6 +4,15 @@ review alters, and think about adding rename versions
   which are really common and useful, but not in ansi
   https://github.com/JakeWheat/simple-sql-parser/issues/20
 
+try to get some control over the pretty printing and the error
+messages by creating some dumps of pretty printing and error messages,
+then can rerun these every so often to see how they've changed
+
+-> expose in the dialect:
+keywords which can (also?) appear as identifiers in scalar expressions
+keywords which can (also?) appear as app-likes - this is already implemented
+
+
 finish off going through the keyword list
 
 do more examples
diff --git a/tools/Language/SQL/SimpleSQL/CustomDialect.lhs b/tools/Language/SQL/SimpleSQL/CustomDialect.lhs
index ab04d5b..daa22a9 100644
--- a/tools/Language/SQL/SimpleSQL/CustomDialect.lhs
+++ b/tools/Language/SQL/SimpleSQL/CustomDialect.lhs
@@ -4,8 +4,24 @@
 > import Language.SQL.SimpleSQL.TestTypes
 
 > customDialectTests :: TestItem
-> customDialectTests = Group "custom dialect tests" (map (ParseQueryExpr myDialect) sometests
->     ++ [ParseScalarExprFails ansi2011 "SELECT DATE('2000-01-01')"])
+> customDialectTests = Group "custom dialect tests" (map (uncurry ParseQueryExpr) passTests
+>     ++ map (uncurry ParseScalarExprFails) failTests )
 >   where
->     myDialect = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)}
->     sometests = ["SELECT DATE('2000-01-01')"]
+>     failTests = [(ansi2011,"SELECT DATE('2000-01-01')")
+>                 ,(ansi2011,"SELECT DATE")
+>                 ,(dateApp,"SELECT DATE")
+>                 ,(dateIden,"SELECT DATE('2000-01-01')")
+>                 -- show this never being allowed as an alias
+>                 ,(ansi2011,"SELECT a date")
+>                 ,(dateApp,"SELECT a date")
+>                 ,(dateIden,"SELECT a date")
+>                 ]
+>     passTests = [(ansi2011,"SELECT a b")
+>                 ,(noDateKeyword,"SELECT DATE('2000-01-01')")
+>                 ,(noDateKeyword,"SELECT DATE")
+>                 ,(dateApp,"SELECT DATE('2000-01-01')")
+>                 ,(dateIden,"SELECT DATE")
+>                 ]
+>     noDateKeyword = ansi2011 {diKeywords = filter (/="date") (diKeywords ansi2011)}
+>     dateIden = ansi2011 {diIdentifierKeywords = "date" : diIdentifierKeywords ansi2011}
+>     dateApp = ansi2011 {diAppKeywords = "date" : diAppKeywords ansi2011}
diff --git a/website/make_website.sh b/website/make_website.sh
index 90c6dd4..b550e36 100755
--- a/website/make_website.sh
+++ b/website/make_website.sh
@@ -14,15 +14,15 @@ cp website/main.css build
 cp website/ocean.css build
 
 # index
-asciidoctor website/index.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/index.html
+asciidoctor website/index.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/index.html
 
-asciidoctor website/supported_sql.asciidoc -o - | cabal exec runhaskell website/AddLinks.lhs > build/supported_sql.html
+asciidoctor website/supported_sql.asciidoc -o - | cabal -v0 exec runhaskell website/AddLinks.lhs > build/supported_sql.html
 
 # tpch sql file
 # pandoc src/tpch.sql -s --highlight-style kate -o tpch.sql.html
 # rendered test cases
 # build the parserexe target first to fix the package database
-cabal exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc
+cabal -v0 exec runhaskell -- --ghc-arg=-package=pretty-show -itools website/RenderTestCases.lhs > build/test_cases.asciidoc
 
 asciidoctor build/test_cases.asciidoc -o - | \
     sed -e "s/max-width:62\.5em//g" \