From 5d9a32a91dcc37621c86787fe4d0ab4b1badee3e Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Sat, 19 Apr 2014 00:18:15 +0300
Subject: [PATCH] reserve most of the reserved keywords in the parser

---
 Language/SQL/SimpleSQL/Parser.lhs         | 527 +++++++++++++++++++++-
 TODO                                      |   3 +-
 tools/Language/SQL/SimpleSQL/Postgres.lhs |   2 +-
 3 files changed, 525 insertions(+), 7 deletions(-)

diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs
index 0a39bee..bc09552 100644
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@@ -519,7 +519,7 @@ a match (select a from t)
 > collate :: Parser (ValueExpr -> ValueExpr)
 > collate = do
 >           keyword_ "collate"
->           i <- identifier
+>           i <- identifierBlacklist blacklist
 >           return $ \v -> Collate v i
 
 
@@ -551,11 +551,13 @@ TODO: this need heavy refactoring
 >     -- precision, scale, lob scale and units, timezone, character
 >     -- set and collations
 >     otherTypeName = do
->         tn <- (try multiWordParsers <|> names)
+>         tn <- (try multiWordParsers <|> names <|> baseTypeName)
 >         choice [try $ timezone tn
 >                ,try (precscale tn) >>= optionSuffix charSuffix
 >                ,try $ lob tn
 >                ,optionSuffix charSuffix $ TypeName tn]
+>     -- fix this hack, needs left factoring better or something
+>     baseTypeName = (:[]) . Name <$> identifier
 >     timezone tn = do
 >         TimeTypeName tn
 >         <$> optionMaybe prec
@@ -632,11 +634,18 @@ TODO: this need heavy refactoring
 >   where
 >     intervalField =
 >         Itf
->         <$> identifierBlacklist blacklist
+>         <$> datetimeField
 >         <*> optionMaybe
 >             (parens ((,) <$> unsignedInteger
 >                          <*> optionMaybe (comma *> unsignedInteger)))
 
+TODO: use this in extract
+use a data type for the datetime field?
+
+> datetimeField :: Parser String
+> datetimeField = choice (map keyword ["year","month","day"
+>                                 ,"hour","minute","second"])
+>                 <?> "datetime field"
 
 == value expression parens, row ctor and scalar subquery
 
@@ -1265,7 +1274,7 @@ instead, and create an alternative suffix parser
 >     <?> "identifier"
 
 > blacklist :: [String]
-> blacklist =
+> blacklist = reservedWord {-
 >     [-- case
 >      "case", "when", "then", "else", "end"
 >     ,--join
@@ -1274,7 +1283,7 @@ instead, and create an alternative suffix parser
 >     ,"from","where","group","having","order","limit", "offset", "fetch"
 >     ,"as","in"
 >     ,"except", "intersect", "union"
->     ]
+>     ] -}
 
 These blacklisted names are mostly needed when we parse something with
 an optional alias, e.g. select a a from t. If we write select a from
@@ -1287,6 +1296,514 @@ The standard has a weird mix of reserved keywords and unreserved
 keywords (I'm not sure what exactly being an unreserved keyword
 means).
 
+> nonReservedWord :: [String]
+> nonReservedWord =
+>     ["a"
+>     ,"abs"
+>     ,"absolute"
+>     ,"action"
+>     ,"ada"
+>     ,"admin"
+>     ,"after"
+>     ,"always"
+>     ,"asc"
+>     ,"assertion"
+>     ,"assignment"
+>     ,"attribute"
+>     ,"attributes"
+>     ,"avg"
+>     ,"before"
+>     ,"bernoulli"
+>     ,"breadth"
+>     ,"c"
+>     ,"cardinality"
+>     ,"cascade"
+>     ,"catalog"
+>     ,"catalog_name"
+>     ,"ceil"
+>     ,"ceiling"
+>     ,"chain"
+>     ,"characteristics"
+>     ,"characters"
+>     ,"character_length"
+>     ,"character_set_catalog"
+>     ,"character_set_name"
+>     ,"character_set_schema"
+>     ,"char_length"
+>     ,"checked"
+>     ,"class_origin"
+>     ,"coalesce"
+>     ,"cobol"
+>     ,"code_units"
+>     ,"collation"
+>     ,"collation_catalog"
+>     ,"collation_name"
+>     ,"collation_schema"
+>     ,"collect"
+>     ,"column_name"
+>     ,"command_function"
+>     ,"command_function_code"
+>     ,"committed"
+>     ,"condition"
+>     ,"condition_number"
+>     ,"connection_name"
+>     ,"constraints"
+>     ,"constraint_catalog"
+>     ,"constraint_name"
+>     ,"constraint_schema"
+>     ,"constructors"
+>     ,"contains"
+>     ,"convert"
+>     ,"corr"
+>     ,"count"
+>     ,"covar_pop"
+>     ,"covar_samp"
+>     ,"cume_dist"
+>     ,"current_collation"
+>     ,"cursor_name"
+>     ,"data"
+>     ,"datetime_interval_code"
+>     ,"datetime_interval_precision"
+>     ,"defaults"
+>     ,"deferrable"
+>     ,"deferred"
+>     ,"defined"
+>     ,"definer"
+>     ,"degree"
+>     ,"dense_rank"
+>     ,"depth"
+>     ,"derived"
+>     ,"desc"
+>     ,"descriptor"
+>     ,"diagnostics"
+>     ,"dispatch"
+>     ,"domain"
+>     ,"dynamic_function"
+>     ,"dynamic_function_code"
+>     ,"equals"
+>     ,"every"
+>     ,"exception"
+>     ,"exclude"
+>     ,"excluding"
+>     ,"exp"
+>     ,"extract"
+>     ,"final"
+>     ,"first"
+>     ,"floor"
+>     ,"following"
+>     ,"fortran"
+>     ,"found"
+>     ,"fusion"
+>     ,"g"
+>     ,"general"
+>     ,"go"
+>     ,"goto"
+>     ,"granted"
+>     ,"hierarchy"
+>     ,"implementation"
+>     ,"including"
+>     ,"increment"
+>     ,"initially"
+>     ,"instance"
+>     ,"instantiable"
+>     ,"intersection"
+>     ,"invoker"
+>     ,"isolation"
+>     ,"k"
+>     ,"key"
+>     ,"key_member"
+>     ,"key_type"
+>     ,"last"
+>     ,"length"
+>     ,"level"
+>     ,"ln"
+>     ,"locator"
+>     ,"lower"
+>     ,"m"
+>     ,"map"
+>     ,"matched"
+>     ,"max"
+>     ,"maxvalue"
+>     ,"message_length"
+>     ,"message_octet_length"
+>     ,"message_text"
+>     ,"min"
+>     ,"minvalue"
+>     ,"mod"
+>     ,"more"
+>     ,"mumps"
+>     ,"name"
+>     ,"names"
+>     ,"nesting"
+>     ,"next"
+>     ,"normalize"
+>     ,"normalized"
+>     ,"nullable"
+>     ,"nullif"
+>     ,"nulls"
+>     ,"number"
+>     ,"object"
+>     ,"octets"
+>     ,"octet_length"
+>     ,"option"
+>     ,"options"
+>     ,"ordering"
+>     ,"ordinality"
+>     ,"others"
+>     ,"overlay"
+>     ,"overriding"
+>     ,"pad"
+>     ,"parameter_mode"
+>     ,"parameter_name"
+>     ,"parameter_ordinal_position"
+>     ,"parameter_specific_catalog"
+>     ,"parameter_specific_name"
+>     ,"parameter_specific_schema"
+>     ,"partial"
+>     ,"pascal"
+>     ,"path"
+>     ,"percentile_cont"
+>     ,"percentile_disc"
+>     ,"percent_rank"
+>     ,"placing"
+>     ,"pli"
+>     ,"position"
+>     ,"power"
+>     ,"preceding"
+>     ,"preserve"
+>     ,"prior"
+>     ,"privileges"
+>     ,"public"
+>     ,"rank"
+>     ,"read"
+>     ,"relative"
+>     ,"repeatable"
+>     ,"restart"
+>     ,"returned_cardinality"
+>     ,"returned_length"
+>     ,"returned_octet_length"
+>     ,"returned_sqlstate"
+>     ,"role"
+>     ,"routine"
+>     ,"routine_catalog"
+>     ,"routine_name"
+>     ,"routine_schema"
+>     ,"row_count"
+>     ,"row_number"
+>     ,"scale"
+>     ,"schema"
+>     ,"schema_name"
+>     ,"scope_catalog"
+>     ,"scope_name"
+>     ,"scope_schema"
+>     ,"section"
+>     ,"security"
+>     ,"self"
+>     ,"sequence"
+>     ,"serializable"
+>     ,"server_name"
+>     ,"session"
+>     ,"sets"
+>     ,"simple"
+>     ,"size"
+>     ,"source"
+>     ,"space"
+>     ,"specific_name"
+>     ,"sqrt"
+>     ,"state"
+>     ,"statement"
+>     ,"stddev_pop"
+>     ,"stddev_samp"
+>     ,"structure"
+>     ,"style"
+>     ,"subclass_origin"
+>     ,"substring"
+>     ,"sum"
+>     ,"tablesample"
+>     ,"table_name"
+>     ,"temporary"
+>     ,"ties"
+>     ,"top_level_count"
+>     ,"transaction"
+>     ,"transactions_committed"
+>     ,"transactions_rolled_back"
+>     ,"transaction_active"
+>     ,"transform"
+>     ,"transforms"
+>     ,"translate"
+>     ,"trigger_catalog"
+>     ,"trigger_name"
+>     ,"trigger_schema"
+>     ,"trim"
+>     ,"type"
+>     ,"unbounded"
+>     ,"uncommitted"
+>     ,"under"
+>     ,"unnamed"
+>     ,"usage"
+>     ,"user_defined_type_catalog"
+>     ,"user_defined_type_code"
+>     ,"user_defined_type_name"
+>     ,"user_defined_type_schema"
+>     ,"view"
+>     ,"work"
+>     ,"write"
+>     ,"zone"]
+
+> reservedWord :: [String]
+> reservedWord =
+>     ["add"
+>     ,"all"
+>     ,"allocate"
+>     ,"alter"
+>     ,"and"
+>     ,"any"
+>     ,"are"
+>     ,"array"
+>     ,"as"
+>     ,"asensitive"
+>     ,"asymmetric"
+>     ,"at"
+>     ,"atomic"
+>     ,"authorization"
+>     ,"begin"
+>     ,"between"
+>     ,"bigint"
+>     ,"binary"
+>     ,"blob"
+>     ,"boolean"
+>     ,"both"
+>     ,"by"
+>     ,"call"
+>     ,"called"
+>     ,"cascaded"
+>     ,"case"
+>     ,"cast"
+>     ,"char"
+>     ,"character"
+>     ,"check"
+>     ,"clob"
+>     ,"close"
+>     ,"collate"
+>     ,"column"
+>     ,"commit"
+>     ,"connect"
+>     ,"constraint"
+>     ,"continue"
+>     ,"corresponding"
+>     ,"create"
+>     ,"cross"
+>     ,"cube"
+>     ,"current"
+>     --,"current_date"
+>     ,"current_default_transform_group"
+>     ,"current_path"
+>     ,"current_role"
+>     ,"current_time"
+>     ,"current_timestamp"
+>     ,"current_transform_group_for_type"
+>     ,"current_user"
+>     ,"cursor"
+>     ,"cycle"
+>     --,"date"
+>     --,"day"
+>     ,"deallocate"
+>     ,"dec"
+>     --,"decimal"
+>     ,"declare"
+>     --,"default"
+>     ,"delete"
+>     ,"deref"
+>     ,"describe"
+>     ,"deterministic"
+>     ,"disconnect"
+>     ,"distinct"
+>     ,"double"
+>     ,"drop"
+>     ,"dynamic"
+>     ,"each"
+>     --,"element"
+>     ,"else"
+>     ,"end"
+>     ,"end-exec"
+>     ,"escape"
+>     ,"except"
+>     ,"exec"
+>     ,"execute"
+>     ,"exists"
+>     ,"external"
+>     --,"false"
+>     ,"fetch"
+>     ,"filter"
+>     ,"float"
+>     ,"for"
+>     ,"foreign"
+>     ,"free"
+>     ,"from"
+>     ,"full"
+>     ,"function"
+>     ,"get"
+>     ,"global"
+>     ,"grant"
+>     ,"group"
+>     ,"grouping"
+>     ,"having"
+>     ,"hold"
+>     --,"hour"
+>     ,"identity"
+>     ,"immediate"
+>     ,"in"
+>     ,"indicator"
+>     ,"inner"
+>     ,"inout"
+>     ,"input"
+>     ,"insensitive"
+>     ,"insert"
+>     ,"int"
+>     ,"integer"
+>     ,"intersect"
+>     ,"interval"
+>     ,"into"
+>     ,"is"
+>     ,"isolation"
+>     ,"join"
+>     ,"language"
+>     ,"large"
+>     ,"lateral"
+>     ,"leading"
+>     ,"left"
+>     ,"like"
+>     ,"local"
+>     ,"localtime"
+>     ,"localtimestamp"
+>     ,"match"
+>     ,"member"
+>     ,"merge"
+>     ,"method"
+>     --,"minute"
+>     ,"modifies"
+>     ,"module"
+>     --,"month"
+>     ,"multiset"
+>     ,"national"
+>     ,"natural"
+>     ,"nchar"
+>     ,"nclob"
+>     ,"new"
+>     ,"no"
+>     ,"none"
+>     ,"not"
+>     --,"null"
+>     ,"numeric"
+>     ,"of"
+>     ,"old"
+>     ,"on"
+>     ,"only"
+>     ,"open"
+>     ,"or"
+>     ,"order"
+>     ,"out"
+>     ,"outer"
+>     ,"output"
+>     ,"over"
+>     ,"overlaps"
+>     ,"parameter"
+>     ,"partition"
+>     ,"precision"
+>     ,"prepare"
+>     ,"primary"
+>     ,"procedure"
+>     ,"range"
+>     ,"reads"
+>     ,"real"
+>     ,"recursive"
+>     ,"ref"
+>     ,"references"
+>     ,"referencing"
+>     ,"regr_avgx"
+>     ,"regr_avgy"
+>     ,"regr_count"
+>     ,"regr_intercept"
+>     ,"regr_r2"
+>     ,"regr_slope"
+>     ,"regr_sxx"
+>     ,"regr_sxy"
+>     ,"regr_syy"
+>     ,"release"
+>     ,"result"
+>     ,"return"
+>     ,"returns"
+>     ,"revoke"
+>     ,"right"
+>     ,"rollback"
+>     ,"rollup"
+>     --,"row"
+>     ,"rows"
+>     ,"savepoint"
+>     ,"scroll"
+>     ,"search"
+>     --,"second"
+>     ,"select"
+>     ,"sensitive"
+>     ,"session_user"
+>     --,"set"
+>     ,"similar"
+>     ,"smallint"
+>     ,"some"
+>     ,"specific"
+>     ,"specifictype"
+>     ,"sql"
+>     ,"sqlexception"
+>     ,"sqlstate"
+>     ,"sqlwarning"
+>     --,"start"
+>     ,"static"
+>     ,"submultiset"
+>     ,"symmetric"
+>     ,"system"
+>     ,"system_user"
+>     ,"table"
+>     ,"then"
+>     ,"time"
+>     ,"timestamp"
+>     ,"timezone_hour"
+>     ,"timezone_minute"
+>     ,"to"
+>     ,"trailing"
+>     ,"translation"
+>     ,"treat"
+>     ,"trigger"
+>     --,"true"
+>     ,"uescape"
+>     ,"union"
+>     ,"unique"
+>     --,"unknown"
+>     ,"unnest"
+>     ,"update"
+>     ,"upper"
+>     ,"user"
+>     ,"using"
+>     --,"value"
+>     ,"values"
+>     ,"var_pop"
+>     ,"var_samp"
+>     ,"varchar"
+>     ,"varying"
+>     ,"when"
+>     ,"whenever"
+>     ,"where"
+>     ,"width_bucket"
+>     ,"window"
+>     ,"with"
+>     ,"within"
+>     ,"without"
+>     --,"year"
+
+>     -- added for this parser
+>     ,"limit"
+>     ,"offset"
+>     ]
+
 --------------------------------------------
 
 = helper functions
diff --git a/TODO b/TODO
index 03e224d..beddb92 100644
--- a/TODO
+++ b/TODO
@@ -63,7 +63,8 @@ rules for changing the multi keyword parsing:
 rough SQL 2003 todo, including tests to write:
 
 now:
-implement the reservation of all keywords
+review the commented out reserved keyword entries and work out how to
+   fix
 go through all? the functions
 go through almost all the predicates
 window functions missing bits, window clauses
diff --git a/tools/Language/SQL/SimpleSQL/Postgres.lhs b/tools/Language/SQL/SimpleSQL/Postgres.lhs
index 804db43..ce1f500 100644
--- a/tools/Language/SQL/SimpleSQL/Postgres.lhs
+++ b/tools/Language/SQL/SimpleSQL/Postgres.lhs
@@ -31,7 +31,7 @@ TODO: get all the commented out tests working
 
 >     -- table is a reservered keyword?
 >     --,"SELECT ROW(table.*) IS NULL FROM table;"
->     ,"SELECT ROW(tablex.*) IS NULL FROM table;"
+>     ,"SELECT ROW(tablex.*) IS NULL FROM tablex;"
 
 >     ,"SELECT true OR somefunc();"