From 5d9a32a91dcc37621c86787fe4d0ab4b1badee3e Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Sat, 19 Apr 2014 00:18:15 +0300 Subject: [PATCH] reserve most of the reserved keywords in the parser --- Language/SQL/SimpleSQL/Parser.lhs | 527 +++++++++++++++++++++- TODO | 3 +- tools/Language/SQL/SimpleSQL/Postgres.lhs | 2 +- 3 files changed, 525 insertions(+), 7 deletions(-) diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 0a39bee..bc09552 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -519,7 +519,7 @@ a match (select a from t) > collate :: Parser (ValueExpr -> ValueExpr) > collate = do > keyword_ "collate" -> i <- identifier +> i <- identifierBlacklist blacklist > return $ \v -> Collate v i @@ -551,11 +551,13 @@ TODO: this need heavy refactoring > -- precision, scale, lob scale and units, timezone, character > -- set and collations > otherTypeName = do -> tn <- (try multiWordParsers <|> names) +> tn <- (try multiWordParsers <|> names <|> baseTypeName) > choice [try $ timezone tn > ,try (precscale tn) >>= optionSuffix charSuffix > ,try $ lob tn > ,optionSuffix charSuffix $ TypeName tn] +> -- fix this hack, needs left factoring better or something +> baseTypeName = (:[]) . Name <$> identifier > timezone tn = do > TimeTypeName tn > <$> optionMaybe prec @@ -632,11 +634,18 @@ TODO: this need heavy refactoring > where > intervalField = > Itf -> <$> identifierBlacklist blacklist +> <$> datetimeField > <*> optionMaybe > (parens ((,) <$> unsignedInteger > <*> optionMaybe (comma *> unsignedInteger))) +TODO: use this in extract +use a data type for the datetime field? + +> datetimeField :: Parser String +> datetimeField = choice (map keyword ["year","month","day" +> ,"hour","minute","second"]) +> "datetime field" == value expression parens, row ctor and scalar subquery @@ -1265,7 +1274,7 @@ instead, and create an alternative suffix parser > "identifier" > blacklist :: [String] -> blacklist = +> blacklist = reservedWord {- > [-- case > "case", "when", "then", "else", "end" > ,--join @@ -1274,7 +1283,7 @@ instead, and create an alternative suffix parser > ,"from","where","group","having","order","limit", "offset", "fetch" > ,"as","in" > ,"except", "intersect", "union" -> ] +> ] -} These blacklisted names are mostly needed when we parse something with an optional alias, e.g. select a a from t. If we write select a from @@ -1287,6 +1296,514 @@ The standard has a weird mix of reserved keywords and unreserved keywords (I'm not sure what exactly being an unreserved keyword means). +> nonReservedWord :: [String] +> nonReservedWord = +> ["a" +> ,"abs" +> ,"absolute" +> ,"action" +> ,"ada" +> ,"admin" +> ,"after" +> ,"always" +> ,"asc" +> ,"assertion" +> ,"assignment" +> ,"attribute" +> ,"attributes" +> ,"avg" +> ,"before" +> ,"bernoulli" +> ,"breadth" +> ,"c" +> ,"cardinality" +> ,"cascade" +> ,"catalog" +> ,"catalog_name" +> ,"ceil" +> ,"ceiling" +> ,"chain" +> ,"characteristics" +> ,"characters" +> ,"character_length" +> ,"character_set_catalog" +> ,"character_set_name" +> ,"character_set_schema" +> ,"char_length" +> ,"checked" +> ,"class_origin" +> ,"coalesce" +> ,"cobol" +> ,"code_units" +> ,"collation" +> ,"collation_catalog" +> ,"collation_name" +> ,"collation_schema" +> ,"collect" +> ,"column_name" +> ,"command_function" +> ,"command_function_code" +> ,"committed" +> ,"condition" +> ,"condition_number" +> ,"connection_name" +> ,"constraints" +> ,"constraint_catalog" +> ,"constraint_name" +> ,"constraint_schema" +> ,"constructors" +> ,"contains" +> ,"convert" +> ,"corr" +> ,"count" +> ,"covar_pop" +> ,"covar_samp" +> ,"cume_dist" +> ,"current_collation" +> ,"cursor_name" +> ,"data" +> ,"datetime_interval_code" +> ,"datetime_interval_precision" +> ,"defaults" +> ,"deferrable" +> ,"deferred" +> ,"defined" +> ,"definer" +> ,"degree" +> ,"dense_rank" +> ,"depth" +> ,"derived" +> ,"desc" +> ,"descriptor" +> ,"diagnostics" +> ,"dispatch" +> ,"domain" +> ,"dynamic_function" +> ,"dynamic_function_code" +> ,"equals" +> ,"every" +> ,"exception" +> ,"exclude" +> ,"excluding" +> ,"exp" +> ,"extract" +> ,"final" +> ,"first" +> ,"floor" +> ,"following" +> ,"fortran" +> ,"found" +> ,"fusion" +> ,"g" +> ,"general" +> ,"go" +> ,"goto" +> ,"granted" +> ,"hierarchy" +> ,"implementation" +> ,"including" +> ,"increment" +> ,"initially" +> ,"instance" +> ,"instantiable" +> ,"intersection" +> ,"invoker" +> ,"isolation" +> ,"k" +> ,"key" +> ,"key_member" +> ,"key_type" +> ,"last" +> ,"length" +> ,"level" +> ,"ln" +> ,"locator" +> ,"lower" +> ,"m" +> ,"map" +> ,"matched" +> ,"max" +> ,"maxvalue" +> ,"message_length" +> ,"message_octet_length" +> ,"message_text" +> ,"min" +> ,"minvalue" +> ,"mod" +> ,"more" +> ,"mumps" +> ,"name" +> ,"names" +> ,"nesting" +> ,"next" +> ,"normalize" +> ,"normalized" +> ,"nullable" +> ,"nullif" +> ,"nulls" +> ,"number" +> ,"object" +> ,"octets" +> ,"octet_length" +> ,"option" +> ,"options" +> ,"ordering" +> ,"ordinality" +> ,"others" +> ,"overlay" +> ,"overriding" +> ,"pad" +> ,"parameter_mode" +> ,"parameter_name" +> ,"parameter_ordinal_position" +> ,"parameter_specific_catalog" +> ,"parameter_specific_name" +> ,"parameter_specific_schema" +> ,"partial" +> ,"pascal" +> ,"path" +> ,"percentile_cont" +> ,"percentile_disc" +> ,"percent_rank" +> ,"placing" +> ,"pli" +> ,"position" +> ,"power" +> ,"preceding" +> ,"preserve" +> ,"prior" +> ,"privileges" +> ,"public" +> ,"rank" +> ,"read" +> ,"relative" +> ,"repeatable" +> ,"restart" +> ,"returned_cardinality" +> ,"returned_length" +> ,"returned_octet_length" +> ,"returned_sqlstate" +> ,"role" +> ,"routine" +> ,"routine_catalog" +> ,"routine_name" +> ,"routine_schema" +> ,"row_count" +> ,"row_number" +> ,"scale" +> ,"schema" +> ,"schema_name" +> ,"scope_catalog" +> ,"scope_name" +> ,"scope_schema" +> ,"section" +> ,"security" +> ,"self" +> ,"sequence" +> ,"serializable" +> ,"server_name" +> ,"session" +> ,"sets" +> ,"simple" +> ,"size" +> ,"source" +> ,"space" +> ,"specific_name" +> ,"sqrt" +> ,"state" +> ,"statement" +> ,"stddev_pop" +> ,"stddev_samp" +> ,"structure" +> ,"style" +> ,"subclass_origin" +> ,"substring" +> ,"sum" +> ,"tablesample" +> ,"table_name" +> ,"temporary" +> ,"ties" +> ,"top_level_count" +> ,"transaction" +> ,"transactions_committed" +> ,"transactions_rolled_back" +> ,"transaction_active" +> ,"transform" +> ,"transforms" +> ,"translate" +> ,"trigger_catalog" +> ,"trigger_name" +> ,"trigger_schema" +> ,"trim" +> ,"type" +> ,"unbounded" +> ,"uncommitted" +> ,"under" +> ,"unnamed" +> ,"usage" +> ,"user_defined_type_catalog" +> ,"user_defined_type_code" +> ,"user_defined_type_name" +> ,"user_defined_type_schema" +> ,"view" +> ,"work" +> ,"write" +> ,"zone"] + +> reservedWord :: [String] +> reservedWord = +> ["add" +> ,"all" +> ,"allocate" +> ,"alter" +> ,"and" +> ,"any" +> ,"are" +> ,"array" +> ,"as" +> ,"asensitive" +> ,"asymmetric" +> ,"at" +> ,"atomic" +> ,"authorization" +> ,"begin" +> ,"between" +> ,"bigint" +> ,"binary" +> ,"blob" +> ,"boolean" +> ,"both" +> ,"by" +> ,"call" +> ,"called" +> ,"cascaded" +> ,"case" +> ,"cast" +> ,"char" +> ,"character" +> ,"check" +> ,"clob" +> ,"close" +> ,"collate" +> ,"column" +> ,"commit" +> ,"connect" +> ,"constraint" +> ,"continue" +> ,"corresponding" +> ,"create" +> ,"cross" +> ,"cube" +> ,"current" +> --,"current_date" +> ,"current_default_transform_group" +> ,"current_path" +> ,"current_role" +> ,"current_time" +> ,"current_timestamp" +> ,"current_transform_group_for_type" +> ,"current_user" +> ,"cursor" +> ,"cycle" +> --,"date" +> --,"day" +> ,"deallocate" +> ,"dec" +> --,"decimal" +> ,"declare" +> --,"default" +> ,"delete" +> ,"deref" +> ,"describe" +> ,"deterministic" +> ,"disconnect" +> ,"distinct" +> ,"double" +> ,"drop" +> ,"dynamic" +> ,"each" +> --,"element" +> ,"else" +> ,"end" +> ,"end-exec" +> ,"escape" +> ,"except" +> ,"exec" +> ,"execute" +> ,"exists" +> ,"external" +> --,"false" +> ,"fetch" +> ,"filter" +> ,"float" +> ,"for" +> ,"foreign" +> ,"free" +> ,"from" +> ,"full" +> ,"function" +> ,"get" +> ,"global" +> ,"grant" +> ,"group" +> ,"grouping" +> ,"having" +> ,"hold" +> --,"hour" +> ,"identity" +> ,"immediate" +> ,"in" +> ,"indicator" +> ,"inner" +> ,"inout" +> ,"input" +> ,"insensitive" +> ,"insert" +> ,"int" +> ,"integer" +> ,"intersect" +> ,"interval" +> ,"into" +> ,"is" +> ,"isolation" +> ,"join" +> ,"language" +> ,"large" +> ,"lateral" +> ,"leading" +> ,"left" +> ,"like" +> ,"local" +> ,"localtime" +> ,"localtimestamp" +> ,"match" +> ,"member" +> ,"merge" +> ,"method" +> --,"minute" +> ,"modifies" +> ,"module" +> --,"month" +> ,"multiset" +> ,"national" +> ,"natural" +> ,"nchar" +> ,"nclob" +> ,"new" +> ,"no" +> ,"none" +> ,"not" +> --,"null" +> ,"numeric" +> ,"of" +> ,"old" +> ,"on" +> ,"only" +> ,"open" +> ,"or" +> ,"order" +> ,"out" +> ,"outer" +> ,"output" +> ,"over" +> ,"overlaps" +> ,"parameter" +> ,"partition" +> ,"precision" +> ,"prepare" +> ,"primary" +> ,"procedure" +> ,"range" +> ,"reads" +> ,"real" +> ,"recursive" +> ,"ref" +> ,"references" +> ,"referencing" +> ,"regr_avgx" +> ,"regr_avgy" +> ,"regr_count" +> ,"regr_intercept" +> ,"regr_r2" +> ,"regr_slope" +> ,"regr_sxx" +> ,"regr_sxy" +> ,"regr_syy" +> ,"release" +> ,"result" +> ,"return" +> ,"returns" +> ,"revoke" +> ,"right" +> ,"rollback" +> ,"rollup" +> --,"row" +> ,"rows" +> ,"savepoint" +> ,"scroll" +> ,"search" +> --,"second" +> ,"select" +> ,"sensitive" +> ,"session_user" +> --,"set" +> ,"similar" +> ,"smallint" +> ,"some" +> ,"specific" +> ,"specifictype" +> ,"sql" +> ,"sqlexception" +> ,"sqlstate" +> ,"sqlwarning" +> --,"start" +> ,"static" +> ,"submultiset" +> ,"symmetric" +> ,"system" +> ,"system_user" +> ,"table" +> ,"then" +> ,"time" +> ,"timestamp" +> ,"timezone_hour" +> ,"timezone_minute" +> ,"to" +> ,"trailing" +> ,"translation" +> ,"treat" +> ,"trigger" +> --,"true" +> ,"uescape" +> ,"union" +> ,"unique" +> --,"unknown" +> ,"unnest" +> ,"update" +> ,"upper" +> ,"user" +> ,"using" +> --,"value" +> ,"values" +> ,"var_pop" +> ,"var_samp" +> ,"varchar" +> ,"varying" +> ,"when" +> ,"whenever" +> ,"where" +> ,"width_bucket" +> ,"window" +> ,"with" +> ,"within" +> ,"without" +> --,"year" + +> -- added for this parser +> ,"limit" +> ,"offset" +> ] + -------------------------------------------- = helper functions diff --git a/TODO b/TODO index 03e224d..beddb92 100644 --- a/TODO +++ b/TODO @@ -63,7 +63,8 @@ rules for changing the multi keyword parsing: rough SQL 2003 todo, including tests to write: now: -implement the reservation of all keywords +review the commented out reserved keyword entries and work out how to + fix go through all? the functions go through almost all the predicates window functions missing bits, window clauses diff --git a/tools/Language/SQL/SimpleSQL/Postgres.lhs b/tools/Language/SQL/SimpleSQL/Postgres.lhs index 804db43..ce1f500 100644 --- a/tools/Language/SQL/SimpleSQL/Postgres.lhs +++ b/tools/Language/SQL/SimpleSQL/Postgres.lhs @@ -31,7 +31,7 @@ TODO: get all the commented out tests working > -- table is a reservered keyword? > --,"SELECT ROW(table.*) IS NULL FROM table;" -> ,"SELECT ROW(tablex.*) IS NULL FROM table;" +> ,"SELECT ROW(tablex.*) IS NULL FROM tablex;" > ,"SELECT true OR somefunc();"