tweaks to the keyword list and handling
This commit is contained in:
parent
1a1913e7b8
commit
ca910b8e6d
|
@ -92,6 +92,18 @@ Data types to represent different dialect options
|
||||||
> addLimit d = d {diKeywords = "limit": diKeywords d
|
> addLimit d = d {diKeywords = "limit": diKeywords d
|
||||||
> ,diLimit = True}
|
> ,diLimit = True}
|
||||||
|
|
||||||
|
todo: review this list
|
||||||
|
add tests
|
||||||
|
|
||||||
|
think about how to say if something can safely be made a non keyword
|
||||||
|
(assuming can only be total keyword or not keyword at all)
|
||||||
|
-> if something can't appear in a scalar expression or next to one,
|
||||||
|
then I think it's pretty safe
|
||||||
|
|
||||||
|
mostly, things are keywords to avoid them mistakenly being parsed as
|
||||||
|
aliases or as identifiers/functions/function-like things (aggs,
|
||||||
|
windows, etc.)
|
||||||
|
|
||||||
> ansi2011ReservedKeywords :: [String]
|
> ansi2011ReservedKeywords :: [String]
|
||||||
> ansi2011ReservedKeywords =
|
> ansi2011ReservedKeywords =
|
||||||
> [--"abs" -- function
|
> [--"abs" -- function
|
||||||
|
@ -108,78 +120,78 @@ Data types to represent different dialect options
|
||||||
> ,"asensitive" -- keyword
|
> ,"asensitive" -- keyword
|
||||||
> ,"asymmetric" -- keyword
|
> ,"asymmetric" -- keyword
|
||||||
> ,"at" -- keyword
|
> ,"at" -- keyword
|
||||||
> ,"atomic"
|
> ,"atomic" -- keyword
|
||||||
> ,"authorization"
|
> ,"authorization" -- keyword
|
||||||
> --,"avg"
|
> --,"avg" -- function
|
||||||
> ,"begin"
|
> ,"begin" -- keyword
|
||||||
> ,"begin_frame"
|
> --,"begin_frame" -- identifier
|
||||||
> ,"begin_partition"
|
> --,"begin_partition" -- identifier
|
||||||
> ,"between"
|
> ,"between" -- keyword
|
||||||
> ,"bigint"
|
> ,"bigint" -- type
|
||||||
> ,"binary"
|
> ,"binary" -- type
|
||||||
> ,"blob"
|
> ,"blob" -- type
|
||||||
> ,"boolean"
|
> ,"boolean" -- type
|
||||||
> ,"both"
|
> ,"both" -- keyword
|
||||||
> ,"by"
|
> ,"by" -- keyword
|
||||||
> ,"call"
|
> ,"call" -- keyword
|
||||||
> ,"called"
|
> ,"called" -- keyword
|
||||||
> ,"cardinality"
|
> -- ,"cardinality" -- function + identifier?
|
||||||
> ,"cascaded"
|
> ,"cascaded" -- keyword
|
||||||
> ,"case"
|
> ,"case" -- keyword
|
||||||
> ,"cast"
|
> ,"cast" -- special function
|
||||||
> ,"ceil"
|
> -- ,"ceil" -- function
|
||||||
> ,"ceiling"
|
> -- ,"ceiling" -- function
|
||||||
> ,"char"
|
> ,"char" -- type (+ keyword?)
|
||||||
> --,"char_length"
|
> --,"char_length" -- function
|
||||||
> ,"character"
|
> ,"character" -- type
|
||||||
> --,"character_length"
|
> --,"character_length" -- function
|
||||||
> ,"check"
|
> ,"check" -- keyword
|
||||||
> ,"clob"
|
> ,"clob" -- type
|
||||||
> ,"close"
|
> ,"close" -- keyword
|
||||||
> ,"coalesce"
|
> -- ,"coalesce" -- function
|
||||||
> ,"collate"
|
> ,"collate" -- keyword
|
||||||
> --,"collect"
|
> --,"collect" -- function
|
||||||
> ,"column"
|
> ,"column" -- keyword
|
||||||
> ,"commit"
|
> ,"commit" -- keyword
|
||||||
> ,"condition"
|
> ,"condition" -- keyword
|
||||||
> ,"connect"
|
> ,"connect" -- keyword
|
||||||
> ,"constraint"
|
> ,"constraint" --keyword
|
||||||
> ,"contains"
|
> --,"contains" -- keyword?
|
||||||
> --,"convert"
|
> --,"convert" -- function?
|
||||||
> --,"corr"
|
> --,"corr" -- function
|
||||||
> ,"corresponding"
|
> ,"corresponding" --keyword
|
||||||
> --,"count"
|
> --,"count" --function
|
||||||
> --,"covar_pop"
|
> --,"covar_pop" -- function
|
||||||
> --,"covar_samp"
|
> --,"covar_samp" --function
|
||||||
> ,"create"
|
> ,"create" -- keyword
|
||||||
> ,"cross"
|
> ,"cross" -- keyword
|
||||||
> ,"cube"
|
> ,"cube" -- keyword
|
||||||
> --,"cume_dist"
|
> --,"cume_dist" -- function
|
||||||
> ,"current"
|
> ,"current" -- keyword
|
||||||
> ,"current_catalog"
|
> -- ,"current_catalog" --identifier?
|
||||||
> --,"current_date"
|
> --,"current_date" -- identifier
|
||||||
> --,"current_default_transform_group"
|
> --,"current_default_transform_group" -- identifier
|
||||||
> --,"current_path"
|
> --,"current_path" -- identifier
|
||||||
> --,"current_role"
|
> --,"current_role" -- identifier
|
||||||
> ,"current_row"
|
> -- ,"current_row" -- identifier
|
||||||
> ,"current_schema"
|
> -- ,"current_schema" -- identifier
|
||||||
> ,"current_time"
|
> -- ,"current_time" -- identifier
|
||||||
> --,"current_timestamp"
|
> --,"current_timestamp" -- identifier
|
||||||
> ,"current_transform_group_for_type"
|
> --,"current_transform_group_for_type" -- identifier, or keyword?
|
||||||
> --,"current_user"
|
> --,"current_user" -- identifier
|
||||||
> ,"cursor"
|
> ,"cursor" -- keyword
|
||||||
> ,"cycle"
|
> ,"cycle" --keyword
|
||||||
> ,"date"
|
> ,"date" -- type
|
||||||
> --,"day"
|
> ,"day" -- keyword?
|
||||||
> ,"deallocate"
|
> ,"deallocate" -- keyword
|
||||||
> ,"dec"
|
> ,"dec" -- type
|
||||||
> ,"decimal"
|
> ,"decimal" -- type
|
||||||
> ,"declare"
|
> ,"declare" -- keyword
|
||||||
> --,"default"
|
> --,"default" -- identifier + keyword
|
||||||
> ,"delete"
|
> ,"delete" -- keyword
|
||||||
> --,"dense_rank"
|
> --,"dense_rank" -- functino
|
||||||
> ,"deref"
|
> ,"deref" -- keyword
|
||||||
> ,"describe"
|
> ,"describe" -- keyword
|
||||||
> ,"deterministic"
|
> ,"deterministic"
|
||||||
> ,"disconnect"
|
> ,"disconnect"
|
||||||
> ,"distinct"
|
> ,"distinct"
|
||||||
|
@ -190,9 +202,9 @@ Data types to represent different dialect options
|
||||||
> --,"element"
|
> --,"element"
|
||||||
> ,"else"
|
> ,"else"
|
||||||
> ,"end"
|
> ,"end"
|
||||||
> ,"end_frame"
|
> -- ,"end_frame" -- identifier
|
||||||
> ,"end_partition"
|
> -- ,"end_partition" -- identifier
|
||||||
> ,"end-exec"
|
> ,"end-exec" -- no idea what this is
|
||||||
> ,"equals"
|
> ,"equals"
|
||||||
> ,"escape"
|
> ,"escape"
|
||||||
> --,"every"
|
> --,"every"
|
||||||
|
@ -206,12 +218,12 @@ Data types to represent different dialect options
|
||||||
> --,"false"
|
> --,"false"
|
||||||
> ,"fetch"
|
> ,"fetch"
|
||||||
> ,"filter"
|
> ,"filter"
|
||||||
> ,"first_value"
|
> -- ,"first_value"
|
||||||
> ,"float"
|
> ,"float"
|
||||||
> ,"floor"
|
> --,"floor"
|
||||||
> ,"for"
|
> ,"for"
|
||||||
> ,"foreign"
|
> ,"foreign"
|
||||||
> ,"frame_row"
|
> -- ,"frame_row" -- identifier
|
||||||
> ,"free"
|
> ,"free"
|
||||||
> ,"from"
|
> ,"from"
|
||||||
> ,"full"
|
> ,"full"
|
||||||
|
@ -225,7 +237,7 @@ Data types to represent different dialect options
|
||||||
> ,"groups"
|
> ,"groups"
|
||||||
> ,"having"
|
> ,"having"
|
||||||
> ,"hold"
|
> ,"hold"
|
||||||
> --,"hour"
|
> ,"hour"
|
||||||
> ,"identity"
|
> ,"identity"
|
||||||
> ,"in"
|
> ,"in"
|
||||||
> ,"indicator"
|
> ,"indicator"
|
||||||
|
@ -241,21 +253,21 @@ Data types to represent different dialect options
|
||||||
> ,"into"
|
> ,"into"
|
||||||
> ,"is"
|
> ,"is"
|
||||||
> ,"join"
|
> ,"join"
|
||||||
> ,"lag"
|
> --,"lag"
|
||||||
> ,"language"
|
> ,"language"
|
||||||
> ,"large"
|
> ,"large"
|
||||||
> ,"last_value"
|
> --,"last_value"
|
||||||
> ,"lateral"
|
> ,"lateral"
|
||||||
> ,"lead"
|
> --,"lead"
|
||||||
> ,"leading"
|
> ,"leading"
|
||||||
> ,"left"
|
> ,"left"
|
||||||
> ,"like"
|
> ,"like"
|
||||||
> ,"like_regex"
|
> ,"like_regex"
|
||||||
> ,"ln"
|
> --,"ln"
|
||||||
> ,"local"
|
> ,"local"
|
||||||
> ,"localtime"
|
> ,"localtime"
|
||||||
> ,"localtimestamp"
|
> ,"localtimestamp"
|
||||||
> ,"lower"
|
> --,"lower"
|
||||||
> ,"match"
|
> ,"match"
|
||||||
> --,"max"
|
> --,"max"
|
||||||
> ,"member"
|
> ,"member"
|
||||||
|
@ -263,7 +275,7 @@ Data types to represent different dialect options
|
||||||
> ,"method"
|
> ,"method"
|
||||||
> --,"min"
|
> --,"min"
|
||||||
> --,"minute"
|
> --,"minute"
|
||||||
> ,"mod"
|
> --,"mod"
|
||||||
> ,"modifies"
|
> ,"modifies"
|
||||||
> --,"module"
|
> --,"module"
|
||||||
> --,"month"
|
> --,"month"
|
||||||
|
@ -277,10 +289,10 @@ Data types to represent different dialect options
|
||||||
> ,"none"
|
> ,"none"
|
||||||
> ,"normalize"
|
> ,"normalize"
|
||||||
> ,"not"
|
> ,"not"
|
||||||
> ,"nth_value"
|
> --,"nth_value"
|
||||||
> ,"ntile"
|
> ,"ntile"
|
||||||
> --,"null"
|
> --,"null"
|
||||||
> ,"nullif"
|
> --,"nullif"
|
||||||
> ,"numeric"
|
> ,"numeric"
|
||||||
> ,"octet_length"
|
> ,"octet_length"
|
||||||
> ,"occurrences_regex"
|
> ,"occurrences_regex"
|
||||||
|
@ -307,7 +319,7 @@ Data types to represent different dialect options
|
||||||
> ,"portion"
|
> ,"portion"
|
||||||
> ,"position"
|
> ,"position"
|
||||||
> ,"position_regex"
|
> ,"position_regex"
|
||||||
> ,"power"
|
> --,"power"
|
||||||
> ,"precedes"
|
> ,"precedes"
|
||||||
> ,"precision"
|
> ,"precision"
|
||||||
> ,"prepare"
|
> ,"prepare"
|
||||||
|
@ -339,7 +351,7 @@ Data types to represent different dialect options
|
||||||
> ,"rollback"
|
> ,"rollback"
|
||||||
> ,"rollup"
|
> ,"rollup"
|
||||||
> --,"row"
|
> --,"row"
|
||||||
> ,"row_number"
|
> --,"row_number"
|
||||||
> ,"rows"
|
> ,"rows"
|
||||||
> ,"savepoint"
|
> ,"savepoint"
|
||||||
> ,"scope"
|
> ,"scope"
|
||||||
|
@ -359,19 +371,19 @@ Data types to represent different dialect options
|
||||||
> ,"sqlexception"
|
> ,"sqlexception"
|
||||||
> ,"sqlstate"
|
> ,"sqlstate"
|
||||||
> ,"sqlwarning"
|
> ,"sqlwarning"
|
||||||
> ,"sqrt"
|
> --,"sqrt"
|
||||||
> --,"start"
|
> --,"start"
|
||||||
> ,"static"
|
> ,"static"
|
||||||
> --,"stddev_pop"
|
> --,"stddev_pop"
|
||||||
> --,"stddev_samp"
|
> --,"stddev_samp"
|
||||||
> ,"submultiset"
|
> ,"submultiset"
|
||||||
> ,"substring"
|
> --,"substring"
|
||||||
> ,"substring_regex"
|
> ,"substring_regex"
|
||||||
> ,"succeeds"
|
> ,"succeeds"
|
||||||
> --,"sum"
|
> --,"sum"
|
||||||
> ,"symmetric"
|
> ,"symmetric"
|
||||||
> ,"system"
|
> ,"system"
|
||||||
> ,"system_time"
|
> --,"system_time"
|
||||||
> --,"system_user"
|
> --,"system_user"
|
||||||
> ,"table"
|
> ,"table"
|
||||||
> ,"tablesample"
|
> ,"tablesample"
|
||||||
|
@ -388,8 +400,8 @@ Data types to represent different dialect options
|
||||||
> ,"treat"
|
> ,"treat"
|
||||||
> ,"trigger"
|
> ,"trigger"
|
||||||
> ,"truncate"
|
> ,"truncate"
|
||||||
> ,"trim"
|
> --,"trim"
|
||||||
> ,"trim_array"
|
> --,"trim_array"
|
||||||
> --,"true"
|
> --,"true"
|
||||||
> ,"uescape"
|
> ,"uescape"
|
||||||
> ,"union"
|
> ,"union"
|
||||||
|
@ -412,7 +424,7 @@ Data types to represent different dialect options
|
||||||
> ,"when"
|
> ,"when"
|
||||||
> ,"whenever"
|
> ,"whenever"
|
||||||
> ,"where"
|
> ,"where"
|
||||||
> ,"width_bucket"
|
> --,"width_bucket"
|
||||||
> ,"window"
|
> ,"window"
|
||||||
> ,"with"
|
> ,"with"
|
||||||
> ,"within"
|
> ,"within"
|
||||||
|
|
|
@ -733,66 +733,7 @@ all the scalar expressions which start with an identifier
|
||||||
> then return [Name Nothing x]
|
> then return [Name Nothing x]
|
||||||
> else fail ""
|
> else fail ""
|
||||||
> in unquotedIdentifierTok [] Nothing >>= makeKeywordFunction
|
> in unquotedIdentifierTok [] Nothing >>= makeKeywordFunction
|
||||||
> keywordFunctionNames = [{-"abs"
|
> keywordFunctionNames = ["set"
|
||||||
> ,"all"
|
|
||||||
> ,"any"
|
|
||||||
> ,"array_agg"
|
|
||||||
> ,"avg"
|
|
||||||
> ,"ceil"
|
|
||||||
> ,"ceiling"
|
|
||||||
> ,"char_length"
|
|
||||||
> ,"character_length"
|
|
||||||
> ,"coalesce"
|
|
||||||
> ,"collect"
|
|
||||||
> ,"contains"
|
|
||||||
> ,"convert"
|
|
||||||
> ,"corr"
|
|
||||||
> ,"covar_pop"
|
|
||||||
> ,"covar_samp"
|
|
||||||
> ,"count"
|
|
||||||
> ,"cume_dist"
|
|
||||||
> ,"grouping"
|
|
||||||
> ,"intersection"
|
|
||||||
> ,"ln"
|
|
||||||
> ,"max"
|
|
||||||
> ,"mod"
|
|
||||||
> ,"percent_rank"
|
|
||||||
> ,"percentile_cont"
|
|
||||||
> ,"percentile_disc"
|
|
||||||
> ,"power"
|
|
||||||
> ,"rank"
|
|
||||||
> ,"regr_avgx"
|
|
||||||
> ,"regr_avgy"
|
|
||||||
> ,"regr_count"
|
|
||||||
> ,"regr_intercept"
|
|
||||||
> ,"regr_r2"
|
|
||||||
> ,"regr_slope"
|
|
||||||
> ,"regr_sxx"
|
|
||||||
> ,"regr_sxy"
|
|
||||||
> ,"regr_syy"
|
|
||||||
> ,"row"
|
|
||||||
> ,"row_number"
|
|
||||||
> ,-}"set"{-
|
|
||||||
> ,"some"
|
|
||||||
> ,"stddev_pop"
|
|
||||||
> ,"stddev_samp"
|
|
||||||
> ,"sum"
|
|
||||||
> ,"upper"
|
|
||||||
> ,"var_pop"
|
|
||||||
> ,"var_samp"
|
|
||||||
> ,"width_bucket"
|
|
||||||
> -- window functions added here too
|
|
||||||
> ,"row_number"
|
|
||||||
> ,"rank"
|
|
||||||
> ,"dense_rank"
|
|
||||||
> ,"percent_rank"
|
|
||||||
> ,"cume_dist"
|
|
||||||
> ,"ntile"
|
|
||||||
> ,"lead"
|
|
||||||
> ,"lag"
|
|
||||||
> ,"first_value"
|
|
||||||
> ,"last_value"
|
|
||||||
> ,"nth_value"-}
|
|
||||||
> ]
|
> ]
|
||||||
|
|
||||||
|
|
||||||
|
@ -2218,6 +2159,10 @@ special case parsing code to handle this (in the case of set), or it
|
||||||
is not treated as a keyword (not perfect, but if it more or less
|
is not treated as a keyword (not perfect, but if it more or less
|
||||||
works, ok for now).
|
works, ok for now).
|
||||||
|
|
||||||
|
An exception to this is the standard type names are considered as
|
||||||
|
keywords at the moment, with a special case in the type parser to
|
||||||
|
make this work. Maybe this isn't necessary or is a bad idea.
|
||||||
|
|
||||||
It is possible to have a problem if you remove something which is a
|
It is possible to have a problem if you remove something which is a
|
||||||
keyword from this list, and still want to parse statements using it
|
keyword from this list, and still want to parse statements using it
|
||||||
as a keyword - for instance, removing things like 'from' or 'as',
|
as a keyword - for instance, removing things like 'from' or 'as',
|
||||||
|
@ -2227,8 +2172,9 @@ will likely mean many things don't parse anymore.
|
||||||
|
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
bit hacky, used to make the dialect available during parsing so
|
Used to make the dialect available during parsing so different parsers
|
||||||
different parsers can be used for different dialects
|
can be used for different dialects. Not sure if this is the best way
|
||||||
|
to do it, but it's convenient
|
||||||
|
|
||||||
> type ParseState = Dialect
|
> type ParseState = Dialect
|
||||||
|
|
||||||
|
@ -2241,14 +2187,5 @@ different parsers can be used for different dialects
|
||||||
> d <- getState
|
> d <- getState
|
||||||
> guard (f d)
|
> guard (f d)
|
||||||
|
|
||||||
TODO: the ParseState and the Dialect argument should be turned into a
|
The dialect stuff could also be used for custom options: e.g. to only
|
||||||
flags struct. Part (or all?) of this struct is the dialect
|
|
||||||
information, but each dialect has different versions + a big set of
|
|
||||||
flags to control syntax variations within a version of a product
|
|
||||||
dialect (for instance, string and identifier parsing rules vary from
|
|
||||||
dialect to dialect and version to version, and most or all SQL DBMSs
|
|
||||||
appear to have a set of flags to further enable or disable variations
|
|
||||||
for quoting and escaping strings and identifiers).
|
|
||||||
|
|
||||||
The dialect stuff can also be used for custom options: e.g. to only
|
|
||||||
parse dml for instance.
|
parse dml for instance.
|
||||||
|
|
Loading…
Reference in a new issue