556 lines
14 KiB
Haskell
556 lines
14 KiB
Haskell
|
|
|
|
-- Data types to represent different dialect options
|
|
|
|
{-# LANGUAGE DeriveDataTypeable #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
module Language.SQL.SimpleSQL.Dialect
|
|
(Dialect(..)
|
|
,ansi2011
|
|
,mysql
|
|
,postgres
|
|
,oracle
|
|
,sqlserver
|
|
) where
|
|
|
|
import Data.Text (Text)
|
|
import Data.Data (Data,Typeable)
|
|
|
|
-- | Used to set the dialect used for parsing and pretty printing,
|
|
-- very unfinished at the moment.
|
|
--
|
|
-- The keyword handling works as follows:
|
|
--
|
|
-- There is a list of reserved keywords. These will never parse as
|
|
-- anything other than as a keyword, unless they are in one of the
|
|
-- other lists.
|
|
--
|
|
-- There is a list of \'identifier\' keywords. These are reserved
|
|
-- keywords, with an exception that they will parse as an
|
|
-- identifier in a scalar expression. They won't parse as
|
|
-- identifiers in other places, e.g. column names or aliases.
|
|
--
|
|
-- There is a list of \'app\' keywords. These are reserved keywords,
|
|
-- with an exception that they will also parse in an \'app-like\'
|
|
-- construct - a regular function call, or any of the aggregate and
|
|
-- window variations.
|
|
--
|
|
-- There is a list of special type names. This list serves two
|
|
-- purposes - it is a list of the reserved keywords which are also
|
|
-- type names, and it is a list of all the multi word type names.
|
|
--
|
|
-- Every keyword should appear in the keywords lists, and then you can
|
|
-- add them to the other lists if you want exceptions. Most things
|
|
-- that refer to functions, types or variables that are keywords in
|
|
-- the ansi standard, can be removed from the keywords lists
|
|
-- completely with little effect. With most of the actual SQL
|
|
-- keywords, removing them from the keyword list will result in
|
|
-- lots of valid syntax no longer parsing (and probably bad parse
|
|
-- error messages too).
|
|
--
|
|
-- In the code, all special syntax which looks identical to regular
|
|
-- identifiers or function calls (apart from the name), is treated
|
|
-- like a regular identifier or function call.
|
|
--
|
|
-- It's easy to break the parser by removing the wrong words from
|
|
-- the keywords list or adding the wrong words to the other lists.
|
|
|
|
data Dialect = Dialect
|
|
{ -- | reserved keywords
|
|
diKeywords :: [Text]
|
|
-- | keywords with identifier exception
|
|
,diIdentifierKeywords :: [Text]
|
|
-- | keywords with app exception
|
|
,diAppKeywords :: [Text]
|
|
-- | keywords with type exception plus all the type names which
|
|
-- are multiple words
|
|
,diSpecialTypeNames :: [Text]
|
|
-- | allow ansi fetch first syntax
|
|
,diFetchFirst :: Bool
|
|
-- | allow limit keyword (mysql, postgres,
|
|
-- ...)
|
|
,diLimit :: Bool
|
|
-- | allow parsing ODBC syntax
|
|
,diOdbc :: Bool
|
|
-- | allow quoting identifiers with \`backquotes\`
|
|
,diBackquotedIden :: Bool
|
|
-- | allow quoting identifiers with [square brackets]
|
|
,diSquareBracketQuotedIden :: Bool
|
|
-- | allow identifiers with a leading at @example
|
|
,diAtIdentifier :: Bool
|
|
-- | allow identifiers with a leading \# \#example
|
|
,diHashIdentifier :: Bool
|
|
-- | allow positional identifiers like this: $1
|
|
,diPositionalArg :: Bool
|
|
-- | allow postgres style dollar strings
|
|
,diDollarString :: Bool
|
|
-- | allow strings with an e - e"example"
|
|
,diEString :: Bool
|
|
-- | allow postgres style symbols
|
|
,diPostgresSymbols :: Bool
|
|
-- | allow sql server style symbols
|
|
,diSqlServerSymbols :: Bool
|
|
-- | allow sql server style for CONVERT function in format CONVERT(data_type(length), expression, style)
|
|
,diConvertFunction :: Bool
|
|
-- | allow creating autoincrement columns
|
|
,diAutoincrement :: Bool
|
|
-- | allow omitting the comma between constraint clauses
|
|
,diNonCommaSeparatedConstraints :: Bool
|
|
}
|
|
deriving (Eq,Show,Read,Data,Typeable)
|
|
|
|
-- | ansi sql 2011 dialect
|
|
ansi2011 :: Dialect
|
|
ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords
|
|
,diIdentifierKeywords = []
|
|
,diAppKeywords = ["set"]
|
|
,diSpecialTypeNames = ansi2011TypeNames
|
|
,diFetchFirst = True
|
|
,diLimit = False
|
|
,diOdbc = False
|
|
,diBackquotedIden = False
|
|
,diSquareBracketQuotedIden = False
|
|
,diAtIdentifier = False
|
|
,diHashIdentifier = False
|
|
,diPositionalArg = False
|
|
,diDollarString = False
|
|
,diEString = False
|
|
,diPostgresSymbols = False
|
|
,diSqlServerSymbols = False
|
|
,diConvertFunction = False
|
|
,diAutoincrement = False
|
|
,diNonCommaSeparatedConstraints = False
|
|
}
|
|
|
|
-- | mysql dialect
|
|
mysql :: Dialect
|
|
mysql = addLimit ansi2011 {diFetchFirst = False
|
|
,diBackquotedIden = True
|
|
}
|
|
|
|
-- | postgresql dialect
|
|
postgres :: Dialect
|
|
postgres = addLimit ansi2011 {diPositionalArg = True
|
|
,diDollarString = True
|
|
,diEString = True
|
|
,diPostgresSymbols = True}
|
|
|
|
-- | oracle dialect
|
|
oracle :: Dialect
|
|
oracle = ansi2011 -- {}
|
|
|
|
-- | microsoft sql server dialect
|
|
sqlserver :: Dialect
|
|
sqlserver = ansi2011 {diSquareBracketQuotedIden = True
|
|
,diAtIdentifier = True
|
|
,diHashIdentifier = True
|
|
,diOdbc = True
|
|
,diSqlServerSymbols = True
|
|
,diConvertFunction = True}
|
|
|
|
addLimit :: Dialect -> Dialect
|
|
addLimit d = d {diKeywords = "limit": diKeywords d
|
|
,diLimit = True}
|
|
|
|
|
|
{-
|
|
The keyword handling is quite strong - an alternative way to do it
|
|
would be to have as few keywords as possible, and only require them
|
|
to be quoted when this is needed to resolve a parsing ambiguity.
|
|
|
|
I don't think this is a good idea for genuine keywords (it probably is
|
|
for all the 'fake' keywords in the standard - things which are
|
|
essentially function names, or predefined variable names, or type
|
|
names, eetc.).
|
|
|
|
1. working out exactly when each keyword would need to be quoted is
|
|
quite error prone, and might change as the parser implementation is
|
|
maintained - which would be terrible for users
|
|
|
|
2. it's not user friendly for the user to deal with a whole load of
|
|
special cases - either something is a keyword, then you know you must
|
|
always quote it, or it isn't, then you know you never need to quote
|
|
it
|
|
|
|
3. I think not having exceptions makes for better error messages for
|
|
the user, and a better sql code maintenance experience.
|
|
|
|
This might not match actual existing SQL products that well, some of
|
|
which I think have idiosyncratic rules about when a keyword must be
|
|
quoted. If you want to match one of these dialects exactly with this
|
|
parser, I think it will be a lot of work.
|
|
-}
|
|
|
|
ansi2011ReservedKeywords :: [Text]
|
|
ansi2011ReservedKeywords =
|
|
[--"abs" -- function
|
|
"all" -- keyword only?
|
|
,"allocate" -- keyword
|
|
,"alter" -- keyword
|
|
,"and" -- keyword
|
|
--,"any" -- keyword? and function
|
|
,"are" -- keyword
|
|
,"array" -- keyword, and used in some special places, like array[...], and array(subquery)
|
|
--,"array_agg" -- function
|
|
-- ,"array_max_cardinality" -- function
|
|
,"as" -- keyword
|
|
,"asensitive" -- keyword
|
|
,"asymmetric" -- keyword
|
|
,"at" -- keyword
|
|
,"atomic" -- keyword
|
|
,"authorization" -- keyword
|
|
--,"avg" -- function
|
|
,"begin" -- keyword
|
|
--,"begin_frame" -- identifier
|
|
--,"begin_partition" -- identifier
|
|
,"between" -- keyword
|
|
,"bigint" -- type
|
|
,"binary" -- type
|
|
,"blob" -- type
|
|
,"boolean" -- type
|
|
,"both" -- keyword
|
|
,"by" -- keyword
|
|
,"call" -- keyword
|
|
,"called" -- keyword
|
|
-- ,"cardinality" -- function + identifier?
|
|
,"cascaded" -- keyword
|
|
,"case" -- keyword
|
|
,"cast" -- special function
|
|
-- ,"ceil" -- function
|
|
-- ,"ceiling" -- function
|
|
,"char" -- type (+ keyword?)
|
|
--,"char_length" -- function
|
|
,"character" -- type
|
|
--,"character_length" -- function
|
|
,"check" -- keyword
|
|
,"clob" -- type
|
|
,"close" -- keyword
|
|
-- ,"coalesce" -- function
|
|
,"collate" -- keyword
|
|
--,"collect" -- function
|
|
,"column" -- keyword
|
|
,"commit" -- keyword
|
|
,"condition" -- keyword
|
|
,"connect" -- keyword
|
|
,"constraint" --keyword
|
|
--,"contains" -- keyword?
|
|
--,"convert" -- function?
|
|
--,"corr" -- function
|
|
,"corresponding" --keyword
|
|
--,"count" --function
|
|
--,"covar_pop" -- function
|
|
--,"covar_samp" --function
|
|
,"create" -- keyword
|
|
,"cross" -- keyword
|
|
,"cube" -- keyword
|
|
--,"cume_dist" -- function
|
|
,"current" -- keyword
|
|
-- ,"current_catalog" --identifier?
|
|
--,"current_date" -- identifier
|
|
--,"current_default_transform_group" -- identifier
|
|
--,"current_path" -- identifier
|
|
--,"current_role" -- identifier
|
|
-- ,"current_row" -- identifier
|
|
-- ,"current_schema" -- identifier
|
|
-- ,"current_time" -- identifier
|
|
--,"current_timestamp" -- identifier
|
|
--,"current_transform_group_for_type" -- identifier, or keyword?
|
|
--,"current_user" -- identifier
|
|
,"cursor" -- keyword
|
|
,"cycle" --keyword
|
|
,"date" -- type
|
|
--,"day" -- keyword? - the parser needs it to not be a keyword to parse extract at the moment
|
|
,"deallocate" -- keyword
|
|
,"dec" -- type
|
|
,"decimal" -- type
|
|
,"declare" -- keyword
|
|
--,"default" -- identifier + keyword
|
|
,"delete" -- keyword
|
|
--,"dense_rank" -- functino
|
|
,"deref" -- keyword
|
|
,"describe" -- keyword
|
|
,"deterministic"
|
|
,"disconnect"
|
|
,"distinct"
|
|
,"double"
|
|
,"drop"
|
|
,"dynamic"
|
|
,"each"
|
|
--,"element"
|
|
,"else"
|
|
,"end"
|
|
-- ,"end_frame" -- identifier
|
|
-- ,"end_partition" -- identifier
|
|
,"end-exec" -- no idea what this is
|
|
,"equals"
|
|
,"escape"
|
|
--,"every"
|
|
,"except"
|
|
,"exec"
|
|
,"execute"
|
|
,"exists"
|
|
,"exp"
|
|
,"external"
|
|
,"extract"
|
|
--,"false"
|
|
,"fetch"
|
|
,"filter"
|
|
-- ,"first_value"
|
|
,"float"
|
|
--,"floor"
|
|
,"for"
|
|
,"foreign"
|
|
-- ,"frame_row" -- identifier
|
|
,"free"
|
|
,"from"
|
|
,"full"
|
|
,"function"
|
|
--,"fusion"
|
|
,"get"
|
|
,"global"
|
|
,"grant"
|
|
,"group"
|
|
--,"grouping"
|
|
,"groups"
|
|
,"having"
|
|
,"hold"
|
|
--,"hour"
|
|
,"identity"
|
|
,"in"
|
|
,"indicator"
|
|
,"inner"
|
|
,"inout"
|
|
,"insensitive"
|
|
,"insert"
|
|
,"int"
|
|
,"integer"
|
|
,"intersect"
|
|
--,"intersection"
|
|
,"interval"
|
|
,"into"
|
|
,"is"
|
|
,"join"
|
|
--,"lag"
|
|
,"language"
|
|
,"large"
|
|
--,"last_value"
|
|
,"lateral"
|
|
--,"lead"
|
|
,"leading"
|
|
,"left"
|
|
,"like"
|
|
,"like_regex"
|
|
--,"ln"
|
|
,"local"
|
|
,"localtime"
|
|
,"localtimestamp"
|
|
--,"lower"
|
|
,"match"
|
|
--,"max"
|
|
,"member"
|
|
,"merge"
|
|
,"method"
|
|
--,"min"
|
|
--,"minute"
|
|
--,"mod"
|
|
,"modifies"
|
|
--,"module"
|
|
--,"month"
|
|
,"multiset"
|
|
,"national"
|
|
,"natural"
|
|
,"nchar"
|
|
,"nclob"
|
|
,"new"
|
|
,"no"
|
|
,"none"
|
|
,"normalize"
|
|
,"not"
|
|
--,"nth_value"
|
|
,"ntile"
|
|
--,"null"
|
|
--,"nullif"
|
|
,"numeric"
|
|
,"octet_length"
|
|
,"occurrences_regex"
|
|
,"of"
|
|
,"offset"
|
|
,"old"
|
|
,"on"
|
|
,"only"
|
|
,"open"
|
|
,"or"
|
|
,"order"
|
|
,"out"
|
|
,"outer"
|
|
,"over"
|
|
,"overlaps"
|
|
,"overlay"
|
|
,"parameter"
|
|
,"partition"
|
|
,"percent"
|
|
--,"percent_rank"
|
|
--,"percentile_cont"
|
|
--,"percentile_disc"
|
|
,"period"
|
|
,"portion"
|
|
,"position"
|
|
,"position_regex"
|
|
--,"power"
|
|
,"precedes"
|
|
,"precision"
|
|
,"prepare"
|
|
,"primary"
|
|
,"procedure"
|
|
,"range"
|
|
--,"rank"
|
|
,"reads"
|
|
,"real"
|
|
,"recursive"
|
|
,"ref"
|
|
,"references"
|
|
,"referencing"
|
|
--,"regr_avgx"
|
|
--,"regr_avgy"
|
|
--,"regr_count"
|
|
--,"regr_intercept"
|
|
--,"regr_r2"
|
|
--,"regr_slope"
|
|
--,"regr_sxx"
|
|
--,"regr_sxy"
|
|
--,"regr_syy"
|
|
,"release"
|
|
,"result"
|
|
,"return"
|
|
,"returns"
|
|
,"revoke"
|
|
,"right"
|
|
,"rollback"
|
|
,"rollup"
|
|
--,"row"
|
|
--,"row_number"
|
|
,"rows"
|
|
,"savepoint"
|
|
,"scope"
|
|
,"scroll"
|
|
,"search"
|
|
--,"second"
|
|
,"select"
|
|
,"sensitive"
|
|
--,"session_user"
|
|
,"set"
|
|
,"similar"
|
|
,"smallint"
|
|
--,"some"
|
|
,"specific"
|
|
,"specifictype"
|
|
,"sql"
|
|
,"sqlexception"
|
|
,"sqlstate"
|
|
,"sqlwarning"
|
|
--,"sqrt"
|
|
--,"start"
|
|
,"static"
|
|
--,"stddev_pop"
|
|
--,"stddev_samp"
|
|
,"submultiset"
|
|
--,"substring"
|
|
,"substring_regex"
|
|
,"succeeds"
|
|
--,"sum"
|
|
,"symmetric"
|
|
,"system"
|
|
--,"system_time"
|
|
--,"system_user"
|
|
,"table"
|
|
,"tablesample"
|
|
,"then"
|
|
,"time"
|
|
,"timestamp"
|
|
,"timezone_hour"
|
|
,"timezone_minute"
|
|
,"to"
|
|
,"trailing"
|
|
,"translate"
|
|
,"translate_regex"
|
|
,"translation"
|
|
,"treat"
|
|
,"trigger"
|
|
,"truncate"
|
|
--,"trim"
|
|
--,"trim_array"
|
|
--,"true"
|
|
,"uescape"
|
|
,"union"
|
|
,"unique"
|
|
--,"unknown"
|
|
,"unnest"
|
|
,"update"
|
|
,"upper"
|
|
--,"user"
|
|
,"using"
|
|
--,"value"
|
|
,"values"
|
|
,"value_of"
|
|
--,"var_pop"
|
|
--,"var_samp"
|
|
,"varbinary"
|
|
,"varchar"
|
|
,"varying"
|
|
,"versioning"
|
|
,"when"
|
|
,"whenever"
|
|
,"where"
|
|
--,"width_bucket"
|
|
,"window"
|
|
,"with"
|
|
,"within"
|
|
,"without"
|
|
--,"year"
|
|
]
|
|
|
|
|
|
ansi2011TypeNames :: [Text]
|
|
ansi2011TypeNames =
|
|
["double precision"
|
|
,"character varying"
|
|
,"char varying"
|
|
,"character large object"
|
|
,"char large object"
|
|
,"national character"
|
|
,"national char"
|
|
,"national character varying"
|
|
,"national char varying"
|
|
,"national character large object"
|
|
,"nchar large object"
|
|
,"nchar varying"
|
|
,"bit varying"
|
|
,"binary large object"
|
|
,"binary varying"
|
|
-- reserved keyword typenames:
|
|
,"array"
|
|
,"bigint"
|
|
,"binary"
|
|
,"blob"
|
|
,"boolean"
|
|
,"char"
|
|
,"character"
|
|
,"clob"
|
|
,"date"
|
|
,"dec"
|
|
,"decimal"
|
|
,"double"
|
|
,"float"
|
|
,"int"
|
|
,"integer"
|
|
,"nchar"
|
|
,"nclob"
|
|
,"numeric"
|
|
,"real"
|
|
,"smallint"
|
|
,"time"
|
|
,"timestamp"
|
|
,"varchar"
|
|
,"varbinary"
|
|
]
|