1
Fork 0

switch from literate to regular haskell source

This commit is contained in:
Jake Wheat 2024-01-09 00:07:47 +00:00
parent f51600e0b1
commit ec8ce0243e
74 changed files with 11498 additions and 10996 deletions

View file

@ -0,0 +1,117 @@
-- | This module contains some generic combinators used in the
-- parser. None of the parsing which relies on the local lexers is
-- in this module. Some of these combinators have been taken from
-- other parser combinator libraries other than Parsec.
module Language.SQL.SimpleSQL.Combinators
(optionSuffix
,(<??>)
,(<??.>)
,(<??*>)
,(<$$>)
,(<$$$>)
,(<$$$$>)
,(<$$$$$>)
) where
import Control.Applicative ((<**>))
import Text.Parsec (option,many)
import Text.Parsec.String (GenParser)
{-
a possible issue with the option suffix is that it enforces left
associativity when chaining it recursively. Have to review
all these uses and figure out if any should be right associative
instead, and create an alternative suffix parser
This function style is not good, and should be replaced with chain and
<??> which has a different type
-}
optionSuffix :: (a -> GenParser t s a) -> a -> GenParser t s a
optionSuffix p a = option a (p a)
{-
parses an optional postfix element and applies its result to its left
hand result, taken from uu-parsinglib
TODO: make sure the precedence higher than <|> and lower than the
other operators so it can be used nicely
-}
(<??>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
p <??> q = p <**> option id q
{-
Help with left factored parsers. <$$> is like an analogy with <**>:
f <$> a <*> b
is like
a <**> (b <$$> f)
f <$> a <*> b <*> c
is like
a <**> (b <**> (c <$$$> f))
-}
(<$$>) :: Applicative f =>
f b -> (a -> b -> c) -> f (a -> c)
(<$$>) pa c = pa <**> pure (flip c)
(<$$$>) :: Applicative f =>
f c -> (a -> b -> c -> t) -> f (b -> a -> t)
p <$$$> c = p <**> pure (flip3 c)
(<$$$$>) :: Applicative f =>
f d -> (a -> b -> c -> d -> t) -> f (c -> b -> a -> t)
p <$$$$> c = p <**> pure (flip4 c)
(<$$$$$>) :: Applicative f =>
f e -> (a -> b -> c -> d -> e -> t) -> f (d -> c -> b -> a -> t)
p <$$$$$> c = p <**> pure (flip5 c)
{-
Surely no-one would write code like this seriously?
composing suffix parsers, not sure about the name. This is used to add
a second or more suffix parser contingent on the first suffix parser
succeeding.
-}
(<??.>) :: GenParser t s (a -> a) -> GenParser t s (a -> a) -> GenParser t s (a -> a)
(<??.>) pa pb = (.) `c` pa <*> option id pb
-- todo: fix this mess
where c = (<$>) . flip
-- 0 to many repeated applications of suffix parser
(<??*>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
{-
These are to help with left factored parsers:
a <**> (b <**> (c <**> pure (flip3 ctor)))
Not sure the names are correct, but they follow a pattern with flip
a <**> (b <**> pure (flip ctor))
-}
flip3 :: (a -> b -> c -> t) -> c -> b -> a -> t
flip3 f a b c = f c b a
flip4 :: (a -> b -> c -> d -> t) -> d -> c -> b -> a -> t
flip4 f a b c d = f d c b a
flip5 :: (a -> b -> c -> d -> e -> t) -> e -> d -> c -> b -> a -> t
flip5 f a b c d e = f e d c b a

View file

@ -1,107 +0,0 @@
> -- | This module contains some generic combinators used in the
> -- parser. None of the parsing which relies on the local lexers is
> -- in this module. Some of these combinators have been taken from
> -- other parser combinator libraries other than Parsec.
> module Language.SQL.SimpleSQL.Combinators
> (optionSuffix
> ,(<??>)
> ,(<??.>)
> ,(<??*>)
> ,(<$$>)
> ,(<$$$>)
> ,(<$$$$>)
> ,(<$$$$$>)
> ) where
> import Control.Applicative ((<**>))
> import Text.Parsec (option,many)
> import Text.Parsec.String (GenParser)
a possible issue with the option suffix is that it enforces left
associativity when chaining it recursively. Have to review
all these uses and figure out if any should be right associative
instead, and create an alternative suffix parser
This function style is not good, and should be replaced with chain and
<??> which has a different type
> optionSuffix :: (a -> GenParser t s a) -> a -> GenParser t s a
> optionSuffix p a = option a (p a)
parses an optional postfix element and applies its result to its left
hand result, taken from uu-parsinglib
TODO: make sure the precedence higher than <|> and lower than the
other operators so it can be used nicely
> (<??>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
> p <??> q = p <**> option id q
Help with left factored parsers. <$$> is like an analogy with <**>:
f <$> a <*> b
is like
a <**> (b <$$> f)
f <$> a <*> b <*> c
is like
a <**> (b <**> (c <$$$> f))
> (<$$>) :: Applicative f =>
> f b -> (a -> b -> c) -> f (a -> c)
> (<$$>) pa c = pa <**> pure (flip c)
> (<$$$>) :: Applicative f =>
> f c -> (a -> b -> c -> t) -> f (b -> a -> t)
> p <$$$> c = p <**> pure (flip3 c)
> (<$$$$>) :: Applicative f =>
> f d -> (a -> b -> c -> d -> t) -> f (c -> b -> a -> t)
> p <$$$$> c = p <**> pure (flip4 c)
> (<$$$$$>) :: Applicative f =>
> f e -> (a -> b -> c -> d -> e -> t) -> f (d -> c -> b -> a -> t)
> p <$$$$$> c = p <**> pure (flip5 c)
Surely no-one would write code like this seriously?
composing suffix parsers, not sure about the name. This is used to add
a second or more suffix parser contingent on the first suffix parser
succeeding.
> (<??.>) :: GenParser t s (a -> a) -> GenParser t s (a -> a) -> GenParser t s (a -> a)
> (<??.>) pa pb = (.) `c` pa <*> option id pb
> -- todo: fix this mess
> where c = (<$>) . flip
0 to many repeated applications of suffix parser
> (<??*>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
> p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
These are to help with left factored parsers:
a <**> (b <**> (c <**> pure (flip3 ctor)))
Not sure the names are correct, but they follow a pattern with flip
a <**> (b <**> pure (flip ctor))
> flip3 :: (a -> b -> c -> t) -> c -> b -> a -> t
> flip3 f a b c = f c b a
> flip4 :: (a -> b -> c -> d -> t) -> d -> c -> b -> a -> t
> flip4 f a b c d = f d c b a
> flip5 :: (a -> b -> c -> d -> e -> t) -> e -> d -> c -> b -> a -> t
> flip5 f a b c d e = f e d c b a

View file

@ -0,0 +1,553 @@
-- Data types to represent different dialect options
{-# LANGUAGE DeriveDataTypeable #-}
module Language.SQL.SimpleSQL.Dialect
(Dialect(..)
,ansi2011
,mysql
,postgres
,oracle
,sqlserver
) where
import Data.Data
-- | Used to set the dialect used for parsing and pretty printing,
-- very unfinished at the moment.
--
-- The keyword handling works as follows:
--
-- There is a list of reserved keywords. These will never parse as
-- anything other than as a keyword, unless they are in one of the
-- other lists.
--
-- There is a list of \'identifier\' keywords. These are reserved
-- keywords, with an exception that they will parse as an
-- identifier in a scalar expression. They won't parse as
-- identifiers in other places, e.g. column names or aliases.
--
-- There is a list of \'app\' keywords. These are reserved keywords,
-- with an exception that they will also parse in an \'app-like\'
-- construct - a regular function call, or any of the aggregate and
-- window variations.
--
-- There is a list of special type names. This list serves two
-- purposes - it is a list of the reserved keywords which are also
-- type names, and it is a list of all the multi word type names.
--
-- Every keyword should appear in the keywords lists, and then you can
-- add them to the other lists if you want exceptions. Most things
-- that refer to functions, types or variables that are keywords in
-- the ansi standard, can be removed from the keywords lists
-- completely with little effect. With most of the actual SQL
-- keywords, removing them from the keyword list will result in
-- lots of valid syntax no longer parsing (and probably bad parse
-- error messages too).
--
-- In the code, all special syntax which looks identical to regular
-- identifiers or function calls (apart from the name), is treated
-- like a regular identifier or function call.
--
-- It's easy to break the parser by removing the wrong words from
-- the keywords list or adding the wrong words to the other lists.
data Dialect = Dialect
{ -- | reserved keywords
diKeywords :: [String]
-- | keywords with identifier exception
,diIdentifierKeywords :: [String]
-- | keywords with app exception
,diAppKeywords :: [String]
-- | keywords with type exception plus all the type names which
-- are multiple words
,diSpecialTypeNames :: [String]
-- | allow ansi fetch first syntax
,diFetchFirst :: Bool
-- | allow limit keyword (mysql, postgres,
-- ...)
,diLimit :: Bool
-- | allow parsing ODBC syntax
,diOdbc :: Bool
-- | allow quoting identifiers with \`backquotes\`
,diBackquotedIden :: Bool
-- | allow quoting identifiers with [square brackets]
,diSquareBracketQuotedIden :: Bool
-- | allow identifiers with a leading at @example
,diAtIdentifier :: Bool
-- | allow identifiers with a leading \# \#example
,diHashIdentifier :: Bool
-- | allow positional identifiers like this: $1
,diPositionalArg :: Bool
-- | allow postgres style dollar strings
,diDollarString :: Bool
-- | allow strings with an e - e"example"
,diEString :: Bool
-- | allow postgres style symbols
,diPostgresSymbols :: Bool
-- | allow sql server style symbols
,diSqlServerSymbols :: Bool
-- | allow sql server style for CONVERT function in format CONVERT(data_type(length), expression, style)
,diConvertFunction :: Bool
-- | allow creating autoincrement columns
,diAutoincrement :: Bool
-- | allow omitting the comma between constraint clauses
,diNonCommaSeparatedConstraints :: Bool
}
deriving (Eq,Show,Read,Data,Typeable)
-- | ansi sql 2011 dialect
ansi2011 :: Dialect
ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords
,diIdentifierKeywords = []
,diAppKeywords = ["set"]
,diSpecialTypeNames = ansi2011TypeNames
,diFetchFirst = True
,diLimit = False
,diOdbc = False
,diBackquotedIden = False
,diSquareBracketQuotedIden = False
,diAtIdentifier = False
,diHashIdentifier = False
,diPositionalArg = False
,diDollarString = False
,diEString = False
,diPostgresSymbols = False
,diSqlServerSymbols = False
,diConvertFunction = False
,diAutoincrement = False
,diNonCommaSeparatedConstraints = False
}
-- | mysql dialect
mysql :: Dialect
mysql = addLimit ansi2011 {diFetchFirst = False
,diBackquotedIden = True
}
-- | postgresql dialect
postgres :: Dialect
postgres = addLimit ansi2011 {diPositionalArg = True
,diDollarString = True
,diEString = True
,diPostgresSymbols = True}
-- | oracle dialect
oracle :: Dialect
oracle = ansi2011 -- {}
-- | microsoft sql server dialect
sqlserver :: Dialect
sqlserver = ansi2011 {diSquareBracketQuotedIden = True
,diAtIdentifier = True
,diHashIdentifier = True
,diOdbc = True
,diSqlServerSymbols = True
,diConvertFunction = True}
addLimit :: Dialect -> Dialect
addLimit d = d {diKeywords = "limit": diKeywords d
,diLimit = True}
{-
The keyword handling is quite strong - an alternative way to do it
would be to have as few keywords as possible, and only require them
to be quoted when this is needed to resolve a parsing ambiguity.
I don't think this is a good idea for genuine keywords (it probably is
for all the 'fake' keywords in the standard - things which are
essentially function names, or predefined variable names, or type
names, eetc.).
1. working out exactly when each keyword would need to be quoted is
quite error prone, and might change as the parser implementation is
maintained - which would be terrible for users
2. it's not user friendly for the user to deal with a whole load of
special cases - either something is a keyword, then you know you must
always quote it, or it isn't, then you know you never need to quote
it
3. I think not having exceptions makes for better error messages for
the user, and a better sql code maintenance experience.
This might not match actual existing SQL products that well, some of
which I think have idiosyncratic rules about when a keyword must be
quoted. If you want to match one of these dialects exactly with this
parser, I think it will be a lot of work.
-}
ansi2011ReservedKeywords :: [String]
ansi2011ReservedKeywords =
[--"abs" -- function
"all" -- keyword only?
,"allocate" -- keyword
,"alter" -- keyword
,"and" -- keyword
--,"any" -- keyword? and function
,"are" -- keyword
,"array" -- keyword, and used in some special places, like array[...], and array(subquery)
--,"array_agg" -- function
-- ,"array_max_cardinality" -- function
,"as" -- keyword
,"asensitive" -- keyword
,"asymmetric" -- keyword
,"at" -- keyword
,"atomic" -- keyword
,"authorization" -- keyword
--,"avg" -- function
,"begin" -- keyword
--,"begin_frame" -- identifier
--,"begin_partition" -- identifier
,"between" -- keyword
,"bigint" -- type
,"binary" -- type
,"blob" -- type
,"boolean" -- type
,"both" -- keyword
,"by" -- keyword
,"call" -- keyword
,"called" -- keyword
-- ,"cardinality" -- function + identifier?
,"cascaded" -- keyword
,"case" -- keyword
,"cast" -- special function
-- ,"ceil" -- function
-- ,"ceiling" -- function
,"char" -- type (+ keyword?)
--,"char_length" -- function
,"character" -- type
--,"character_length" -- function
,"check" -- keyword
,"clob" -- type
,"close" -- keyword
-- ,"coalesce" -- function
,"collate" -- keyword
--,"collect" -- function
,"column" -- keyword
,"commit" -- keyword
,"condition" -- keyword
,"connect" -- keyword
,"constraint" --keyword
--,"contains" -- keyword?
--,"convert" -- function?
--,"corr" -- function
,"corresponding" --keyword
--,"count" --function
--,"covar_pop" -- function
--,"covar_samp" --function
,"create" -- keyword
,"cross" -- keyword
,"cube" -- keyword
--,"cume_dist" -- function
,"current" -- keyword
-- ,"current_catalog" --identifier?
--,"current_date" -- identifier
--,"current_default_transform_group" -- identifier
--,"current_path" -- identifier
--,"current_role" -- identifier
-- ,"current_row" -- identifier
-- ,"current_schema" -- identifier
-- ,"current_time" -- identifier
--,"current_timestamp" -- identifier
--,"current_transform_group_for_type" -- identifier, or keyword?
--,"current_user" -- identifier
,"cursor" -- keyword
,"cycle" --keyword
,"date" -- type
--,"day" -- keyword? - the parser needs it to not be a keyword to parse extract at the moment
,"deallocate" -- keyword
,"dec" -- type
,"decimal" -- type
,"declare" -- keyword
--,"default" -- identifier + keyword
,"delete" -- keyword
--,"dense_rank" -- functino
,"deref" -- keyword
,"describe" -- keyword
,"deterministic"
,"disconnect"
,"distinct"
,"double"
,"drop"
,"dynamic"
,"each"
--,"element"
,"else"
,"end"
-- ,"end_frame" -- identifier
-- ,"end_partition" -- identifier
,"end-exec" -- no idea what this is
,"equals"
,"escape"
--,"every"
,"except"
,"exec"
,"execute"
,"exists"
,"exp"
,"external"
,"extract"
--,"false"
,"fetch"
,"filter"
-- ,"first_value"
,"float"
--,"floor"
,"for"
,"foreign"
-- ,"frame_row" -- identifier
,"free"
,"from"
,"full"
,"function"
--,"fusion"
,"get"
,"global"
,"grant"
,"group"
--,"grouping"
,"groups"
,"having"
,"hold"
--,"hour"
,"identity"
,"in"
,"indicator"
,"inner"
,"inout"
,"insensitive"
,"insert"
,"int"
,"integer"
,"intersect"
--,"intersection"
,"interval"
,"into"
,"is"
,"join"
--,"lag"
,"language"
,"large"
--,"last_value"
,"lateral"
--,"lead"
,"leading"
,"left"
,"like"
,"like_regex"
--,"ln"
,"local"
,"localtime"
,"localtimestamp"
--,"lower"
,"match"
--,"max"
,"member"
,"merge"
,"method"
--,"min"
--,"minute"
--,"mod"
,"modifies"
--,"module"
--,"month"
,"multiset"
,"national"
,"natural"
,"nchar"
,"nclob"
,"new"
,"no"
,"none"
,"normalize"
,"not"
--,"nth_value"
,"ntile"
--,"null"
--,"nullif"
,"numeric"
,"octet_length"
,"occurrences_regex"
,"of"
,"offset"
,"old"
,"on"
,"only"
,"open"
,"or"
,"order"
,"out"
,"outer"
,"over"
,"overlaps"
,"overlay"
,"parameter"
,"partition"
,"percent"
--,"percent_rank"
--,"percentile_cont"
--,"percentile_disc"
,"period"
,"portion"
,"position"
,"position_regex"
--,"power"
,"precedes"
,"precision"
,"prepare"
,"primary"
,"procedure"
,"range"
--,"rank"
,"reads"
,"real"
,"recursive"
,"ref"
,"references"
,"referencing"
--,"regr_avgx"
--,"regr_avgy"
--,"regr_count"
--,"regr_intercept"
--,"regr_r2"
--,"regr_slope"
--,"regr_sxx"
--,"regr_sxy"
--,"regr_syy"
,"release"
,"result"
,"return"
,"returns"
,"revoke"
,"right"
,"rollback"
,"rollup"
--,"row"
--,"row_number"
,"rows"
,"savepoint"
,"scope"
,"scroll"
,"search"
--,"second"
,"select"
,"sensitive"
--,"session_user"
,"set"
,"similar"
,"smallint"
--,"some"
,"specific"
,"specifictype"
,"sql"
,"sqlexception"
,"sqlstate"
,"sqlwarning"
--,"sqrt"
--,"start"
,"static"
--,"stddev_pop"
--,"stddev_samp"
,"submultiset"
--,"substring"
,"substring_regex"
,"succeeds"
--,"sum"
,"symmetric"
,"system"
--,"system_time"
--,"system_user"
,"table"
,"tablesample"
,"then"
,"time"
,"timestamp"
,"timezone_hour"
,"timezone_minute"
,"to"
,"trailing"
,"translate"
,"translate_regex"
,"translation"
,"treat"
,"trigger"
,"truncate"
--,"trim"
--,"trim_array"
--,"true"
,"uescape"
,"union"
,"unique"
--,"unknown"
,"unnest"
,"update"
,"upper"
--,"user"
,"using"
--,"value"
,"values"
,"value_of"
--,"var_pop"
--,"var_samp"
,"varbinary"
,"varchar"
,"varying"
,"versioning"
,"when"
,"whenever"
,"where"
--,"width_bucket"
,"window"
,"with"
,"within"
,"without"
--,"year"
]
ansi2011TypeNames :: [String]
ansi2011TypeNames =
["double precision"
,"character varying"
,"char varying"
,"character large object"
,"char large object"
,"national character"
,"national char"
,"national character varying"
,"national char varying"
,"national character large object"
,"nchar large object"
,"nchar varying"
,"bit varying"
,"binary large object"
,"binary varying"
-- reserved keyword typenames:
,"array"
,"bigint"
,"binary"
,"blob"
,"boolean"
,"char"
,"character"
,"clob"
,"date"
,"dec"
,"decimal"
,"double"
,"float"
,"int"
,"integer"
,"nchar"
,"nclob"
,"numeric"
,"real"
,"smallint"
,"time"
,"timestamp"
,"varchar"
,"varbinary"
]

View file

@ -1,551 +0,0 @@
Data types to represent different dialect options
> {-# LANGUAGE DeriveDataTypeable #-}
> module Language.SQL.SimpleSQL.Dialect
> (Dialect(..)
> ,ansi2011
> ,mysql
> ,postgres
> ,oracle
> ,sqlserver
> ) where
> import Data.Data
> -- | Used to set the dialect used for parsing and pretty printing,
> -- very unfinished at the moment.
> --
> -- The keyword handling works as follows:
> --
> -- There is a list of reserved keywords. These will never parse as
> -- anything other than as a keyword, unless they are in one of the
> -- other lists.
> --
> -- There is a list of \'identifier\' keywords. These are reserved
> -- keywords, with an exception that they will parse as an
> -- identifier in a scalar expression. They won't parse as
> -- identifiers in other places, e.g. column names or aliases.
> --
> -- There is a list of \'app\' keywords. These are reserved keywords,
> -- with an exception that they will also parse in an \'app-like\'
> -- construct - a regular function call, or any of the aggregate and
> -- window variations.
> --
> -- There is a list of special type names. This list serves two
> -- purposes - it is a list of the reserved keywords which are also
> -- type names, and it is a list of all the multi word type names.
> --
> -- Every keyword should appear in the keywords lists, and then you can
> -- add them to the other lists if you want exceptions. Most things
> -- that refer to functions, types or variables that are keywords in
> -- the ansi standard, can be removed from the keywords lists
> -- completely with little effect. With most of the actual SQL
> -- keywords, removing them from the keyword list will result in
> -- lots of valid syntax no longer parsing (and probably bad parse
> -- error messages too).
> --
> -- In the code, all special syntax which looks identical to regular
> -- identifiers or function calls (apart from the name), is treated
> -- like a regular identifier or function call.
> --
> -- It's easy to break the parser by removing the wrong words from
> -- the keywords list or adding the wrong words to the other lists.
>
> data Dialect = Dialect
> { -- | reserved keywords
> diKeywords :: [String]
> -- | keywords with identifier exception
> ,diIdentifierKeywords :: [String]
> -- | keywords with app exception
> ,diAppKeywords :: [String]
> -- | keywords with type exception plus all the type names which
> -- are multiple words
> ,diSpecialTypeNames :: [String]
> -- | allow ansi fetch first syntax
> ,diFetchFirst :: Bool
> -- | allow limit keyword (mysql, postgres,
> -- ...)
> ,diLimit :: Bool
> -- | allow parsing ODBC syntax
> ,diOdbc :: Bool
> -- | allow quoting identifiers with \`backquotes\`
> ,diBackquotedIden :: Bool
> -- | allow quoting identifiers with [square brackets]
> ,diSquareBracketQuotedIden :: Bool
> -- | allow identifiers with a leading at @example
> ,diAtIdentifier :: Bool
> -- | allow identifiers with a leading \# \#example
> ,diHashIdentifier :: Bool
> -- | allow positional identifiers like this: $1
> ,diPositionalArg :: Bool
> -- | allow postgres style dollar strings
> ,diDollarString :: Bool
> -- | allow strings with an e - e"example"
> ,diEString :: Bool
> -- | allow postgres style symbols
> ,diPostgresSymbols :: Bool
> -- | allow sql server style symbols
> ,diSqlServerSymbols :: Bool
> -- | allow sql server style for CONVERT function in format CONVERT(data_type(length), expression, style)
> ,diConvertFunction :: Bool
> -- | allow creating autoincrement columns
> ,diAutoincrement :: Bool
> -- | allow omitting the comma between constraint clauses
> ,diNonCommaSeparatedConstraints :: Bool
> }
> deriving (Eq,Show,Read,Data,Typeable)
> -- | ansi sql 2011 dialect
> ansi2011 :: Dialect
> ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords
> ,diIdentifierKeywords = []
> ,diAppKeywords = ["set"]
> ,diSpecialTypeNames = ansi2011TypeNames
> ,diFetchFirst = True
> ,diLimit = False
> ,diOdbc = False
> ,diBackquotedIden = False
> ,diSquareBracketQuotedIden = False
> ,diAtIdentifier = False
> ,diHashIdentifier = False
> ,diPositionalArg = False
> ,diDollarString = False
> ,diEString = False
> ,diPostgresSymbols = False
> ,diSqlServerSymbols = False
> ,diConvertFunction = False
> ,diAutoincrement = False
> ,diNonCommaSeparatedConstraints = False
> }
> -- | mysql dialect
> mysql :: Dialect
> mysql = addLimit ansi2011 {diFetchFirst = False
> ,diBackquotedIden = True
> }
> -- | postgresql dialect
> postgres :: Dialect
> postgres = addLimit ansi2011 {diPositionalArg = True
> ,diDollarString = True
> ,diEString = True
> ,diPostgresSymbols = True}
> -- | oracle dialect
> oracle :: Dialect
> oracle = ansi2011 -- {}
> -- | microsoft sql server dialect
> sqlserver :: Dialect
> sqlserver = ansi2011 {diSquareBracketQuotedIden = True
> ,diAtIdentifier = True
> ,diHashIdentifier = True
> ,diOdbc = True
> ,diSqlServerSymbols = True
> ,diConvertFunction = True}
> addLimit :: Dialect -> Dialect
> addLimit d = d {diKeywords = "limit": diKeywords d
> ,diLimit = True}
The keyword handling is quite strong - an alternative way to do it
would be to have as few keywords as possible, and only require them
to be quoted when this is needed to resolve a parsing ambiguity.
I don't think this is a good idea for genuine keywords (it probably is
for all the 'fake' keywords in the standard - things which are
essentially function names, or predefined variable names, or type
names, eetc.).
1. working out exactly when each keyword would need to be quoted is
quite error prone, and might change as the parser implementation is
maintained - which would be terrible for users
2. it's not user friendly for the user to deal with a whole load of
special cases - either something is a keyword, then you know you must
always quote it, or it isn't, then you know you never need to quote
it
3. I think not having exceptions makes for better error messages for
the user, and a better sql code maintenance experience.
This might not match actual existing SQL products that well, some of
which I think have idiosyncratic rules about when a keyword must be
quoted. If you want to match one of these dialects exactly with this
parser, I think it will be a lot of work.
> ansi2011ReservedKeywords :: [String]
> ansi2011ReservedKeywords =
> [--"abs" -- function
> "all" -- keyword only?
> ,"allocate" -- keyword
> ,"alter" -- keyword
> ,"and" -- keyword
> --,"any" -- keyword? and function
> ,"are" -- keyword
> ,"array" -- keyword, and used in some special places, like array[...], and array(subquery)
> --,"array_agg" -- function
> -- ,"array_max_cardinality" -- function
> ,"as" -- keyword
> ,"asensitive" -- keyword
> ,"asymmetric" -- keyword
> ,"at" -- keyword
> ,"atomic" -- keyword
> ,"authorization" -- keyword
> --,"avg" -- function
> ,"begin" -- keyword
> --,"begin_frame" -- identifier
> --,"begin_partition" -- identifier
> ,"between" -- keyword
> ,"bigint" -- type
> ,"binary" -- type
> ,"blob" -- type
> ,"boolean" -- type
> ,"both" -- keyword
> ,"by" -- keyword
> ,"call" -- keyword
> ,"called" -- keyword
> -- ,"cardinality" -- function + identifier?
> ,"cascaded" -- keyword
> ,"case" -- keyword
> ,"cast" -- special function
> -- ,"ceil" -- function
> -- ,"ceiling" -- function
> ,"char" -- type (+ keyword?)
> --,"char_length" -- function
> ,"character" -- type
> --,"character_length" -- function
> ,"check" -- keyword
> ,"clob" -- type
> ,"close" -- keyword
> -- ,"coalesce" -- function
> ,"collate" -- keyword
> --,"collect" -- function
> ,"column" -- keyword
> ,"commit" -- keyword
> ,"condition" -- keyword
> ,"connect" -- keyword
> ,"constraint" --keyword
> --,"contains" -- keyword?
> --,"convert" -- function?
> --,"corr" -- function
> ,"corresponding" --keyword
> --,"count" --function
> --,"covar_pop" -- function
> --,"covar_samp" --function
> ,"create" -- keyword
> ,"cross" -- keyword
> ,"cube" -- keyword
> --,"cume_dist" -- function
> ,"current" -- keyword
> -- ,"current_catalog" --identifier?
> --,"current_date" -- identifier
> --,"current_default_transform_group" -- identifier
> --,"current_path" -- identifier
> --,"current_role" -- identifier
> -- ,"current_row" -- identifier
> -- ,"current_schema" -- identifier
> -- ,"current_time" -- identifier
> --,"current_timestamp" -- identifier
> --,"current_transform_group_for_type" -- identifier, or keyword?
> --,"current_user" -- identifier
> ,"cursor" -- keyword
> ,"cycle" --keyword
> ,"date" -- type
> --,"day" -- keyword? - the parser needs it to not be a keyword to parse extract at the moment
> ,"deallocate" -- keyword
> ,"dec" -- type
> ,"decimal" -- type
> ,"declare" -- keyword
> --,"default" -- identifier + keyword
> ,"delete" -- keyword
> --,"dense_rank" -- functino
> ,"deref" -- keyword
> ,"describe" -- keyword
> ,"deterministic"
> ,"disconnect"
> ,"distinct"
> ,"double"
> ,"drop"
> ,"dynamic"
> ,"each"
> --,"element"
> ,"else"
> ,"end"
> -- ,"end_frame" -- identifier
> -- ,"end_partition" -- identifier
> ,"end-exec" -- no idea what this is
> ,"equals"
> ,"escape"
> --,"every"
> ,"except"
> ,"exec"
> ,"execute"
> ,"exists"
> ,"exp"
> ,"external"
> ,"extract"
> --,"false"
> ,"fetch"
> ,"filter"
> -- ,"first_value"
> ,"float"
> --,"floor"
> ,"for"
> ,"foreign"
> -- ,"frame_row" -- identifier
> ,"free"
> ,"from"
> ,"full"
> ,"function"
> --,"fusion"
> ,"get"
> ,"global"
> ,"grant"
> ,"group"
> --,"grouping"
> ,"groups"
> ,"having"
> ,"hold"
> --,"hour"
> ,"identity"
> ,"in"
> ,"indicator"
> ,"inner"
> ,"inout"
> ,"insensitive"
> ,"insert"
> ,"int"
> ,"integer"
> ,"intersect"
> --,"intersection"
> ,"interval"
> ,"into"
> ,"is"
> ,"join"
> --,"lag"
> ,"language"
> ,"large"
> --,"last_value"
> ,"lateral"
> --,"lead"
> ,"leading"
> ,"left"
> ,"like"
> ,"like_regex"
> --,"ln"
> ,"local"
> ,"localtime"
> ,"localtimestamp"
> --,"lower"
> ,"match"
> --,"max"
> ,"member"
> ,"merge"
> ,"method"
> --,"min"
> --,"minute"
> --,"mod"
> ,"modifies"
> --,"module"
> --,"month"
> ,"multiset"
> ,"national"
> ,"natural"
> ,"nchar"
> ,"nclob"
> ,"new"
> ,"no"
> ,"none"
> ,"normalize"
> ,"not"
> --,"nth_value"
> ,"ntile"
> --,"null"
> --,"nullif"
> ,"numeric"
> ,"octet_length"
> ,"occurrences_regex"
> ,"of"
> ,"offset"
> ,"old"
> ,"on"
> ,"only"
> ,"open"
> ,"or"
> ,"order"
> ,"out"
> ,"outer"
> ,"over"
> ,"overlaps"
> ,"overlay"
> ,"parameter"
> ,"partition"
> ,"percent"
> --,"percent_rank"
> --,"percentile_cont"
> --,"percentile_disc"
> ,"period"
> ,"portion"
> ,"position"
> ,"position_regex"
> --,"power"
> ,"precedes"
> ,"precision"
> ,"prepare"
> ,"primary"
> ,"procedure"
> ,"range"
> --,"rank"
> ,"reads"
> ,"real"
> ,"recursive"
> ,"ref"
> ,"references"
> ,"referencing"
> --,"regr_avgx"
> --,"regr_avgy"
> --,"regr_count"
> --,"regr_intercept"
> --,"regr_r2"
> --,"regr_slope"
> --,"regr_sxx"
> --,"regr_sxy"
> --,"regr_syy"
> ,"release"
> ,"result"
> ,"return"
> ,"returns"
> ,"revoke"
> ,"right"
> ,"rollback"
> ,"rollup"
> --,"row"
> --,"row_number"
> ,"rows"
> ,"savepoint"
> ,"scope"
> ,"scroll"
> ,"search"
> --,"second"
> ,"select"
> ,"sensitive"
> --,"session_user"
> ,"set"
> ,"similar"
> ,"smallint"
> --,"some"
> ,"specific"
> ,"specifictype"
> ,"sql"
> ,"sqlexception"
> ,"sqlstate"
> ,"sqlwarning"
> --,"sqrt"
> --,"start"
> ,"static"
> --,"stddev_pop"
> --,"stddev_samp"
> ,"submultiset"
> --,"substring"
> ,"substring_regex"
> ,"succeeds"
> --,"sum"
> ,"symmetric"
> ,"system"
> --,"system_time"
> --,"system_user"
> ,"table"
> ,"tablesample"
> ,"then"
> ,"time"
> ,"timestamp"
> ,"timezone_hour"
> ,"timezone_minute"
> ,"to"
> ,"trailing"
> ,"translate"
> ,"translate_regex"
> ,"translation"
> ,"treat"
> ,"trigger"
> ,"truncate"
> --,"trim"
> --,"trim_array"
> --,"true"
> ,"uescape"
> ,"union"
> ,"unique"
> --,"unknown"
> ,"unnest"
> ,"update"
> ,"upper"
> --,"user"
> ,"using"
> --,"value"
> ,"values"
> ,"value_of"
> --,"var_pop"
> --,"var_samp"
> ,"varbinary"
> ,"varchar"
> ,"varying"
> ,"versioning"
> ,"when"
> ,"whenever"
> ,"where"
> --,"width_bucket"
> ,"window"
> ,"with"
> ,"within"
> ,"without"
> --,"year"
> ]
> ansi2011TypeNames :: [String]
> ansi2011TypeNames =
> ["double precision"
> ,"character varying"
> ,"char varying"
> ,"character large object"
> ,"char large object"
> ,"national character"
> ,"national char"
> ,"national character varying"
> ,"national char varying"
> ,"national character large object"
> ,"nchar large object"
> ,"nchar varying"
> ,"bit varying"
> ,"binary large object"
> ,"binary varying"
> -- reserved keyword typenames:
> ,"array"
> ,"bigint"
> ,"binary"
> ,"blob"
> ,"boolean"
> ,"char"
> ,"character"
> ,"clob"
> ,"date"
> ,"dec"
> ,"decimal"
> ,"double"
> ,"float"
> ,"int"
> ,"integer"
> ,"nchar"
> ,"nclob"
> ,"numeric"
> ,"real"
> ,"smallint"
> ,"time"
> ,"timestamp"
> ,"varchar"
> ,"varbinary"
> ]

View file

@ -0,0 +1,53 @@
-- | helpers to work with parsec errors more nicely
module Language.SQL.SimpleSQL.Errors
(ParseError(..)
--,formatError
,convParseError
) where
import Text.Parsec (sourceColumn,sourceLine,sourceName,errorPos)
import qualified Text.Parsec as P (ParseError)
-- | Type to represent parse errors.
data ParseError = ParseError
{peErrorString :: String
-- ^ contains the error message
,peFilename :: FilePath
-- ^ filename location for the error
,pePosition :: (Int,Int)
-- ^ line number and column number location for the error
,peFormattedError :: String
-- ^ formatted error with the position, error
-- message and source context
} deriving (Eq,Show)
convParseError :: String -> P.ParseError -> ParseError
convParseError src e =
ParseError
{peErrorString = show e
,peFilename = sourceName p
,pePosition = (sourceLine p, sourceColumn p)
,peFormattedError = formatError src e}
where
p = errorPos e
{-
format the error more nicely: emacs format for positioning, plus
context
-}
formatError :: String -> P.ParseError -> String
formatError src e =
sourceName p ++ ":" ++ show (sourceLine p)
++ ":" ++ show (sourceColumn p) ++ ":"
++ context
++ show e
where
context =
let lns = take 1 $ drop (sourceLine p - 1) $ lines src
in case lns of
[x] -> "\n" ++ x ++ "\n"
++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
_ -> ""
p = errorPos e

View file

@ -1,51 +0,0 @@
> -- | helpers to work with parsec errors more nicely
> module Language.SQL.SimpleSQL.Errors
> (ParseError(..)
> --,formatError
> ,convParseError
> ) where
> import Text.Parsec (sourceColumn,sourceLine,sourceName,errorPos)
> import qualified Text.Parsec as P (ParseError)
> -- | Type to represent parse errors.
> data ParseError = ParseError
> {peErrorString :: String
> -- ^ contains the error message
> ,peFilename :: FilePath
> -- ^ filename location for the error
> ,pePosition :: (Int,Int)
> -- ^ line number and column number location for the error
> ,peFormattedError :: String
> -- ^ formatted error with the position, error
> -- message and source context
> } deriving (Eq,Show)
> convParseError :: String -> P.ParseError -> ParseError
> convParseError src e =
> ParseError
> {peErrorString = show e
> ,peFilename = sourceName p
> ,pePosition = (sourceLine p, sourceColumn p)
> ,peFormattedError = formatError src e}
> where
> p = errorPos e
format the error more nicely: emacs format for positioning, plus
context
> formatError :: String -> P.ParseError -> String
> formatError src e =
> sourceName p ++ ":" ++ show (sourceLine p)
> ++ ":" ++ show (sourceColumn p) ++ ":"
> ++ context
> ++ show e
> where
> context =
> let lns = take 1 $ drop (sourceLine p - 1) $ lines src
> in case lns of
> [x] -> "\n" ++ x ++ "\n"
> ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
> _ -> ""
> p = errorPos e

View file

@ -0,0 +1,751 @@
{-
The parser uses a separate lexer for two reasons:
1. sql syntax is very awkward to parse, the separate lexer makes it
easier to handle this in most places (in some places it makes it
harder or impossible, the fix is to switch to something better than
parsec)
2. using a separate lexer gives a huge speed boost because it reduces
backtracking. (We could get this by making the parsing code a lot more
complex also.)
3. we can test the lexer relatively exhaustively, then even when we
don't do nearly as comprehensive testing on the syntax level, we still
have a relatively high assurance of the low level of bugs. This is
much more difficult to get parity with when testing the syntax parser
directly without the separately testing lexing stage.
-}
-- | Lexer for SQL.
{-# LANGUAGE TupleSections #-}
module Language.SQL.SimpleSQL.Lex
(Token(..)
,lexSQL
,prettyToken
,prettyTokens
,ParseError(..)
,tokenListWillPrintAndLex
,ansi2011
) where
import Language.SQL.SimpleSQL.Dialect
import Text.Parsec (option,string,manyTill,anyChar
,try,string,many1,oneOf,digit,(<|>),choice,char,eof
,many,runParser,lookAhead,satisfy
,setPosition,getPosition
,setSourceColumn,setSourceLine
,sourceName, setSourceName
,sourceLine, sourceColumn
,notFollowedBy)
import Language.SQL.SimpleSQL.Combinators
import Language.SQL.SimpleSQL.Errors
import Control.Applicative hiding ((<|>), many)
import Data.Char
import Control.Monad
import Prelude hiding (takeWhile)
import Text.Parsec.String (Parser)
import Data.Maybe
-- | Represents a lexed token
data Token
-- | A symbol (in ansi dialect) is one of the following
--
-- * multi char symbols <> \<= \>= != ||
-- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( )
--
= Symbol String
-- | This is an identifier or keyword. The first field is
-- the quotes used, or nothing if no quotes were used. The quotes
-- can be " or u& or something dialect specific like []
| Identifier (Maybe (String,String)) String
-- | This is a prefixed variable symbol, such as :var, @var or #var
-- (only :var is used in ansi dialect)
| PrefixedVariable Char String
-- | This is a positional arg identifier e.g. $1
| PositionalArg Int
-- | This is a string literal. The first two fields are the --
-- start and end quotes, which are usually both ', but can be
-- the character set (one of nNbBxX, or u&, U&), or a dialect
-- specific string quoting (such as $$ in postgres)
| SqlString String String String
-- | A number literal (integral or otherwise), stored in original format
-- unchanged
| SqlNumber String
-- | Whitespace, one or more of space, tab or newline.
| Whitespace String
-- | A commented line using --, contains every character starting with the
-- \'--\' and including the terminating newline character if there is one
-- - this will be missing if the last line in the source is a line comment
-- with no trailing newline
| LineComment String
-- | A block comment, \/* stuff *\/, includes the comment delimiters
| BlockComment String
deriving (Eq,Show)
-- | Pretty printing, if you lex a bunch of tokens, then pretty
-- print them, should should get back exactly the same string
prettyToken :: Dialect -> Token -> String
prettyToken _ (Symbol s) = s
prettyToken _ (Identifier Nothing t) = t
prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
prettyToken _ (PrefixedVariable c p) = c:p
prettyToken _ (PositionalArg p) = '$':show p
prettyToken _ (SqlString s e t) = s ++ t ++ e
prettyToken _ (SqlNumber r) = r
prettyToken _ (Whitespace t) = t
prettyToken _ (LineComment l) = l
prettyToken _ (BlockComment c) = c
prettyTokens :: Dialect -> [Token] -> String
prettyTokens d ts = concat $ map (prettyToken d) ts
-- TODO: try to make all parsers applicative only
-- | Lex some SQL to a list of tokens.
lexSQL :: Dialect
-- ^ dialect of SQL to use
-> FilePath
-- ^ filename to use in error messages
-> Maybe (Int,Int)
-- ^ line number and column number of the first character
-- in the source to use in error messages
-> String
-- ^ the SQL source to lex
-> Either ParseError [((String,Int,Int),Token)]
lexSQL dialect fn' p src =
let (l',c') = fromMaybe (1,1) p
in either (Left . convParseError src) Right
$ runParser (setPos (fn',l',c') *> many (sqlToken dialect) <* eof) () fn' src
where
setPos (fn,l,c) = do
fmap (flip setSourceName fn
. flip setSourceLine l
. flip setSourceColumn c) getPosition
>>= setPosition
-- | parser for a sql token
sqlToken :: Dialect -> Parser ((String,Int,Int),Token)
sqlToken d = do
p' <- getPosition
let p = (sourceName p',sourceLine p', sourceColumn p')
{-
The order of parsers is important: strings and quoted identifiers can
start out looking like normal identifiers, so we try to parse these
first and use a little bit of try. Line and block comments start like
symbols, so we try these before symbol. Numbers can start with a . so
this is also tried before symbol (a .1 will be parsed as a number, but
. otherwise will be parsed as a symbol).
-}
(p,) <$> choice [sqlString d
,identifier d
,lineComment d
,blockComment d
,sqlNumber d
,positionalArg d
,dontParseEndBlockComment d
,prefixedVariable d
,symbol d
,sqlWhitespace d]
{-
Parses identifiers:
simple_identifier_23
u&"unicode quoted identifier"
"quoted identifier"
"quoted identifier "" with double quote char"
`mysql quoted identifier`
-}
identifier :: Dialect -> Parser Token
identifier d =
choice
[quotedIden
,unicodeQuotedIden
,regularIden
,guard (diBackquotedIden d) >> mySqlQuotedIden
,guard (diSquareBracketQuotedIden d) >> sqlServerQuotedIden
]
where
regularIden = Identifier Nothing <$> identifierString
quotedIden = Identifier (Just ("\"","\"")) <$> qidenPart
mySqlQuotedIden = Identifier (Just ("`","`"))
<$> (char '`' *> takeWhile1 (/='`') <* char '`')
sqlServerQuotedIden = Identifier (Just ("[","]"))
<$> (char '[' *> takeWhile1 (`notElem` "[]") <* char ']')
-- try is used here to avoid a conflict with identifiers
-- and quoted strings which also start with a 'u'
unicodeQuotedIden = Identifier
<$> (f <$> try (oneOf "uU" <* string "&"))
<*> qidenPart
where f x = Just (x: "&\"", "\"")
qidenPart = char '"' *> qidenSuffix ""
qidenSuffix t = do
s <- takeTill (=='"')
void $ char '"'
-- deal with "" as literal double quote character
choice [do
void $ char '"'
qidenSuffix $ concat [t,s,"\"\""]
,return $ concat [t,s]]
-- This parses a valid identifier without quotes.
identifierString :: Parser String
identifierString =
startsWith (\c -> c == '_' || isAlpha c) isIdentifierChar
-- this can be moved to the dialect at some point
isIdentifierChar :: Char -> Bool
isIdentifierChar c = c == '_' || isAlphaNum c
-- use try because : and @ can be part of other things also
prefixedVariable :: Dialect -> Parser Token
prefixedVariable d = try $ choice
[PrefixedVariable <$> char ':' <*> identifierString
,guard (diAtIdentifier d) >>
PrefixedVariable <$> char '@' <*> identifierString
,guard (diHashIdentifier d) >>
PrefixedVariable <$> char '#' <*> identifierString
]
positionalArg :: Dialect -> Parser Token
positionalArg d =
guard (diPositionalArg d) >>
-- use try to avoid ambiguities with other syntax which starts with dollar
PositionalArg <$> try (char '$' *> (read <$> many1 digit))
{-
Parse a SQL string. Examples:
'basic string'
'string with '' a quote'
n'international text'
b'binary string'
x'hexidecimal string'
-}
sqlString :: Dialect -> Parser Token
sqlString d = dollarString <|> csString <|> normalString
where
dollarString = do
guard $ diDollarString d
-- use try because of ambiguity with symbols and with
-- positional arg
delim <- (\x -> concat ["$",x,"$"])
<$> try (char '$' *> option "" identifierString <* char '$')
SqlString delim delim <$> manyTill anyChar (try $ string delim)
normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix False "")
normalStringSuffix allowBackslash t = do
s <- takeTill $ if allowBackslash
then (`elem` "'\\")
else (== '\'')
-- deal with '' or \' as literal quote character
choice [do
ctu <- choice ["''" <$ try (string "''")
,"\\'" <$ string "\\'"
,"\\" <$ char '\\']
normalStringSuffix allowBackslash $ concat [t,s,ctu]
,concat [t,s] <$ char '\'']
-- try is used to to avoid conflicts with
-- identifiers which can start with n,b,x,u
-- once we read the quote type and the starting '
-- then we commit to a string
-- it's possible that this will reject some valid syntax
-- but only pathalogical stuff, and I think the improved
-- error messages and user predictability make it a good
-- pragmatic choice
csString
| diEString d =
choice [SqlString <$> try (string "e'" <|> string "E'")
<*> return "'" <*> normalStringSuffix True ""
,csString']
| otherwise = csString'
csString' = SqlString
<$> try cs
<*> return "'"
<*> normalStringSuffix False ""
csPrefixes = "nNbBxX"
cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
++ [string "u&'"
,string "U&'"]
{-
numbers
digits
digits.[digits][e[+-]digits]
[digits].digits[e[+-]digits]
digitse[+-]digits
where digits is one or more decimal digits (0 through 9). At least one
digit must be before or after the decimal point, if one is used. At
least one digit must follow the exponent marker (e), if one is
present. There cannot be any spaces or other characters embedded in
the constant. Note that any leading plus or minus sign is not actually
considered part of the constant; it is an operator applied to the
constant.
-}
sqlNumber :: Dialect -> Parser Token
sqlNumber d =
SqlNumber <$> completeNumber
-- this is for definitely avoiding possibly ambiguous source
<* choice [-- special case to allow e.g. 1..2
guard (diPostgresSymbols d)
*> (void $ lookAhead $ try $ string "..")
<|> void (notFollowedBy (oneOf "eE."))
,notFollowedBy (oneOf "eE.")
]
where
completeNumber =
(int <??> (pp dot <??.> pp int)
-- try is used in case we read a dot
-- and it isn't part of a number
-- if there are any following digits, then we commit
-- to it being a number and not something else
<|> try ((++) <$> dot <*> int))
<??> pp expon
int = many1 digit
-- make sure we don't parse two adjacent dots in a number
-- special case for postgresql, we backtrack if we see two adjacent dots
-- to parse 1..2, but in other dialects we commit to the failure
dot = let p = string "." <* notFollowedBy (char '.')
in if diPostgresSymbols d
then try p
else p
expon = (:) <$> oneOf "eE" <*> sInt
sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
pp = (<$$> (++))
{-
Symbols
A symbol is an operator, or one of the misc symbols which include:
. .. := : :: ( ) ? ; , { } (for odbc)
The postgresql operator syntax allows a huge range of operators
compared with ansi and other dialects
-}
symbol :: Dialect -> Parser Token
symbol d = Symbol <$> choice (concat
[dots
,if diPostgresSymbols d
then postgresExtraSymbols
else []
,miscSymbol
,if diOdbc d then odbcSymbol else []
,if diPostgresSymbols d
then generalizedPostgresqlOperator
else basicAnsiOps
])
where
dots = [many1 (char '.')]
odbcSymbol = [string "{", string "}"]
postgresExtraSymbols =
[try (string ":=")
-- parse :: and : and avoid allowing ::: or more
,try (string "::" <* notFollowedBy (char ':'))
,try (string ":" <* notFollowedBy (char ':'))]
miscSymbol = map (string . (:[])) $
case () of
_ | diSqlServerSymbols d -> ",;():?"
| diPostgresSymbols d -> "[],;()"
| otherwise -> "[],;():?"
{-
try is used because most of the first characters of the two character
symbols can also be part of a single character symbol
-}
basicAnsiOps = map (try . string) [">=","<=","!=","<>"]
++ map (string . (:[])) "+-^*/%~&<>="
++ pipes
pipes = -- what about using many1 (char '|'), then it will
-- fail in the parser? Not sure exactly how
-- standalone the lexer should be
[char '|' *>
choice ["||" <$ char '|' <* notFollowedBy (char '|')
,return "|"]]
{-
postgresql generalized operators
this includes the custom operators that postgres supports,
plus all the standard operators which could be custom operators
according to their grammar
rules
An operator name is a sequence of up to NAMEDATALEN-1 (63 by default) characters from the following list:
+ - * / < > = ~ ! @ # % ^ & | ` ?
There are a few restrictions on operator names, however:
-- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
A multiple-character operator name cannot end in + or -, unless the name also contains at least one of these characters:
~ ! @ # % ^ & | ` ?
which allows the last character of a multi character symbol to be + or
-
-}
generalizedPostgresqlOperator :: [Parser String]
generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
where
allOpSymbols = "+-*/<>=~!@#%^&|`?"
-- these are the symbols when if part of a multi character
-- operator permit the operator to end with a + or - symbol
exceptionOpSymbols = "~!@#%^&|`?"
-- special case for parsing a single + or - symbol
singlePlusMinus = try $ do
c <- oneOf "+-"
notFollowedBy $ oneOf allOpSymbols
return [c]
-- this is used when we are parsing a potentially multi symbol
-- operator and we have alread seen one of the 'exception chars'
-- and so we can end with a + or -
moreOpCharsException = do
c <- oneOf (filter (`notElem` "-/*") allOpSymbols)
-- make sure we don't parse a comment starting token
-- as part of an operator
<|> try (char '/' <* notFollowedBy (char '*'))
<|> try (char '-' <* notFollowedBy (char '-'))
-- and make sure we don't parse a block comment end
-- as part of another symbol
<|> try (char '*' <* notFollowedBy (char '/'))
(c:) <$> option [] moreOpCharsException
opMoreChars = choice
[-- parse an exception char, now we can finish with a + -
(:)
<$> oneOf exceptionOpSymbols
<*> option [] moreOpCharsException
,(:)
<$> (-- parse +, make sure it isn't the last symbol
try (char '+' <* lookAhead (oneOf allOpSymbols))
<|> -- parse -, make sure it isn't the last symbol
-- or the start of a -- comment
try (char '-'
<* notFollowedBy (char '-')
<* lookAhead (oneOf allOpSymbols))
<|> -- parse / check it isn't the start of a /* comment
try (char '/' <* notFollowedBy (char '*'))
<|> -- make sure we don't parse */ as part of a symbol
try (char '*' <* notFollowedBy (char '/'))
<|> -- any other ansi operator symbol
oneOf "<>=")
<*> option [] opMoreChars
]
sqlWhitespace :: Dialect -> Parser Token
sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
lineComment :: Dialect -> Parser Token
lineComment _ =
(\s -> LineComment $ concat ["--",s]) <$>
-- try is used here in case we see a - symbol
-- once we read two -- then we commit to the comment token
(try (string "--") *> (
-- todo: there must be a better way to do this
conc <$> manyTill anyChar (lookAhead lineCommentEnd) <*> lineCommentEnd))
where
conc a Nothing = a
conc a (Just b) = a ++ b
lineCommentEnd =
Just "\n" <$ char '\n'
<|> Nothing <$ eof
{-
Try is used in the block comment for the two symbol bits because we
want to backtrack if we read the first symbol but the second symbol
isn't there.
-}
blockComment :: Dialect -> Parser Token
blockComment _ =
(\s -> BlockComment $ concat ["/*",s]) <$>
(try (string "/*") *> commentSuffix 0)
where
commentSuffix :: Int -> Parser String
commentSuffix n = do
-- read until a possible end comment or nested comment
x <- takeWhile (\e -> e /= '/' && e /= '*')
choice [-- close comment: if the nesting is 0, done
-- otherwise recurse on commentSuffix
try (string "*/") *> let t = concat [x,"*/"]
in if n == 0
then return t
else (\s -> concat [t,s]) <$> commentSuffix (n - 1)
-- nested comment, recurse
,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
-- not an end comment or nested comment, continue
,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]
{-
This is to improve user experience: provide an error if we see */
outside a comment. This could potentially break postgres ops with */
in them (which is a stupid thing to do). In other cases, the user
should write * / instead (I can't think of any cases when this would
be valid syntax though).
-}
dontParseEndBlockComment :: Dialect -> Parser Token
dontParseEndBlockComment _ =
-- don't use try, then it should commit to the error
try (string "*/") *> fail "comment end without comment start"
-- Some helper combinators
startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
startsWith p ps = do
c <- satisfy p
choice [(:) c <$> (takeWhile1 ps)
,return [c]]
takeWhile1 :: (Char -> Bool) -> Parser String
takeWhile1 p = many1 (satisfy p)
takeWhile :: (Char -> Bool) -> Parser String
takeWhile p = many (satisfy p)
takeTill :: (Char -> Bool) -> Parser String
takeTill p = manyTill anyChar (peekSatisfy p)
peekSatisfy :: (Char -> Bool) -> Parser ()
peekSatisfy p = void $ lookAhead (satisfy p)
{-
This utility function will accurately report if the two tokens are
pretty printed, if they should lex back to the same two tokens. This
function is used in testing (and can be used in other places), and
must not be implemented by actually trying to print both tokens and
then lex them back from a single string (because then we would have
the risk of thinking two tokens cannot be together when there is bug
in the lexer, which the testing is supposed to find).
maybe do some quick checking to make sure this function only gives
true negatives: check pairs which return false actually fail to lex or
give different symbols in return: could use quickcheck for this
a good sanity test for this function is to change it to always return
true, then check that the automated tests return the same number of
successes. I don't think it succeeds this test at the moment
-}
-- | Utility function to tell you if a list of tokens
-- will pretty print then lex back to the same set of tokens.
-- Used internally, might be useful for generating SQL via lexical tokens.
tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool
tokenListWillPrintAndLex _ [] = True
tokenListWillPrintAndLex _ [_] = True
tokenListWillPrintAndLex d (a:b:xs) =
tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs)
tokensWillPrintAndLex :: Dialect -> Token -> Token -> Bool
tokensWillPrintAndLex d a b
{-
a : followed by an identifier character will look like a host param
followed by = or : makes a different symbol
-}
| Symbol ":" <- a
, checkFirstBChar (\x -> isIdentifierChar x || x `elem` ":=") = False
{-
two symbols next to eachother will fail if the symbols can combine and
(possibly just the prefix) look like a different symbol
-}
| diPostgresSymbols d
, Symbol a' <- a
, Symbol b' <- b
, b' `notElem` ["+", "-"] || or (map (`elem` a') "~!@#%^&|`?") = False
{-
check two adjacent symbols in non postgres where the combination
possibilities are much more limited. This is ansi behaviour, it might
be different when the other dialects are done properly
-}
| Symbol a' <- a
, Symbol b' <- b
, (a',b') `elem` [("<",">")
,("<","=")
,(">","=")
,("!","=")
,("|","|")
,("||","|")
,("|","||")
,("||","||")
,("<",">=")
] = False
-- two whitespaces will be combined
| Whitespace {} <- a
, Whitespace {} <- b = False
-- line comment without a newline at the end will eat the next token
| LineComment {} <- a
, checkLastAChar (/='\n') = False
{-
check the last character of the first token and the first character of
the second token forming a comment start or end symbol
-}
| let f '-' '-' = True
f '/' '*' = True
f '*' '/' = True
f _ _ = False
in checkBorderChars f = False
{-
a symbol will absorb a following .
TODO: not 100% on this always being bad
-}
| Symbol {} <- a
, checkFirstBChar (=='.') = False
-- cannot follow a symbol ending in : with another token starting with :
| let f ':' ':' = True
f _ _ = False
in checkBorderChars f = False
-- unquoted identifier followed by an identifier letter
| Identifier Nothing _ <- a
, checkFirstBChar isIdentifierChar = False
-- a quoted identifier using ", followed by a " will fail
| Identifier (Just (_,"\"")) _ <- a
, checkFirstBChar (=='"') = False
-- prefixed variable followed by an identifier char will be absorbed
| PrefixedVariable {} <- a
, checkFirstBChar isIdentifierChar = False
-- a positional arg will absorb a following digit
| PositionalArg {} <- a
, checkFirstBChar isDigit = False
-- a string ending with ' followed by a token starting with ' will be absorbed
| SqlString _ "'" _ <- a
, checkFirstBChar (=='\'') = False
-- a number followed by a . will fail or be absorbed
| SqlNumber {} <- a
, checkFirstBChar (=='.') = False
-- a number followed by an e or E will fail or be absorbed
| SqlNumber {} <- a
, checkFirstBChar (\x -> x =='e' || x == 'E') = False
-- two numbers next to eachother will fail or be absorbed
| SqlNumber {} <- a
, SqlNumber {} <- b = False
| otherwise = True
where
prettya = prettyToken d a
prettyb = prettyToken d b
-- helper function to run a predicate on the
-- last character of the first token and the first
-- character of the second token
checkBorderChars f
| (_:_) <- prettya
, (fb:_) <- prettyb
, la <- last prettya
= f la fb
checkBorderChars _ = False
checkFirstBChar f = case prettyb of
(b':_) -> f b'
_ -> False
checkLastAChar f = case prettya of
(_:_) -> f $ last prettya
_ -> False
{-
TODO:
make the tokenswill print more dialect accurate. Maybe add symbol
chars and identifier chars to the dialect definition and use them from
here
start adding negative / different parse dialect tests
add token tables and tests for oracle, sql server
review existing tables
look for refactoring opportunities, especially the token
generation tables in the tests
do some user documentation on lexing, and lexing/dialects
start thinking about a more separated design for the dialect handling
lexing tests are starting to take a really long time, so split the
tests so it is much easier to run all the tests except the lexing
tests which only need to be run when working on the lexer (which
should be relatively uncommon), or doing a commit or finishing off a
series of commits,
start writing the error message tests:
generate/write a large number of syntax errors
create a table with the source and the error message
try to compare some different versions of code to compare the
quality of the error messages by hand
get this checked in so improvements and regressions in the error
message quality can be tracked a little more easily (although it will
still be manual)
try again to add annotation to the ast
-}

View file

@ -1,717 +0,0 @@
The parser uses a separate lexer for two reasons:
1. sql syntax is very awkward to parse, the separate lexer makes it
easier to handle this in most places (in some places it makes it
harder or impossible, the fix is to switch to something better than
parsec)
2. using a separate lexer gives a huge speed boost because it reduces
backtracking. (We could get this by making the parsing code a lot more
complex also.)
3. we can test the lexer relatively exhaustively, then even when we
don't do nearly as comprehensive testing on the syntax level, we still
have a relatively high assurance of the low level of bugs. This is
much more difficult to get parity with when testing the syntax parser
directly without the separately testing lexing stage.
> -- | Lexer for SQL.
> {-# LANGUAGE TupleSections #-}
> module Language.SQL.SimpleSQL.Lex
> (Token(..)
> ,lexSQL
> ,prettyToken
> ,prettyTokens
> ,ParseError(..)
> ,tokenListWillPrintAndLex
> ,ansi2011
> ) where
> import Language.SQL.SimpleSQL.Dialect
> import Text.Parsec (option,string,manyTill,anyChar
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
> ,many,runParser,lookAhead,satisfy
> ,setPosition,getPosition
> ,setSourceColumn,setSourceLine
> ,sourceName, setSourceName
> ,sourceLine, sourceColumn
> ,notFollowedBy)
> import Language.SQL.SimpleSQL.Combinators
> import Language.SQL.SimpleSQL.Errors
> import Control.Applicative hiding ((<|>), many)
> import Data.Char
> import Control.Monad
> import Prelude hiding (takeWhile)
> import Text.Parsec.String (Parser)
> import Data.Maybe
> -- | Represents a lexed token
> data Token
> -- | A symbol (in ansi dialect) is one of the following
> --
> -- * multi char symbols <> \<= \>= != ||
> -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( )
> --
> = Symbol String
>
> -- | This is an identifier or keyword. The first field is
> -- the quotes used, or nothing if no quotes were used. The quotes
> -- can be " or u& or something dialect specific like []
> | Identifier (Maybe (String,String)) String
>
> -- | This is a prefixed variable symbol, such as :var, @var or #var
> -- (only :var is used in ansi dialect)
> | PrefixedVariable Char String
>
> -- | This is a positional arg identifier e.g. $1
> | PositionalArg Int
>
> -- | This is a string literal. The first two fields are the --
> -- start and end quotes, which are usually both ', but can be
> -- the character set (one of nNbBxX, or u&, U&), or a dialect
> -- specific string quoting (such as $$ in postgres)
> | SqlString String String String
>
> -- | A number literal (integral or otherwise), stored in original format
> -- unchanged
> | SqlNumber String
>
> -- | Whitespace, one or more of space, tab or newline.
> | Whitespace String
>
> -- | A commented line using --, contains every character starting with the
> -- \'--\' and including the terminating newline character if there is one
> -- - this will be missing if the last line in the source is a line comment
> -- with no trailing newline
> | LineComment String
>
> -- | A block comment, \/* stuff *\/, includes the comment delimiters
> | BlockComment String
>
> deriving (Eq,Show)
> -- | Pretty printing, if you lex a bunch of tokens, then pretty
> -- print them, should should get back exactly the same string
> prettyToken :: Dialect -> Token -> String
> prettyToken _ (Symbol s) = s
> prettyToken _ (Identifier Nothing t) = t
> prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
> prettyToken _ (PrefixedVariable c p) = c:p
> prettyToken _ (PositionalArg p) = '$':show p
> prettyToken _ (SqlString s e t) = s ++ t ++ e
> prettyToken _ (SqlNumber r) = r
> prettyToken _ (Whitespace t) = t
> prettyToken _ (LineComment l) = l
> prettyToken _ (BlockComment c) = c
> prettyTokens :: Dialect -> [Token] -> String
> prettyTokens d ts = concat $ map (prettyToken d) ts
TODO: try to make all parsers applicative only
> -- | Lex some SQL to a list of tokens.
> lexSQL :: Dialect
> -- ^ dialect of SQL to use
> -> FilePath
> -- ^ filename to use in error messages
> -> Maybe (Int,Int)
> -- ^ line number and column number of the first character
> -- in the source to use in error messages
> -> String
> -- ^ the SQL source to lex
> -> Either ParseError [((String,Int,Int),Token)]
> lexSQL dialect fn' p src =
> let (l',c') = fromMaybe (1,1) p
> in either (Left . convParseError src) Right
> $ runParser (setPos (fn',l',c') *> many (sqlToken dialect) <* eof) () fn' src
> where
> setPos (fn,l,c) = do
> fmap (flip setSourceName fn
> . flip setSourceLine l
> . flip setSourceColumn c) getPosition
> >>= setPosition
> -- | parser for a sql token
> sqlToken :: Dialect -> Parser ((String,Int,Int),Token)
> sqlToken d = do
> p' <- getPosition
> let p = (sourceName p',sourceLine p', sourceColumn p')
The order of parsers is important: strings and quoted identifiers can
start out looking like normal identifiers, so we try to parse these
first and use a little bit of try. Line and block comments start like
symbols, so we try these before symbol. Numbers can start with a . so
this is also tried before symbol (a .1 will be parsed as a number, but
. otherwise will be parsed as a symbol).
> (p,) <$> choice [sqlString d
> ,identifier d
> ,lineComment d
> ,blockComment d
> ,sqlNumber d
> ,positionalArg d
> ,dontParseEndBlockComment d
> ,prefixedVariable d
> ,symbol d
> ,sqlWhitespace d]
Parses identifiers:
simple_identifier_23
u&"unicode quoted identifier"
"quoted identifier"
"quoted identifier "" with double quote char"
`mysql quoted identifier`
> identifier :: Dialect -> Parser Token
> identifier d =
> choice
> [quotedIden
> ,unicodeQuotedIden
> ,regularIden
> ,guard (diBackquotedIden d) >> mySqlQuotedIden
> ,guard (diSquareBracketQuotedIden d) >> sqlServerQuotedIden
> ]
> where
> regularIden = Identifier Nothing <$> identifierString
> quotedIden = Identifier (Just ("\"","\"")) <$> qidenPart
> mySqlQuotedIden = Identifier (Just ("`","`"))
> <$> (char '`' *> takeWhile1 (/='`') <* char '`')
> sqlServerQuotedIden = Identifier (Just ("[","]"))
> <$> (char '[' *> takeWhile1 (`notElem` "[]") <* char ']')
> -- try is used here to avoid a conflict with identifiers
> -- and quoted strings which also start with a 'u'
> unicodeQuotedIden = Identifier
> <$> (f <$> try (oneOf "uU" <* string "&"))
> <*> qidenPart
> where f x = Just (x: "&\"", "\"")
> qidenPart = char '"' *> qidenSuffix ""
> qidenSuffix t = do
> s <- takeTill (=='"')
> void $ char '"'
> -- deal with "" as literal double quote character
> choice [do
> void $ char '"'
> qidenSuffix $ concat [t,s,"\"\""]
> ,return $ concat [t,s]]
This parses a valid identifier without quotes.
> identifierString :: Parser String
> identifierString =
> startsWith (\c -> c == '_' || isAlpha c) isIdentifierChar
this can be moved to the dialect at some point
> isIdentifierChar :: Char -> Bool
> isIdentifierChar c = c == '_' || isAlphaNum c
use try because : and @ can be part of other things also
> prefixedVariable :: Dialect -> Parser Token
> prefixedVariable d = try $ choice
> [PrefixedVariable <$> char ':' <*> identifierString
> ,guard (diAtIdentifier d) >>
> PrefixedVariable <$> char '@' <*> identifierString
> ,guard (diHashIdentifier d) >>
> PrefixedVariable <$> char '#' <*> identifierString
> ]
> positionalArg :: Dialect -> Parser Token
> positionalArg d =
> guard (diPositionalArg d) >>
> -- use try to avoid ambiguities with other syntax which starts with dollar
> PositionalArg <$> try (char '$' *> (read <$> many1 digit))
Parse a SQL string. Examples:
'basic string'
'string with '' a quote'
n'international text'
b'binary string'
x'hexidecimal string'
> sqlString :: Dialect -> Parser Token
> sqlString d = dollarString <|> csString <|> normalString
> where
> dollarString = do
> guard $ diDollarString d
> -- use try because of ambiguity with symbols and with
> -- positional arg
> delim <- (\x -> concat ["$",x,"$"])
> <$> try (char '$' *> option "" identifierString <* char '$')
> SqlString delim delim <$> manyTill anyChar (try $ string delim)
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix False "")
> normalStringSuffix allowBackslash t = do
> s <- takeTill $ if allowBackslash
> then (`elem` "'\\")
> else (== '\'')
> -- deal with '' or \' as literal quote character
> choice [do
> ctu <- choice ["''" <$ try (string "''")
> ,"\\'" <$ string "\\'"
> ,"\\" <$ char '\\']
> normalStringSuffix allowBackslash $ concat [t,s,ctu]
> ,concat [t,s] <$ char '\'']
> -- try is used to to avoid conflicts with
> -- identifiers which can start with n,b,x,u
> -- once we read the quote type and the starting '
> -- then we commit to a string
> -- it's possible that this will reject some valid syntax
> -- but only pathalogical stuff, and I think the improved
> -- error messages and user predictability make it a good
> -- pragmatic choice
> csString
> | diEString d =
> choice [SqlString <$> try (string "e'" <|> string "E'")
> <*> return "'" <*> normalStringSuffix True ""
> ,csString']
> | otherwise = csString'
> csString' = SqlString
> <$> try cs
> <*> return "'"
> <*> normalStringSuffix False ""
> csPrefixes = "nNbBxX"
> cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
> ++ [string "u&'"
> ,string "U&'"]
numbers
digits
digits.[digits][e[+-]digits]
[digits].digits[e[+-]digits]
digitse[+-]digits
where digits is one or more decimal digits (0 through 9). At least one
digit must be before or after the decimal point, if one is used. At
least one digit must follow the exponent marker (e), if one is
present. There cannot be any spaces or other characters embedded in
the constant. Note that any leading plus or minus sign is not actually
considered part of the constant; it is an operator applied to the
constant.
> sqlNumber :: Dialect -> Parser Token
> sqlNumber d =
> SqlNumber <$> completeNumber
> -- this is for definitely avoiding possibly ambiguous source
> <* choice [-- special case to allow e.g. 1..2
> guard (diPostgresSymbols d)
> *> (void $ lookAhead $ try $ string "..")
> <|> void (notFollowedBy (oneOf "eE."))
> ,notFollowedBy (oneOf "eE.")
> ]
> where
> completeNumber =
> (int <??> (pp dot <??.> pp int)
> -- try is used in case we read a dot
> -- and it isn't part of a number
> -- if there are any following digits, then we commit
> -- to it being a number and not something else
> <|> try ((++) <$> dot <*> int))
> <??> pp expon
> int = many1 digit
> -- make sure we don't parse two adjacent dots in a number
> -- special case for postgresql, we backtrack if we see two adjacent dots
> -- to parse 1..2, but in other dialects we commit to the failure
> dot = let p = string "." <* notFollowedBy (char '.')
> in if diPostgresSymbols d
> then try p
> else p
> expon = (:) <$> oneOf "eE" <*> sInt
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
> pp = (<$$> (++))
Symbols
A symbol is an operator, or one of the misc symbols which include:
. .. := : :: ( ) ? ; , { } (for odbc)
The postgresql operator syntax allows a huge range of operators
compared with ansi and other dialects
> symbol :: Dialect -> Parser Token
> symbol d = Symbol <$> choice (concat
> [dots
> ,if diPostgresSymbols d
> then postgresExtraSymbols
> else []
> ,miscSymbol
> ,if diOdbc d then odbcSymbol else []
> ,if diPostgresSymbols d
> then generalizedPostgresqlOperator
> else basicAnsiOps
> ])
> where
> dots = [many1 (char '.')]
> odbcSymbol = [string "{", string "}"]
> postgresExtraSymbols =
> [try (string ":=")
> -- parse :: and : and avoid allowing ::: or more
> ,try (string "::" <* notFollowedBy (char ':'))
> ,try (string ":" <* notFollowedBy (char ':'))]
> miscSymbol = map (string . (:[])) $
> case () of
> _ | diSqlServerSymbols d -> ",;():?"
> | diPostgresSymbols d -> "[],;()"
> | otherwise -> "[],;():?"
try is used because most of the first characters of the two character
symbols can also be part of a single character symbol
> basicAnsiOps = map (try . string) [">=","<=","!=","<>"]
> ++ map (string . (:[])) "+-^*/%~&<>="
> ++ pipes
> pipes = -- what about using many1 (char '|'), then it will
> -- fail in the parser? Not sure exactly how
> -- standalone the lexer should be
> [char '|' *>
> choice ["||" <$ char '|' <* notFollowedBy (char '|')
> ,return "|"]]
postgresql generalized operators
this includes the custom operators that postgres supports,
plus all the standard operators which could be custom operators
according to their grammar
rules
An operator name is a sequence of up to NAMEDATALEN-1 (63 by default) characters from the following list:
+ - * / < > = ~ ! @ # % ^ & | ` ?
There are a few restrictions on operator names, however:
-- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
A multiple-character operator name cannot end in + or -, unless the name also contains at least one of these characters:
~ ! @ # % ^ & | ` ?
which allows the last character of a multi character symbol to be + or
-
> generalizedPostgresqlOperator :: [Parser String]
> generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
> where
> allOpSymbols = "+-*/<>=~!@#%^&|`?"
> -- these are the symbols when if part of a multi character
> -- operator permit the operator to end with a + or - symbol
> exceptionOpSymbols = "~!@#%^&|`?"
> -- special case for parsing a single + or - symbol
> singlePlusMinus = try $ do
> c <- oneOf "+-"
> notFollowedBy $ oneOf allOpSymbols
> return [c]
> -- this is used when we are parsing a potentially multi symbol
> -- operator and we have alread seen one of the 'exception chars'
> -- and so we can end with a + or -
> moreOpCharsException = do
> c <- oneOf (filter (`notElem` "-/*") allOpSymbols)
> -- make sure we don't parse a comment starting token
> -- as part of an operator
> <|> try (char '/' <* notFollowedBy (char '*'))
> <|> try (char '-' <* notFollowedBy (char '-'))
> -- and make sure we don't parse a block comment end
> -- as part of another symbol
> <|> try (char '*' <* notFollowedBy (char '/'))
> (c:) <$> option [] moreOpCharsException
> opMoreChars = choice
> [-- parse an exception char, now we can finish with a + -
> (:)
> <$> oneOf exceptionOpSymbols
> <*> option [] moreOpCharsException
> ,(:)
> <$> (-- parse +, make sure it isn't the last symbol
> try (char '+' <* lookAhead (oneOf allOpSymbols))
> <|> -- parse -, make sure it isn't the last symbol
> -- or the start of a -- comment
> try (char '-'
> <* notFollowedBy (char '-')
> <* lookAhead (oneOf allOpSymbols))
> <|> -- parse / check it isn't the start of a /* comment
> try (char '/' <* notFollowedBy (char '*'))
> <|> -- make sure we don't parse */ as part of a symbol
> try (char '*' <* notFollowedBy (char '/'))
> <|> -- any other ansi operator symbol
> oneOf "<>=")
> <*> option [] opMoreChars
> ]
> sqlWhitespace :: Dialect -> Parser Token
> sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
> lineComment :: Dialect -> Parser Token
> lineComment _ =
> (\s -> LineComment $ concat ["--",s]) <$>
> -- try is used here in case we see a - symbol
> -- once we read two -- then we commit to the comment token
> (try (string "--") *> (
> -- todo: there must be a better way to do this
> conc <$> manyTill anyChar (lookAhead lineCommentEnd) <*> lineCommentEnd))
> where
> conc a Nothing = a
> conc a (Just b) = a ++ b
> lineCommentEnd =
> Just "\n" <$ char '\n'
> <|> Nothing <$ eof
Try is used in the block comment for the two symbol bits because we
want to backtrack if we read the first symbol but the second symbol
isn't there.
> blockComment :: Dialect -> Parser Token
> blockComment _ =
> (\s -> BlockComment $ concat ["/*",s]) <$>
> (try (string "/*") *> commentSuffix 0)
> where
> commentSuffix :: Int -> Parser String
> commentSuffix n = do
> -- read until a possible end comment or nested comment
> x <- takeWhile (\e -> e /= '/' && e /= '*')
> choice [-- close comment: if the nesting is 0, done
> -- otherwise recurse on commentSuffix
> try (string "*/") *> let t = concat [x,"*/"]
> in if n == 0
> then return t
> else (\s -> concat [t,s]) <$> commentSuffix (n - 1)
> -- nested comment, recurse
> ,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
> -- not an end comment or nested comment, continue
> ,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]
This is to improve user experience: provide an error if we see */
outside a comment. This could potentially break postgres ops with */
in them (which is a stupid thing to do). In other cases, the user
should write * / instead (I can't think of any cases when this would
be valid syntax though).
> dontParseEndBlockComment :: Dialect -> Parser Token
> dontParseEndBlockComment _ =
> -- don't use try, then it should commit to the error
> try (string "*/") *> fail "comment end without comment start"
Some helper combinators
> startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
> startsWith p ps = do
> c <- satisfy p
> choice [(:) c <$> (takeWhile1 ps)
> ,return [c]]
> takeWhile1 :: (Char -> Bool) -> Parser String
> takeWhile1 p = many1 (satisfy p)
> takeWhile :: (Char -> Bool) -> Parser String
> takeWhile p = many (satisfy p)
> takeTill :: (Char -> Bool) -> Parser String
> takeTill p = manyTill anyChar (peekSatisfy p)
> peekSatisfy :: (Char -> Bool) -> Parser ()
> peekSatisfy p = void $ lookAhead (satisfy p)
This utility function will accurately report if the two tokens are
pretty printed, if they should lex back to the same two tokens. This
function is used in testing (and can be used in other places), and
must not be implemented by actually trying to print both tokens and
then lex them back from a single string (because then we would have
the risk of thinking two tokens cannot be together when there is bug
in the lexer, which the testing is supposed to find).
maybe do some quick checking to make sure this function only gives
true negatives: check pairs which return false actually fail to lex or
give different symbols in return: could use quickcheck for this
a good sanity test for this function is to change it to always return
true, then check that the automated tests return the same number of
successes. I don't think it succeeds this test at the moment
> -- | Utility function to tell you if a list of tokens
> -- will pretty print then lex back to the same set of tokens.
> -- Used internally, might be useful for generating SQL via lexical tokens.
> tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool
> tokenListWillPrintAndLex _ [] = True
> tokenListWillPrintAndLex _ [_] = True
> tokenListWillPrintAndLex d (a:b:xs) =
> tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs)
> tokensWillPrintAndLex :: Dialect -> Token -> Token -> Bool
> tokensWillPrintAndLex d a b
a : followed by an identifier character will look like a host param
followed by = or : makes a different symbol
> | Symbol ":" <- a
> , checkFirstBChar (\x -> isIdentifierChar x || x `elem` ":=") = False
two symbols next to eachother will fail if the symbols can combine and
(possibly just the prefix) look like a different symbol
> | diPostgresSymbols d
> , Symbol a' <- a
> , Symbol b' <- b
> , b' `notElem` ["+", "-"] || or (map (`elem` a') "~!@#%^&|`?") = False
check two adjacent symbols in non postgres where the combination
possibilities are much more limited. This is ansi behaviour, it might
be different when the other dialects are done properly
> | Symbol a' <- a
> , Symbol b' <- b
> , (a',b') `elem` [("<",">")
> ,("<","=")
> ,(">","=")
> ,("!","=")
> ,("|","|")
> ,("||","|")
> ,("|","||")
> ,("||","||")
> ,("<",">=")
> ] = False
two whitespaces will be combined
> | Whitespace {} <- a
> , Whitespace {} <- b = False
line comment without a newline at the end will eat the next token
> | LineComment {} <- a
> , checkLastAChar (/='\n') = False
check the last character of the first token and the first character of
the second token forming a comment start or end symbol
> | let f '-' '-' = True
> f '/' '*' = True
> f '*' '/' = True
> f _ _ = False
> in checkBorderChars f = False
a symbol will absorb a following .
TODO: not 100% on this always being bad
> | Symbol {} <- a
> , checkFirstBChar (=='.') = False
cannot follow a symbol ending in : with another token starting with :
> | let f ':' ':' = True
> f _ _ = False
> in checkBorderChars f = False
unquoted identifier followed by an identifier letter
> | Identifier Nothing _ <- a
> , checkFirstBChar isIdentifierChar = False
a quoted identifier using ", followed by a " will fail
> | Identifier (Just (_,"\"")) _ <- a
> , checkFirstBChar (=='"') = False
prefixed variable followed by an identifier char will be absorbed
> | PrefixedVariable {} <- a
> , checkFirstBChar isIdentifierChar = False
a positional arg will absorb a following digit
> | PositionalArg {} <- a
> , checkFirstBChar isDigit = False
a string ending with ' followed by a token starting with ' will be absorbed
> | SqlString _ "'" _ <- a
> , checkFirstBChar (=='\'') = False
a number followed by a . will fail or be absorbed
> | SqlNumber {} <- a
> , checkFirstBChar (=='.') = False
a number followed by an e or E will fail or be absorbed
> | SqlNumber {} <- a
> , checkFirstBChar (\x -> x =='e' || x == 'E') = False
two numbers next to eachother will fail or be absorbed
> | SqlNumber {} <- a
> , SqlNumber {} <- b = False
> | otherwise = True
> where
> prettya = prettyToken d a
> prettyb = prettyToken d b
> -- helper function to run a predicate on the
> -- last character of the first token and the first
> -- character of the second token
> checkBorderChars f
> | (_:_) <- prettya
> , (fb:_) <- prettyb
> , la <- last prettya
> = f la fb
> checkBorderChars _ = False
> checkFirstBChar f = case prettyb of
> (b':_) -> f b'
> _ -> False
> checkLastAChar f = case prettya of
> (_:_) -> f $ last prettya
> _ -> False
TODO:
make the tokenswill print more dialect accurate. Maybe add symbol
chars and identifier chars to the dialect definition and use them from
here
start adding negative / different parse dialect tests
add token tables and tests for oracle, sql server
review existing tables
look for refactoring opportunities, especially the token
generation tables in the tests
do some user documentation on lexing, and lexing/dialects
start thinking about a more separated design for the dialect handling
lexing tests are starting to take a really long time, so split the
tests so it is much easier to run all the tests except the lexing
tests which only need to be run when working on the lexer (which
should be relatively uncommon), or doing a commit or finishing off a
series of commits,
start writing the error message tests:
generate/write a large number of syntax errors
create a table with the source and the error message
try to compare some different versions of code to compare the
quality of the error messages by hand
get this checked in so improvements and regressions in the error
message quality can be tracked a little more easily (although it will
still be manual)
try again to add annotation to the ast

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,848 @@
-- | These is the pretty printing functions, which produce SQL
-- source from ASTs. The code attempts to format the output in a
-- readable way.
module Language.SQL.SimpleSQL.Pretty
(prettyQueryExpr
,prettyScalarExpr
,prettyStatement
,prettyStatements
) where
import Prelude hiding ((<>))
{-
TODO: there should be more comments in this file, especially the bits
which have been changed to try to improve the layout of the output.
Try to do this when this code is ported to a modern pretty printing lib.
-}
--import Language.SQL.SimpleSQL.Dialect
import Text.PrettyPrint (render, vcat, text, (<>), (<+>), empty, parens,
nest, Doc, punctuate, comma, sep, quotes,
brackets,hcat)
import Data.Maybe (maybeToList, catMaybes)
import Data.List (intercalate)
import Language.SQL.SimpleSQL.Syntax
import Language.SQL.SimpleSQL.Dialect
-- | Convert a query expr ast to concrete syntax.
prettyQueryExpr :: Dialect -> QueryExpr -> String
prettyQueryExpr d = render . queryExpr d
-- | Convert a value expr ast to concrete syntax.
prettyScalarExpr :: Dialect -> ScalarExpr -> String
prettyScalarExpr d = render . scalarExpr d
-- | A terminating semicolon.
terminator :: Doc
terminator = text ";\n"
-- | Convert a statement ast to concrete syntax.
prettyStatement :: Dialect -> Statement -> String
prettyStatement _ EmptyStatement = render terminator
prettyStatement d s = render (statement d s)
-- | Convert a list of statements to concrete syntax. A semicolon
-- is inserted after each statement.
prettyStatements :: Dialect -> [Statement] -> String
prettyStatements d = render . vcat . map prettyStatementWithSemicolon
where
prettyStatementWithSemicolon :: Statement -> Doc
prettyStatementWithSemicolon s = statement d s <> terminator
-- = scalar expressions
scalarExpr :: Dialect -> ScalarExpr -> Doc
scalarExpr _ (StringLit s e t) = text s <> text t <> text e
scalarExpr _ (NumLit s) = text s
scalarExpr _ (IntervalLit s v f t) =
text "interval"
<+> me (\x -> text $ case x of
Plus -> "+"
Minus -> "-") s
<+> quotes (text v)
<+> intervalTypeField f
<+> me (\x -> text "to" <+> intervalTypeField x) t
scalarExpr _ (Iden i) = names i
scalarExpr _ Star = text "*"
scalarExpr _ Parameter = text "?"
scalarExpr _ (PositionalArg n) = text $ "$" ++ show n
scalarExpr _ (HostParameter p i) =
text p
<+> me (\i' -> text "indicator" <+> text i') i
scalarExpr d (App f es) = names f <> parens (commaSep (map (scalarExpr d) es))
scalarExpr dia (AggregateApp f d es od fil) =
names f
<> parens ((case d of
Distinct -> text "distinct"
All -> text "all"
SQDefault -> empty)
<+> commaSep (map (scalarExpr dia) es)
<+> orderBy dia od)
<+> me (\x -> text "filter"
<+> parens (text "where" <+> scalarExpr dia x)) fil
scalarExpr d (AggregateAppGroup f es od) =
names f
<> parens (commaSep (map (scalarExpr d) es))
<+> if null od
then empty
else text "within group" <+> parens (orderBy d od)
scalarExpr d (WindowApp f es pb od fr) =
names f <> parens (commaSep $ map (scalarExpr d) es)
<+> text "over"
<+> parens ((case pb of
[] -> empty
_ -> text "partition by"
<+> nest 13 (commaSep $ map (scalarExpr d) pb))
<+> orderBy d od
<+> me frd fr)
where
frd (FrameFrom rs fp) = rsd rs <+> fpd fp
frd (FrameBetween rs fps fpe) =
rsd rs <+> text "between" <+> fpd fps
<+> text "and" <+> fpd fpe
rsd rs = case rs of
FrameRows -> text "rows"
FrameRange -> text "range"
fpd UnboundedPreceding = text "unbounded preceding"
fpd UnboundedFollowing = text "unbounded following"
fpd Current = text "current row"
fpd (Preceding e) = scalarExpr d e <+> text "preceding"
fpd (Following e) = scalarExpr d e <+> text "following"
scalarExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name Nothing "between"]
,[Name Nothing "not between"]] =
sep [scalarExpr dia a
,names nm <+> scalarExpr dia b
,nest (length (unnames nm) + 1) $ text "and" <+> scalarExpr dia c]
scalarExpr d (SpecialOp [Name Nothing "rowctor"] as) =
parens $ commaSep $ map (scalarExpr d) as
scalarExpr d (SpecialOp nm es) =
names nm <+> parens (commaSep $ map (scalarExpr d) es)
scalarExpr d (SpecialOpK nm fs as) =
names nm <> parens (sep $ catMaybes
(fmap (scalarExpr d) fs
: map (\(n,e) -> Just (text n <+> scalarExpr d e)) as))
scalarExpr d (PrefixOp f e) = names f <+> scalarExpr d e
scalarExpr d (PostfixOp f e) = scalarExpr d e <+> names f
scalarExpr d e@(BinOp _ op _) | op `elem` [[Name Nothing "and"]
,[Name Nothing "or"]] =
-- special case for and, or, get all the ands so we can vcat them
-- nicely
case ands e of
(e':es) -> vcat (scalarExpr d e'
: map ((names op <+>) . scalarExpr d) es)
[] -> empty -- shouldn't be possible
where
ands (BinOp a op' b) | op == op' = ands a ++ ands b
ands x = [x]
-- special case for . we don't use whitespace
scalarExpr d (BinOp e0 [Name Nothing "."] e1) =
scalarExpr d e0 <> text "." <> scalarExpr d e1
scalarExpr d (BinOp e0 f e1) =
scalarExpr d e0 <+> names f <+> scalarExpr d e1
scalarExpr dia (Case t ws els) =
sep $ [text "case" <+> me (scalarExpr dia) t]
++ map w ws
++ maybeToList (fmap e els)
++ [text "end"]
where
w (t0,t1) =
text "when" <+> nest 5 (commaSep $ map (scalarExpr dia) t0)
<+> text "then" <+> nest 5 (scalarExpr dia t1)
e el = text "else" <+> nest 5 (scalarExpr dia el)
scalarExpr d (Parens e) = parens $ scalarExpr d e
scalarExpr d (Cast e tn) =
text "cast" <> parens (sep [scalarExpr d e
,text "as"
,typeName tn])
scalarExpr _ (TypedLit tn s) =
typeName tn <+> quotes (text s)
scalarExpr d (SubQueryExpr ty qe) =
(case ty of
SqSq -> empty
SqExists -> text "exists"
SqUnique -> text "unique"
) <+> parens (queryExpr d qe)
scalarExpr d (QuantifiedComparison v c cp sq) =
scalarExpr d v
<+> names c
<+> (text $ case cp of
CPAny -> "any"
CPSome -> "some"
CPAll -> "all")
<+> parens (queryExpr d sq)
scalarExpr d (Match v u sq) =
scalarExpr d v
<+> text "match"
<+> (if u then text "unique" else empty)
<+> parens (queryExpr d sq)
scalarExpr d (In b se x) =
scalarExpr d se <+>
(if b then empty else text "not")
<+> text "in"
<+> parens (nest (if b then 3 else 7) $
case x of
InList es -> commaSep $ map (scalarExpr d) es
InQueryExpr qe -> queryExpr d qe)
scalarExpr d (Array v es) =
scalarExpr d v <> brackets (commaSep $ map (scalarExpr d) es)
scalarExpr d (ArrayCtor q) =
text "array" <> parens (queryExpr d q)
scalarExpr d (MultisetCtor es) =
text "multiset" <> brackets (commaSep $ map (scalarExpr d) es)
scalarExpr d (MultisetQueryCtor q) =
text "multiset" <> parens (queryExpr d q)
scalarExpr d (MultisetBinOp a c q b) =
sep
[scalarExpr d a
,text "multiset"
,text $ case c of
Union -> "union"
Intersect -> "intersect"
Except -> "except"
,case q of
SQDefault -> empty
All -> text "all"
Distinct -> text "distinct"
,scalarExpr d b]
{-scalarExpr d (Escape v e) =
scalarExpr d v <+> text "escape" <+> text [e]
scalarExpr d (UEscape v e) =
scalarExpr d v <+> text "uescape" <+> text [e]-}
scalarExpr d (Collate v c) =
scalarExpr d v <+> text "collate" <+> names c
scalarExpr _ (NextValueFor ns) =
text "next value for" <+> names ns
scalarExpr d (VEComment cmt v) =
vcat $ map comment cmt ++ [scalarExpr d v]
scalarExpr _ (OdbcLiteral t s) =
text "{" <> lt t <+> quotes (text s) <> text "}"
where
lt OLDate = text "d"
lt OLTime = text "t"
lt OLTimestamp = text "ts"
scalarExpr d (OdbcFunc e) =
text "{fn" <+> scalarExpr d e <> text "}"
scalarExpr d (Convert t e Nothing) =
text "convert(" <> typeName t <> text "," <+> scalarExpr d e <> text ")"
scalarExpr d (Convert t e (Just i)) =
text "convert(" <> typeName t <> text "," <+> scalarExpr d e <> text "," <+> text (show i) <> text ")"
unname :: Name -> String
unname (Name Nothing n) = n
unname (Name (Just (s,e)) n) =
s ++ n ++ e
unnames :: [Name] -> String
unnames ns = intercalate "." $ map unname ns
name :: Name -> Doc
name (Name Nothing n) = text n
name (Name (Just (s,e)) n) = text s <> text n <> text e
names :: [Name] -> Doc
names ns = hcat $ punctuate (text ".") $ map name ns
typeName :: TypeName -> Doc
typeName (TypeName t) = names t
typeName (PrecTypeName t a) = names t <+> parens (text $ show a)
typeName (PrecScaleTypeName t a b) =
names t <+> parens (text (show a) <+> comma <+> text (show b))
typeName (PrecLengthTypeName t i m u) =
names t
<> parens (text (show i)
<> me (\x -> case x of
PrecK -> text "K"
PrecM -> text "M"
PrecG -> text "G"
PrecT -> text "T"
PrecP -> text "P") m
<+> me (\x -> case x of
PrecCharacters -> text "CHARACTERS"
PrecOctets -> text "OCTETS") u)
typeName (CharTypeName t i cs col) =
names t
<> me (\x -> parens (text $ show x)) i
<+> (if null cs
then empty
else text "character set" <+> names cs)
<+> (if null col
then empty
else text "collate" <+> names col)
typeName (TimeTypeName t i tz) =
names t
<> me (\x -> parens (text $ show x)) i
<+> text (if tz
then "with time zone"
else "without time zone")
typeName (RowTypeName cs) =
text "row" <> parens (commaSep $ map f cs)
where
f (n,t) = name n <+> typeName t
typeName (IntervalTypeName f t) =
text "interval"
<+> intervalTypeField f
<+> me (\x -> text "to" <+> intervalTypeField x) t
typeName (ArrayTypeName tn sz) =
typeName tn <+> text "array" <+> me (brackets . text . show) sz
typeName (MultisetTypeName tn) =
typeName tn <+> text "multiset"
intervalTypeField :: IntervalTypeField -> Doc
intervalTypeField (Itf n p) =
text n
<+> me (\(x,x1) ->
parens (text (show x)
<+> me (\y -> (sep [comma,text (show y)])) x1)) p
-- = query expressions
queryExpr :: Dialect -> QueryExpr -> Doc
queryExpr dia (Select d sl fr wh gb hv od off fe) =
sep [text "select"
,case d of
SQDefault -> empty
All -> text "all"
Distinct -> text "distinct"
,nest 7 $ sep [selectList dia sl]
,from dia fr
,maybeScalarExpr dia "where" wh
,grpBy dia gb
,maybeScalarExpr dia "having" hv
,orderBy dia od
,me (\e -> text "offset" <+> scalarExpr dia e <+> text "rows") off
,fetchFirst
]
where
fetchFirst =
me (\e -> if diLimit dia
then text "limit" <+> scalarExpr dia e
else text "fetch first" <+> scalarExpr dia e
<+> text "rows only") fe
queryExpr dia (QueryExprSetOp q1 ct d c q2) =
sep [queryExpr dia q1
,text (case ct of
Union -> "union"
Intersect -> "intersect"
Except -> "except")
<+> case d of
SQDefault -> empty
All -> text "all"
Distinct -> text "distinct"
<+> case c of
Corresponding -> text "corresponding"
Respectively -> empty
,queryExpr dia q2]
queryExpr d (With rc withs qe) =
text "with" <+> (if rc then text "recursive" else empty)
<+> vcat [nest 5
(vcat $ punctuate comma $ flip map withs $ \(n,q) ->
withAlias n <+> text "as" <+> parens (queryExpr d q))
,queryExpr d qe]
where
withAlias (Alias nm cols) = name nm
<+> me (parens . commaSep . map name) cols
queryExpr d (Values vs) =
text "values"
<+> nest 7 (commaSep (map (parens . commaSep . map (scalarExpr d)) vs))
queryExpr _ (Table t) = text "table" <+> names t
queryExpr d (QEComment cmt v) =
vcat $ map comment cmt ++ [queryExpr d v]
alias :: Alias -> Doc
alias (Alias nm cols) =
text "as" <+> name nm
<+> me (parens . commaSep . map name) cols
selectList :: Dialect -> [(ScalarExpr,Maybe Name)] -> Doc
selectList d is = commaSep $ map si is
where
si (e,al) = scalarExpr d e <+> me als al
als al = text "as" <+> name al
from :: Dialect -> [TableRef] -> Doc
from _ [] = empty
from d ts =
sep [text "from"
,nest 5 $ vcat $ punctuate comma $ map tr ts]
where
tr (TRSimple t) = names t
tr (TRLateral t) = text "lateral" <+> tr t
tr (TRFunction f as) =
names f <> parens (commaSep $ map (scalarExpr d) as)
tr (TRAlias t a) = sep [tr t, alias a]
tr (TRParens t) = parens $ tr t
tr (TRQueryExpr q) = parens $ queryExpr d q
tr (TRJoin t0 b jt t1 jc) =
sep [tr t0
,if b then text "natural" else empty
,joinText jt <+> tr t1
,joinCond jc]
tr (TROdbc t) = text "{oj" <+> tr t <+> text "}"
joinText jt =
sep [case jt of
JInner -> text "inner"
JLeft -> text "left"
JRight -> text "right"
JFull -> text "full"
JCross -> text "cross"
,text "join"]
joinCond (Just (JoinOn e)) = text "on" <+> scalarExpr d e
joinCond (Just (JoinUsing es)) =
text "using" <+> parens (commaSep $ map name es)
joinCond Nothing = empty
maybeScalarExpr :: Dialect -> String -> Maybe ScalarExpr -> Doc
maybeScalarExpr d k = me
(\e -> sep [text k
,nest (length k + 1) $ scalarExpr d e])
grpBy :: Dialect -> [GroupingExpr] -> Doc
grpBy _ [] = empty
grpBy d gs = sep [text "group by"
,nest 9 $ commaSep $ map ge gs]
where
ge (SimpleGroup e) = scalarExpr d e
ge (GroupingParens g) = parens (commaSep $ map ge g)
ge (Cube es) = text "cube" <> parens (commaSep $ map ge es)
ge (Rollup es) = text "rollup" <> parens (commaSep $ map ge es)
ge (GroupingSets es) = text "grouping sets" <> parens (commaSep $ map ge es)
orderBy :: Dialect -> [SortSpec] -> Doc
orderBy _ [] = empty
orderBy dia os = sep [text "order by"
,nest 9 $ commaSep $ map f os]
where
f (SortSpec e d n) =
scalarExpr dia e
<+> (case d of
Asc -> text "asc"
Desc -> text "desc"
DirDefault -> empty)
<+> (case n of
NullsOrderDefault -> empty
NullsFirst -> text "nulls" <+> text "first"
NullsLast -> text "nulls" <+> text "last")
-- = statements
statement :: Dialect -> Statement -> Doc
-- == ddl
statement _ (CreateSchema nm) =
text "create" <+> text "schema" <+> names nm
statement d (CreateTable nm cds) =
text "create" <+> text "table" <+> names nm
<+> parens (commaSep $ map cd cds)
where
cd (TableConstraintDef n con) =
maybe empty (\s -> text "constraint" <+> names s) n
<+> tableConstraint d con
cd (TableColumnDef cd') = columnDef d cd'
statement d (AlterTable t act) =
texts ["alter","table"] <+> names t
<+> alterTableAction d act
statement _ (DropSchema nm db) =
text "drop" <+> text "schema" <+> names nm <+> dropBehav db
statement d (CreateDomain nm ty def cs) =
text "create" <+> text "domain" <+> names nm
<+> typeName ty
<+> maybe empty (\def' -> text "default" <+> scalarExpr d def') def
<+> sep (map con cs)
where
con (cn, e) =
maybe empty (\cn' -> text "constraint" <+> names cn') cn
<+> text "check" <> parens (scalarExpr d e)
statement d (AlterDomain nm act) =
texts ["alter","domain"]
<+> names nm
<+> a act
where
a (ADSetDefault v) = texts ["set","default"] <+> scalarExpr d v
a (ADDropDefault) = texts ["drop","default"]
a (ADAddConstraint cnm e) =
text "add"
<+> maybe empty (\cnm' -> text "constraint" <+> names cnm') cnm
<+> text "check" <> parens (scalarExpr d e)
a (ADDropConstraint cnm) = texts ["drop", "constraint"]
<+> names cnm
statement _ (DropDomain nm db) =
text "drop" <+> text "domain" <+> names nm <+> dropBehav db
statement _ (CreateSequence nm sgos) =
texts ["create","sequence"] <+> names nm
<+> sep (map sequenceGeneratorOption sgos)
statement _ (AlterSequence nm sgos) =
texts ["alter","sequence"] <+> names nm
<+> sep (map sequenceGeneratorOption sgos)
statement _ (DropSequence nm db) =
text "drop" <+> text "sequence" <+> names nm <+> dropBehav db
statement d (CreateAssertion nm ex) =
texts ["create","assertion"] <+> names nm
<+> text "check" <+> parens (scalarExpr d ex)
statement _ (DropAssertion nm db) =
text "drop" <+> text "assertion" <+> names nm <+> dropBehav db
statement _ (CreateIndex un nm tbl cols) =
texts (if un
then ["create","unique","index"]
else ["create","index"])
<+> names nm
<+> text "on"
<+> names tbl
<+> parens (commaSep $ map name cols)
-- == dml
statement d (SelectStatement q) = queryExpr d q
statement d (Delete t a w) =
text "delete" <+> text "from"
<+> names t <+> maybe empty (\x -> text "as" <+> name x) a
<+> maybeScalarExpr d "where" w
statement _ (Truncate t ir) =
text "truncate" <+> text "table" <+> names t
<+> case ir of
DefaultIdentityRestart -> empty
ContinueIdentity -> text "continue" <+> text "identity"
RestartIdentity -> text "restart" <+> text "identity"
statement d (Insert t cs s) =
text "insert" <+> text "into" <+> names t
<+> maybe empty (\cs' -> parens (commaSep $ map name cs')) cs
<+> case s of
DefaultInsertValues -> text "default" <+> text "values"
InsertQuery q -> queryExpr d q
statement d (Update t a sts whr) =
text "update" <+> names t
<+> maybe empty (\x -> text "as" <+> name x) a
<+> text "set" <+> commaSep (map sc sts)
<+> maybeScalarExpr d "where" whr
where
sc (Set tg v) = names tg <+> text "=" <+> scalarExpr d v
sc (SetMultiple ts vs) = parens (commaSep $ map names ts) <+> text "="
<+> parens (commaSep $ map (scalarExpr d) vs)
statement _ (DropTable n b) =
text "drop" <+> text "table" <+> names n <+> dropBehav b
statement d (CreateView r nm al q co) =
text "create" <+> (if r then text "recursive" else empty)
<+> text "view" <+> names nm
<+> (maybe empty (\al' -> parens $ commaSep $ map name al')) al
<+> text "as"
<+> queryExpr d q
<+> case co of
Nothing -> empty
Just DefaultCheckOption -> texts ["with", "check", "option"]
Just CascadedCheckOption -> texts ["with", "cascaded", "check", "option"]
Just LocalCheckOption -> texts ["with", "local", "check", "option"]
statement _ (DropView n b) =
text "drop" <+> text "view" <+> names n <+> dropBehav b
-- == transactions
statement _ StartTransaction =
texts ["start", "transaction"]
statement _ (Savepoint nm) =
text "savepoint" <+> name nm
statement _ (ReleaseSavepoint nm) =
texts ["release", "savepoint"] <+> name nm
statement _ Commit =
text "commit"
statement _ (Rollback mn) =
text "rollback"
<+> maybe empty (\n -> texts ["to","savepoint"] <+> name n) mn
-- == access control
statement _ (GrantPrivilege pas po rs go) =
text "grant" <+> commaSep (map privAct pas)
<+> text "on" <+> privObj po
<+> text "to" <+> commaSep (map name rs)
<+> grantOpt go
where
grantOpt WithGrantOption = texts ["with","grant","option"]
grantOpt WithoutGrantOption = empty
statement _ (GrantRole rs trs ao) =
text "grant" <+> commaSep (map name rs)
<+> text "to" <+> commaSep (map name trs)
<+> adminOpt ao
where
adminOpt WithAdminOption = texts ["with","admin","option"]
adminOpt WithoutAdminOption = empty
statement _ (CreateRole nm) =
texts ["create","role"] <+> name nm
statement _ (DropRole nm) =
texts ["drop","role"] <+> name nm
statement _ (RevokePrivilege go pas po rs db) =
text "revoke"
<+> grantOptFor go
<+> commaSep (map privAct pas)
<+> text "on" <+> privObj po
<+> text "from" <+> commaSep (map name rs)
<+> dropBehav db
where
grantOptFor GrantOptionFor = texts ["grant","option","for"]
grantOptFor NoGrantOptionFor = empty
statement _ (RevokeRole ao rs trs db) =
text "revoke"
<+> adminOptFor ao
<+> commaSep (map name rs)
<+> text "from" <+> commaSep (map name trs)
<+> dropBehav db
where
adminOptFor AdminOptionFor = texts ["admin","option","for"]
adminOptFor NoAdminOptionFor = empty
statement _ (StatementComment cs) = vcat $ map comment cs
statement _ EmptyStatement = empty
{-
== sessions
== extras
-}
dropBehav :: DropBehaviour -> Doc
dropBehav DefaultDropBehaviour = empty
dropBehav Cascade = text "cascade"
dropBehav Restrict = text "restrict"
columnDef :: Dialect -> ColumnDef -> Doc
columnDef d (ColumnDef n t mdef cons) =
name n <+> typeName t
<+> case mdef of
Nothing -> empty
Just (DefaultClause def) ->
text "default" <+> scalarExpr d def
Just (GenerationClause e) ->
texts ["generated","always","as"] <+> parens (scalarExpr d e)
Just (IdentityColumnSpec w o) ->
text "generated"
<+> (case w of
GeneratedAlways -> text "always"
GeneratedByDefault -> text "by" <+> text "default")
<+> text "as" <+> text "identity"
<+> (case o of
[] -> empty
os -> parens (sep $ map sequenceGeneratorOption os))
<+> sep (map cdef cons)
where
cdef (ColConstraintDef cnm con) =
maybe empty (\s -> text "constraint" <+> names s) cnm
<+> pcon con
pcon ColNotNullConstraint = texts ["not","null"]
pcon ColUniqueConstraint = text "unique"
pcon (ColPrimaryKeyConstraint autoincrement) =
texts $ ["primary","key"] ++ ["autoincrement"|autoincrement]
pcon (ColCheckConstraint v) = text "check" <+> parens (scalarExpr d v)
pcon (ColReferencesConstraint tb c m u del) =
text "references"
<+> names tb
<+> maybe empty (\c' -> parens (name c')) c
<+> refMatch m
<+> refAct "update" u
<+> refAct "delete" del
sequenceGeneratorOption :: SequenceGeneratorOption -> Doc
sequenceGeneratorOption (SGODataType t) =
text "as" <+> typeName t
sequenceGeneratorOption (SGORestart mi) =
text "restart" <+> maybe empty (\mi' -> texts ["with", show mi']) mi
sequenceGeneratorOption (SGOStartWith i) = texts ["start", "with", show i]
sequenceGeneratorOption (SGOIncrementBy i) = texts ["increment", "by", show i]
sequenceGeneratorOption (SGOMaxValue i) = texts ["maxvalue", show i]
sequenceGeneratorOption SGONoMaxValue = texts ["no", "maxvalue"]
sequenceGeneratorOption (SGOMinValue i) = texts ["minvalue", show i]
sequenceGeneratorOption SGONoMinValue = texts ["no", "minvalue"]
sequenceGeneratorOption SGOCycle = text "cycle"
sequenceGeneratorOption SGONoCycle = text "no cycle"
refMatch :: ReferenceMatch -> Doc
refMatch m = case m of
DefaultReferenceMatch -> empty
MatchFull -> texts ["match", "full"]
MatchPartial -> texts ["match","partial"]
MatchSimple -> texts ["match", "simple"]
refAct :: String -> ReferentialAction -> Doc
refAct t a = case a of
DefaultReferentialAction -> empty
RefCascade -> texts ["on", t, "cascade"]
RefSetNull -> texts ["on", t, "set", "null"]
RefSetDefault -> texts ["on", t, "set", "default"]
RefRestrict -> texts ["on", t, "restrict"]
RefNoAction -> texts ["on", t, "no", "action"]
alterTableAction :: Dialect -> AlterTableAction -> Doc
alterTableAction d (AddColumnDef cd) =
texts ["add", "column"] <+> columnDef d cd
alterTableAction d (AlterColumnSetDefault n v) =
texts ["alter", "column"]
<+> name n
<+> texts ["set","default"] <+> scalarExpr d v
alterTableAction _ (AlterColumnDropDefault n) =
texts ["alter", "column"]
<+> name n
<+> texts ["drop","default"]
alterTableAction _ (AlterColumnSetNotNull n) =
texts ["alter", "column"]
<+> name n
<+> texts ["set","not","null"]
alterTableAction _ (AlterColumnDropNotNull n) =
texts ["alter", "column"]
<+> name n
<+> texts ["drop","not","null"]
alterTableAction _ (AlterColumnSetDataType n t) =
texts ["alter", "column"]
<+> name n
<+> texts ["set","data","Type"]
<+> typeName t
alterTableAction _ (DropColumn n b) =
texts ["drop", "column"]
<+> name n
<+> dropBehav b
alterTableAction d (AddTableConstraintDef n con) =
text "add"
<+> maybe empty (\s -> text "constraint" <+> names s) n
<+> tableConstraint d con
alterTableAction _ (DropTableConstraintDef n b) =
texts ["drop", "constraint"]
<+> names n
<+> dropBehav b
tableConstraint :: Dialect -> TableConstraint -> Doc
tableConstraint _ (TableUniqueConstraint ns) =
text "unique" <+> parens (commaSep $ map name ns)
tableConstraint _ (TablePrimaryKeyConstraint ns) =
texts ["primary","key"] <+> parens (commaSep $ map name ns)
tableConstraint _ (TableReferencesConstraint cs t tcs m u del) =
texts ["foreign", "key"]
<+> parens (commaSep $ map name cs)
<+> text "references"
<+> names t
<+> maybe empty (\c' -> parens (commaSep $ map name c')) tcs
<+> refMatch m
<+> refAct "update" u
<+> refAct "delete" del
tableConstraint d (TableCheckConstraint v) = text "check" <+> parens (scalarExpr d v)
privAct :: PrivilegeAction -> Doc
privAct PrivAll = texts ["all","privileges"]
privAct (PrivSelect cs) = text "select" <+> maybeColList cs
privAct (PrivInsert cs) = text "insert" <+> maybeColList cs
privAct (PrivUpdate cs) = text "update" <+> maybeColList cs
privAct (PrivReferences cs) = text "references" <+> maybeColList cs
privAct PrivDelete = text "delete"
privAct PrivUsage = text "usage"
privAct PrivTrigger = text "trigger"
privAct PrivExecute = text "execute"
maybeColList :: [Name] -> Doc
maybeColList cs =
if null cs
then empty
else parens (commaSep $ map name cs)
privObj :: PrivilegeObject -> Doc
privObj (PrivTable nm) = names nm
privObj (PrivDomain nm) = text "domain" <+> names nm
privObj (PrivType nm) = text "type" <+> names nm
privObj (PrivSequence nm) = text "sequence" <+> names nm
privObj (PrivFunction nm) = texts ["specific", "function"] <+> names nm
-- = utils
commaSep :: [Doc] -> Doc
commaSep ds = sep $ punctuate comma ds
me :: (a -> Doc) -> Maybe a -> Doc
me = maybe empty
comment :: Comment -> Doc
comment (BlockComment str) = text "/*" <+> text str <+> text "*/"
texts :: [String] -> Doc
texts ts = sep $ map text ts

View file

@ -1,844 +0,0 @@
> -- | These is the pretty printing functions, which produce SQL
> -- source from ASTs. The code attempts to format the output in a
> -- readable way.
> module Language.SQL.SimpleSQL.Pretty
> (prettyQueryExpr
> ,prettyScalarExpr
> ,prettyStatement
> ,prettyStatements
> ) where
> import Prelude hiding ((<>))
TODO: there should be more comments in this file, especially the bits
which have been changed to try to improve the layout of the output.
Try to do this when this code is ported to a modern pretty printing lib.
> --import Language.SQL.SimpleSQL.Dialect
> import Text.PrettyPrint (render, vcat, text, (<>), (<+>), empty, parens,
> nest, Doc, punctuate, comma, sep, quotes,
> brackets,hcat)
> import Data.Maybe (maybeToList, catMaybes)
> import Data.List (intercalate)
> import Language.SQL.SimpleSQL.Syntax
> import Language.SQL.SimpleSQL.Dialect
> -- | Convert a query expr ast to concrete syntax.
> prettyQueryExpr :: Dialect -> QueryExpr -> String
> prettyQueryExpr d = render . queryExpr d
> -- | Convert a value expr ast to concrete syntax.
> prettyScalarExpr :: Dialect -> ScalarExpr -> String
> prettyScalarExpr d = render . scalarExpr d
> -- | A terminating semicolon.
> terminator :: Doc
> terminator = text ";\n"
> -- | Convert a statement ast to concrete syntax.
> prettyStatement :: Dialect -> Statement -> String
> prettyStatement _ EmptyStatement = render terminator
> prettyStatement d s = render (statement d s)
> -- | Convert a list of statements to concrete syntax. A semicolon
> -- is inserted after each statement.
> prettyStatements :: Dialect -> [Statement] -> String
> prettyStatements d = render . vcat . map prettyStatementWithSemicolon
> where
> prettyStatementWithSemicolon :: Statement -> Doc
> prettyStatementWithSemicolon s = statement d s <> terminator
= scalar expressions
> scalarExpr :: Dialect -> ScalarExpr -> Doc
> scalarExpr _ (StringLit s e t) = text s <> text t <> text e
> scalarExpr _ (NumLit s) = text s
> scalarExpr _ (IntervalLit s v f t) =
> text "interval"
> <+> me (\x -> text $ case x of
> Plus -> "+"
> Minus -> "-") s
> <+> quotes (text v)
> <+> intervalTypeField f
> <+> me (\x -> text "to" <+> intervalTypeField x) t
> scalarExpr _ (Iden i) = names i
> scalarExpr _ Star = text "*"
> scalarExpr _ Parameter = text "?"
> scalarExpr _ (PositionalArg n) = text $ "$" ++ show n
> scalarExpr _ (HostParameter p i) =
> text p
> <+> me (\i' -> text "indicator" <+> text i') i
> scalarExpr d (App f es) = names f <> parens (commaSep (map (scalarExpr d) es))
> scalarExpr dia (AggregateApp f d es od fil) =
> names f
> <> parens ((case d of
> Distinct -> text "distinct"
> All -> text "all"
> SQDefault -> empty)
> <+> commaSep (map (scalarExpr dia) es)
> <+> orderBy dia od)
> <+> me (\x -> text "filter"
> <+> parens (text "where" <+> scalarExpr dia x)) fil
> scalarExpr d (AggregateAppGroup f es od) =
> names f
> <> parens (commaSep (map (scalarExpr d) es))
> <+> if null od
> then empty
> else text "within group" <+> parens (orderBy d od)
> scalarExpr d (WindowApp f es pb od fr) =
> names f <> parens (commaSep $ map (scalarExpr d) es)
> <+> text "over"
> <+> parens ((case pb of
> [] -> empty
> _ -> text "partition by"
> <+> nest 13 (commaSep $ map (scalarExpr d) pb))
> <+> orderBy d od
> <+> me frd fr)
> where
> frd (FrameFrom rs fp) = rsd rs <+> fpd fp
> frd (FrameBetween rs fps fpe) =
> rsd rs <+> text "between" <+> fpd fps
> <+> text "and" <+> fpd fpe
> rsd rs = case rs of
> FrameRows -> text "rows"
> FrameRange -> text "range"
> fpd UnboundedPreceding = text "unbounded preceding"
> fpd UnboundedFollowing = text "unbounded following"
> fpd Current = text "current row"
> fpd (Preceding e) = scalarExpr d e <+> text "preceding"
> fpd (Following e) = scalarExpr d e <+> text "following"
> scalarExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name Nothing "between"]
> ,[Name Nothing "not between"]] =
> sep [scalarExpr dia a
> ,names nm <+> scalarExpr dia b
> ,nest (length (unnames nm) + 1) $ text "and" <+> scalarExpr dia c]
> scalarExpr d (SpecialOp [Name Nothing "rowctor"] as) =
> parens $ commaSep $ map (scalarExpr d) as
> scalarExpr d (SpecialOp nm es) =
> names nm <+> parens (commaSep $ map (scalarExpr d) es)
> scalarExpr d (SpecialOpK nm fs as) =
> names nm <> parens (sep $ catMaybes
> (fmap (scalarExpr d) fs
> : map (\(n,e) -> Just (text n <+> scalarExpr d e)) as))
> scalarExpr d (PrefixOp f e) = names f <+> scalarExpr d e
> scalarExpr d (PostfixOp f e) = scalarExpr d e <+> names f
> scalarExpr d e@(BinOp _ op _) | op `elem` [[Name Nothing "and"]
> ,[Name Nothing "or"]] =
> -- special case for and, or, get all the ands so we can vcat them
> -- nicely
> case ands e of
> (e':es) -> vcat (scalarExpr d e'
> : map ((names op <+>) . scalarExpr d) es)
> [] -> empty -- shouldn't be possible
> where
> ands (BinOp a op' b) | op == op' = ands a ++ ands b
> ands x = [x]
> -- special case for . we don't use whitespace
> scalarExpr d (BinOp e0 [Name Nothing "."] e1) =
> scalarExpr d e0 <> text "." <> scalarExpr d e1
> scalarExpr d (BinOp e0 f e1) =
> scalarExpr d e0 <+> names f <+> scalarExpr d e1
> scalarExpr dia (Case t ws els) =
> sep $ [text "case" <+> me (scalarExpr dia) t]
> ++ map w ws
> ++ maybeToList (fmap e els)
> ++ [text "end"]
> where
> w (t0,t1) =
> text "when" <+> nest 5 (commaSep $ map (scalarExpr dia) t0)
> <+> text "then" <+> nest 5 (scalarExpr dia t1)
> e el = text "else" <+> nest 5 (scalarExpr dia el)
> scalarExpr d (Parens e) = parens $ scalarExpr d e
> scalarExpr d (Cast e tn) =
> text "cast" <> parens (sep [scalarExpr d e
> ,text "as"
> ,typeName tn])
> scalarExpr _ (TypedLit tn s) =
> typeName tn <+> quotes (text s)
> scalarExpr d (SubQueryExpr ty qe) =
> (case ty of
> SqSq -> empty
> SqExists -> text "exists"
> SqUnique -> text "unique"
> ) <+> parens (queryExpr d qe)
> scalarExpr d (QuantifiedComparison v c cp sq) =
> scalarExpr d v
> <+> names c
> <+> (text $ case cp of
> CPAny -> "any"
> CPSome -> "some"
> CPAll -> "all")
> <+> parens (queryExpr d sq)
> scalarExpr d (Match v u sq) =
> scalarExpr d v
> <+> text "match"
> <+> (if u then text "unique" else empty)
> <+> parens (queryExpr d sq)
> scalarExpr d (In b se x) =
> scalarExpr d se <+>
> (if b then empty else text "not")
> <+> text "in"
> <+> parens (nest (if b then 3 else 7) $
> case x of
> InList es -> commaSep $ map (scalarExpr d) es
> InQueryExpr qe -> queryExpr d qe)
> scalarExpr d (Array v es) =
> scalarExpr d v <> brackets (commaSep $ map (scalarExpr d) es)
> scalarExpr d (ArrayCtor q) =
> text "array" <> parens (queryExpr d q)
> scalarExpr d (MultisetCtor es) =
> text "multiset" <> brackets (commaSep $ map (scalarExpr d) es)
> scalarExpr d (MultisetQueryCtor q) =
> text "multiset" <> parens (queryExpr d q)
> scalarExpr d (MultisetBinOp a c q b) =
> sep
> [scalarExpr d a
> ,text "multiset"
> ,text $ case c of
> Union -> "union"
> Intersect -> "intersect"
> Except -> "except"
> ,case q of
> SQDefault -> empty
> All -> text "all"
> Distinct -> text "distinct"
> ,scalarExpr d b]
> {-scalarExpr d (Escape v e) =
> scalarExpr d v <+> text "escape" <+> text [e]
> scalarExpr d (UEscape v e) =
> scalarExpr d v <+> text "uescape" <+> text [e]-}
> scalarExpr d (Collate v c) =
> scalarExpr d v <+> text "collate" <+> names c
> scalarExpr _ (NextValueFor ns) =
> text "next value for" <+> names ns
> scalarExpr d (VEComment cmt v) =
> vcat $ map comment cmt ++ [scalarExpr d v]
> scalarExpr _ (OdbcLiteral t s) =
> text "{" <> lt t <+> quotes (text s) <> text "}"
> where
> lt OLDate = text "d"
> lt OLTime = text "t"
> lt OLTimestamp = text "ts"
> scalarExpr d (OdbcFunc e) =
> text "{fn" <+> scalarExpr d e <> text "}"
> scalarExpr d (Convert t e Nothing) =
> text "convert(" <> typeName t <> text "," <+> scalarExpr d e <> text ")"
> scalarExpr d (Convert t e (Just i)) =
> text "convert(" <> typeName t <> text "," <+> scalarExpr d e <> text "," <+> text (show i) <> text ")"
> unname :: Name -> String
> unname (Name Nothing n) = n
> unname (Name (Just (s,e)) n) =
> s ++ n ++ e
> unnames :: [Name] -> String
> unnames ns = intercalate "." $ map unname ns
> name :: Name -> Doc
> name (Name Nothing n) = text n
> name (Name (Just (s,e)) n) = text s <> text n <> text e
> names :: [Name] -> Doc
> names ns = hcat $ punctuate (text ".") $ map name ns
> typeName :: TypeName -> Doc
> typeName (TypeName t) = names t
> typeName (PrecTypeName t a) = names t <+> parens (text $ show a)
> typeName (PrecScaleTypeName t a b) =
> names t <+> parens (text (show a) <+> comma <+> text (show b))
> typeName (PrecLengthTypeName t i m u) =
> names t
> <> parens (text (show i)
> <> me (\x -> case x of
> PrecK -> text "K"
> PrecM -> text "M"
> PrecG -> text "G"
> PrecT -> text "T"
> PrecP -> text "P") m
> <+> me (\x -> case x of
> PrecCharacters -> text "CHARACTERS"
> PrecOctets -> text "OCTETS") u)
> typeName (CharTypeName t i cs col) =
> names t
> <> me (\x -> parens (text $ show x)) i
> <+> (if null cs
> then empty
> else text "character set" <+> names cs)
> <+> (if null col
> then empty
> else text "collate" <+> names col)
> typeName (TimeTypeName t i tz) =
> names t
> <> me (\x -> parens (text $ show x)) i
> <+> text (if tz
> then "with time zone"
> else "without time zone")
> typeName (RowTypeName cs) =
> text "row" <> parens (commaSep $ map f cs)
> where
> f (n,t) = name n <+> typeName t
> typeName (IntervalTypeName f t) =
> text "interval"
> <+> intervalTypeField f
> <+> me (\x -> text "to" <+> intervalTypeField x) t
> typeName (ArrayTypeName tn sz) =
> typeName tn <+> text "array" <+> me (brackets . text . show) sz
> typeName (MultisetTypeName tn) =
> typeName tn <+> text "multiset"
> intervalTypeField :: IntervalTypeField -> Doc
> intervalTypeField (Itf n p) =
> text n
> <+> me (\(x,x1) ->
> parens (text (show x)
> <+> me (\y -> (sep [comma,text (show y)])) x1)) p
= query expressions
> queryExpr :: Dialect -> QueryExpr -> Doc
> queryExpr dia (Select d sl fr wh gb hv od off fe) =
> sep [text "select"
> ,case d of
> SQDefault -> empty
> All -> text "all"
> Distinct -> text "distinct"
> ,nest 7 $ sep [selectList dia sl]
> ,from dia fr
> ,maybeScalarExpr dia "where" wh
> ,grpBy dia gb
> ,maybeScalarExpr dia "having" hv
> ,orderBy dia od
> ,me (\e -> text "offset" <+> scalarExpr dia e <+> text "rows") off
> ,fetchFirst
> ]
> where
> fetchFirst =
> me (\e -> if diLimit dia
> then text "limit" <+> scalarExpr dia e
> else text "fetch first" <+> scalarExpr dia e
> <+> text "rows only") fe
> queryExpr dia (QueryExprSetOp q1 ct d c q2) =
> sep [queryExpr dia q1
> ,text (case ct of
> Union -> "union"
> Intersect -> "intersect"
> Except -> "except")
> <+> case d of
> SQDefault -> empty
> All -> text "all"
> Distinct -> text "distinct"
> <+> case c of
> Corresponding -> text "corresponding"
> Respectively -> empty
> ,queryExpr dia q2]
> queryExpr d (With rc withs qe) =
> text "with" <+> (if rc then text "recursive" else empty)
> <+> vcat [nest 5
> (vcat $ punctuate comma $ flip map withs $ \(n,q) ->
> withAlias n <+> text "as" <+> parens (queryExpr d q))
> ,queryExpr d qe]
> where
> withAlias (Alias nm cols) = name nm
> <+> me (parens . commaSep . map name) cols
> queryExpr d (Values vs) =
> text "values"
> <+> nest 7 (commaSep (map (parens . commaSep . map (scalarExpr d)) vs))
> queryExpr _ (Table t) = text "table" <+> names t
> queryExpr d (QEComment cmt v) =
> vcat $ map comment cmt ++ [queryExpr d v]
> alias :: Alias -> Doc
> alias (Alias nm cols) =
> text "as" <+> name nm
> <+> me (parens . commaSep . map name) cols
> selectList :: Dialect -> [(ScalarExpr,Maybe Name)] -> Doc
> selectList d is = commaSep $ map si is
> where
> si (e,al) = scalarExpr d e <+> me als al
> als al = text "as" <+> name al
> from :: Dialect -> [TableRef] -> Doc
> from _ [] = empty
> from d ts =
> sep [text "from"
> ,nest 5 $ vcat $ punctuate comma $ map tr ts]
> where
> tr (TRSimple t) = names t
> tr (TRLateral t) = text "lateral" <+> tr t
> tr (TRFunction f as) =
> names f <> parens (commaSep $ map (scalarExpr d) as)
> tr (TRAlias t a) = sep [tr t, alias a]
> tr (TRParens t) = parens $ tr t
> tr (TRQueryExpr q) = parens $ queryExpr d q
> tr (TRJoin t0 b jt t1 jc) =
> sep [tr t0
> ,if b then text "natural" else empty
> ,joinText jt <+> tr t1
> ,joinCond jc]
> tr (TROdbc t) = text "{oj" <+> tr t <+> text "}"
> joinText jt =
> sep [case jt of
> JInner -> text "inner"
> JLeft -> text "left"
> JRight -> text "right"
> JFull -> text "full"
> JCross -> text "cross"
> ,text "join"]
> joinCond (Just (JoinOn e)) = text "on" <+> scalarExpr d e
> joinCond (Just (JoinUsing es)) =
> text "using" <+> parens (commaSep $ map name es)
> joinCond Nothing = empty
> maybeScalarExpr :: Dialect -> String -> Maybe ScalarExpr -> Doc
> maybeScalarExpr d k = me
> (\e -> sep [text k
> ,nest (length k + 1) $ scalarExpr d e])
> grpBy :: Dialect -> [GroupingExpr] -> Doc
> grpBy _ [] = empty
> grpBy d gs = sep [text "group by"
> ,nest 9 $ commaSep $ map ge gs]
> where
> ge (SimpleGroup e) = scalarExpr d e
> ge (GroupingParens g) = parens (commaSep $ map ge g)
> ge (Cube es) = text "cube" <> parens (commaSep $ map ge es)
> ge (Rollup es) = text "rollup" <> parens (commaSep $ map ge es)
> ge (GroupingSets es) = text "grouping sets" <> parens (commaSep $ map ge es)
> orderBy :: Dialect -> [SortSpec] -> Doc
> orderBy _ [] = empty
> orderBy dia os = sep [text "order by"
> ,nest 9 $ commaSep $ map f os]
> where
> f (SortSpec e d n) =
> scalarExpr dia e
> <+> (case d of
> Asc -> text "asc"
> Desc -> text "desc"
> DirDefault -> empty)
> <+> (case n of
> NullsOrderDefault -> empty
> NullsFirst -> text "nulls" <+> text "first"
> NullsLast -> text "nulls" <+> text "last")
= statements
> statement :: Dialect -> Statement -> Doc
== ddl
> statement _ (CreateSchema nm) =
> text "create" <+> text "schema" <+> names nm
> statement d (CreateTable nm cds) =
> text "create" <+> text "table" <+> names nm
> <+> parens (commaSep $ map cd cds)
> where
> cd (TableConstraintDef n con) =
> maybe empty (\s -> text "constraint" <+> names s) n
> <+> tableConstraint d con
> cd (TableColumnDef cd') = columnDef d cd'
> statement d (AlterTable t act) =
> texts ["alter","table"] <+> names t
> <+> alterTableAction d act
> statement _ (DropSchema nm db) =
> text "drop" <+> text "schema" <+> names nm <+> dropBehav db
> statement d (CreateDomain nm ty def cs) =
> text "create" <+> text "domain" <+> names nm
> <+> typeName ty
> <+> maybe empty (\def' -> text "default" <+> scalarExpr d def') def
> <+> sep (map con cs)
> where
> con (cn, e) =
> maybe empty (\cn' -> text "constraint" <+> names cn') cn
> <+> text "check" <> parens (scalarExpr d e)
> statement d (AlterDomain nm act) =
> texts ["alter","domain"]
> <+> names nm
> <+> a act
> where
> a (ADSetDefault v) = texts ["set","default"] <+> scalarExpr d v
> a (ADDropDefault) = texts ["drop","default"]
> a (ADAddConstraint cnm e) =
> text "add"
> <+> maybe empty (\cnm' -> text "constraint" <+> names cnm') cnm
> <+> text "check" <> parens (scalarExpr d e)
> a (ADDropConstraint cnm) = texts ["drop", "constraint"]
> <+> names cnm
> statement _ (DropDomain nm db) =
> text "drop" <+> text "domain" <+> names nm <+> dropBehav db
> statement _ (CreateSequence nm sgos) =
> texts ["create","sequence"] <+> names nm
> <+> sep (map sequenceGeneratorOption sgos)
> statement _ (AlterSequence nm sgos) =
> texts ["alter","sequence"] <+> names nm
> <+> sep (map sequenceGeneratorOption sgos)
> statement _ (DropSequence nm db) =
> text "drop" <+> text "sequence" <+> names nm <+> dropBehav db
> statement d (CreateAssertion nm ex) =
> texts ["create","assertion"] <+> names nm
> <+> text "check" <+> parens (scalarExpr d ex)
> statement _ (DropAssertion nm db) =
> text "drop" <+> text "assertion" <+> names nm <+> dropBehav db
> statement _ (CreateIndex un nm tbl cols) =
> texts (if un
> then ["create","unique","index"]
> else ["create","index"])
> <+> names nm
> <+> text "on"
> <+> names tbl
> <+> parens (commaSep $ map name cols)
== dml
> statement d (SelectStatement q) = queryExpr d q
> statement d (Delete t a w) =
> text "delete" <+> text "from"
> <+> names t <+> maybe empty (\x -> text "as" <+> name x) a
> <+> maybeScalarExpr d "where" w
> statement _ (Truncate t ir) =
> text "truncate" <+> text "table" <+> names t
> <+> case ir of
> DefaultIdentityRestart -> empty
> ContinueIdentity -> text "continue" <+> text "identity"
> RestartIdentity -> text "restart" <+> text "identity"
> statement d (Insert t cs s) =
> text "insert" <+> text "into" <+> names t
> <+> maybe empty (\cs' -> parens (commaSep $ map name cs')) cs
> <+> case s of
> DefaultInsertValues -> text "default" <+> text "values"
> InsertQuery q -> queryExpr d q
> statement d (Update t a sts whr) =
> text "update" <+> names t
> <+> maybe empty (\x -> text "as" <+> name x) a
> <+> text "set" <+> commaSep (map sc sts)
> <+> maybeScalarExpr d "where" whr
> where
> sc (Set tg v) = names tg <+> text "=" <+> scalarExpr d v
> sc (SetMultiple ts vs) = parens (commaSep $ map names ts) <+> text "="
> <+> parens (commaSep $ map (scalarExpr d) vs)
> statement _ (DropTable n b) =
> text "drop" <+> text "table" <+> names n <+> dropBehav b
> statement d (CreateView r nm al q co) =
> text "create" <+> (if r then text "recursive" else empty)
> <+> text "view" <+> names nm
> <+> (maybe empty (\al' -> parens $ commaSep $ map name al')) al
> <+> text "as"
> <+> queryExpr d q
> <+> case co of
> Nothing -> empty
> Just DefaultCheckOption -> texts ["with", "check", "option"]
> Just CascadedCheckOption -> texts ["with", "cascaded", "check", "option"]
> Just LocalCheckOption -> texts ["with", "local", "check", "option"]
> statement _ (DropView n b) =
> text "drop" <+> text "view" <+> names n <+> dropBehav b
== transactions
> statement _ StartTransaction =
> texts ["start", "transaction"]
> statement _ (Savepoint nm) =
> text "savepoint" <+> name nm
> statement _ (ReleaseSavepoint nm) =
> texts ["release", "savepoint"] <+> name nm
> statement _ Commit =
> text "commit"
> statement _ (Rollback mn) =
> text "rollback"
> <+> maybe empty (\n -> texts ["to","savepoint"] <+> name n) mn
== access control
> statement _ (GrantPrivilege pas po rs go) =
> text "grant" <+> commaSep (map privAct pas)
> <+> text "on" <+> privObj po
> <+> text "to" <+> commaSep (map name rs)
> <+> grantOpt go
> where
> grantOpt WithGrantOption = texts ["with","grant","option"]
> grantOpt WithoutGrantOption = empty
> statement _ (GrantRole rs trs ao) =
> text "grant" <+> commaSep (map name rs)
> <+> text "to" <+> commaSep (map name trs)
> <+> adminOpt ao
> where
> adminOpt WithAdminOption = texts ["with","admin","option"]
> adminOpt WithoutAdminOption = empty
> statement _ (CreateRole nm) =
> texts ["create","role"] <+> name nm
> statement _ (DropRole nm) =
> texts ["drop","role"] <+> name nm
> statement _ (RevokePrivilege go pas po rs db) =
> text "revoke"
> <+> grantOptFor go
> <+> commaSep (map privAct pas)
> <+> text "on" <+> privObj po
> <+> text "from" <+> commaSep (map name rs)
> <+> dropBehav db
> where
> grantOptFor GrantOptionFor = texts ["grant","option","for"]
> grantOptFor NoGrantOptionFor = empty
> statement _ (RevokeRole ao rs trs db) =
> text "revoke"
> <+> adminOptFor ao
> <+> commaSep (map name rs)
> <+> text "from" <+> commaSep (map name trs)
> <+> dropBehav db
> where
> adminOptFor AdminOptionFor = texts ["admin","option","for"]
> adminOptFor NoAdminOptionFor = empty
> statement _ (StatementComment cs) = vcat $ map comment cs
> statement _ EmptyStatement = empty
== sessions
== extras
> dropBehav :: DropBehaviour -> Doc
> dropBehav DefaultDropBehaviour = empty
> dropBehav Cascade = text "cascade"
> dropBehav Restrict = text "restrict"
> columnDef :: Dialect -> ColumnDef -> Doc
> columnDef d (ColumnDef n t mdef cons) =
> name n <+> typeName t
> <+> case mdef of
> Nothing -> empty
> Just (DefaultClause def) ->
> text "default" <+> scalarExpr d def
> Just (GenerationClause e) ->
> texts ["generated","always","as"] <+> parens (scalarExpr d e)
> Just (IdentityColumnSpec w o) ->
> text "generated"
> <+> (case w of
> GeneratedAlways -> text "always"
> GeneratedByDefault -> text "by" <+> text "default")
> <+> text "as" <+> text "identity"
> <+> (case o of
> [] -> empty
> os -> parens (sep $ map sequenceGeneratorOption os))
> <+> sep (map cdef cons)
> where
> cdef (ColConstraintDef cnm con) =
> maybe empty (\s -> text "constraint" <+> names s) cnm
> <+> pcon con
> pcon ColNotNullConstraint = texts ["not","null"]
> pcon ColUniqueConstraint = text "unique"
> pcon (ColPrimaryKeyConstraint autoincrement) =
> texts $ ["primary","key"] ++ ["autoincrement"|autoincrement]
> pcon (ColCheckConstraint v) = text "check" <+> parens (scalarExpr d v)
> pcon (ColReferencesConstraint tb c m u del) =
> text "references"
> <+> names tb
> <+> maybe empty (\c' -> parens (name c')) c
> <+> refMatch m
> <+> refAct "update" u
> <+> refAct "delete" del
> sequenceGeneratorOption :: SequenceGeneratorOption -> Doc
> sequenceGeneratorOption (SGODataType t) =
> text "as" <+> typeName t
> sequenceGeneratorOption (SGORestart mi) =
> text "restart" <+> maybe empty (\mi' -> texts ["with", show mi']) mi
> sequenceGeneratorOption (SGOStartWith i) = texts ["start", "with", show i]
> sequenceGeneratorOption (SGOIncrementBy i) = texts ["increment", "by", show i]
> sequenceGeneratorOption (SGOMaxValue i) = texts ["maxvalue", show i]
> sequenceGeneratorOption SGONoMaxValue = texts ["no", "maxvalue"]
> sequenceGeneratorOption (SGOMinValue i) = texts ["minvalue", show i]
> sequenceGeneratorOption SGONoMinValue = texts ["no", "minvalue"]
> sequenceGeneratorOption SGOCycle = text "cycle"
> sequenceGeneratorOption SGONoCycle = text "no cycle"
> refMatch :: ReferenceMatch -> Doc
> refMatch m = case m of
> DefaultReferenceMatch -> empty
> MatchFull -> texts ["match", "full"]
> MatchPartial -> texts ["match","partial"]
> MatchSimple -> texts ["match", "simple"]
> refAct :: String -> ReferentialAction -> Doc
> refAct t a = case a of
> DefaultReferentialAction -> empty
> RefCascade -> texts ["on", t, "cascade"]
> RefSetNull -> texts ["on", t, "set", "null"]
> RefSetDefault -> texts ["on", t, "set", "default"]
> RefRestrict -> texts ["on", t, "restrict"]
> RefNoAction -> texts ["on", t, "no", "action"]
> alterTableAction :: Dialect -> AlterTableAction -> Doc
> alterTableAction d (AddColumnDef cd) =
> texts ["add", "column"] <+> columnDef d cd
> alterTableAction d (AlterColumnSetDefault n v) =
> texts ["alter", "column"]
> <+> name n
> <+> texts ["set","default"] <+> scalarExpr d v
> alterTableAction _ (AlterColumnDropDefault n) =
> texts ["alter", "column"]
> <+> name n
> <+> texts ["drop","default"]
> alterTableAction _ (AlterColumnSetNotNull n) =
> texts ["alter", "column"]
> <+> name n
> <+> texts ["set","not","null"]
> alterTableAction _ (AlterColumnDropNotNull n) =
> texts ["alter", "column"]
> <+> name n
> <+> texts ["drop","not","null"]
> alterTableAction _ (AlterColumnSetDataType n t) =
> texts ["alter", "column"]
> <+> name n
> <+> texts ["set","data","Type"]
> <+> typeName t
> alterTableAction _ (DropColumn n b) =
> texts ["drop", "column"]
> <+> name n
> <+> dropBehav b
> alterTableAction d (AddTableConstraintDef n con) =
> text "add"
> <+> maybe empty (\s -> text "constraint" <+> names s) n
> <+> tableConstraint d con
> alterTableAction _ (DropTableConstraintDef n b) =
> texts ["drop", "constraint"]
> <+> names n
> <+> dropBehav b
> tableConstraint :: Dialect -> TableConstraint -> Doc
> tableConstraint _ (TableUniqueConstraint ns) =
> text "unique" <+> parens (commaSep $ map name ns)
> tableConstraint _ (TablePrimaryKeyConstraint ns) =
> texts ["primary","key"] <+> parens (commaSep $ map name ns)
> tableConstraint _ (TableReferencesConstraint cs t tcs m u del) =
> texts ["foreign", "key"]
> <+> parens (commaSep $ map name cs)
> <+> text "references"
> <+> names t
> <+> maybe empty (\c' -> parens (commaSep $ map name c')) tcs
> <+> refMatch m
> <+> refAct "update" u
> <+> refAct "delete" del
> tableConstraint d (TableCheckConstraint v) = text "check" <+> parens (scalarExpr d v)
> privAct :: PrivilegeAction -> Doc
> privAct PrivAll = texts ["all","privileges"]
> privAct (PrivSelect cs) = text "select" <+> maybeColList cs
> privAct (PrivInsert cs) = text "insert" <+> maybeColList cs
> privAct (PrivUpdate cs) = text "update" <+> maybeColList cs
> privAct (PrivReferences cs) = text "references" <+> maybeColList cs
> privAct PrivDelete = text "delete"
> privAct PrivUsage = text "usage"
> privAct PrivTrigger = text "trigger"
> privAct PrivExecute = text "execute"
> maybeColList :: [Name] -> Doc
> maybeColList cs =
> if null cs
> then empty
> else parens (commaSep $ map name cs)
> privObj :: PrivilegeObject -> Doc
> privObj (PrivTable nm) = names nm
> privObj (PrivDomain nm) = text "domain" <+> names nm
> privObj (PrivType nm) = text "type" <+> names nm
> privObj (PrivSequence nm) = text "sequence" <+> names nm
> privObj (PrivFunction nm) = texts ["specific", "function"] <+> names nm
= utils
> commaSep :: [Doc] -> Doc
> commaSep ds = sep $ punctuate comma ds
> me :: (a -> Doc) -> Maybe a -> Doc
> me = maybe empty
> comment :: Comment -> Doc
> comment (BlockComment str) = text "/*" <+> text str <+> text "*/"
> texts :: [String] -> Doc
> texts ts = sep $ map text ts

View file

@ -0,0 +1,744 @@
-- | The AST for SQL.
{-# LANGUAGE DeriveDataTypeable #-}
module Language.SQL.SimpleSQL.Syntax
(-- * Scalar expressions
ScalarExpr(..)
,Name(..)
,TypeName(..)
,IntervalTypeField(..)
,Sign(..)
,PrecMultiplier(..)
,PrecUnits(..)
,SetQuantifier(..)
,SortSpec(..)
,Direction(..)
,NullsOrder(..)
,InPredValue(..)
,SubQueryExprType(..)
,CompPredQuantifier(..)
,Frame(..)
,FrameRows(..)
,FramePos(..)
,OdbcLiteralType(..)
-- * Query expressions
,QueryExpr(..)
,makeSelect
,SetOperatorName(..)
,Corresponding(..)
,Alias(..)
,GroupingExpr(..)
-- ** From
,TableRef(..)
,JoinType(..)
,JoinCondition(..)
-- * Statements
,Statement(..)
,DropBehaviour(..)
,IdentityRestart(..)
,InsertSource(..)
,SetClause(..)
,TableElement(..)
,ColumnDef(..)
,DefaultClause(..)
,IdentityWhen(..)
,SequenceGeneratorOption(..)
,ColConstraintDef(..)
,AutoincrementClause
,ColConstraint(..)
,TableConstraint(..)
,ReferenceMatch(..)
,ReferentialAction(..)
,AlterTableAction(..)
,CheckOption(..)
,AlterDomainAction(..)
,AdminOption(..)
,GrantOption(..)
,PrivilegeObject(..)
,PrivilegeAction(..)
,AdminOptionFor(..)
,GrantOptionFor(..)
-- * Comment
,Comment(..)
) where
import Data.Data
-- | Represents a value expression. This is used for the expressions
-- in select lists. It is also used for expressions in where, group
-- by, having, order by and so on.
data ScalarExpr
= -- | a numeric literal optional decimal point, e+-
-- integral exponent, e.g
--
-- * 10
--
-- * 10.
--
-- * .1
--
-- * 10.1
--
-- * 1e5
--
-- * 12.34e-6
NumLit String
-- | string literal, with the start and end quote
-- e.g. 'test' -> StringLit "'" "'" "test"
| StringLit String String String
-- | text of interval literal, units of interval precision,
-- e.g. interval 3 days (3)
| IntervalLit
{ilSign :: Maybe Sign -- ^ if + or - used
,ilLiteral :: String -- ^ literal text
,ilFrom :: IntervalTypeField
,ilTo :: Maybe IntervalTypeField
}
-- | prefix 'typed literal', e.g. int '42'
| TypedLit TypeName String
-- | identifier with parts separated by dots
| Iden [Name]
-- | star, as in select *, t.*, count(*)
| Star
| Parameter -- ^ Represents a ? in a parameterized query
| PositionalArg Int -- ^ Represents an e.g. $1 in a parameterized query
| HostParameter String (Maybe String) -- ^ represents a host
-- parameter, e.g. :a. The
-- Maybe String is for the
-- indicator, e.g. :var
-- indicator :nl
-- | Infix binary operators. This is used for symbol operators
-- (a + b), keyword operators (a and b) and multiple keyword
-- operators (a is similar to b)
| BinOp ScalarExpr [Name] ScalarExpr
-- | Prefix unary operators. This is used for symbol
-- operators, keyword operators and multiple keyword operators.
| PrefixOp [Name] ScalarExpr
-- | Postfix unary operators. This is used for symbol
-- operators, keyword operators and multiple keyword operators.
| PostfixOp [Name] ScalarExpr
-- | Used for ternary, mixfix and other non orthodox
-- operators. Currently used for row constructors, and for
-- between.
| SpecialOp [Name] [ScalarExpr]
-- | function application (anything that looks like c style
-- function application syntactically)
| App [Name] [ScalarExpr]
-- | aggregate application, which adds distinct or all, and
-- order by, to regular function application
| AggregateApp
{aggName :: [Name] -- ^ aggregate function name
,aggDistinct :: SetQuantifier -- ^ distinct
,aggArgs :: [ScalarExpr]-- ^ args
,aggOrderBy :: [SortSpec] -- ^ order by
,aggFilter :: Maybe ScalarExpr -- ^ filter
}
-- | aggregates with within group
| AggregateAppGroup
{aggName :: [Name] -- ^ aggregate function name
,aggArgs :: [ScalarExpr] -- ^ args
,aggGroup :: [SortSpec] -- ^ within group
}
-- | window application, which adds over (partition by a order
-- by b) to regular function application. Explicit frames are
-- not currently supported
| WindowApp
{wnName :: [Name] -- ^ window function name
,wnArgs :: [ScalarExpr] -- ^ args
,wnPartition :: [ScalarExpr] -- ^ partition by
,wnOrderBy :: [SortSpec] -- ^ order by
,wnFrame :: Maybe Frame -- ^ frame clause
}
-- | Used for the operators which look like functions
-- except the arguments are separated by keywords instead
-- of commas. The maybe is for the first unnamed argument
-- if it is present, and the list is for the keyword argument
-- pairs.
| SpecialOpK [Name] (Maybe ScalarExpr) [(String,ScalarExpr)]
-- | cast(a as typename)
| Cast ScalarExpr TypeName
-- | convert expression to given datatype @CONVERT(data_type(length), expression, style)@
| Convert TypeName ScalarExpr (Maybe Integer)
-- | case expression. both flavours supported
| Case
{caseTest :: Maybe ScalarExpr -- ^ test value
,caseWhens :: [([ScalarExpr],ScalarExpr)] -- ^ when branches
,caseElse :: Maybe ScalarExpr -- ^ else value
}
| Parens ScalarExpr
-- | in list literal and in subquery, if the bool is false it
-- means not in was used ('a not in (1,2)')
| In Bool ScalarExpr InPredValue
-- | exists, all, any, some subqueries
| SubQueryExpr SubQueryExprType QueryExpr
| QuantifiedComparison
ScalarExpr
[Name] -- operator
CompPredQuantifier
QueryExpr
| Match ScalarExpr Bool -- true if unique
QueryExpr
| Array ScalarExpr [ScalarExpr] -- ^ represents an array
-- access expression, or an array ctor
-- e.g. a[3]. The first
-- scalarExpr is the array, the
-- second is the subscripts/ctor args
| ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
{-
todo: special syntax for like, similar with escape - escape cannot go
in other places
-}
-- | Escape ScalarExpr Char
-- | UEscape ScalarExpr Char
| Collate ScalarExpr [Name]
| MultisetBinOp ScalarExpr SetOperatorName SetQuantifier ScalarExpr
| MultisetCtor [ScalarExpr]
| MultisetQueryCtor QueryExpr
| NextValueFor [Name]
| VEComment [Comment] ScalarExpr
| OdbcLiteral OdbcLiteralType String
-- ^ an odbc literal e.g. {d '2000-01-01'}
| OdbcFunc ScalarExpr
-- ^ an odbc function call e.g. {fn CHARACTER_LENGTH('test')}
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents an identifier name, which can be quoted or unquoted.
-- examples:
--
-- * test -> Name Nothing "test"
-- * "test" -> Name (Just "\"","\"") "test"
-- * `something` -> Name (Just ("`","`") "something"
-- * [ms] -> Name (Just ("[","]") "ms"
data Name = Name (Maybe (String,String)) String
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents a type name, used in casts.
data TypeName
= TypeName [Name]
| PrecTypeName [Name] Integer
| PrecScaleTypeName [Name] Integer Integer
| PrecLengthTypeName [Name] Integer (Maybe PrecMultiplier) (Maybe PrecUnits)
-- precision, characterset, collate
| CharTypeName [Name] (Maybe Integer) [Name] [Name]
| TimeTypeName [Name] (Maybe Integer) Bool -- true == with time zone
| RowTypeName [(Name,TypeName)]
| IntervalTypeName IntervalTypeField (Maybe IntervalTypeField)
| ArrayTypeName TypeName (Maybe Integer)
| MultisetTypeName TypeName
deriving (Eq,Show,Read,Data,Typeable)
data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer))
deriving (Eq,Show,Read,Data,Typeable)
data Sign = Plus | Minus
deriving (Eq,Show,Read,Data,Typeable)
data PrecMultiplier = PrecK | PrecM | PrecG | PrecT | PrecP
deriving (Eq,Show,Read,Data,Typeable)
data PrecUnits = PrecCharacters
| PrecOctets
deriving (Eq,Show,Read,Data,Typeable)
-- | Used for 'expr in (scalar expression list)', and 'expr in
-- (subquery)' syntax.
data InPredValue = InList [ScalarExpr]
| InQueryExpr QueryExpr
deriving (Eq,Show,Read,Data,Typeable)
-- not sure if scalar subquery, exists and unique should be represented like this
-- | A subquery in a scalar expression.
data SubQueryExprType
= -- | exists (query expr)
SqExists
-- | unique (query expr)
| SqUnique
-- | a scalar subquery
| SqSq
deriving (Eq,Show,Read,Data,Typeable)
data CompPredQuantifier
= CPAny
| CPSome
| CPAll
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents one field in an order by list.
data SortSpec = SortSpec ScalarExpr Direction NullsOrder
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents 'nulls first' or 'nulls last' in an order by clause.
data NullsOrder = NullsOrderDefault
| NullsFirst
| NullsLast
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents the frame clause of a window
-- this can be [range | rows] frame_start
-- or [range | rows] between frame_start and frame_end
data Frame = FrameFrom FrameRows FramePos
| FrameBetween FrameRows FramePos FramePos
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents whether a window frame clause is over rows or ranges.
data FrameRows = FrameRows | FrameRange
deriving (Eq,Show,Read,Data,Typeable)
-- | represents the start or end of a frame
data FramePos = UnboundedPreceding
| Preceding ScalarExpr
| Current
| Following ScalarExpr
| UnboundedFollowing
deriving (Eq,Show,Read,Data,Typeable)
-- | the type of an odbc literal (e.g. {d '2000-01-01'}),
-- correpsonding to the letter after the opening {
data OdbcLiteralType = OLDate
| OLTime
| OLTimestamp
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents a query expression, which can be:
--
-- * a regular select;
--
-- * a set operator (union, except, intersect);
--
-- * a common table expression (with);
--
-- * a table value constructor (values (1,2),(3,4)); or
--
-- * an explicit table (table t).
data QueryExpr
= Select
{qeSetQuantifier :: SetQuantifier
,qeSelectList :: [(ScalarExpr,Maybe Name)]
-- ^ the expressions and the column aliases
{-
TODO: consider breaking this up. The SQL grammar has
queryexpr = select <select list> [<table expression>]
table expression = <from> [where] [groupby] [having] ...
This would make some things a bit cleaner?
-}
,qeFrom :: [TableRef]
,qeWhere :: Maybe ScalarExpr
,qeGroupBy :: [GroupingExpr]
,qeHaving :: Maybe ScalarExpr
,qeOrderBy :: [SortSpec]
,qeOffset :: Maybe ScalarExpr
,qeFetchFirst :: Maybe ScalarExpr
}
| QueryExprSetOp
{qe0 :: QueryExpr
,qeCombOp :: SetOperatorName
,qeSetQuantifier :: SetQuantifier
,qeCorresponding :: Corresponding
,qe1 :: QueryExpr
}
| With
{qeWithRecursive :: Bool
,qeViews :: [(Alias,QueryExpr)]
,qeQueryExpression :: QueryExpr}
| Values [[ScalarExpr]]
| Table [Name]
| QEComment [Comment] QueryExpr
deriving (Eq,Show,Read,Data,Typeable)
{-
TODO: add queryexpr parens to deal with e.g.
(select 1 union select 2) union select 3
I'm not sure if this is valid syntax or not.
-}
-- | Helper/'default' value for query exprs to make creating query
-- expr values a little easier. It is defined like this:
--
-- > makeSelect :: QueryExpr
-- > makeSelect = Select {qeSetQuantifier = SQDefault
-- > ,qeSelectList = []
-- > ,qeFrom = []
-- > ,qeWhere = Nothing
-- > ,qeGroupBy = []
-- > ,qeHaving = Nothing
-- > ,qeOrderBy = []
-- > ,qeOffset = Nothing
-- > ,qeFetchFirst = Nothing}
makeSelect :: QueryExpr
makeSelect = Select {qeSetQuantifier = SQDefault
,qeSelectList = []
,qeFrom = []
,qeWhere = Nothing
,qeGroupBy = []
,qeHaving = Nothing
,qeOrderBy = []
,qeOffset = Nothing
,qeFetchFirst = Nothing}
-- | Represents the Distinct or All keywords, which can be used
-- before a select list, in an aggregate/window function
-- application, or in a query expression set operator.
data SetQuantifier = SQDefault | Distinct | All deriving (Eq,Show,Read,Data,Typeable)
-- | The direction for a column in order by.
data Direction = DirDefault | Asc | Desc deriving (Eq,Show,Read,Data,Typeable)
-- | Query expression set operators.
data SetOperatorName = Union | Except | Intersect deriving (Eq,Show,Read,Data,Typeable)
-- | Corresponding, an option for the set operators.
data Corresponding = Corresponding | Respectively deriving (Eq,Show,Read,Data,Typeable)
-- | Represents an item in a group by clause.
data GroupingExpr
= GroupingParens [GroupingExpr]
| Cube [GroupingExpr]
| Rollup [GroupingExpr]
| GroupingSets [GroupingExpr]
| SimpleGroup ScalarExpr
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents a entry in the csv of tables in the from clause.
data TableRef = -- | from t / from s.t
TRSimple [Name]
-- | from a join b, the bool is true if natural was used
| TRJoin TableRef Bool JoinType TableRef (Maybe JoinCondition)
-- | from (a)
| TRParens TableRef
-- | from a as b(c,d)
| TRAlias TableRef Alias
-- | from (query expr)
| TRQueryExpr QueryExpr
-- | from function(args)
| TRFunction [Name] [ScalarExpr]
-- | from lateral t
| TRLateral TableRef
-- | ODBC {oj t1 left outer join t2 on expr} syntax
| TROdbc TableRef
deriving (Eq,Show,Read,Data,Typeable)
-- | Represents an alias for a table valued expression, used in with
-- queries and in from alias, e.g. select a from t u, select a from t u(b),
-- with a(c) as select 1, select * from a.
data Alias = Alias Name (Maybe [Name])
deriving (Eq,Show,Read,Data,Typeable)
-- | The type of a join.
data JoinType = JInner | JLeft | JRight | JFull | JCross
deriving (Eq,Show,Read,Data,Typeable)
-- | The join condition.
data JoinCondition = JoinOn ScalarExpr -- ^ on expr
| JoinUsing [Name] -- ^ using (column list)
deriving (Eq,Show,Read,Data,Typeable)
-- ---------------------------
data Statement =
-- ddl
CreateSchema [Name]
| DropSchema [Name] DropBehaviour
| CreateTable [Name] [TableElement]
| AlterTable [Name] AlterTableAction
| DropTable [Name] DropBehaviour
| CreateIndex Bool [Name] [Name] [Name]
| CreateView Bool [Name] (Maybe [Name])
QueryExpr (Maybe CheckOption)
| DropView [Name] DropBehaviour
| CreateDomain [Name] TypeName (Maybe ScalarExpr)
[(Maybe [Name], ScalarExpr)]
| AlterDomain [Name] AlterDomainAction
| DropDomain [Name] DropBehaviour
-- probably won't do character sets, collations
-- and translations because I think they are too far from
-- reality
{- | CreateCharacterSet
| DropCharacterSet
| CreateCollation
| DropCollation
| CreateTranslation
| DropTranslation -}
| CreateAssertion [Name] ScalarExpr
| DropAssertion [Name] DropBehaviour
{- | CreateTrigger
| DropTrigger
| CreateType
| AlterType
| DropType
-- routine stuff? TODO
| CreateCast
| DropCast
| CreateOrdering
| DropOrdering -}
-- transforms
| CreateSequence [Name] [SequenceGeneratorOption]
| AlterSequence [Name] [SequenceGeneratorOption]
| DropSequence [Name] DropBehaviour
-- dml
| SelectStatement QueryExpr
{- | DeclareCursor
| OpenCursor
| FetchCursor
| CloseCursor
| SelectInto -}
-- | DeletePositioned
| Delete [Name] (Maybe Name) (Maybe ScalarExpr)
| Truncate [Name] IdentityRestart
| Insert [Name] (Maybe [Name]) InsertSource
-- | Merge
| Update [Name] (Maybe Name) [SetClause] (Maybe ScalarExpr)
{- | TemporaryTable
| FreeLocator
| HoldLocator -}
-- access control
| GrantPrivilege [PrivilegeAction] PrivilegeObject [Name] GrantOption
| GrantRole [Name] [Name] AdminOption
| CreateRole Name
| DropRole Name
| RevokePrivilege GrantOptionFor [PrivilegeAction] PrivilegeObject
[Name] DropBehaviour
| RevokeRole AdminOptionFor [Name] [Name] DropBehaviour
-- transaction management
| StartTransaction
-- | SetTransaction
-- | SetContraints
| Savepoint Name
| ReleaseSavepoint Name
| Commit
| Rollback (Maybe Name)
-- session
{- | SetSessionCharacteristics
| SetSessionAuthorization
| SetRole
| SetTimeZone
| SetCatalog
| SetSchema
| SetNames
| SetTransform
| SetCollation -}
| StatementComment [Comment]
| EmptyStatement
deriving (Eq,Show,Read,Data,Typeable)
data DropBehaviour =
Restrict
| Cascade
| DefaultDropBehaviour
deriving (Eq,Show,Read,Data,Typeable)
data IdentityRestart =
ContinueIdentity
| RestartIdentity
| DefaultIdentityRestart
deriving (Eq,Show,Read,Data,Typeable)
data InsertSource =
InsertQuery QueryExpr
| DefaultInsertValues
deriving (Eq,Show,Read,Data,Typeable)
data SetClause =
Set [Name] ScalarExpr
| SetMultiple [[Name]] [ScalarExpr]
deriving (Eq,Show,Read,Data,Typeable)
data TableElement =
TableColumnDef ColumnDef
| TableConstraintDef (Maybe [Name]) TableConstraint
deriving (Eq,Show,Read,Data,Typeable)
data ColumnDef = ColumnDef Name TypeName
(Maybe DefaultClause)
[ColConstraintDef]
-- (Maybe CollateClause)
deriving (Eq,Show,Read,Data,Typeable)
data ColConstraintDef =
ColConstraintDef (Maybe [Name]) ColConstraint
-- (Maybe [ConstraintCharacteristics])
deriving (Eq,Show,Read,Data,Typeable)
type AutoincrementClause = Bool
data ColConstraint =
ColNullableConstraint
| ColNotNullConstraint
| ColUniqueConstraint
| ColPrimaryKeyConstraint AutoincrementClause
| ColReferencesConstraint [Name] (Maybe Name)
ReferenceMatch
ReferentialAction
ReferentialAction
| ColCheckConstraint ScalarExpr
deriving (Eq,Show,Read,Data,Typeable)
data TableConstraint =
TableUniqueConstraint [Name]
| TablePrimaryKeyConstraint [Name]
| TableReferencesConstraint [Name] [Name] (Maybe [Name])
ReferenceMatch
ReferentialAction
ReferentialAction
| TableCheckConstraint ScalarExpr
deriving (Eq,Show,Read,Data,Typeable)
data ReferenceMatch =
DefaultReferenceMatch
| MatchFull
| MatchPartial
| MatchSimple
deriving (Eq,Show,Read,Data,Typeable)
data ReferentialAction =
DefaultReferentialAction
| RefCascade
| RefSetNull
| RefSetDefault
| RefRestrict
| RefNoAction
deriving (Eq,Show,Read,Data,Typeable)
data AlterTableAction =
AddColumnDef ColumnDef
| AlterColumnSetDefault Name ScalarExpr
| AlterColumnDropDefault Name
| AlterColumnSetNotNull Name
| AlterColumnDropNotNull Name
| AlterColumnSetDataType Name TypeName
{- | AlterColumnAlterIdentity
| AlterColumnDropIdentity
| AlterColumnDropColumnGeneration-}
| DropColumn Name DropBehaviour
| AddTableConstraintDef (Maybe [Name]) TableConstraint
-- | AlterTableConstraintDef
| DropTableConstraintDef [Name] DropBehaviour
deriving (Eq,Show,Read,Data,Typeable)
{-data ConstraintCharacteristics =
ConstraintCharacteristics
ConstraintCheckTime
Deferrable
ConstraintEnforcement
deriving (Eq,Show,Read,Data,Typeable)
data ConstraintCheckTime =
DefaultConstraintCheckTime
| InitiallyDeferred
| InitiallyImmeditate
deriving (Eq,Show,Read,Data,Typeable)
data Deferrable =
DefaultDefferable
| Deferrable
| NotDeferrable
deriving (Eq,Show,Read,Data,Typeable)
data ConstraintEnforcement =
DefaultConstraintEnforcement
| Enforced
| NotEnforced
deriving (Eq,Show,Read,Data,Typeable) -}
{-data TableConstraintDef
deriving (Eq,Show,Read,Data,Typeable) -}
data DefaultClause =
DefaultClause ScalarExpr
| IdentityColumnSpec IdentityWhen [SequenceGeneratorOption]
| GenerationClause ScalarExpr
deriving (Eq,Show,Read,Data,Typeable)
data IdentityWhen =
GeneratedAlways
| GeneratedByDefault
deriving (Eq,Show,Read,Data,Typeable)
data SequenceGeneratorOption =
SGODataType TypeName
| SGOStartWith Integer
| SGORestart (Maybe Integer)
| SGOIncrementBy Integer
| SGOMaxValue Integer
| SGONoMaxValue
| SGOMinValue Integer
| SGONoMinValue
| SGOCycle
| SGONoCycle
deriving (Eq,Show,Read,Data,Typeable)
data CheckOption =
DefaultCheckOption
| CascadedCheckOption
| LocalCheckOption
deriving (Eq,Show,Read,Data,Typeable)
data AlterDomainAction =
ADSetDefault ScalarExpr
| ADDropDefault
| ADAddConstraint (Maybe [Name]) ScalarExpr
| ADDropConstraint [Name]
deriving (Eq,Show,Read,Data,Typeable)
data AdminOption = WithAdminOption | WithoutAdminOption
deriving (Eq,Show,Read,Data,Typeable)
data GrantOption = WithGrantOption | WithoutGrantOption
deriving (Eq,Show,Read,Data,Typeable)
data AdminOptionFor = AdminOptionFor | NoAdminOptionFor
deriving (Eq,Show,Read,Data,Typeable)
data GrantOptionFor = GrantOptionFor | NoGrantOptionFor
deriving (Eq,Show,Read,Data,Typeable)
data PrivilegeObject =
PrivTable [Name]
| PrivDomain [Name]
| PrivType [Name]
| PrivSequence [Name]
| PrivFunction [Name]
deriving (Eq,Show,Read,Data,Typeable)
data PrivilegeAction =
PrivAll
| PrivSelect [Name]
| PrivDelete
| PrivInsert [Name]
| PrivUpdate [Name]
| PrivReferences [Name]
| PrivUsage
| PrivTrigger
| PrivExecute
deriving (Eq,Show,Read,Data,Typeable)
-- | Comment. Useful when generating SQL code programmatically. The
-- parser doesn't produce these.
newtype Comment = BlockComment String
deriving (Eq,Show,Read,Data,Typeable)

View file

@ -1,738 +0,0 @@
> -- | The AST for SQL.
> {-# LANGUAGE DeriveDataTypeable #-}
> module Language.SQL.SimpleSQL.Syntax
> (-- * Scalar expressions
> ScalarExpr(..)
> ,Name(..)
> ,TypeName(..)
> ,IntervalTypeField(..)
> ,Sign(..)
> ,PrecMultiplier(..)
> ,PrecUnits(..)
> ,SetQuantifier(..)
> ,SortSpec(..)
> ,Direction(..)
> ,NullsOrder(..)
> ,InPredValue(..)
> ,SubQueryExprType(..)
> ,CompPredQuantifier(..)
> ,Frame(..)
> ,FrameRows(..)
> ,FramePos(..)
> ,OdbcLiteralType(..)
> -- * Query expressions
> ,QueryExpr(..)
> ,makeSelect
> ,SetOperatorName(..)
> ,Corresponding(..)
> ,Alias(..)
> ,GroupingExpr(..)
> -- ** From
> ,TableRef(..)
> ,JoinType(..)
> ,JoinCondition(..)
> -- * Statements
> ,Statement(..)
> ,DropBehaviour(..)
> ,IdentityRestart(..)
> ,InsertSource(..)
> ,SetClause(..)
> ,TableElement(..)
> ,ColumnDef(..)
> ,DefaultClause(..)
> ,IdentityWhen(..)
> ,SequenceGeneratorOption(..)
> ,ColConstraintDef(..)
> ,AutoincrementClause
> ,ColConstraint(..)
> ,TableConstraint(..)
> ,ReferenceMatch(..)
> ,ReferentialAction(..)
> ,AlterTableAction(..)
> ,CheckOption(..)
> ,AlterDomainAction(..)
> ,AdminOption(..)
> ,GrantOption(..)
> ,PrivilegeObject(..)
> ,PrivilegeAction(..)
> ,AdminOptionFor(..)
> ,GrantOptionFor(..)
> -- * Comment
> ,Comment(..)
> ) where
> import Data.Data
> -- | Represents a value expression. This is used for the expressions
> -- in select lists. It is also used for expressions in where, group
> -- by, having, order by and so on.
> data ScalarExpr
> = -- | a numeric literal optional decimal point, e+-
> -- integral exponent, e.g
> --
> -- * 10
> --
> -- * 10.
> --
> -- * .1
> --
> -- * 10.1
> --
> -- * 1e5
> --
> -- * 12.34e-6
> NumLit String
> -- | string literal, with the start and end quote
> -- e.g. 'test' -> StringLit "'" "'" "test"
> | StringLit String String String
> -- | text of interval literal, units of interval precision,
> -- e.g. interval 3 days (3)
> | IntervalLit
> {ilSign :: Maybe Sign -- ^ if + or - used
> ,ilLiteral :: String -- ^ literal text
> ,ilFrom :: IntervalTypeField
> ,ilTo :: Maybe IntervalTypeField
> }
> -- | prefix 'typed literal', e.g. int '42'
> | TypedLit TypeName String
> -- | identifier with parts separated by dots
> | Iden [Name]
> -- | star, as in select *, t.*, count(*)
> | Star
> | Parameter -- ^ Represents a ? in a parameterized query
> | PositionalArg Int -- ^ Represents an e.g. $1 in a parameterized query
> | HostParameter String (Maybe String) -- ^ represents a host
> -- parameter, e.g. :a. The
> -- Maybe String is for the
> -- indicator, e.g. :var
> -- indicator :nl
> -- | Infix binary operators. This is used for symbol operators
> -- (a + b), keyword operators (a and b) and multiple keyword
> -- operators (a is similar to b)
> | BinOp ScalarExpr [Name] ScalarExpr
> -- | Prefix unary operators. This is used for symbol
> -- operators, keyword operators and multiple keyword operators.
> | PrefixOp [Name] ScalarExpr
> -- | Postfix unary operators. This is used for symbol
> -- operators, keyword operators and multiple keyword operators.
> | PostfixOp [Name] ScalarExpr
> -- | Used for ternary, mixfix and other non orthodox
> -- operators. Currently used for row constructors, and for
> -- between.
> | SpecialOp [Name] [ScalarExpr]
> -- | function application (anything that looks like c style
> -- function application syntactically)
> | App [Name] [ScalarExpr]
> -- | aggregate application, which adds distinct or all, and
> -- order by, to regular function application
> | AggregateApp
> {aggName :: [Name] -- ^ aggregate function name
> ,aggDistinct :: SetQuantifier -- ^ distinct
> ,aggArgs :: [ScalarExpr]-- ^ args
> ,aggOrderBy :: [SortSpec] -- ^ order by
> ,aggFilter :: Maybe ScalarExpr -- ^ filter
> }
> -- | aggregates with within group
> | AggregateAppGroup
> {aggName :: [Name] -- ^ aggregate function name
> ,aggArgs :: [ScalarExpr] -- ^ args
> ,aggGroup :: [SortSpec] -- ^ within group
> }
> -- | window application, which adds over (partition by a order
> -- by b) to regular function application. Explicit frames are
> -- not currently supported
> | WindowApp
> {wnName :: [Name] -- ^ window function name
> ,wnArgs :: [ScalarExpr] -- ^ args
> ,wnPartition :: [ScalarExpr] -- ^ partition by
> ,wnOrderBy :: [SortSpec] -- ^ order by
> ,wnFrame :: Maybe Frame -- ^ frame clause
> }
> -- | Used for the operators which look like functions
> -- except the arguments are separated by keywords instead
> -- of commas. The maybe is for the first unnamed argument
> -- if it is present, and the list is for the keyword argument
> -- pairs.
> | SpecialOpK [Name] (Maybe ScalarExpr) [(String,ScalarExpr)]
> -- | cast(a as typename)
> | Cast ScalarExpr TypeName
> -- | convert expression to given datatype @CONVERT(data_type(length), expression, style)@
> | Convert TypeName ScalarExpr (Maybe Integer)
> -- | case expression. both flavours supported
> | Case
> {caseTest :: Maybe ScalarExpr -- ^ test value
> ,caseWhens :: [([ScalarExpr],ScalarExpr)] -- ^ when branches
> ,caseElse :: Maybe ScalarExpr -- ^ else value
> }
> | Parens ScalarExpr
> -- | in list literal and in subquery, if the bool is false it
> -- means not in was used ('a not in (1,2)')
> | In Bool ScalarExpr InPredValue
> -- | exists, all, any, some subqueries
> | SubQueryExpr SubQueryExprType QueryExpr
> | QuantifiedComparison
> ScalarExpr
> [Name] -- operator
> CompPredQuantifier
> QueryExpr
> | Match ScalarExpr Bool -- true if unique
> QueryExpr
> | Array ScalarExpr [ScalarExpr] -- ^ represents an array
> -- access expression, or an array ctor
> -- e.g. a[3]. The first
> -- scalarExpr is the array, the
> -- second is the subscripts/ctor args
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
todo: special syntax for like, similar with escape - escape cannot go
in other places
> -- | Escape ScalarExpr Char
> -- | UEscape ScalarExpr Char
> | Collate ScalarExpr [Name]
> | MultisetBinOp ScalarExpr SetOperatorName SetQuantifier ScalarExpr
> | MultisetCtor [ScalarExpr]
> | MultisetQueryCtor QueryExpr
> | NextValueFor [Name]
> | VEComment [Comment] ScalarExpr
> | OdbcLiteral OdbcLiteralType String
> -- ^ an odbc literal e.g. {d '2000-01-01'}
> | OdbcFunc ScalarExpr
> -- ^ an odbc function call e.g. {fn CHARACTER_LENGTH('test')}
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents an identifier name, which can be quoted or unquoted.
> -- examples:
> --
> -- * test -> Name Nothing "test"
> -- * "test" -> Name (Just "\"","\"") "test"
> -- * `something` -> Name (Just ("`","`") "something"
> -- * [ms] -> Name (Just ("[","]") "ms"
> data Name = Name (Maybe (String,String)) String
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents a type name, used in casts.
> data TypeName
> = TypeName [Name]
> | PrecTypeName [Name] Integer
> | PrecScaleTypeName [Name] Integer Integer
> | PrecLengthTypeName [Name] Integer (Maybe PrecMultiplier) (Maybe PrecUnits)
> -- precision, characterset, collate
> | CharTypeName [Name] (Maybe Integer) [Name] [Name]
> | TimeTypeName [Name] (Maybe Integer) Bool -- true == with time zone
> | RowTypeName [(Name,TypeName)]
> | IntervalTypeName IntervalTypeField (Maybe IntervalTypeField)
> | ArrayTypeName TypeName (Maybe Integer)
> | MultisetTypeName TypeName
> deriving (Eq,Show,Read,Data,Typeable)
> data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer))
> deriving (Eq,Show,Read,Data,Typeable)
> data Sign = Plus | Minus
> deriving (Eq,Show,Read,Data,Typeable)
> data PrecMultiplier = PrecK | PrecM | PrecG | PrecT | PrecP
> deriving (Eq,Show,Read,Data,Typeable)
> data PrecUnits = PrecCharacters
> | PrecOctets
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Used for 'expr in (scalar expression list)', and 'expr in
> -- (subquery)' syntax.
> data InPredValue = InList [ScalarExpr]
> | InQueryExpr QueryExpr
> deriving (Eq,Show,Read,Data,Typeable)
not sure if scalar subquery, exists and unique should be represented like this
> -- | A subquery in a scalar expression.
> data SubQueryExprType
> = -- | exists (query expr)
> SqExists
> -- | unique (query expr)
> | SqUnique
> -- | a scalar subquery
> | SqSq
> deriving (Eq,Show,Read,Data,Typeable)
> data CompPredQuantifier
> = CPAny
> | CPSome
> | CPAll
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents one field in an order by list.
> data SortSpec = SortSpec ScalarExpr Direction NullsOrder
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents 'nulls first' or 'nulls last' in an order by clause.
> data NullsOrder = NullsOrderDefault
> | NullsFirst
> | NullsLast
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents the frame clause of a window
> -- this can be [range | rows] frame_start
> -- or [range | rows] between frame_start and frame_end
> data Frame = FrameFrom FrameRows FramePos
> | FrameBetween FrameRows FramePos FramePos
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents whether a window frame clause is over rows or ranges.
> data FrameRows = FrameRows | FrameRange
> deriving (Eq,Show,Read,Data,Typeable)
> -- | represents the start or end of a frame
> data FramePos = UnboundedPreceding
> | Preceding ScalarExpr
> | Current
> | Following ScalarExpr
> | UnboundedFollowing
> deriving (Eq,Show,Read,Data,Typeable)
> -- | the type of an odbc literal (e.g. {d '2000-01-01'}),
> -- correpsonding to the letter after the opening {
> data OdbcLiteralType = OLDate
> | OLTime
> | OLTimestamp
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents a query expression, which can be:
> --
> -- * a regular select;
> --
> -- * a set operator (union, except, intersect);
> --
> -- * a common table expression (with);
> --
> -- * a table value constructor (values (1,2),(3,4)); or
> --
> -- * an explicit table (table t).
> data QueryExpr
> = Select
> {qeSetQuantifier :: SetQuantifier
> ,qeSelectList :: [(ScalarExpr,Maybe Name)]
> -- ^ the expressions and the column aliases
TODO: consider breaking this up. The SQL grammar has
queryexpr = select <select list> [<table expression>]
table expression = <from> [where] [groupby] [having] ...
This would make some things a bit cleaner?
> ,qeFrom :: [TableRef]
> ,qeWhere :: Maybe ScalarExpr
> ,qeGroupBy :: [GroupingExpr]
> ,qeHaving :: Maybe ScalarExpr
> ,qeOrderBy :: [SortSpec]
> ,qeOffset :: Maybe ScalarExpr
> ,qeFetchFirst :: Maybe ScalarExpr
> }
> | QueryExprSetOp
> {qe0 :: QueryExpr
> ,qeCombOp :: SetOperatorName
> ,qeSetQuantifier :: SetQuantifier
> ,qeCorresponding :: Corresponding
> ,qe1 :: QueryExpr
> }
> | With
> {qeWithRecursive :: Bool
> ,qeViews :: [(Alias,QueryExpr)]
> ,qeQueryExpression :: QueryExpr}
> | Values [[ScalarExpr]]
> | Table [Name]
> | QEComment [Comment] QueryExpr
> deriving (Eq,Show,Read,Data,Typeable)
TODO: add queryexpr parens to deal with e.g.
(select 1 union select 2) union select 3
I'm not sure if this is valid syntax or not.
> -- | Helper/'default' value for query exprs to make creating query
> -- expr values a little easier. It is defined like this:
> --
> -- > makeSelect :: QueryExpr
> -- > makeSelect = Select {qeSetQuantifier = SQDefault
> -- > ,qeSelectList = []
> -- > ,qeFrom = []
> -- > ,qeWhere = Nothing
> -- > ,qeGroupBy = []
> -- > ,qeHaving = Nothing
> -- > ,qeOrderBy = []
> -- > ,qeOffset = Nothing
> -- > ,qeFetchFirst = Nothing}
> makeSelect :: QueryExpr
> makeSelect = Select {qeSetQuantifier = SQDefault
> ,qeSelectList = []
> ,qeFrom = []
> ,qeWhere = Nothing
> ,qeGroupBy = []
> ,qeHaving = Nothing
> ,qeOrderBy = []
> ,qeOffset = Nothing
> ,qeFetchFirst = Nothing}
> -- | Represents the Distinct or All keywords, which can be used
> -- before a select list, in an aggregate/window function
> -- application, or in a query expression set operator.
> data SetQuantifier = SQDefault | Distinct | All deriving (Eq,Show,Read,Data,Typeable)
> -- | The direction for a column in order by.
> data Direction = DirDefault | Asc | Desc deriving (Eq,Show,Read,Data,Typeable)
> -- | Query expression set operators.
> data SetOperatorName = Union | Except | Intersect deriving (Eq,Show,Read,Data,Typeable)
> -- | Corresponding, an option for the set operators.
> data Corresponding = Corresponding | Respectively deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents an item in a group by clause.
> data GroupingExpr
> = GroupingParens [GroupingExpr]
> | Cube [GroupingExpr]
> | Rollup [GroupingExpr]
> | GroupingSets [GroupingExpr]
> | SimpleGroup ScalarExpr
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents a entry in the csv of tables in the from clause.
> data TableRef = -- | from t / from s.t
> TRSimple [Name]
> -- | from a join b, the bool is true if natural was used
> | TRJoin TableRef Bool JoinType TableRef (Maybe JoinCondition)
> -- | from (a)
> | TRParens TableRef
> -- | from a as b(c,d)
> | TRAlias TableRef Alias
> -- | from (query expr)
> | TRQueryExpr QueryExpr
> -- | from function(args)
> | TRFunction [Name] [ScalarExpr]
> -- | from lateral t
> | TRLateral TableRef
> -- | ODBC {oj t1 left outer join t2 on expr} syntax
> | TROdbc TableRef
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents an alias for a table valued expression, used in with
> -- queries and in from alias, e.g. select a from t u, select a from t u(b),
> -- with a(c) as select 1, select * from a.
> data Alias = Alias Name (Maybe [Name])
> deriving (Eq,Show,Read,Data,Typeable)
> -- | The type of a join.
> data JoinType = JInner | JLeft | JRight | JFull | JCross
> deriving (Eq,Show,Read,Data,Typeable)
> -- | The join condition.
> data JoinCondition = JoinOn ScalarExpr -- ^ on expr
> | JoinUsing [Name] -- ^ using (column list)
> deriving (Eq,Show,Read,Data,Typeable)
---------------------------
> data Statement =
> -- ddl
> CreateSchema [Name]
> | DropSchema [Name] DropBehaviour
> | CreateTable [Name] [TableElement]
> | AlterTable [Name] AlterTableAction
> | DropTable [Name] DropBehaviour
> | CreateIndex Bool [Name] [Name] [Name]
> | CreateView Bool [Name] (Maybe [Name])
> QueryExpr (Maybe CheckOption)
> | DropView [Name] DropBehaviour
> | CreateDomain [Name] TypeName (Maybe ScalarExpr)
> [(Maybe [Name], ScalarExpr)]
> | AlterDomain [Name] AlterDomainAction
> | DropDomain [Name] DropBehaviour
> -- probably won't do character sets, collations
> -- and translations because I think they are too far from
> -- reality
> {- | CreateCharacterSet
> | DropCharacterSet
> | CreateCollation
> | DropCollation
> | CreateTranslation
> | DropTranslation -}
> | CreateAssertion [Name] ScalarExpr
> | DropAssertion [Name] DropBehaviour
> {- | CreateTrigger
> | DropTrigger
> | CreateType
> | AlterType
> | DropType
> -- routine stuff? TODO
> | CreateCast
> | DropCast
> | CreateOrdering
> | DropOrdering -}
> -- transforms
> | CreateSequence [Name] [SequenceGeneratorOption]
> | AlterSequence [Name] [SequenceGeneratorOption]
> | DropSequence [Name] DropBehaviour
> -- dml
> | SelectStatement QueryExpr
> {- | DeclareCursor
> | OpenCursor
> | FetchCursor
> | CloseCursor
> | SelectInto -}
> -- | DeletePositioned
> | Delete [Name] (Maybe Name) (Maybe ScalarExpr)
> | Truncate [Name] IdentityRestart
> | Insert [Name] (Maybe [Name]) InsertSource
> -- | Merge
> | Update [Name] (Maybe Name) [SetClause] (Maybe ScalarExpr)
> {- | TemporaryTable
> | FreeLocator
> | HoldLocator -}
> -- access control
> | GrantPrivilege [PrivilegeAction] PrivilegeObject [Name] GrantOption
> | GrantRole [Name] [Name] AdminOption
> | CreateRole Name
> | DropRole Name
> | RevokePrivilege GrantOptionFor [PrivilegeAction] PrivilegeObject
> [Name] DropBehaviour
> | RevokeRole AdminOptionFor [Name] [Name] DropBehaviour
> -- transaction management
> | StartTransaction
> -- | SetTransaction
> -- | SetContraints
> | Savepoint Name
> | ReleaseSavepoint Name
> | Commit
> | Rollback (Maybe Name)
> -- session
> {- | SetSessionCharacteristics
> | SetSessionAuthorization
> | SetRole
> | SetTimeZone
> | SetCatalog
> | SetSchema
> | SetNames
> | SetTransform
> | SetCollation -}
> | StatementComment [Comment]
> | EmptyStatement
> deriving (Eq,Show,Read,Data,Typeable)
> data DropBehaviour =
> Restrict
> | Cascade
> | DefaultDropBehaviour
> deriving (Eq,Show,Read,Data,Typeable)
> data IdentityRestart =
> ContinueIdentity
> | RestartIdentity
> | DefaultIdentityRestart
> deriving (Eq,Show,Read,Data,Typeable)
> data InsertSource =
> InsertQuery QueryExpr
> | DefaultInsertValues
> deriving (Eq,Show,Read,Data,Typeable)
> data SetClause =
> Set [Name] ScalarExpr
> | SetMultiple [[Name]] [ScalarExpr]
> deriving (Eq,Show,Read,Data,Typeable)
> data TableElement =
> TableColumnDef ColumnDef
> | TableConstraintDef (Maybe [Name]) TableConstraint
> deriving (Eq,Show,Read,Data,Typeable)
> data ColumnDef = ColumnDef Name TypeName
> (Maybe DefaultClause)
> [ColConstraintDef]
> -- (Maybe CollateClause)
> deriving (Eq,Show,Read,Data,Typeable)
> data ColConstraintDef =
> ColConstraintDef (Maybe [Name]) ColConstraint
> -- (Maybe [ConstraintCharacteristics])
> deriving (Eq,Show,Read,Data,Typeable)
> type AutoincrementClause = Bool
>
> data ColConstraint =
> ColNullableConstraint
> | ColNotNullConstraint
> | ColUniqueConstraint
> | ColPrimaryKeyConstraint AutoincrementClause
> | ColReferencesConstraint [Name] (Maybe Name)
> ReferenceMatch
> ReferentialAction
> ReferentialAction
> | ColCheckConstraint ScalarExpr
> deriving (Eq,Show,Read,Data,Typeable)
> data TableConstraint =
> TableUniqueConstraint [Name]
> | TablePrimaryKeyConstraint [Name]
> | TableReferencesConstraint [Name] [Name] (Maybe [Name])
> ReferenceMatch
> ReferentialAction
> ReferentialAction
> | TableCheckConstraint ScalarExpr
> deriving (Eq,Show,Read,Data,Typeable)
> data ReferenceMatch =
> DefaultReferenceMatch
> | MatchFull
> | MatchPartial
> | MatchSimple
> deriving (Eq,Show,Read,Data,Typeable)
> data ReferentialAction =
> DefaultReferentialAction
> | RefCascade
> | RefSetNull
> | RefSetDefault
> | RefRestrict
> | RefNoAction
> deriving (Eq,Show,Read,Data,Typeable)
> data AlterTableAction =
> AddColumnDef ColumnDef
> | AlterColumnSetDefault Name ScalarExpr
> | AlterColumnDropDefault Name
> | AlterColumnSetNotNull Name
> | AlterColumnDropNotNull Name
> | AlterColumnSetDataType Name TypeName
> {- | AlterColumnAlterIdentity
> | AlterColumnDropIdentity
> | AlterColumnDropColumnGeneration-}
> | DropColumn Name DropBehaviour
> | AddTableConstraintDef (Maybe [Name]) TableConstraint
> -- | AlterTableConstraintDef
> | DropTableConstraintDef [Name] DropBehaviour
> deriving (Eq,Show,Read,Data,Typeable)
> {-data ConstraintCharacteristics =
> ConstraintCharacteristics
> ConstraintCheckTime
> Deferrable
> ConstraintEnforcement
> deriving (Eq,Show,Read,Data,Typeable)
> data ConstraintCheckTime =
> DefaultConstraintCheckTime
> | InitiallyDeferred
> | InitiallyImmeditate
> deriving (Eq,Show,Read,Data,Typeable)
> data Deferrable =
> DefaultDefferable
> | Deferrable
> | NotDeferrable
> deriving (Eq,Show,Read,Data,Typeable)
> data ConstraintEnforcement =
> DefaultConstraintEnforcement
> | Enforced
> | NotEnforced
> deriving (Eq,Show,Read,Data,Typeable) -}
> {-data TableConstraintDef
> deriving (Eq,Show,Read,Data,Typeable) -}
> data DefaultClause =
> DefaultClause ScalarExpr
> | IdentityColumnSpec IdentityWhen [SequenceGeneratorOption]
> | GenerationClause ScalarExpr
> deriving (Eq,Show,Read,Data,Typeable)
> data IdentityWhen =
> GeneratedAlways
> | GeneratedByDefault
> deriving (Eq,Show,Read,Data,Typeable)
> data SequenceGeneratorOption =
> SGODataType TypeName
> | SGOStartWith Integer
> | SGORestart (Maybe Integer)
> | SGOIncrementBy Integer
> | SGOMaxValue Integer
> | SGONoMaxValue
> | SGOMinValue Integer
> | SGONoMinValue
> | SGOCycle
> | SGONoCycle
> deriving (Eq,Show,Read,Data,Typeable)
> data CheckOption =
> DefaultCheckOption
> | CascadedCheckOption
> | LocalCheckOption
> deriving (Eq,Show,Read,Data,Typeable)
> data AlterDomainAction =
> ADSetDefault ScalarExpr
> | ADDropDefault
> | ADAddConstraint (Maybe [Name]) ScalarExpr
> | ADDropConstraint [Name]
> deriving (Eq,Show,Read,Data,Typeable)
> data AdminOption = WithAdminOption | WithoutAdminOption
> deriving (Eq,Show,Read,Data,Typeable)
> data GrantOption = WithGrantOption | WithoutGrantOption
> deriving (Eq,Show,Read,Data,Typeable)
> data AdminOptionFor = AdminOptionFor | NoAdminOptionFor
> deriving (Eq,Show,Read,Data,Typeable)
> data GrantOptionFor = GrantOptionFor | NoGrantOptionFor
> deriving (Eq,Show,Read,Data,Typeable)
> data PrivilegeObject =
> PrivTable [Name]
> | PrivDomain [Name]
> | PrivType [Name]
> | PrivSequence [Name]
> | PrivFunction [Name]
> deriving (Eq,Show,Read,Data,Typeable)
> data PrivilegeAction =
> PrivAll
> | PrivSelect [Name]
> | PrivDelete
> | PrivInsert [Name]
> | PrivUpdate [Name]
> | PrivReferences [Name]
> | PrivUsage
> | PrivTrigger
> | PrivExecute
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Comment. Useful when generating SQL code programmatically. The
> -- parser doesn't produce these.
> newtype Comment = BlockComment String
> deriving (Eq,Show,Read,Data,Typeable)