From 020b33e729004b082423ef32dd72f04e5b04e747 Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Sat, 31 Aug 2019 13:40:23 +0100 Subject: [PATCH] improve the dialect handling to be more feature based --- Language/SQL/SimpleSQL/Dialect.lhs | 74 ++++++++++++++++++++---------- Language/SQL/SimpleSQL/Lex.lhs | 32 ++++++------- 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/Language/SQL/SimpleSQL/Dialect.lhs b/Language/SQL/SimpleSQL/Dialect.lhs index 59a5191..17229cf 100644 --- a/Language/SQL/SimpleSQL/Dialect.lhs +++ b/Language/SQL/SimpleSQL/Dialect.lhs @@ -5,7 +5,6 @@ Data types to represent different dialect options > {-# LANGUAGE DeriveDataTypeable #-} > module Language.SQL.SimpleSQL.Dialect > (Dialect(..) -> ,SyntaxFlavour(..) > ,ansi2011 > ,mysql > ,postgres @@ -16,49 +15,78 @@ Data types to represent different dialect options > import Data.Data - -hack for now, later will expand to flags on a feature by feature basis - -> data SyntaxFlavour = ANSI2011 -> | MySQL -> | Postgres -> | Oracle -> | SQLServer -> deriving (Eq,Show,Read,Data,Typeable) - > -- | Used to set the dialect used for parsing and pretty printing, > -- very unfinished at the moment. -> data Dialect = Dialect {diKeywords :: [String] -> ,diSyntaxFlavour :: SyntaxFlavour -> ,diFetchFirst :: Bool -> ,diLimit :: Bool -> ,diOdbc :: Bool} +> data Dialect = Dialect +> { -- | The list of reserved keywords +> diKeywords :: [String] +> -- | does the dialect support ansi fetch first syntax +> ,diFetchFirst :: Bool +> -- | does the dialect support limit keyword (mysql, postgres, ...) +> ,diLimit :: Bool +> -- | allow parsing ODBC syntax +> ,diOdbc :: Bool +> -- | allow quoting identifiers with `backquotes` +> ,diBackquotedIden :: Bool +> -- | allow quoting identifiers with [square brackets] +> ,diSquareBracketQuotedIden :: Bool +> -- | allow identifiers with a leading at @example +> ,diAtIdentifier :: Bool +> -- | allow identifiers with a leading # #example +> ,diHashIdentifier :: Bool +> -- | allow positional identifiers like this: $1 +> ,diPositionalArg :: Bool +> -- | allow postgres style dollar strings +> ,diDollarString :: Bool +> -- | allow strings with an e - e"example" +> ,diEString :: Bool +> -- | allow postgres style symbols +> ,diPostgresSymbols :: Bool +> -- | allow sql server style symbols +> ,diSqlServerSymbols :: Bool +> } > deriving (Eq,Show,Read,Data,Typeable) > -- | ansi sql 2011 dialect > ansi2011 :: Dialect > ansi2011 = Dialect {diKeywords = ansi2011ReservedKeywords -> ,diSyntaxFlavour = ANSI2011 > ,diFetchFirst = True > ,diLimit = False -> ,diOdbc = False} +> ,diOdbc = False +> ,diBackquotedIden = False +> ,diSquareBracketQuotedIden = False +> ,diAtIdentifier = False +> ,diHashIdentifier = False +> ,diPositionalArg = False +> ,diDollarString = False +> ,diEString = False +> ,diPostgresSymbols = False +> ,diSqlServerSymbols = False +> } > -- | mysql dialect > mysql :: Dialect -> mysql = addLimit ansi2011 {diSyntaxFlavour = MySQL -> ,diFetchFirst = False } +> mysql = addLimit ansi2011 {diFetchFirst = False +> ,diBackquotedIden = True +> } > -- | postgresql dialect > postgres :: Dialect -> postgres = addLimit ansi2011 {diSyntaxFlavour = Postgres} +> postgres = addLimit ansi2011 {diPositionalArg = True +> ,diDollarString = True +> ,diEString = True +> ,diPostgresSymbols = True} > -- | oracle dialect > oracle :: Dialect -> oracle = ansi2011 {diSyntaxFlavour = Oracle} +> oracle = ansi2011 -- {} > -- | microsoft sql server dialect > sqlserver :: Dialect -> sqlserver = ansi2011 {diSyntaxFlavour = SQLServer} +> sqlserver = ansi2011 {diSquareBracketQuotedIden = True +> ,diAtIdentifier = True +> ,diHashIdentifier = True +> ,diSqlServerSymbols = True } > addLimit :: Dialect -> Dialect > addLimit d = d {diKeywords = "limit": diKeywords d diff --git a/Language/SQL/SimpleSQL/Lex.lhs b/Language/SQL/SimpleSQL/Lex.lhs index 01c4192..83eab3e 100644 --- a/Language/SQL/SimpleSQL/Lex.lhs +++ b/Language/SQL/SimpleSQL/Lex.lhs @@ -174,8 +174,8 @@ u&"unicode quoted identifier" > [quotedIden > ,unicodeQuotedIden > ,regularIden -> ,guard (diSyntaxFlavour d == MySQL) >> mySqlQuotedIden -> ,guard (diSyntaxFlavour d == SQLServer) >> sqlServerQuotedIden +> ,guard (diBackquotedIden d) >> mySqlQuotedIden +> ,guard (diSquareBracketQuotedIden d) >> sqlServerQuotedIden > ] > where > regularIden = Identifier Nothing <$> identifierString @@ -217,15 +217,15 @@ use try because : and @ can be part of other things also > prefixedVariable :: Dialect -> Parser Token > prefixedVariable d = try $ choice > [PrefixedVariable <$> char ':' <*> identifierString -> ,guard (diSyntaxFlavour d == SQLServer) >> +> ,guard (diAtIdentifier d) >> > PrefixedVariable <$> char '@' <*> identifierString -> ,guard (diSyntaxFlavour d == SQLServer) >> +> ,guard (diHashIdentifier d) >> > PrefixedVariable <$> char '#' <*> identifierString > ] > positionalArg :: Dialect -> Parser Token > positionalArg d = -> guard (diSyntaxFlavour d == Postgres) >> +> guard (diPositionalArg d) >> > -- use try to avoid ambiguities with other syntax which starts with dollar > PositionalArg <$> try (char '$' *> (read <$> many1 digit)) @@ -243,7 +243,7 @@ x'hexidecimal string' > sqlString d = dollarString <|> csString <|> normalString > where > dollarString = do -> guard $ diSyntaxFlavour d == Postgres +> guard $ diDollarString d > -- use try because of ambiguity with symbols and with > -- positional arg > delim <- (\x -> concat ["$",x,"$"]) @@ -270,7 +270,7 @@ x'hexidecimal string' > -- error messages and user predictability make it a good > -- pragmatic choice > csString -> | diSyntaxFlavour d == Postgres = +> | diEString d = > choice [SqlString <$> try (string "e'" <|> string "E'") > <*> return "'" <*> normalStringSuffix True "" > ,csString'] @@ -304,7 +304,7 @@ constant. > SqlNumber <$> completeNumber > -- this is for definitely avoiding possibly ambiguous source > <* choice [-- special case to allow e.g. 1..2 -> guard (diSyntaxFlavour d == Postgres) +> guard (diPostgresSymbols d) > *> (void $ lookAhead $ try $ string "..") > <|> void (notFollowedBy (oneOf "eE.")) > ,notFollowedBy (oneOf "eE.") @@ -324,7 +324,7 @@ constant. > -- special case for postgresql, we backtrack if we see two adjacent dots > -- to parse 1..2, but in other dialects we commit to the failure > dot = let p = string "." <* notFollowedBy (char '.') -> in if (diSyntaxFlavour d == Postgres) +> in if diPostgresSymbols d > then try p > else p > expon = (:) <$> oneOf "eE" <*> sInt @@ -342,12 +342,12 @@ compared with ansi and other dialects > symbol :: Dialect -> Parser Token > symbol d = Symbol <$> choice (concat > [dots -> ,if (diSyntaxFlavour d == Postgres) +> ,if diPostgresSymbols d > then postgresExtraSymbols > else [] > ,miscSymbol > ,if diOdbc d then odbcSymbol else [] -> ,if (diSyntaxFlavour d == Postgres) +> ,if diPostgresSymbols d > then generalizedPostgresqlOperator > else basicAnsiOps > ]) @@ -360,10 +360,10 @@ compared with ansi and other dialects > ,try (string "::" <* notFollowedBy (char ':')) > ,try (string ":" <* notFollowedBy (char ':'))] > miscSymbol = map (string . (:[])) $ -> case diSyntaxFlavour d of -> SQLServer -> ",;():?" -> Postgres -> "[],;()" -> _ -> "[],;():?" +> case () of +> _ | diSqlServerSymbols d -> ",;():?" +> | diPostgresSymbols d -> "[],;()" +> | otherwise -> "[],;():?" try is used because most of the first characters of the two character symbols can also be part of a single character symbol @@ -562,7 +562,7 @@ followed by = or : makes a different symbol two symbols next to eachother will fail if the symbols can combine and (possibly just the prefix) look like a different symbol -> | Dialect {diSyntaxFlavour = Postgres} <- d +> | diPostgresSymbols d > , Symbol a' <- a > , Symbol b' <- b > , b' `notElem` ["+", "-"] || or (map (`elem` a') "~!@#%^&|`?") = False