1
Fork 0

new syntax for names and string literals

This commit is contained in:
Jake Wheat 2016-02-12 13:09:58 +02:00
parent 1b4eefc431
commit 52f035b718
8 changed files with 150 additions and 165 deletions

View file

@ -50,26 +50,20 @@ parsec
> --
> | Identifier String
>
> -- | This is an identifier quoted with "
> | QIdentifier String
> -- | This is an identifier quoted with u&"
> | UQIdentifier String
> -- | This is a dialect specific quoted identifier with the quote
> -- characters explicit. The first and second fields are the
> -- starting and ending quote characters.
> | DQIdentifier String String String
>
> -- | This is a quoted identifier, the quotes can be " or u&,
> -- etc. or something dialect specific like []
> -- the first two fields are the start and end quotes
> | QuotedIdentifier String -- start quote
> String -- end quote
> String -- content
> -- | This is a host param symbol, e.g. :param
> | HostParam String
>
> -- | This is a string literal.
> | SqlString String
>
> -- | This is a character set string literal. The first field is
> -- the character set (one of nNbBxX, or u&, U&).
> | CSSqlString String String
>
> -- | This is a string literal. The first two fields are the --
> -- start and end quotes, which are usually both ', but can be
> -- the character set (one of nNbBxX, or u&, U&), or a dialect
> -- specific string quoting (such as $$ in postgres)
> | SqlString String String String
> -- | A number literal (integral or otherwise), stored in original format
> -- unchanged
> | SqlNumber String
@ -95,15 +89,19 @@ parsec
> prettyToken :: Dialect -> Token -> String
> prettyToken _ (Symbol s) = s
> prettyToken _ (Identifier t) = t
> prettyToken _ (QIdentifier t) =
> "\"" ++ doubleChars '"' t ++ "\""
> prettyToken _ (UQIdentifier t) =
> "u&\"" ++ doubleChars '"' t ++ "\""
> prettyToken _ (DQIdentifier s e t) =
> s ++ t ++ e
> prettyToken _ (QuotedIdentifier q1 q2 t) =
> q1 ++
> -- todo: a bit hacky, do a better design
> (if '"' `elem` q1 then doubleChars '"' t else t)
> ++ q2
> --prettyToken _ (UQIdentifier t) =
> -- "u&\"" ++ doubleChars '"' t ++ "\""
> --prettyToken _ (DQIdentifier s e t) =
> -- s ++ t ++ e
> prettyToken _ (HostParam p) = ':':p
> prettyToken _ (SqlString t) = "'" ++ doubleChars '\'' t ++ "'"
> prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
> prettyToken _ (SqlString s e t) =
> s ++ (if '\'' `elem` s then doubleChars '\'' t else t) ++ e
> --prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
> prettyToken _ (SqlNumber r) = r
> prettyToken _ (Whitespace t) = t
> prettyToken _ (LineComment l) = l
@ -181,12 +179,14 @@ u&"unicode quoted identifier"
> identifier :: Dialect -> Parser Token
> identifier d =
> choice
> [QIdentifier <$> qiden
> [QuotedIdentifier "\"" "\"" <$> qiden
> -- try is used here to avoid a conflict with identifiers
> -- and quoted strings which also start with a 'u'
> ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
> ,QuotedIdentifier "u&\"" "\"" <$> (try (string "u&") *> qiden)
> ,QuotedIdentifier "U&\"" "\"" <$> (try (string "U&") *> qiden)
> ,Identifier <$> identifierString
> ,DQIdentifier "`" "`" <$> mySqlQIden
> -- todo: dialect protection
> ,QuotedIdentifier "`" "`" <$> mySqlQIden
> ]
> where
> qiden = char '"' *> qidenSuffix ""
@ -226,7 +226,7 @@ x'hexidecimal string'
> ,normalString
> ]
> where
> normalString = SqlString {-"'"-} <$> (char '\'' *> normalStringSuffix "")
> normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
> normalStringSuffix t = do
> s <- takeTill (=='\'')
> void $ char '\''
@ -239,10 +239,10 @@ x'hexidecimal string'
> -- identifiers which can start with n,b,x,u
> -- once we read the quote type and the starting '
> -- then we commit to a string
> csString = CSSqlString <$> try (cs <* char '\'') <*> normalStringSuffix ""
> cs = choice [(:[]) <$> oneOf "nNbBxX"
> ,string "u&"
> ,string "U&"]
> csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
> cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
> ++ [string "u&'"
> ,string "U&'"]
> hostParam :: Dialect -> Parser Token
> hostParam _ = HostParam <$> (char ':' *> identifierString)
@ -283,13 +283,15 @@ A symbol is one of the two character symbols, or one of the single
character symbols in the two lists below.
> symbol :: Dialect -> Parser Token
> symbol _ = Symbol <$> choice (many1 (char '.') :
> symbol _ = Symbol <$>
> choice (
> many1 (char '.') :
> -- try is used because most of the first
> -- characters of the two character symbols
> -- can also be part of a single character symbol
> -- maybe this would be better with left factoring?
> map (try . string) [">=","<=","!=","<>","||"]
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
> map (try . string) [">=","<=","!=","<>","||"]
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
> sqlWhitespace :: Dialect -> Parser Token
> sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)

View file

@ -330,10 +330,8 @@ u&"example quoted"
> name :: Parser Name
> name = do
> d <- getState
> choice [QName <$> qidentifierTok
> ,UQName <$> uqidentifierTok
> ,Name <$> identifierTok (blacklist d) Nothing
> ,(\(s,e,t) -> DQName s e t) <$> dqidentifierTok
> choice [Name <$> identifierTok (blacklist d) Nothing
> ,(\(s,e,t) -> QuotedName s e t) <$> qidentifierTok
> ]
todo: replace (:[]) with a named function all over
@ -558,16 +556,13 @@ factoring in this function, and it is a little dense.
See the stringToken lexer below for notes on string literal syntax.
> stringLit :: Parser ValueExpr
> stringLit = StringLit <$> stringTokExtend
> stringLit = (\(s,e,t) -> StringLit s e t) <$> stringTokExtend
> numberLit :: Parser ValueExpr
> numberLit = NumLit <$> sqlNumberTok False
> characterSetLit :: Parser ValueExpr
> characterSetLit = uncurry CSStringLit <$> csSqlStringLitTok
> simpleLiteral :: Parser ValueExpr
> simpleLiteral = numberLit <|> stringLit <|> characterSetLit
> simpleLiteral = numberLit <|> stringLit
== star, param, host param
@ -690,7 +685,7 @@ this. also fix the monad -> applicative
> intervalLit = try (keyword_ "interval" >> do
> s <- optionMaybe $ choice [True <$ symbol_ "+"
> ,False <$ symbol_ "-"]
> lit <- stringTok
> lit <- singleQuotesOnlyStringTok
> q <- optionMaybe intervalQualifier
> mkIt s lit q)
> where
@ -716,7 +711,7 @@ all the value expressions which start with an identifier
> idenExpr :: Parser ValueExpr
> idenExpr =
> -- todo: work out how to left factor this
> try (TypedLit <$> typeName <*> stringTokExtend)
> try (TypedLit <$> typeName <*> singleQuotesOnlyStringTok)
> <|> multisetSetFunction
> <|> (names <**> option Iden app)
> where
@ -831,7 +826,7 @@ in the source
> keyword "trim" >>
> parens (mkTrim
> <$> option "both" sides
> <*> option " " stringTok
> <*> option " " singleQuotesOnlyStringTok
> <*> (keyword_ "from" *> valueExpr))
> where
> sides = choice ["leading" <$ keyword_ "leading"
@ -839,7 +834,7 @@ in the source
> ,"both" <$ keyword_ "both"]
> mkTrim fa ch fr =
> SpecialOpK [Name "trim"] Nothing
> $ catMaybes [Just (fa,StringLit ch)
> $ catMaybes [Just (fa,StringLit "'" "'" ch)
> ,Just ("from", fr)]
=== app, aggregate, window
@ -1951,28 +1946,34 @@ unsigned integer match
symbol matching
keyword matching
> csSqlStringLitTok :: Parser (String,String)
> csSqlStringLitTok = mytoken (\tok ->
> case tok of
> L.CSSqlString p s -> Just (p,s)
> _ -> Nothing)
> stringTok :: Parser String
> stringTok :: Parser (String,String,String)
> stringTok = mytoken (\tok ->
> case tok of
> L.SqlString s -> Just s
> L.SqlString s e t -> Just (s,e,t)
> _ -> Nothing)
> singleQuotesOnlyStringTok :: Parser String
> singleQuotesOnlyStringTok = mytoken (\tok ->
> case tok of
> L.SqlString "'" "'" t -> Just t
> _ -> Nothing)
This is to support SQL strings where you can write
'part of a string' ' another part'
and it will parse as a single string
> stringTokExtend :: Parser String
It is only allowed when all the strings are quoted with ' atm.
> stringTokExtend :: Parser (String,String,String)
> stringTokExtend = do
> x <- stringTok
> (s,e,x) <- stringTok
> choice [
> ((x++) <$> stringTokExtend)
> ,return x
> do
> guard (s == "'" && e == "'")
> (s',e',y) <- stringTokExtend
> guard (s' == "'" && e' == "'")
> return $ (s,e,x ++ y)
> ,return (s,e,x)
> ]
> hostParamTok :: Parser String
@ -2002,25 +2003,12 @@ and it will parse as a single string
> (Just k, L.Identifier p) | k == map toLower p -> Just p
> _ -> Nothing)
> qidentifierTok :: Parser String
> qidentifierTok :: Parser (String,String,String)
> qidentifierTok = mytoken (\tok ->
> case tok of
> L.QIdentifier p -> Just p
> L.QuotedIdentifier s e t -> Just (s,e,t)
> _ -> Nothing)
> dqidentifierTok :: Parser (String,String,String)
> dqidentifierTok = mytoken (\tok ->
> case tok of
> L.DQIdentifier s e t -> Just (s,e,t)
> _ -> Nothing)
> uqidentifierTok :: Parser String
> uqidentifierTok = mytoken (\tok ->
> case tok of
> L.UQIdentifier p -> Just p
> _ -> Nothing)
> mytoken :: (L.Token -> Maybe a) -> Parser a
> mytoken test = token showToken posToken testToken
> where

View file

@ -16,7 +16,7 @@ which have been changed to try to improve the layout of the output.
> import Language.SQL.SimpleSQL.Dialect
> import Text.PrettyPrint (render, vcat, text, (<>), (<+>), empty, parens,
> nest, Doc, punctuate, comma, sep, quotes,
> doubleQuotes, brackets,hcat)
> brackets,hcat)
> import Data.Maybe (maybeToList, catMaybes)
> import Data.List (intercalate)
@ -40,7 +40,8 @@ which have been changed to try to improve the layout of the output.
= value expressions
> valueExpr :: Dialect -> ValueExpr -> Doc
> valueExpr _ (StringLit s) = quotes $ text $ doubleUpQuotes s
> valueExpr _ (StringLit s e t) =
> text (s ++ (if '\'' `elem` s then doubleUpQuotes t else t) ++ e)
> valueExpr _ (NumLit s) = text s
> valueExpr _ (IntervalLit s v f t) =
@ -210,11 +211,6 @@ which have been changed to try to improve the layout of the output.
> Distinct -> text "distinct"
> ,valueExpr d b]
> valueExpr _ (CSStringLit cs st) =
> text cs <> quotes (text $ doubleUpQuotes st)
> valueExpr d (Escape v e) =
> valueExpr d v <+> text "escape" <+> text [e]
@ -243,21 +239,18 @@ which have been changed to try to improve the layout of the output.
> unname :: Name -> String
> unname (QName n) = "\"" ++ doubleUpDoubleQuotes n ++ "\""
> unname (UQName n) = "U&\"" ++ doubleUpDoubleQuotes n ++ "\""
> unname (Name n) = n
> unname (DQName s e n) = s ++ n ++ e
> unname (QuotedName s e n) =
> s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e
> unnames :: [Name] -> String
> unnames ns = intercalate "." $ map unname ns
> name :: Name -> Doc
> name (QName n) = doubleQuotes $ text $ doubleUpDoubleQuotes n
> name (UQName n) =
> text "U&" <> doubleQuotes (text $ doubleUpDoubleQuotes n)
> name (Name n) = text n
> name (DQName s e n) = text s <> text n <> text e
> name (QuotedName s e n) =
> text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e
> names :: [Name] -> Doc
> names ns = hcat $ punctuate (text ".") $ map name ns

View file

@ -88,9 +88,9 @@
> --
> -- * 12.34e-6
> NumLit String
> -- | string literal, currently only basic strings between
> -- single quotes with a single quote escaped using ''
> | StringLit String
> -- | string literal, with the start and end quote
> -- e.g. 'test' -> StringLit "'" "'" "test"
> | StringLit String String String
> -- | text of interval literal, units of interval precision,
> -- e.g. interval 3 days (3)
> | IntervalLit
@ -203,8 +203,6 @@
> -- second is the subscripts/ctor args
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
> | CSStringLit String String
todo: special syntax for like, similar with escape - escape cannot go
in other places
@ -220,10 +218,8 @@ in other places
> -- | Represents an identifier name, which can be quoted or unquoted.
> data Name = Name String
> | QName String
> | UQName String
> | DQName String String String
> -- ^ dialect quoted name, the fields are start quote, end quote and the string itself, e.g. `something` is parsed to DQName "`" "`" "something, and $a$ test $a$ is parsed to DQName "$a$" "$a$" " test "
> | QuotedName String String String
> -- ^ quoted name, the fields are start quote, end quote and the string itself, these will usually be ", others are possible e.g. `something` is parsed to QuotedName "`" "`" "something, and $a$ test $a$ is parsed to QuotedName "$a$" "$a$" " test "
> deriving (Eq,Show,Read,Data,Typeable)
> -- | Represents a type name, used in casts.