new syntax for names and string literals

2016-02-12 13:09:58 +02:00 · 2016-02-12 13:09:58 +02:00 · 52f035b718
commit 52f035b718
parent 1b4eefc431
8 changed files with 150 additions and 165 deletions
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@ -50,26 +50,20 @@ parsec
 >     --
 >     | Identifier String
 >
->     -- | This is an identifier quoted with "
->     | QIdentifier String
->     -- | This is an identifier quoted with u&"
->     | UQIdentifier String
-
->     -- | This is a dialect specific quoted identifier with the quote
->     -- characters explicit. The first and second fields are the
->     -- starting and ending quote characters.
->     | DQIdentifier String String String
->
+>     -- | This is a quoted identifier, the quotes can be " or u&,
+>     -- etc. or something dialect specific like []
+>     -- the first two fields are the start and end quotes
+>     | QuotedIdentifier String -- start quote
+>                        String -- end quote
+>                        String -- content
 >     -- | This is a host param symbol, e.g. :param
 >     | HostParam String
 >
->     -- | This is a string literal.
->     | SqlString String
->
->     -- | This is a character set string literal. The first field is
->     -- the character set (one of nNbBxX, or u&, U&).
->     | CSSqlString String String
->
+>     -- | This is a string literal. The first two fields are the --
+>     -- start and end quotes, which are usually both ', but can be
+>     -- the character set (one of nNbBxX, or u&, U&), or a dialect
+>     -- specific string quoting (such as $$ in postgres)
+>     | SqlString String String String
 >     -- | A number literal (integral or otherwise), stored in original format
 >     -- unchanged
 >     | SqlNumber String
@ -95,15 +89,19 @@ parsec
 > prettyToken :: Dialect -> Token -> String
 > prettyToken _ (Symbol s) = s
 > prettyToken _ (Identifier t) = t
-> prettyToken _ (QIdentifier t) =
->     "\"" ++ doubleChars '"' t ++ "\""
-> prettyToken _ (UQIdentifier t) =
->     "u&\"" ++ doubleChars '"' t ++ "\""
-> prettyToken _ (DQIdentifier s e t) =
->     s ++ t ++ e
+> prettyToken _ (QuotedIdentifier q1 q2 t) =
+>     q1 ++
+>     -- todo: a bit hacky, do a better design
+>     (if '"' `elem` q1 then doubleChars '"' t else t)
+>     ++ q2
+> --prettyToken _ (UQIdentifier t) =
+> --    "u&\"" ++ doubleChars '"' t ++ "\""
+> --prettyToken _ (DQIdentifier s e t) =
+> --    s ++ t ++ e
 > prettyToken _ (HostParam p) = ':':p
-> prettyToken _ (SqlString t) = "'" ++ doubleChars '\'' t ++ "'"
-> prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
+> prettyToken _ (SqlString s e t) =
+>     s ++ (if '\'' `elem` s then doubleChars '\'' t else t) ++ e
+> --prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
 > prettyToken _ (SqlNumber r) = r
 > prettyToken _ (Whitespace t) = t
 > prettyToken _ (LineComment l) = l
@ -181,12 +179,14 @@ u&"unicode quoted identifier"
 > identifier :: Dialect -> Parser Token
 > identifier d =
 >     choice
->     [QIdentifier <$> qiden
+>     [QuotedIdentifier "\"" "\"" <$> qiden
 >      -- try is used here to avoid a conflict with identifiers
 >      -- and quoted strings which also start with a 'u'
->     ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
+>     ,QuotedIdentifier "u&\"" "\"" <$> (try (string "u&") *> qiden)
+>     ,QuotedIdentifier "U&\"" "\"" <$> (try (string "U&") *> qiden)
 >     ,Identifier <$> identifierString
->     ,DQIdentifier "`" "`" <$> mySqlQIden
+>      -- todo: dialect protection
+>     ,QuotedIdentifier "`" "`" <$> mySqlQIden
 >     ]
 >   where
 >     qiden = char '"' *> qidenSuffix ""
@ -226,7 +226,7 @@ x'hexidecimal string'
 >            ,normalString
 >            ]
 >   where
->     normalString = SqlString {-"'"-} <$> (char '\'' *> normalStringSuffix "")
+>     normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix "")
 >     normalStringSuffix t = do
 >         s <- takeTill (=='\'')
 >         void $ char '\''
@ -239,10 +239,10 @@ x'hexidecimal string'
 >     -- identifiers which can start with n,b,x,u
 >     -- once we read the quote type and the starting '
 >     -- then we commit to a string
->     csString = CSSqlString <$> try (cs  <* char '\'') <*> normalStringSuffix ""
->     cs = choice [(:[]) <$> oneOf "nNbBxX"
->                 ,string "u&"
->                 ,string "U&"]
+>     csString = SqlString <$> try cs <*> return "'" <*> normalStringSuffix ""
+>     cs = choice $ (map (\x -> string ([x] ++ "'")) "nNbBxX")
+>                   ++ [string "u&'"
+>                      ,string "U&'"]

 > hostParam :: Dialect -> Parser Token
 > hostParam _ = HostParam <$> (char ':' *> identifierString)
@ -283,13 +283,15 @@ A symbol is one of the two character symbols, or one of the single
 character symbols in the two lists below.

 > symbol :: Dialect -> Parser Token
-> symbol _ = Symbol <$> choice (many1 (char '.') :
+> symbol _ = Symbol <$>
+>     choice (
+>             many1 (char '.') :
 >                  -- try is used because most of the first
 >                  -- characters of the two character symbols
 >                  -- can also be part of a single character symbol
 >                  -- maybe this would be better with left factoring?
->                  map (try . string) [">=","<=","!=","<>","||"]
->                  ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
+>                 map (try . string) [">=","<=","!=","<>","||"]
+>                 ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")

 > sqlWhitespace :: Dialect -> Parser Token
 > sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
--- a/Language/SQL/SimpleSQL/Parse.lhs
+++ b/Language/SQL/SimpleSQL/Parse.lhs
@ -330,10 +330,8 @@ u&"example quoted"
 > name :: Parser Name
 > name = do
 >     d <- getState
->     choice [QName <$> qidentifierTok
->            ,UQName <$> uqidentifierTok
->            ,Name <$> identifierTok (blacklist d) Nothing
->            ,(\(s,e,t) -> DQName s e t) <$> dqidentifierTok
+>     choice [Name <$> identifierTok (blacklist d) Nothing
+>            ,(\(s,e,t) -> QuotedName s e t) <$> qidentifierTok
 >            ]

 todo: replace (:[]) with a named function all over
@ -558,16 +556,13 @@ factoring in this function, and it is a little dense.
 See the stringToken lexer below for notes on string literal syntax.

 > stringLit :: Parser ValueExpr
-> stringLit = StringLit <$> stringTokExtend
+> stringLit = (\(s,e,t) -> StringLit s e t) <$> stringTokExtend

 > numberLit :: Parser ValueExpr
 > numberLit = NumLit <$> sqlNumberTok False

-> characterSetLit :: Parser ValueExpr
-> characterSetLit = uncurry CSStringLit <$> csSqlStringLitTok
-
 > simpleLiteral :: Parser ValueExpr
-> simpleLiteral = numberLit <|> stringLit <|> characterSetLit
+> simpleLiteral = numberLit <|> stringLit

 == star, param, host param

@ -690,7 +685,7 @@ this. also fix the monad -> applicative
 > intervalLit = try (keyword_ "interval" >> do
 >     s <- optionMaybe $ choice [True <$ symbol_ "+"
 >                               ,False <$ symbol_ "-"]
->     lit <- stringTok
+>     lit <- singleQuotesOnlyStringTok
 >     q <- optionMaybe intervalQualifier
 >     mkIt s lit q)
 >   where
@ -716,7 +711,7 @@ all the value expressions which start with an identifier
 > idenExpr :: Parser ValueExpr
 > idenExpr =
 >     -- todo: work out how to left factor this
->     try (TypedLit <$> typeName <*> stringTokExtend)
+>     try (TypedLit <$> typeName <*> singleQuotesOnlyStringTok)
 >     <|> multisetSetFunction
 >     <|> (names <**> option Iden app)
 >   where
@ -831,7 +826,7 @@ in the source
 >     keyword "trim" >>
 >     parens (mkTrim
 >             <$> option "both" sides
->             <*> option " " stringTok
+>             <*> option " " singleQuotesOnlyStringTok
 >             <*> (keyword_ "from" *> valueExpr))
 >   where
 >     sides = choice ["leading" <$ keyword_ "leading"
@ -839,7 +834,7 @@ in the source
 >                    ,"both" <$ keyword_ "both"]
 >     mkTrim fa ch fr =
 >       SpecialOpK [Name "trim"] Nothing
->           $ catMaybes [Just (fa,StringLit ch)
+>           $ catMaybes [Just (fa,StringLit "'" "'" ch)
 >                       ,Just ("from", fr)]

 === app, aggregate, window
@ -1951,28 +1946,34 @@ unsigned integer match
 symbol matching
 keyword matching

-> csSqlStringLitTok :: Parser (String,String)
-> csSqlStringLitTok = mytoken (\tok ->
->     case tok of
->       L.CSSqlString p s -> Just (p,s)
->       _ -> Nothing)
-
-> stringTok :: Parser String
+> stringTok :: Parser (String,String,String)
 > stringTok = mytoken (\tok ->
 >     case tok of
->       L.SqlString s -> Just s
+>       L.SqlString s e t -> Just (s,e,t)
+>       _ -> Nothing)
+
+> singleQuotesOnlyStringTok :: Parser String
+> singleQuotesOnlyStringTok = mytoken (\tok ->
+>     case tok of
+>       L.SqlString "'" "'" t -> Just t
 >       _ -> Nothing)

 This is to support SQL strings where you can write
 'part of a string' ' another part'
 and it will parse as a single string

-> stringTokExtend :: Parser String
+It is only allowed when all the strings are quoted with ' atm.
+
+> stringTokExtend :: Parser (String,String,String)
 > stringTokExtend = do
->     x <- stringTok
+>     (s,e,x) <- stringTok
 >     choice [
->         ((x++) <$> stringTokExtend)
->         ,return x
+>          do
+>          guard (s == "'" && e == "'")
+>          (s',e',y) <- stringTokExtend
+>          guard (s' == "'" && e' == "'")
+>          return $ (s,e,x ++ y)
+>         ,return (s,e,x)
 >         ]

 > hostParamTok :: Parser String
@ -2002,25 +2003,12 @@ and it will parse as a single string
 >       (Just k, L.Identifier p) | k == map toLower p -> Just p
 >       _ -> Nothing)

-> qidentifierTok :: Parser String
+> qidentifierTok :: Parser (String,String,String)
 > qidentifierTok = mytoken (\tok ->
 >     case tok of
->       L.QIdentifier p -> Just p
+>       L.QuotedIdentifier s e t -> Just (s,e,t)
 >       _ -> Nothing)

-> dqidentifierTok :: Parser (String,String,String)
-> dqidentifierTok = mytoken (\tok ->
->     case tok of
->       L.DQIdentifier s e t -> Just (s,e,t)
->       _ -> Nothing)
-
-> uqidentifierTok :: Parser String
-> uqidentifierTok = mytoken (\tok ->
->     case tok of
->       L.UQIdentifier p -> Just p
->       _ -> Nothing)
-
-
 > mytoken :: (L.Token -> Maybe a) -> Parser a
 > mytoken test = token showToken posToken testToken
 >   where
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@ -16,7 +16,7 @@ which have been changed to try to improve the layout of the output.
 > import Language.SQL.SimpleSQL.Dialect
 > import Text.PrettyPrint (render, vcat, text, (<>), (<+>), empty, parens,
 >                          nest, Doc, punctuate, comma, sep, quotes,
->                          doubleQuotes, brackets,hcat)
+>                          brackets,hcat)
 > import Data.Maybe (maybeToList, catMaybes)
 > import Data.List (intercalate)

@ -40,7 +40,8 @@ which have been changed to try to improve the layout of the output.
 = value expressions

 > valueExpr :: Dialect -> ValueExpr -> Doc
-> valueExpr _ (StringLit s) = quotes $ text $ doubleUpQuotes s
+> valueExpr _ (StringLit s e t) =
+>     text (s ++ (if '\'' `elem` s then doubleUpQuotes t else t) ++ e)

 > valueExpr _ (NumLit s) = text s
 > valueExpr _ (IntervalLit s v f t) =
@ -210,11 +211,6 @@ which have been changed to try to improve the layout of the output.
 >          Distinct -> text "distinct"
 >     ,valueExpr d b]

-
-
-> valueExpr _ (CSStringLit cs st) =
->   text cs <> quotes (text $ doubleUpQuotes st)
-
 > valueExpr d (Escape v e) =
 >     valueExpr d v <+> text "escape" <+> text [e]

@ -243,21 +239,18 @@ which have been changed to try to improve the layout of the output.


 > unname :: Name -> String
-> unname (QName n) = "\"" ++ doubleUpDoubleQuotes n ++ "\""
-> unname (UQName n) = "U&\"" ++ doubleUpDoubleQuotes n ++ "\""
 > unname (Name n) = n
-> unname (DQName s e n) = s ++ n ++ e
+> unname (QuotedName s e n) =
+>     s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e

 > unnames :: [Name] -> String
 > unnames ns = intercalate "." $ map unname ns


 > name :: Name -> Doc
-> name (QName n) = doubleQuotes $ text $ doubleUpDoubleQuotes n
-> name (UQName n) =
->     text "U&" <> doubleQuotes (text $ doubleUpDoubleQuotes n)
 > name (Name n) = text n
-> name (DQName s e n) = text s <> text n <> text e
+> name (QuotedName s e n) =
+>     text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e

 > names :: [Name] -> Doc
 > names ns = hcat $ punctuate (text ".") $ map name ns
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@ -88,9 +88,9 @@
 >       --
 >       -- * 12.34e-6
 >       NumLit String
->       -- | string literal, currently only basic strings between
->       -- single quotes with a single quote escaped using ''
->     | StringLit String
+>       -- | string literal, with the start and end quote
+>       -- e.g. 'test' -> StringLit "'" "'" "test"
+>     | StringLit String String String
 >       -- | text of interval literal, units of interval precision,
 >       -- e.g. interval 3 days (3)
 >     | IntervalLit
@ -203,8 +203,6 @@
 >                                   -- second is the subscripts/ctor args
 >     | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)

->     | CSStringLit String String
-
 todo: special syntax for like, similar with escape - escape cannot go
 in other places

@ -220,10 +218,8 @@ in other places

 > -- | Represents an identifier name, which can be quoted or unquoted.
 > data Name = Name String
->           | QName String
->           | UQName String
->           | DQName String String String
->             -- ^ dialect quoted name, the fields are start quote, end quote and the string itself, e.g. `something` is parsed to DQName "`" "`" "something, and $a$ test $a$ is parsed to DQName "$a$" "$a$" " test "
+>           | QuotedName String String String
+>             -- ^ quoted name, the fields are start quote, end quote and the string itself, these will usually be ", others are possible e.g. `something` is parsed to QuotedName "`" "`" "something, and $a$ test $a$ is parsed to QuotedName "$a$" "$a$" " test "
 >             deriving (Eq,Show,Read,Data,Typeable)

 > -- | Represents a type name, used in casts.