support character set literals, e.g. N'stuff'

support collate postfix operator support escape and uescape as postfix operators change the collate support in substring and trim which isn't a special case in the sql 2003 grammar anymore but just a normal collate postfix operator, the old code had the collation name as a string, but the new style is as an identifier
2014-04-18 00:16:24 +03:00 · 2014-04-18 00:16:24 +03:00 · 37dca6596b
parent 211174cfb4
commit 37dca6596b
5 changed files with 90 additions and 34 deletions
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@ -17,7 +17,7 @@
 >                    ,option,between,sepBy,sepBy1,string,manyTill,anyChar
 >                    ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
 >                    ,optionMaybe,optional,many,letter,parse
->                    ,chainl1, (<?>),notFollowedBy,alphaNum)
+>                    ,chainl1, (<?>),notFollowedBy,alphaNum, lookAhead)
 > import Text.Parsec.String (Parser)
 > import qualified Text.Parsec as P (ParseError)
 > import Text.Parsec.Perm (permute,(<$?>), (<|?>))
@ -123,8 +123,20 @@ which parses as a typed literal
 >     mkIt val Nothing = TypedLit (TypeName "interval") val
 >     mkIt val (Just (a,b)) = IntervalLit val a b

+> characterSetLiteral :: Parser ValueExpr
+> characterSetLiteral =
+>     CSStringLit <$> shortCSPrefix <*> stringToken
+>   where
+>     shortCSPrefix =
+>         choice
+>         [(:[]) <$> oneOf "nNbBxX"
+>         ,string "u&"
+>         ,string "U&"
+>         ] <* lookAhead (char '\'')
+
 > literal :: Parser ValueExpr
-> literal = number <|> stringValue <|> interval
+> literal = number <|> stringValue <|> interval <|> try characterSetLiteral
+

 == Names

@ -348,7 +360,7 @@ for, but the parser doens't enforce this

 > substring :: Parser ValueExpr
 > substring = specialOpK "substring" SOKMandatory
->                 [("from", False),("for", False),("collate", False)]
+>                 [("from", False),("for", False)]

 > convert :: Parser ValueExpr
 > convert = specialOpK "convert" SOKMandatory [("using", True)]
@ -371,17 +383,15 @@ in the source
 >     parens (mkTrim
 >             <$> option "both" sides
 >             <*> option " " stringToken
->             <*> (keyword_ "from" *> valueExpr)
->             <*> optionMaybe (keyword_ "collate" *> stringToken))
+>             <*> (keyword_ "from" *> valueExpr))
 >   where
 >     sides = choice ["leading" <$ keyword_ "leading"
 >                    ,"trailing" <$ keyword_ "trailing"
 >                    ,"both" <$ keyword_ "both"]
->     mkTrim fa ch fr cl =
+>     mkTrim fa ch fr =
 >       SpecialOpK (Name "trim") Nothing
 >           $ catMaybes [Just (fa,StringLit ch)
->                       ,Just ("from", fr)
->                       ,fmap (("collate",) . StringLit) cl]
+>                       ,Just ("from", fr)]

 in: two variations:
 a in (expr0, expr1, ...)
@ -473,6 +483,21 @@ a match (select a from t)
 >     [ArrayCtor <$> parens queryExpr
 >     ,Array (Iden (Name "array")) <$> brackets (commaSep valueExpr)]

+> escape :: Parser (ValueExpr -> ValueExpr)
+> escape = do
+>     ctor <- choice
+>             [Escape <$ keyword_ "escape"
+>             ,UEscape <$ keyword_ "uescape"]
+>     c <- anyChar
+>     return $ \v -> ctor v c
+
+> collate :: Parser (ValueExpr -> ValueExpr)
+> collate = do
+>           keyword_ "collate"
+>           i <- identifier
+>           return $ \v -> Collate v i
+
+
 typename: used in casts. Special cases for the multi keyword typenames
 that SQL supports.

@ -538,9 +563,12 @@ TODO: carefully review the precedences and associativities.
 >           -- todo: left factor the quantified comparison with regular
 >           -- binary comparison, somehow
 >          [E.Postfix $ try quantifiedComparison
->          ,E.Postfix matchPredicate]
+>          ,E.Postfix matchPredicate
+>          ]
 >         ,[binarySym "." E.AssocLeft]
->         ,[postfix' arrayPostfix]
+>         ,[postfix' arrayPostfix
+>          ,postfix' escape
+>          ,postfix' collate]
 >         ,[prefixSym "+", prefixSym "-"]
 >         ,[binarySym "^" E.AssocLeft]
 >         ,[binarySym "*" E.AssocLeft
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@ -34,9 +34,6 @@ which have been changed to try to improve the layout of the output.

 > valueExpr :: ValueExpr -> Doc
 > valueExpr (StringLit s) = quotes $ text $ doubleUpQuotes s
->   where doubleUpQuotes [] = []
->         doubleUpQuotes ('\'':cs) = '\'':'\'':doubleUpQuotes cs
->         doubleUpQuotes (c:cs) = c:doubleUpQuotes cs

 > valueExpr (NumLit s) = text s
 > valueExpr (IntervalLit v u p) =
@ -177,6 +174,24 @@ which have been changed to try to improve the layout of the output.
 > valueExpr (ArrayCtor q) =
 >     text "array" <> parens (queryExpr q)

+> valueExpr (CSStringLit cs st) =
+>   text cs <> quotes (text $ doubleUpQuotes st)
+
+> valueExpr (Escape v e) =
+>     valueExpr v <+> text "escape" <+> text [e]
+
+> valueExpr (UEscape v e) =
+>     valueExpr v <+> text "uescape" <+> text [e]
+
+> valueExpr (Collate v c) =
+>     valueExpr v <+> text "collate" <+> text c
+
+
+> doubleUpQuotes :: String -> String
+> doubleUpQuotes [] = []
+> doubleUpQuotes ('\'':cs) = '\'':'\'':doubleUpQuotes cs
+> doubleUpQuotes (c:cs) = c:doubleUpQuotes cs
+

 > unname :: Name -> String
 > unname (QName n) = "\"" ++ n ++ "\""
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@ -140,6 +140,10 @@
 >                                   -- valueExpr is the array, the
 >                                   -- second is the subscripts/ctor args
 >     | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)>       deriving (Eq,Show,Read,Data,Typeable)
+>     | CSStringLit String String
+>     | Escape ValueExpr Char
+>     | UEscape ValueExpr Char
+>     | Collate ValueExpr String
 >       deriving (Eq,Show,Read,Data,Typeable)

 > -- | Represents an identifier name, which can be quoted or unquoted.
--- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs
@ -17,9 +17,9 @@ large amount of the SQL.
 > sql2003Tests :: TestItem
 > sql2003Tests = Group "sql2003Tests"
 >     [stringLiterals
->     --,nationalCharacterStringLiterals
->     --,unicodeStringLiterals
->     --,binaryStringLiterals
+>     ,nationalCharacterStringLiterals
+>     ,unicodeStringLiterals
+>     ,binaryStringLiterals
 >     ,numericLiterals
 >     ,dateAndTimeLiterals
 >     ,booleanLiterals
@ -43,6 +43,7 @@ large amount of the SQL.
 >     ,quantifiedComparisonPredicate
 >     ,uniquePredicate
 >     ,matchPredicate
+>     ,collateClause
 >     --,sortSpecificationList
 >     ]

@ -479,10 +480,16 @@ The <quote symbol> rule consists of two immediately adjacent <quote> marks with
 >      ,StringLit "something some moreand more")
 >     ,("'a quote: '', stuff'"
 >      ,StringLit "a quote: ', stuff")
+>     ,("''"
+>      ,StringLit "")
+>     ,("_francais 'français'"
+>      ,TypedLit (TypeName "_francais") "français")
 >     ]

 TODO: all the stuff with character set representations.

+
+
 == other string literals

 <national character string literal>    ::= 
@ -491,8 +498,8 @@ TODO: all the stuff with character set representations.

 > nationalCharacterStringLiterals :: TestItem
 > nationalCharacterStringLiterals = Group "national character string literals" $ map (uncurry TestValueExpr)
->     [("N'something'", undefined)
->     ,("n'something'", undefined)
+>     [("N'something'", CSStringLit "N" "something")
+>     ,("n'something'", CSStringLit "n" "something")
 >     ]

 <Unicode character string literal>    ::= 
@ -505,14 +512,13 @@ TODO: all the stuff with character set representations.

 > unicodeStringLiterals :: TestItem
 > unicodeStringLiterals = Group "national character string literals" $ map (uncurry TestValueExpr)
->     [("U&'something'", undefined)
->     ,("u&'something'", undefined)
+>     [("U&'something'", CSStringLit "U&" "something")
+>     ,("u&'something' escape ="
+>      ,Escape (CSStringLit "u&" "something") '=')
+>     ,("u&'something' uescape ="
+>      ,UEscape (CSStringLit "u&" "something") '=')
 >     ]

-TODO: put in some unicode and some unicode escape values plus work out
-the ESCAPE thing. I think this is to change the unicode escape value
-starting character.
-
 == other string literals

 <binary string literal>    ::= 
@ -529,8 +535,9 @@ TODO: how to escapes work here?

 > binaryStringLiterals :: TestItem
 > binaryStringLiterals = Group "bit and hex string literals" $ map (uncurry TestValueExpr)
->     [("B'101010'", undefined)
->     ,("X'7f7f7f'", undefined)
+>     [("B'101010'", CSStringLit "B" "101010")
+>     ,("X'7f7f7f'", CSStringLit "X" "7f7f7f")
+>     ,("X'7f7f7f' escape z", Escape (CSStringLit "X" "7f7f7f") 'z')
 >     ]

 TODO: separator stuff for all the string literals?
@ -2802,7 +2809,10 @@ Specify a default collating sequence.

 <collate clause>    ::=   COLLATE <collation name>

-covered elsewhere
+> collateClause :: TestItem
+> collateClause = Group "collate clause" $ map (uncurry TestValueExpr)
+>     [("a collate my_collation"
+>      ,Collate (Iden "a") "my_collation")]

 10.8 <constraint name definition> and <constraint characteristics> (p501)

--- a/tools/Language/SQL/SimpleSQL/ValueExprs.lhs
+++ b/tools/Language/SQL/SimpleSQL/ValueExprs.lhs
@ -243,10 +243,10 @@ keyword special operators
 >     ,("substring(x for 2)"
 >      ,SpecialOpK "substring" (Just $ Iden "x") [("for", NumLit "2")])

->     ,("substring(x from 1 for 2 collate 'C')"
->      ,SpecialOpK "substring" (Just $ Iden "x") [("from", NumLit "1")
->                                                ,("for", NumLit "2")
->                                                ,("collate", StringLit "C")])
+>     ,("substring(x from 1 for 2 collate C)"
+>      ,SpecialOpK "substring" (Just $ Iden "x")
+>           [("from", NumLit "1")
+>           ,("for", Collate (NumLit "2") "C")])

 this doesn't work because of a overlap in the 'in' parser

@ -312,11 +312,10 @@ target_string
 >           [("trailing", StringLit "y")
 >           ,("from", Iden "target_string")])

->     ,("trim(both 'z' from target_string collate 'C')"
+>     ,("trim(both 'z' from target_string collate C)"
 >      ,SpecialOpK "trim" Nothing
 >           [("both", StringLit "z")
->           ,("from", Iden "target_string")
->           ,("collate", StringLit "C")])
+>           ,("from", Collate (Iden "target_string") "C")])

 >     ,("trim(leading from target_string)"
 >      ,SpecialOpK "trim" Nothing