refactor the identifier syntax

2016-02-12 14:13:47 +02:00 · 2016-02-12 14:13:47 +02:00 · aa5c2e89c7
commit aa5c2e89c7
parent 52f035b718
16 changed files with 830 additions and 826 deletions
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@ -46,16 +46,11 @@ parsec
 >     --
 >     = Symbol String
 >
->     -- | This is an identifier or keyword.
->     --
->     | Identifier String
->
->     -- | This is a quoted identifier, the quotes can be " or u&,
->     -- etc. or something dialect specific like []
->     -- the first two fields are the start and end quotes
->     | QuotedIdentifier String -- start quote
->                        String -- end quote
->                        String -- content
+>     -- | This is an identifier or keyword. The first field is
+>     -- the quotes used, or nothing if no quotes were used. The quotes
+>     -- can be " or u& or something dialect specific like []
+>     | Identifier (Maybe (String,String)) String
+
 >     -- | This is a host param symbol, e.g. :param
 >     | HostParam String
 >
@ -88,10 +83,13 @@ parsec
 > -- print them, should should get back exactly the same string
 > prettyToken :: Dialect -> Token -> String
 > prettyToken _ (Symbol s) = s
-> prettyToken _ (Identifier t) = t
-> prettyToken _ (QuotedIdentifier q1 q2 t) =
+> prettyToken _ (Identifier Nothing t) = t
+> prettyToken _ (Identifier (Just (q1,q2)) t) =
 >     q1 ++
 >     -- todo: a bit hacky, do a better design
+>     -- the dialect will know how to escape and unescape
+>     -- contents, but the parser here also needs to know
+>     -- about parsing escaped quotes
 >     (if '"' `elem` q1 then doubleChars '"' t else t)
 >     ++ q2
 > --prettyToken _ (UQIdentifier t) =
@ -179,14 +177,14 @@ u&"unicode quoted identifier"
 > identifier :: Dialect -> Parser Token
 > identifier d =
 >     choice
->     [QuotedIdentifier "\"" "\"" <$> qiden
+>     [Identifier (Just ("\"","\"")) <$> qiden
 >      -- try is used here to avoid a conflict with identifiers
 >      -- and quoted strings which also start with a 'u'
->     ,QuotedIdentifier "u&\"" "\"" <$> (try (string "u&") *> qiden)
->     ,QuotedIdentifier "U&\"" "\"" <$> (try (string "U&") *> qiden)
->     ,Identifier <$> identifierString
+>     ,Identifier (Just ("u&\"","\"")) <$> (try (string "u&") *> qiden)
+>     ,Identifier (Just ("U&\"","\"")) <$> (try (string "U&") *> qiden)
+>     ,Identifier Nothing <$> identifierString
 >      -- todo: dialect protection
->     ,QuotedIdentifier "`" "`" <$> mySqlQIden
+>     ,Identifier (Just ("`","`")) <$> mySqlQIden
 >     ]
 >   where
 >     qiden = char '"' *> qidenSuffix ""
--- a/Language/SQL/SimpleSQL/Parse.lhs
+++ b/Language/SQL/SimpleSQL/Parse.lhs
@ -330,9 +330,7 @@ u&"example quoted"
 > name :: Parser Name
 > name = do
 >     d <- getState
->     choice [Name <$> identifierTok (blacklist d) Nothing
->            ,(\(s,e,t) -> QuotedName s e t) <$> qidentifierTok
->            ]
+>     uncurry Name <$> identifierTok (blacklist d)

 todo: replace (:[]) with a named function all over

@ -506,7 +504,7 @@ factoring in this function, and it is a little dense.
 >     -- this parser handles the fixed set of multi word
 >     -- type names, plus all the type names which are
 >     -- reserved words
->     reservedTypeNames = (:[]) . Name . unwords <$> makeKeywordTree
+>     reservedTypeNames = (:[]) . Name Nothing . unwords <$> makeKeywordTree
 >         ["double precision"
 >         ,"character varying"
 >         ,"char varying"
@ -598,7 +596,7 @@ value expression parens, row ctor and scalar subquery
 >     ,ctor <$> commaSep1 valueExpr]
 >   where
 >     ctor [a] = Parens a
->     ctor as = SpecialOp [Name "rowctor"] as
+>     ctor as = SpecialOp [Name Nothing "rowctor"] as

 == case, cast, exists, unique, array/multiset constructor, interval

@ -643,7 +641,7 @@ subquery expression:
 > arrayCtor = keyword_ "array" >>
 >     choice
 >     [ArrayCtor <$> parens queryExpr
->     ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
+>     ,Array (Iden [Name Nothing "array"]) <$> brackets (commaSep valueExpr)]

 As far as I can tell, table(query expr) is just syntax sugar for
 multiset(query expr). It must be there for compatibility or something.
@ -689,7 +687,7 @@ this. also fix the monad -> applicative
 >     q <- optionMaybe intervalQualifier
 >     mkIt s lit q)
 >   where
->     mkIt Nothing val Nothing = pure $ TypedLit (TypeName [Name "interval"]) val
+>     mkIt Nothing val Nothing = pure $ TypedLit (TypeName [Name Nothing "interval"]) val
 >     mkIt s val (Just (a,b)) = pure $ IntervalLit s val a b
 >     mkIt (Just {}) _val Nothing = fail "cannot use sign without interval qualifier"

@ -718,7 +716,7 @@ all the value expressions which start with an identifier
 >     -- this is a special case because set is a reserved keyword
 >     -- and the names parser won't parse it
 >     multisetSetFunction =
->         App [Name "set"] . (:[]) <$>
+>         App [Name Nothing "set"] . (:[]) <$>
 >         (try (keyword_ "set" *> openParen)
 >          *> valueExpr <* closeParen)

@ -750,7 +748,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
 >               -- check we haven't parsed the first
 >               -- keyword as an identifier
 >               case (e,kws) of
->                   (Iden [Name i], (k,_):_)
+>                   (Iden [Name Nothing i], (k,_):_)
 >                       | map toLower i == k ->
 >                           fail $ "cannot use keyword here: " ++ i
 >                   _ -> return ()
@ -761,7 +759,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
 >          SOKMandatory -> Just <$> pfa
 >     as <- mapM parseArg kws
 >     void closeParen
->     pure $ SpecialOpK [Name opName] fa $ catMaybes as
+>     pure $ SpecialOpK [Name Nothing opName] fa $ catMaybes as
 >   where
 >     parseArg (nm,mand) =
 >         let p = keyword_ nm >> valueExpr
@ -833,7 +831,7 @@ in the source
 >                    ,"trailing" <$ keyword_ "trailing"
 >                    ,"both" <$ keyword_ "both"]
 >     mkTrim fa ch fr =
->       SpecialOpK [Name "trim"] Nothing
+>       SpecialOpK [Name Nothing "trim"] Nothing
 >           $ catMaybes [Just (fa,StringLit "'" "'" ch)
 >                       ,Just ("from", fr)]

@ -959,7 +957,7 @@ and operator. This is the call to valueExprB.

 > betweenSuffix :: Parser (ValueExpr -> ValueExpr)
 > betweenSuffix =
->     makeOp <$> Name <$> opName
+>     makeOp <$> Name Nothing <$> opName
 >            <*> valueExprB
 >            <*> (keyword_ "and" *> valueExprB)
 >   where
@ -979,7 +977,7 @@ a = any (select * from t)
 >     q <- parens queryExpr
 >     pure $ \v -> QuantifiedComparison v [c] cq q
 >   where
->     comp = Name <$> choice (map symbol
+>     comp = Name Nothing <$> choice (map symbol
 >            ["=", "<>", "<=", "<", ">", ">="])
 >     compQuan = choice
 >                [CPAny <$ keyword_ "any"
@ -1009,7 +1007,10 @@ a match (select a from t)
 It is going to be really difficult to support an arbitrary character
 for the escape now there is a separate lexer ...

-> escapeSuffix :: Parser (ValueExpr -> ValueExpr)
+TODO: this needs fixing. Escape is only part of other nodes, and not a
+separate suffix.
+
+> {-escapeSuffix :: Parser (ValueExpr -> ValueExpr)
 > escapeSuffix = do
 >     ctor <- choice
 >             [Escape <$ keyword_ "escape"
@ -1023,6 +1024,7 @@ for the escape now there is a separate lexer ...
 >     oneOnly c = case c of
 >                    [c'] -> return c'
 >                    _ -> fail "escape char must be single char"
+> -}

 === collate

@ -1060,7 +1062,6 @@ messages, but both of these are too important.
 >         ,[binarySym "." E.AssocLeft]

 >         ,[postfix' arraySuffix
->          ,postfix' escapeSuffix
 >          ,postfix' collateSuffix]

 >         ,[prefixSym "+", prefixSym "-"]
@ -1129,14 +1130,14 @@ messages, but both of these are too important.
 >     binaryKeywords p =
 >         E.Infix (do
 >                  o <- try p
->                  pure (\a b -> BinOp a [Name $ unwords o] b))
+>                  pure (\a b -> BinOp a [Name Nothing $ unwords o] b))
 >             E.AssocNone
 >     postfixKeywords p =
 >       postfix' $ do
 >           o <- try p
->           pure $ PostfixOp [Name $ unwords o]
+>           pure $ PostfixOp [Name Nothing $ unwords o]
 >     binary p nm assoc =
->       E.Infix (p >> pure (\a b -> BinOp a [Name nm] b)) assoc
+>       E.Infix (p >> pure (\a b -> BinOp a [Name Nothing nm] b)) assoc
 >     multisetBinOp = E.Infix (do
 >         keyword_ "multiset"
 >         o <- choice [Union <$ keyword_ "union"
@ -1147,7 +1148,7 @@ messages, but both of these are too important.
 >           E.AssocLeft
 >     prefixKeyword nm = prefix (keyword_ nm) nm
 >     prefixSym nm = prefix (symbol_ nm) nm
->     prefix p nm = prefix' (p >> pure (PrefixOp [Name nm]))
+>     prefix p nm = prefix' (p >> pure (PrefixOp [Name Nothing nm]))
 >     -- hack from here
 >     -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
 >     -- not implemented properly yet
@ -1996,17 +1997,17 @@ It is only allowed when all the strings are quoted with ' atm.
 >       (Just s, L.Symbol p) | s == p -> Just p
 >       _ -> Nothing)

-> identifierTok :: [String] -> Maybe String -> Parser String
-> identifierTok blackList kw = mytoken (\tok ->
->     case (kw,tok) of
->       (Nothing, L.Identifier p) | map toLower p `notElem` blackList -> Just p
->       (Just k, L.Identifier p) | k == map toLower p -> Just p
+> identifierTok :: [String] -> Parser (Maybe (String,String), String)
+> identifierTok blackList = mytoken (\tok ->
+>     case tok of
+>       L.Identifier q p | map toLower p `notElem` blackList -> Just (q,p)
 >       _ -> Nothing)

-> qidentifierTok :: Parser (String,String,String)
-> qidentifierTok = mytoken (\tok ->
->     case tok of
->       L.QuotedIdentifier s e t -> Just (s,e,t)
+> unquotedIdentifierTok :: [String] -> Maybe String -> Parser String
+> unquotedIdentifierTok blackList kw = mytoken (\tok ->
+>     case (kw,tok) of
+>       (Nothing, L.Identifier Nothing p) | map toLower p `notElem` blackList -> Just p
+>       (Just k, L.Identifier Nothing p) | k == map toLower p -> Just p
 >       _ -> Nothing)

 > mytoken :: (L.Token -> Maybe a) -> Parser a
@ -2052,7 +2053,7 @@ todo: work out the symbol parsing better
 = helper functions

 > keyword :: String -> Parser String
-> keyword k = identifierTok [] (Just k) <?> k
+> keyword k = unquotedIdentifierTok [] (Just k) <?> k

 helper function to improve error messages

--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@ -100,13 +100,13 @@ which have been changed to try to improve the layout of the output.
 >     fpd (Preceding e) = valueExpr d e <+> text "preceding"
 >     fpd (Following e) = valueExpr d e <+> text "following"

-> valueExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name "between"]
->                                                  ,[Name "not between"]] =
+> valueExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name Nothing "between"]
+>                                                  ,[Name Nothing "not between"]] =
 >   sep [valueExpr dia a
 >       ,names nm <+> valueExpr dia b
 >       ,nest (length (unnames nm) + 1) $ text "and" <+> valueExpr dia c]

-> valueExpr d (SpecialOp [Name "rowctor"] as) =
+> valueExpr d (SpecialOp [Name Nothing "rowctor"] as) =
 >     parens $ commaSep $ map (valueExpr d) as

 > valueExpr d (SpecialOp nm es) =
@ -119,7 +119,8 @@ which have been changed to try to improve the layout of the output.

 > valueExpr d (PrefixOp f e) = names f <+> valueExpr d e
 > valueExpr d (PostfixOp f e) = valueExpr d e <+> names f
-> valueExpr d e@(BinOp _ op _) | op `elem` [[Name "and"], [Name "or"]] =
+> valueExpr d e@(BinOp _ op _) | op `elem` [[Name Nothing "and"]
+>                                          ,[Name Nothing "or"]] =
 >     -- special case for and, or, get all the ands so we can vcat them
 >     -- nicely
 >     case ands e of
@ -130,7 +131,7 @@ which have been changed to try to improve the layout of the output.
 >     ands (BinOp a op' b) | op == op' = ands a ++ ands b
 >     ands x = [x]
 > -- special case for . we don't use whitespace
-> valueExpr d (BinOp e0 [Name "."] e1) =
+> valueExpr d (BinOp e0 [Name Nothing "."] e1) =
 >     valueExpr d e0 <> text "." <> valueExpr d e1
 > valueExpr d (BinOp e0 f e1) =
 >     valueExpr d e0 <+> names f <+> valueExpr d e1
@ -211,11 +212,11 @@ which have been changed to try to improve the layout of the output.
 >          Distinct -> text "distinct"
 >     ,valueExpr d b]

-> valueExpr d (Escape v e) =
+> {-valueExpr d (Escape v e) =
 >     valueExpr d v <+> text "escape" <+> text [e]

 > valueExpr d (UEscape v e) =
->     valueExpr d v <+> text "uescape" <+> text [e]
+>     valueExpr d v <+> text "uescape" <+> text [e]-}

 > valueExpr d (Collate v c) =
 >     valueExpr d v <+> text "collate" <+> names c
@ -239,8 +240,8 @@ which have been changed to try to improve the layout of the output.


 > unname :: Name -> String
-> unname (Name n) = n
-> unname (QuotedName s e n) =
+> unname (Name Nothing n) = n
+> unname (Name (Just (s,e)) n) =
 >     s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e

 > unnames :: [Name] -> String
@ -248,8 +249,8 @@ which have been changed to try to improve the layout of the output.


 > name :: Name -> Doc
-> name (Name n) = text n
-> name (QuotedName s e n) =
+> name (Name Nothing n) = text n
+> name (Name (Just (s,e)) n) =
 >     text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e

 > names :: [Name] -> Doc
--- a/Language/SQL/SimpleSQL/Syntax.lhs
+++ b/Language/SQL/SimpleSQL/Syntax.lhs
@ -206,8 +206,8 @@
 todo: special syntax for like, similar with escape - escape cannot go
 in other places

->     | Escape ValueExpr Char
->     | UEscape ValueExpr Char
+>     --  | Escape ValueExpr Char
+>     --  | UEscape ValueExpr Char
 >     | Collate ValueExpr [Name]
 >     | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr
 >     | MultisetCtor [ValueExpr]
@ -217,9 +217,13 @@ in other places
 >       deriving (Eq,Show,Read,Data,Typeable)

 > -- | Represents an identifier name, which can be quoted or unquoted.
-> data Name = Name String
->           | QuotedName String String String
->             -- ^ quoted name, the fields are start quote, end quote and the string itself, these will usually be ", others are possible e.g. `something` is parsed to QuotedName "`" "`" "something, and $a$ test $a$ is parsed to QuotedName "$a$" "$a$" " test "
+> -- examples:
+> --
+> -- * test -> Name Nothing "test"
+> -- * "test" -> Name (Just "\"","\"") "test"
+> -- * `something` -> Name (Just ("`","`") "something"
+> -- * [ms] -> Name (Just ("[","]") "ms"
+> data Name = Name (Maybe (String,String)) String
 >             deriving (Eq,Show,Read,Data,Typeable)

 > -- | Represents a type name, used in casts.