refactor the identifier syntax
This commit is contained in:
parent
52f035b718
commit
aa5c2e89c7
16 changed files with 830 additions and 826 deletions
Language/SQL/SimpleSQL
|
@ -46,16 +46,11 @@ parsec
|
|||
> --
|
||||
> = Symbol String
|
||||
>
|
||||
> -- | This is an identifier or keyword.
|
||||
> --
|
||||
> | Identifier String
|
||||
>
|
||||
> -- | This is a quoted identifier, the quotes can be " or u&,
|
||||
> -- etc. or something dialect specific like []
|
||||
> -- the first two fields are the start and end quotes
|
||||
> | QuotedIdentifier String -- start quote
|
||||
> String -- end quote
|
||||
> String -- content
|
||||
> -- | This is an identifier or keyword. The first field is
|
||||
> -- the quotes used, or nothing if no quotes were used. The quotes
|
||||
> -- can be " or u& or something dialect specific like []
|
||||
> | Identifier (Maybe (String,String)) String
|
||||
|
||||
> -- | This is a host param symbol, e.g. :param
|
||||
> | HostParam String
|
||||
>
|
||||
|
@ -88,10 +83,13 @@ parsec
|
|||
> -- print them, should should get back exactly the same string
|
||||
> prettyToken :: Dialect -> Token -> String
|
||||
> prettyToken _ (Symbol s) = s
|
||||
> prettyToken _ (Identifier t) = t
|
||||
> prettyToken _ (QuotedIdentifier q1 q2 t) =
|
||||
> prettyToken _ (Identifier Nothing t) = t
|
||||
> prettyToken _ (Identifier (Just (q1,q2)) t) =
|
||||
> q1 ++
|
||||
> -- todo: a bit hacky, do a better design
|
||||
> -- the dialect will know how to escape and unescape
|
||||
> -- contents, but the parser here also needs to know
|
||||
> -- about parsing escaped quotes
|
||||
> (if '"' `elem` q1 then doubleChars '"' t else t)
|
||||
> ++ q2
|
||||
> --prettyToken _ (UQIdentifier t) =
|
||||
|
@ -179,14 +177,14 @@ u&"unicode quoted identifier"
|
|||
> identifier :: Dialect -> Parser Token
|
||||
> identifier d =
|
||||
> choice
|
||||
> [QuotedIdentifier "\"" "\"" <$> qiden
|
||||
> [Identifier (Just ("\"","\"")) <$> qiden
|
||||
> -- try is used here to avoid a conflict with identifiers
|
||||
> -- and quoted strings which also start with a 'u'
|
||||
> ,QuotedIdentifier "u&\"" "\"" <$> (try (string "u&") *> qiden)
|
||||
> ,QuotedIdentifier "U&\"" "\"" <$> (try (string "U&") *> qiden)
|
||||
> ,Identifier <$> identifierString
|
||||
> ,Identifier (Just ("u&\"","\"")) <$> (try (string "u&") *> qiden)
|
||||
> ,Identifier (Just ("U&\"","\"")) <$> (try (string "U&") *> qiden)
|
||||
> ,Identifier Nothing <$> identifierString
|
||||
> -- todo: dialect protection
|
||||
> ,QuotedIdentifier "`" "`" <$> mySqlQIden
|
||||
> ,Identifier (Just ("`","`")) <$> mySqlQIden
|
||||
> ]
|
||||
> where
|
||||
> qiden = char '"' *> qidenSuffix ""
|
||||
|
|
|
@ -330,9 +330,7 @@ u&"example quoted"
|
|||
> name :: Parser Name
|
||||
> name = do
|
||||
> d <- getState
|
||||
> choice [Name <$> identifierTok (blacklist d) Nothing
|
||||
> ,(\(s,e,t) -> QuotedName s e t) <$> qidentifierTok
|
||||
> ]
|
||||
> uncurry Name <$> identifierTok (blacklist d)
|
||||
|
||||
todo: replace (:[]) with a named function all over
|
||||
|
||||
|
@ -506,7 +504,7 @@ factoring in this function, and it is a little dense.
|
|||
> -- this parser handles the fixed set of multi word
|
||||
> -- type names, plus all the type names which are
|
||||
> -- reserved words
|
||||
> reservedTypeNames = (:[]) . Name . unwords <$> makeKeywordTree
|
||||
> reservedTypeNames = (:[]) . Name Nothing . unwords <$> makeKeywordTree
|
||||
> ["double precision"
|
||||
> ,"character varying"
|
||||
> ,"char varying"
|
||||
|
@ -598,7 +596,7 @@ value expression parens, row ctor and scalar subquery
|
|||
> ,ctor <$> commaSep1 valueExpr]
|
||||
> where
|
||||
> ctor [a] = Parens a
|
||||
> ctor as = SpecialOp [Name "rowctor"] as
|
||||
> ctor as = SpecialOp [Name Nothing "rowctor"] as
|
||||
|
||||
== case, cast, exists, unique, array/multiset constructor, interval
|
||||
|
||||
|
@ -643,7 +641,7 @@ subquery expression:
|
|||
> arrayCtor = keyword_ "array" >>
|
||||
> choice
|
||||
> [ArrayCtor <$> parens queryExpr
|
||||
> ,Array (Iden [Name "array"]) <$> brackets (commaSep valueExpr)]
|
||||
> ,Array (Iden [Name Nothing "array"]) <$> brackets (commaSep valueExpr)]
|
||||
|
||||
As far as I can tell, table(query expr) is just syntax sugar for
|
||||
multiset(query expr). It must be there for compatibility or something.
|
||||
|
@ -689,7 +687,7 @@ this. also fix the monad -> applicative
|
|||
> q <- optionMaybe intervalQualifier
|
||||
> mkIt s lit q)
|
||||
> where
|
||||
> mkIt Nothing val Nothing = pure $ TypedLit (TypeName [Name "interval"]) val
|
||||
> mkIt Nothing val Nothing = pure $ TypedLit (TypeName [Name Nothing "interval"]) val
|
||||
> mkIt s val (Just (a,b)) = pure $ IntervalLit s val a b
|
||||
> mkIt (Just {}) _val Nothing = fail "cannot use sign without interval qualifier"
|
||||
|
||||
|
@ -718,7 +716,7 @@ all the value expressions which start with an identifier
|
|||
> -- this is a special case because set is a reserved keyword
|
||||
> -- and the names parser won't parse it
|
||||
> multisetSetFunction =
|
||||
> App [Name "set"] . (:[]) <$>
|
||||
> App [Name Nothing "set"] . (:[]) <$>
|
||||
> (try (keyword_ "set" *> openParen)
|
||||
> *> valueExpr <* closeParen)
|
||||
|
||||
|
@ -750,7 +748,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
|||
> -- check we haven't parsed the first
|
||||
> -- keyword as an identifier
|
||||
> case (e,kws) of
|
||||
> (Iden [Name i], (k,_):_)
|
||||
> (Iden [Name Nothing i], (k,_):_)
|
||||
> | map toLower i == k ->
|
||||
> fail $ "cannot use keyword here: " ++ i
|
||||
> _ -> return ()
|
||||
|
@ -761,7 +759,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
|||
> SOKMandatory -> Just <$> pfa
|
||||
> as <- mapM parseArg kws
|
||||
> void closeParen
|
||||
> pure $ SpecialOpK [Name opName] fa $ catMaybes as
|
||||
> pure $ SpecialOpK [Name Nothing opName] fa $ catMaybes as
|
||||
> where
|
||||
> parseArg (nm,mand) =
|
||||
> let p = keyword_ nm >> valueExpr
|
||||
|
@ -833,7 +831,7 @@ in the source
|
|||
> ,"trailing" <$ keyword_ "trailing"
|
||||
> ,"both" <$ keyword_ "both"]
|
||||
> mkTrim fa ch fr =
|
||||
> SpecialOpK [Name "trim"] Nothing
|
||||
> SpecialOpK [Name Nothing "trim"] Nothing
|
||||
> $ catMaybes [Just (fa,StringLit "'" "'" ch)
|
||||
> ,Just ("from", fr)]
|
||||
|
||||
|
@ -959,7 +957,7 @@ and operator. This is the call to valueExprB.
|
|||
|
||||
> betweenSuffix :: Parser (ValueExpr -> ValueExpr)
|
||||
> betweenSuffix =
|
||||
> makeOp <$> Name <$> opName
|
||||
> makeOp <$> Name Nothing <$> opName
|
||||
> <*> valueExprB
|
||||
> <*> (keyword_ "and" *> valueExprB)
|
||||
> where
|
||||
|
@ -979,7 +977,7 @@ a = any (select * from t)
|
|||
> q <- parens queryExpr
|
||||
> pure $ \v -> QuantifiedComparison v [c] cq q
|
||||
> where
|
||||
> comp = Name <$> choice (map symbol
|
||||
> comp = Name Nothing <$> choice (map symbol
|
||||
> ["=", "<>", "<=", "<", ">", ">="])
|
||||
> compQuan = choice
|
||||
> [CPAny <$ keyword_ "any"
|
||||
|
@ -1009,7 +1007,10 @@ a match (select a from t)
|
|||
It is going to be really difficult to support an arbitrary character
|
||||
for the escape now there is a separate lexer ...
|
||||
|
||||
> escapeSuffix :: Parser (ValueExpr -> ValueExpr)
|
||||
TODO: this needs fixing. Escape is only part of other nodes, and not a
|
||||
separate suffix.
|
||||
|
||||
> {-escapeSuffix :: Parser (ValueExpr -> ValueExpr)
|
||||
> escapeSuffix = do
|
||||
> ctor <- choice
|
||||
> [Escape <$ keyword_ "escape"
|
||||
|
@ -1023,6 +1024,7 @@ for the escape now there is a separate lexer ...
|
|||
> oneOnly c = case c of
|
||||
> [c'] -> return c'
|
||||
> _ -> fail "escape char must be single char"
|
||||
> -}
|
||||
|
||||
=== collate
|
||||
|
||||
|
@ -1060,7 +1062,6 @@ messages, but both of these are too important.
|
|||
> ,[binarySym "." E.AssocLeft]
|
||||
|
||||
> ,[postfix' arraySuffix
|
||||
> ,postfix' escapeSuffix
|
||||
> ,postfix' collateSuffix]
|
||||
|
||||
> ,[prefixSym "+", prefixSym "-"]
|
||||
|
@ -1129,14 +1130,14 @@ messages, but both of these are too important.
|
|||
> binaryKeywords p =
|
||||
> E.Infix (do
|
||||
> o <- try p
|
||||
> pure (\a b -> BinOp a [Name $ unwords o] b))
|
||||
> pure (\a b -> BinOp a [Name Nothing $ unwords o] b))
|
||||
> E.AssocNone
|
||||
> postfixKeywords p =
|
||||
> postfix' $ do
|
||||
> o <- try p
|
||||
> pure $ PostfixOp [Name $ unwords o]
|
||||
> pure $ PostfixOp [Name Nothing $ unwords o]
|
||||
> binary p nm assoc =
|
||||
> E.Infix (p >> pure (\a b -> BinOp a [Name nm] b)) assoc
|
||||
> E.Infix (p >> pure (\a b -> BinOp a [Name Nothing nm] b)) assoc
|
||||
> multisetBinOp = E.Infix (do
|
||||
> keyword_ "multiset"
|
||||
> o <- choice [Union <$ keyword_ "union"
|
||||
|
@ -1147,7 +1148,7 @@ messages, but both of these are too important.
|
|||
> E.AssocLeft
|
||||
> prefixKeyword nm = prefix (keyword_ nm) nm
|
||||
> prefixSym nm = prefix (symbol_ nm) nm
|
||||
> prefix p nm = prefix' (p >> pure (PrefixOp [Name nm]))
|
||||
> prefix p nm = prefix' (p >> pure (PrefixOp [Name Nothing nm]))
|
||||
> -- hack from here
|
||||
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
|
||||
> -- not implemented properly yet
|
||||
|
@ -1996,17 +1997,17 @@ It is only allowed when all the strings are quoted with ' atm.
|
|||
> (Just s, L.Symbol p) | s == p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> identifierTok :: [String] -> Maybe String -> Parser String
|
||||
> identifierTok blackList kw = mytoken (\tok ->
|
||||
> case (kw,tok) of
|
||||
> (Nothing, L.Identifier p) | map toLower p `notElem` blackList -> Just p
|
||||
> (Just k, L.Identifier p) | k == map toLower p -> Just p
|
||||
> identifierTok :: [String] -> Parser (Maybe (String,String), String)
|
||||
> identifierTok blackList = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.Identifier q p | map toLower p `notElem` blackList -> Just (q,p)
|
||||
> _ -> Nothing)
|
||||
|
||||
> qidentifierTok :: Parser (String,String,String)
|
||||
> qidentifierTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.QuotedIdentifier s e t -> Just (s,e,t)
|
||||
> unquotedIdentifierTok :: [String] -> Maybe String -> Parser String
|
||||
> unquotedIdentifierTok blackList kw = mytoken (\tok ->
|
||||
> case (kw,tok) of
|
||||
> (Nothing, L.Identifier Nothing p) | map toLower p `notElem` blackList -> Just p
|
||||
> (Just k, L.Identifier Nothing p) | k == map toLower p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> mytoken :: (L.Token -> Maybe a) -> Parser a
|
||||
|
@ -2052,7 +2053,7 @@ todo: work out the symbol parsing better
|
|||
= helper functions
|
||||
|
||||
> keyword :: String -> Parser String
|
||||
> keyword k = identifierTok [] (Just k) <?> k
|
||||
> keyword k = unquotedIdentifierTok [] (Just k) <?> k
|
||||
|
||||
helper function to improve error messages
|
||||
|
||||
|
|
|
@ -100,13 +100,13 @@ which have been changed to try to improve the layout of the output.
|
|||
> fpd (Preceding e) = valueExpr d e <+> text "preceding"
|
||||
> fpd (Following e) = valueExpr d e <+> text "following"
|
||||
|
||||
> valueExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name "between"]
|
||||
> ,[Name "not between"]] =
|
||||
> valueExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name Nothing "between"]
|
||||
> ,[Name Nothing "not between"]] =
|
||||
> sep [valueExpr dia a
|
||||
> ,names nm <+> valueExpr dia b
|
||||
> ,nest (length (unnames nm) + 1) $ text "and" <+> valueExpr dia c]
|
||||
|
||||
> valueExpr d (SpecialOp [Name "rowctor"] as) =
|
||||
> valueExpr d (SpecialOp [Name Nothing "rowctor"] as) =
|
||||
> parens $ commaSep $ map (valueExpr d) as
|
||||
|
||||
> valueExpr d (SpecialOp nm es) =
|
||||
|
@ -119,7 +119,8 @@ which have been changed to try to improve the layout of the output.
|
|||
|
||||
> valueExpr d (PrefixOp f e) = names f <+> valueExpr d e
|
||||
> valueExpr d (PostfixOp f e) = valueExpr d e <+> names f
|
||||
> valueExpr d e@(BinOp _ op _) | op `elem` [[Name "and"], [Name "or"]] =
|
||||
> valueExpr d e@(BinOp _ op _) | op `elem` [[Name Nothing "and"]
|
||||
> ,[Name Nothing "or"]] =
|
||||
> -- special case for and, or, get all the ands so we can vcat them
|
||||
> -- nicely
|
||||
> case ands e of
|
||||
|
@ -130,7 +131,7 @@ which have been changed to try to improve the layout of the output.
|
|||
> ands (BinOp a op' b) | op == op' = ands a ++ ands b
|
||||
> ands x = [x]
|
||||
> -- special case for . we don't use whitespace
|
||||
> valueExpr d (BinOp e0 [Name "."] e1) =
|
||||
> valueExpr d (BinOp e0 [Name Nothing "."] e1) =
|
||||
> valueExpr d e0 <> text "." <> valueExpr d e1
|
||||
> valueExpr d (BinOp e0 f e1) =
|
||||
> valueExpr d e0 <+> names f <+> valueExpr d e1
|
||||
|
@ -211,11 +212,11 @@ which have been changed to try to improve the layout of the output.
|
|||
> Distinct -> text "distinct"
|
||||
> ,valueExpr d b]
|
||||
|
||||
> valueExpr d (Escape v e) =
|
||||
> {-valueExpr d (Escape v e) =
|
||||
> valueExpr d v <+> text "escape" <+> text [e]
|
||||
|
||||
> valueExpr d (UEscape v e) =
|
||||
> valueExpr d v <+> text "uescape" <+> text [e]
|
||||
> valueExpr d v <+> text "uescape" <+> text [e]-}
|
||||
|
||||
> valueExpr d (Collate v c) =
|
||||
> valueExpr d v <+> text "collate" <+> names c
|
||||
|
@ -239,8 +240,8 @@ which have been changed to try to improve the layout of the output.
|
|||
|
||||
|
||||
> unname :: Name -> String
|
||||
> unname (Name n) = n
|
||||
> unname (QuotedName s e n) =
|
||||
> unname (Name Nothing n) = n
|
||||
> unname (Name (Just (s,e)) n) =
|
||||
> s ++ (if '"' `elem` s then doubleUpDoubleQuotes n else n) ++ e
|
||||
|
||||
> unnames :: [Name] -> String
|
||||
|
@ -248,8 +249,8 @@ which have been changed to try to improve the layout of the output.
|
|||
|
||||
|
||||
> name :: Name -> Doc
|
||||
> name (Name n) = text n
|
||||
> name (QuotedName s e n) =
|
||||
> name (Name Nothing n) = text n
|
||||
> name (Name (Just (s,e)) n) =
|
||||
> text s <> text (if '"' `elem` s then doubleUpDoubleQuotes n else n) <> text e
|
||||
|
||||
> names :: [Name] -> Doc
|
||||
|
|
|
@ -206,8 +206,8 @@
|
|||
todo: special syntax for like, similar with escape - escape cannot go
|
||||
in other places
|
||||
|
||||
> | Escape ValueExpr Char
|
||||
> | UEscape ValueExpr Char
|
||||
> -- | Escape ValueExpr Char
|
||||
> -- | UEscape ValueExpr Char
|
||||
> | Collate ValueExpr [Name]
|
||||
> | MultisetBinOp ValueExpr CombineOp SetQuantifier ValueExpr
|
||||
> | MultisetCtor [ValueExpr]
|
||||
|
@ -217,9 +217,13 @@ in other places
|
|||
> deriving (Eq,Show,Read,Data,Typeable)
|
||||
|
||||
> -- | Represents an identifier name, which can be quoted or unquoted.
|
||||
> data Name = Name String
|
||||
> | QuotedName String String String
|
||||
> -- ^ quoted name, the fields are start quote, end quote and the string itself, these will usually be ", others are possible e.g. `something` is parsed to QuotedName "`" "`" "something, and $a$ test $a$ is parsed to QuotedName "$a$" "$a$" " test "
|
||||
> -- examples:
|
||||
> --
|
||||
> -- * test -> Name Nothing "test"
|
||||
> -- * "test" -> Name (Just "\"","\"") "test"
|
||||
> -- * `something` -> Name (Just ("`","`") "something"
|
||||
> -- * [ms] -> Name (Just ("[","]") "ms"
|
||||
> data Name = Name (Maybe (String,String)) String
|
||||
> deriving (Eq,Show,Read,Data,Typeable)
|
||||
|
||||
> -- | Represents a type name, used in casts.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue