1
Fork 0

make the typename and app parsers a bit more regular

This commit is contained in:
Jake Wheat 2014-05-12 22:06:29 +03:00
parent 6c9a291930
commit 8996230093
2 changed files with 133 additions and 128 deletions

View file

@ -7,12 +7,16 @@
> module Language.SQL.SimpleSQL.Combinators
> (optionSuffix
> ,(<??>)
> ,(<$$>)
> ,(<??.>)
> ,(<??*>)) where
> ,(<??*>)
> ,(<$$>)
> ,(<$$$>)
> ,(<$$$$>)
> ,(<$$$$$>)
> ) where
> import Control.Applicative --((<$>), (<*>), (<**>))
> import Text.Parsec --(option,many)
> import Control.Applicative ((<$>), (<*>), (<**>), pure, Applicative)
> import Text.Parsec (option,many)
> import Text.Parsec.String (Parser)
a possible issue with the option suffix is that it enforces left
@ -37,10 +41,37 @@ other operators so it can be used nicely
> p <??> q = p <**> option id q
this is analogous to <**>, flipped <$>
Help with left factored parsers. <$$> is like an analogy with <**>:
> (<$$>) :: (a -> b -> c) -> Parser b -> Parser (a -> c)
> (<$$>) = (<$>) . flip
f <$> a <*> b
is like
a <**> (b <$$> f)
f <$> a <*> b <*> c
is like
a <**> (b <**> (c <$$$> f))
> (<$$>) :: Applicative f =>
> f b -> (a -> b -> c) -> f (a -> c)
> (<$$>) pa c = pa <**> pure (flip c)
> (<$$$>) :: Applicative f =>
> f c -> (a -> b -> c -> t) -> f (b -> a -> t)
> p <$$$> c = p <**> pure (flip3 c)
> (<$$$$>) :: Applicative f =>
> f d -> (a -> b -> c -> d -> t) -> f (c -> b -> a -> t)
> p <$$$$> c = p <**> pure (flip4 c)
> (<$$$$$>) :: Applicative f =>
> f e -> (a -> b -> c -> d -> e -> t) -> f (d -> c -> b -> a -> t)
> p <$$$$$> c = p <**> pure (flip5 c)
Surely no-one would write code like this seriously?
composing suffix parsers, not sure about the name. This is used to add
@ -48,11 +79,29 @@ a second or more suffix parser contingent on the first suffix parser
succeeding.
> (<??.>) :: Parser (a -> a) -> Parser (a -> a) -> Parser (a -> a)
> (<??.>) pa pb = (.) <$$> pa <*> option id pb
> (<??.>) pa pb = (.) `c` pa <*> option id pb
> -- todo: fix this mess
> where c = (<$>) . flip
0 to many repeated applications of suffix parser
> (<??*>) :: Parser a -> Parser (a -> a) -> Parser a
> p <??*> q = p <**> chainl q (pure (flip (.))) id
> -- foldr ($) <$> p <*> (reverse <$> many q)
> p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
These are to help with left factored parsers:
a <**> (b <**> (c <**> pure (flip3 ctor)))
Not sure the names are correct, but they follow a pattern with flip
a <**> (b <**> pure (flip ctor))
> flip3 :: (a -> b -> c -> t) -> c -> b -> a -> t
> flip3 f a b c = f c b a
> flip4 :: (a -> b -> c -> d -> t) -> d -> c -> b -> a -> t
> flip4 f a b c d = f d c b a
> flip5 :: (a -> b -> c -> d -> e -> t) -> e -> d -> c -> b -> a -> t
> flip5 f a b c d e = f e d c b a

View file

@ -413,20 +413,8 @@ A type name will parse into the 'smallest' constructor it will fit in
syntactically, e.g. a clob(5) will parse to a precision type name, not
a lob type name.
Not sure if the factoring in this function is too far. It could do
with some work to improve the readability still. Ideas: the parsers of
each component, the various kinds combinators, the application of
choice particularly and the constuctors are a bit mixed together and
should be separated more. Probably start with:
P (a -> b), which combines parsing some extra fields and then
constructing a 'b' with these new fields and the 'a', decompose it
into
P c -- separate each field that contributes to a b to a separately
named function
separate out the ctor wrapper: c -> a -> b
and then create a P (a -> b) just by combining bits
Unfortunately, to improve the error messages, there is a lot of (left)
factoring in this function, and it is a little dense.
> typeName :: Parser TypeName
> typeName = lexeme $
@ -444,54 +432,40 @@ and then create a P (a -> b) just by combining bits
> otherTypeName =
> nameOfType <**>
> (typeNameWithParens
> <|> pure Nothing <**> timeTypeName
> <|> pure Nothing <**> charTypeName
> <|> pure Nothing <**> (timeTypeName <|> charTypeName)
> <|> pure TypeName)
> ----------------------------
> -- TODO: adapt the reserved type names to degenerate into
> -- names so try isn't needed (not sure it is needed anyway)
> nameOfType = try reservedTypeNames <|> names
> ----------------------------
> nameOfType = reservedTypeNames <|> names
> charTypeName = charSet <**> (option [] tcollate <$$$$> CharTypeName)
> <|> pure [] <**> (tcollate <$$$$> CharTypeName)
> typeNameWithParens =
> (openParen *> unsignedInteger)
> <**> (precMaybeSuffix <|> extrasInParens)
> precMaybeSuffix = closeParen *>
> ((. Just) <$> timeTypeName
> <|> ((. Just) <$> charTypeName)
> <|> pure (flip PrecTypeName))
> extrasInParens = (precScaleTypeName <|> precLengthTypeName) <* closeParen
> ----------------------------
> precScaleTypeName =
> (\s p nm -> PrecScaleTypeName nm p s) <$> (comma *> unsignedInteger)
> ----------------------------
> <**> (closeParen *> precMaybeSuffix
> <|> (precScaleTypeName <|> precLengthTypeName) <* closeParen)
> precMaybeSuffix = (. Just) <$> (timeTypeName <|> charTypeName)
> <|> pure (flip PrecTypeName)
> precScaleTypeName = (comma *> unsignedInteger) <$$$> PrecScaleTypeName
> precLengthTypeName =
> mkPrec <$> (Just <$> lobPrecSuffix) <*> optionMaybe lobUnits
> <|> mkPrec Nothing <$> (Just <$> lobUnits)
> mkPrec s u p nm = PrecLengthTypeName nm p s u
> lobPrecSuffix = choice
> [PrecK <$ keyword_ "k"
> ,PrecM <$ keyword_ "m"
> ,PrecG <$ keyword_ "g"
> ,PrecT <$ keyword_ "t"
> ,PrecP <$ keyword_ "p"]
> Just <$> lobPrecSuffix
> <**> (optionMaybe lobUnits <$$$$> PrecLengthTypeName)
> <|> pure Nothing <**> ((Just <$> lobUnits) <$$$$> PrecLengthTypeName)
> timeTypeName = tz <$$$> TimeTypeName
> ----------------------------
> lobPrecSuffix = PrecK <$ keyword_ "k"
> <|> PrecM <$ keyword_ "m"
> <|> PrecG <$ keyword_ "g"
> <|> PrecT <$ keyword_ "t"
> <|> PrecP <$ keyword_ "p"
> lobUnits = PrecCharacters <$ keyword_ "characters"
> <|> PrecOctets <$ keyword_ "octets"
> ----------------------------
> timeTypeName =
> (\tz p nm -> TimeTypeName nm p tz)
> <$> (True <$ keywords_ ["with", "time","zone"]
> <|> False <$ keywords_ ["without", "time","zone"])
> ----------------------------
> charTypeName = mkCT <$> charSet <*> option [] tcollate
> <|> mkCT [] <$> tcollate
> mkCT cs col p nm = CharTypeName nm p cs col
> tz = True <$ keywords_ ["with", "time","zone"]
> <|> False <$ keywords_ ["without", "time","zone"]
> charSet = keywords_ ["character", "set"] *> names
> tcollate = keyword_ "collate" *> names
> ----------------------------
> tnSuffix = multiset <|> array
> multiset = MultisetTypeName <$ keyword_ "multiset"
> array = keyword_ "array" *>
> (ArrayTypeName <$$> optionMaybe (brackets unsignedInteger))
> (optionMaybe (brackets unsignedInteger) <$$> ArrayTypeName)
> ----------------------------
> -- this parser handles the fixed set of multi word
> -- type names, plus all the type names which are
@ -635,9 +609,7 @@ subquery expression:
> subquery :: Parser ValueExpr
> subquery = SubQueryExpr <$> sqkw <*> parens queryExpr
> where
> sqkw = choice
> [SqExists <$ keyword_ "exists"
> ,SqUnique <$ keyword_ "unique"]
> sqkw = SqExists <$ keyword_ "exists" <|> SqUnique <$ keyword_ "unique"
=== array/multiset constructor
@ -837,49 +809,41 @@ aggregate variations (distinct, order by in parens, filter and where
suffixes)
window apps (fn/agg followed by over)
This code still needs some tidying, and eventually has to be left
factored with the typename 'literal' parser.
This code is also a little dense like the typename code because of
left factoring, later they will even have to be partially combined
together.
> app :: Parser ([Name] -> ValueExpr)
> app =
> openParen *> choice
> [((,,) <$> duplicates
> <*> commaSep1 valueExpr
> <*> (option [] orderBy <* closeParen))
> <**> ((\f (sq,es,ob) nm -> f sq es ob nm) <$> afilter
> <|> pure (\(d,es,ob) f -> AggregateApp f d es ob Nothing))
> -- separate cases with no all or distinct which have at least one
> -- value expr
> [duplicates
> <**> (commaSep1 valueExpr
> <**> (((option [] orderBy) <* closeParen)
> <**> (optionMaybe afilter <$$$$$> AggregateApp)))
> -- separate cases with no all or distinct which must have at
> -- least one value expr
> ,commaSep1 valueExpr
> <**> choice
> [closeParen *> choice [window
> ,withinGroup
> ,(\f es nm -> f SQDefault es [] nm) <$> afilter
> ,pure (flip App)]
> ,(orderBy <* closeParen)
> <**>
> choice [(\f ob es nm -> f SQDefault es ob nm) <$> afilter
> ,pure (\ob es f -> AggregateApp f SQDefault es ob Nothing)]]
> ,([] <$ closeParen)
> <**> choice [window
> [closeParen *> choice
> [window
> ,withinGroup
> ,(Just <$> afilter) <$$$> aggAppWithoutDupeOrd
> ,pure (flip App)]
> ,orderBy <* closeParen
> <**> (optionMaybe afilter <$$$$> aggAppWithoutDupe)]
> -- no valueExprs: duplicates and order by not allowed
> ,([] <$ closeParen) <**> (window <|> withinGroup <|> pure (flip App))
> ]
> afilter :: Parser (SetQuantifier
> -> [ValueExpr]
> -> [SortSpec]
> -> [Name]
> -> ValueExpr)
> afilter =
> keyword_ "filter" *> (ctor <$> parens (keyword_ "where" *> valueExpr))
> where
> ctor ve sq es ob f = AggregateApp f sq es ob (Just ve)
> aggAppWithoutDupeOrd n es f = AggregateApp n SQDefault es [] f
> aggAppWithoutDupe n = AggregateApp n SQDefault
> afilter :: Parser ValueExpr
> afilter = keyword_ "filter" *> parens (keyword_ "where" *> valueExpr)
> withinGroup :: Parser ([ValueExpr] -> [Name] -> ValueExpr)
> withinGroup =
> keywords_ ["within", "group"] *>
> ((\ob es nm -> AggregateAppGroup nm es ob) <$> parens orderBy)
> (keywords_ ["within", "group"] *> parens orderBy) <$$$> AggregateAppGroup
==== window
@ -893,40 +857,33 @@ TODO: add window support for other aggregate variations, needs some
changes to the syntax also
> window :: Parser ([ValueExpr] -> [Name] -> ValueExpr)
> window = keyword_ "over" *> parens (ctorWrap
> <$> option [] partitionBy
> <*> option [] orderBy
> <*> optionMaybe frameClause)
> window =
> keyword_ "over" *> openParen *> option [] partitionBy
> <**> (option [] orderBy
> <**> (((optionMaybe frameClause) <* closeParen) <$$$$$> WindowApp))
> where
> ctorWrap pb ob fc es f = WindowApp f es pb ob fc
> partitionBy = keywords_ ["partition","by"] >> commaSep1 valueExpr
> partitionBy = keywords_ ["partition","by"] *> commaSep1 valueExpr
> frameClause =
> mkFrame <$> choice [FrameRows <$ keyword_ "rows"
> ,FrameRange <$ keyword_ "range"]
> <*> frameStartEnd
> frameStartEnd =
> choice
> [keyword_ "between" >>
> mkFrameBetween <$> frameLimit True
> <*> (keyword_ "and" *> frameLimit True)
> ,mkFrameFrom <$> frameLimit False]
> -- use the bexpression style from the between parsing for frame between
> frameRowsRange -- TODO: this 'and' could be an issue
> <**> (choice [(keyword_ "between" *> frameLimit True)
> <**> ((keyword_ "and" *> frameLimit True)
> <$$$> FrameBetween)
> -- maybe this should still use a b expression
> -- for consistency
> ,frameLimit False <**> pure (flip FrameFrom)])
> frameRowsRange = FrameRows <$ keyword_ "rows"
> <|> FrameRange <$ keyword_ "range"
> frameLimit useB =
> choice
> [Current <$ keywords_ ["current", "row"]
> -- todo: create an automatic left factor for stuff like
> -- this
> ,keyword_ "unbounded" >>
> -- todo: create an automatic left factor for stuff like this
> ,keyword_ "unbounded" *>
> choice [UnboundedPreceding <$ keyword_ "preceding"
> ,UnboundedFollowing <$ keyword_ "following"]
> ,do
> e <- if useB then valueExprB else valueExpr
> choice [Preceding e <$ keyword_ "preceding"
> ,Following e <$ keyword_ "following"]
> ,(if useB then valueExprB else valueExpr)
> <**> (Preceding <$ keyword_ "preceding"
> <|> Following <$ keyword_ "following")
> ]
> mkFrameBetween s e rs = FrameBetween rs s e
> mkFrameFrom s rs = FrameFrom rs s
> mkFrame rs c = c rs
== suffixes
@ -1244,7 +1201,7 @@ tref
> choice [TRFunction n
> <$> parens (commaSep valueExpr)
> ,pure $ TRSimple n]] <??> aliasSuffix
> aliasSuffix = TRAlias <$$> fromAlias
> aliasSuffix = fromAlias <$$> TRAlias
> joinTrefSuffix t =
> (TRJoin t <$> option False (True <$ keyword_ "natural")
> <*> joinType
@ -1448,7 +1405,6 @@ thick.
> makeKeywordTree :: [String] -> Parser [String]
> makeKeywordTree sets =
> parseTrees (sort $ map words sets)
> -- ?? <?> intercalate "," sets
> where
> parseTrees :: [[String]] -> Parser [String]
> parseTrees ws = do
@ -1522,7 +1478,7 @@ make this choice.
> dot = string "."
> expon = (:) <$> oneOf "eE" <*> sInt
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
> pp = ((++) <$$>)
> pp = (<$$> (++))
> identifier :: Parser String