refactor some of the multi keyword/ multiword typename parsing
This commit is contained in:
parent
247c7a26b7
commit
b0f1e044b4
|
@ -22,8 +22,8 @@
|
|||
> import qualified Text.Parsec as P (ParseError)
|
||||
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
||||
> import qualified Text.Parsec.Expr as E
|
||||
> import Data.List (intercalate)
|
||||
|
||||
> import Data.List (intercalate,sort,groupBy)
|
||||
> import Data.Function (on)
|
||||
> import Language.SQL.SimpleSQL.Syntax
|
||||
|
||||
The public API functions.
|
||||
|
@ -513,15 +513,13 @@ typename: used in casts. Special cases for the multi keyword typenames
|
|||
that SQL supports.
|
||||
|
||||
> typeName :: Parser TypeName
|
||||
> typeName = (choice (multiWordParsers
|
||||
> ++ [TypeName <$> identifierBlacklist blacklist])
|
||||
> typeName =
|
||||
> (choice [multiWordParsers
|
||||
> ,TypeName <$> identifierBlacklist blacklist]
|
||||
> >>= optionSuffix precision
|
||||
> ) <?> "typename"
|
||||
> where
|
||||
> multiWordParsers =
|
||||
> flip map multiWordTypeNames
|
||||
> $ \ks -> (TypeName . unwords) <$> try (keywords ks)
|
||||
> multiWordTypeNames = map words
|
||||
> multiWordParsers = (TypeName . unwords) <$> makeKeywordTree
|
||||
> ["double precision"
|
||||
> ,"character varying"
|
||||
> ,"char varying"
|
||||
|
@ -559,6 +557,41 @@ todo: timestamp types:
|
|||
> ctor [a] = Parens a
|
||||
> ctor as = SpecialOp [Name "rowctor"] as
|
||||
|
||||
== multi keyword helper
|
||||
|
||||
This helper is to help parsing multiple options of multiple keywords
|
||||
with similar prefixes, e.g. parsing 'is null' and 'is not null'.
|
||||
|
||||
use to left factor/ improve:
|
||||
typed literal and general identifiers
|
||||
not like, not in, not between operators
|
||||
help with factoring keyword functions and other app-likes
|
||||
the join keyword sequences
|
||||
fetch first/next
|
||||
row/rows only
|
||||
|
||||
There is probably a simpler way of doing this but I am a bit
|
||||
thick.
|
||||
|
||||
> makeKeywordTree :: [String] -> Parser [String]
|
||||
> makeKeywordTree sets = do
|
||||
> reverse <$> parseTrees (sort $ map words sets)
|
||||
> -- ?? <?> intercalate "," sets
|
||||
> where
|
||||
> parseTrees :: [[String]] -> Parser [String]
|
||||
> parseTrees ws = do
|
||||
> let gs :: [[[String]]]
|
||||
> gs = groupBy ((==) `on` head) ws
|
||||
> choice $ map parseGroup gs
|
||||
> parseGroup :: [[String]] -> Parser [String]
|
||||
> parseGroup l = do
|
||||
> let k = head $ head l
|
||||
> keyword_ k
|
||||
> let tls = map tail l
|
||||
> pr = (k:) <$> parseTrees tls
|
||||
> if (or $ map null tls)
|
||||
> then pr <|> return [k]
|
||||
> else pr
|
||||
|
||||
== operator parsing
|
||||
|
||||
|
@ -569,6 +602,12 @@ can be symbols (a + b), single keywords (a and b) or multiple keywords
|
|||
|
||||
TODO: carefully review the precedences and associativities.
|
||||
|
||||
TODO: to fix the parsing completely, I think will need to parse
|
||||
without precedence and associativity and fix up afterwards, since SQL
|
||||
syntax is way too messy. It might be possible to avoid this if we
|
||||
wanted to avoid extensibility and to not be concerned with parse error
|
||||
messages, but both of these are considered too important.
|
||||
|
||||
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
|
||||
> opTable bExpr =
|
||||
> [-- parse match and quantified comparisons as postfix ops
|
||||
|
@ -598,13 +637,13 @@ TODO: carefully review the precedences and associativities.
|
|||
> ,binarySym "|" E.AssocRight
|
||||
> ,binaryKeyword "like" E.AssocNone
|
||||
> ,binaryKeyword "overlaps" E.AssocNone]
|
||||
> ++ map (`binaryKeywords` E.AssocNone)
|
||||
> ++ [binaryKeywords $ makeKeywordTree
|
||||
> ["not like"
|
||||
> ,"is similar to"
|
||||
> ,"is not similar to"
|
||||
> ,"is distinct from"
|
||||
> ,"is not distinct from"]
|
||||
> ++ map postfixKeywords
|
||||
> ,postfixKeywords $ makeKeywordTree
|
||||
> ["is null"
|
||||
> ,"is not null"
|
||||
> ,"is true"
|
||||
|
@ -613,6 +652,7 @@ TODO: carefully review the precedences and associativities.
|
|||
> ,"is not false"
|
||||
> ,"is unknown"
|
||||
> ,"is not unknown"]
|
||||
> ]
|
||||
> -- have to use try with inSuffix because of a conflict
|
||||
> -- with 'in' in position function, and not between
|
||||
> -- between also has a try in it to deal with 'not'
|
||||
|
@ -631,17 +671,20 @@ TODO: carefully review the precedences and associativities.
|
|||
> where
|
||||
> binarySym nm assoc = binary (symbol_ nm) nm assoc
|
||||
> binaryKeyword nm assoc = binary (keyword_ nm) nm assoc
|
||||
> -- use try with the multi keywords because of shared
|
||||
> -- prefixes to the sets of keywords. Should left factor
|
||||
> -- somehow
|
||||
> binaryKeywords nm assoc = binary (try $ keywords_ (words nm)) nm assoc
|
||||
> binaryKeywords p =
|
||||
> E.Infix (do
|
||||
> o <- p
|
||||
> return (\a b -> BinOp a [Name $ unwords o] b))
|
||||
> E.AssocNone
|
||||
> postfixKeywords p =
|
||||
> postfix' $ do
|
||||
> o <- p
|
||||
> return $ PostfixOp [Name $ unwords o]
|
||||
> binary p nm assoc =
|
||||
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
|
||||
> prefixKeyword nm = prefix (keyword_ nm) nm
|
||||
> prefixSym nm = prefix (symbol_ nm) nm
|
||||
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
|
||||
> postfixKeywords nm = postfix (try $ keywords_ (words nm)) nm
|
||||
> postfix p nm = postfix' (p >> return (PostfixOp [Name nm]))
|
||||
> -- hack from here
|
||||
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
|
||||
> -- not implemented properly yet
|
||||
|
@ -815,11 +858,9 @@ allows offset and fetch in either order
|
|||
> fetch :: Parser ValueExpr
|
||||
> fetch = choice [ansiFetch, limit]
|
||||
> where --todo: better left factoring
|
||||
> ansiFetch = keyword_ "fetch" >>
|
||||
> choice [keyword_ "first",keyword_ "next"]
|
||||
> *> valueExpr
|
||||
> <* choice [keyword_ "rows",keyword_ "row"]
|
||||
> <* keyword_ "only"
|
||||
> fs = makeKeywordTree ["fetch first", "fetch next"]
|
||||
> ro = makeKeywordTree ["rows only", "row only"]
|
||||
> ansiFetch = fs *> valueExpr <* ro
|
||||
> limit = keyword_ "limit" *> valueExpr
|
||||
|
||||
== common table expressions
|
||||
|
@ -1070,9 +1111,6 @@ todo: work out the symbol parsing better
|
|||
|
||||
helper function to improve error messages
|
||||
|
||||
> keywords :: [String] -> Parser [String]
|
||||
> keywords ks = mapM keyword ks <?> intercalate " " ks
|
||||
|
||||
> keywords_ :: [String] -> Parser ()
|
||||
> keywords_ ks = mapM_ keyword_ ks <?> intercalate " " ks
|
||||
|
||||
|
|
|
@ -139,7 +139,7 @@
|
|||
> -- e.g. a[3]. The first
|
||||
> -- valueExpr is the array, the
|
||||
> -- second is the subscripts/ctor args
|
||||
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)> deriving (Eq,Show,Read,Data,Typeable)
|
||||
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
|
||||
> | CSStringLit String String
|
||||
> | Escape ValueExpr Char
|
||||
> | UEscape ValueExpr Char
|
||||
|
|
32
TODO
32
TODO
|
@ -14,7 +14,8 @@ represent missing optional bits in the ast as nothing instead of the
|
|||
look at fixing the expression parsing completely
|
||||
represent natural and using/on in the syntax more close to the
|
||||
concrete syntax - don't combine in the ast
|
||||
|
||||
review haddock in the syntax and update
|
||||
review syntax names and representation
|
||||
careful review of token parses wrt trailing delimiters/junk
|
||||
|
||||
review places in the parse which should allow only a fixed set of
|
||||
|
@ -26,10 +27,12 @@ decide whether to represent numeric literals better, instead of a
|
|||
|
||||
rough SQL 2003 todo, including tests to write:
|
||||
|
||||
switch TypedLit to CSStringLit based on first char being underscore?
|
||||
idens: "", unicode, charset?, check dotted idens and contexts
|
||||
add missing type name support: lots of missing ones here, including
|
||||
simple stuff like lob variations, and new things like interval,
|
||||
row, ref, scope, array, multiset type names.
|
||||
add tests for all the typenames cast and typed literal
|
||||
date and time literals
|
||||
multisets
|
||||
review window functions, window clause
|
||||
|
@ -43,13 +46,15 @@ filter in aggs
|
|||
within group in aggs
|
||||
rows review
|
||||
|
||||
support needed MODULE syntax in identifiers - already covered?
|
||||
decide how to represent special identifiers including the session
|
||||
LNR: maybe leave until after next release
|
||||
|
||||
LNR support needed MODULE syntax in identifiers - already covered?
|
||||
LNR decide how to represent special identifiers including the session
|
||||
variables or whatever they are called like current_user
|
||||
grouping - needs special syntax?
|
||||
next value for
|
||||
probably leave for now: subtypes, methods, new /routine, dereference
|
||||
double check associativity, precedence (value exprs, joins, set ops)
|
||||
LNR grouping - needs special syntax?
|
||||
LNR next value for
|
||||
LNR subtypes, methods, new /routine, dereference
|
||||
LNR double check associativity, precedence (value exprs, joins, set ops)
|
||||
position expressions
|
||||
length expressions
|
||||
extract expression
|
||||
|
@ -61,7 +66,7 @@ convert
|
|||
translate
|
||||
trim
|
||||
overlay
|
||||
specifictype
|
||||
LNR specifictype
|
||||
datetime value expressions
|
||||
intervals
|
||||
row value constructors, expressions review
|
||||
|
@ -80,21 +85,24 @@ between symmetric/asymmetric
|
|||
in predicate review
|
||||
escape for like
|
||||
escape for [not] similar to
|
||||
regular expression syntax?
|
||||
LNR regular expression syntax?
|
||||
normalized predicate
|
||||
overlaps predicate
|
||||
distinct from predicate
|
||||
member predicate
|
||||
submultiset predicate
|
||||
set predicate
|
||||
type predicate
|
||||
LNR type predicate
|
||||
additional stuff review:
|
||||
interval stuff
|
||||
aggregate functions: lots of missing bits
|
||||
especially: filter where, within group
|
||||
complete list of keywords/reserved keywords
|
||||
select into
|
||||
other language format identifiers for host params?
|
||||
LNR select into
|
||||
LNR other language format identifiers for host params?
|
||||
|
||||
----
|
||||
above not marked LNR are for next release
|
||||
|
||||
review areas where this parser is too permissive, e.g. value
|
||||
expressions allowed where column reference names only should be
|
||||
|
|
|
@ -132,8 +132,7 @@ Tests for parsing value expressions
|
|||
> unaryOperators :: TestItem
|
||||
> unaryOperators = Group "unaryOperators" $ map (uncurry TestValueExpr)
|
||||
> [("not a", PrefixOp "not" $ Iden "a")
|
||||
> -- I think this is a missing feature or bug in parsec buildExpressionParser
|
||||
> --,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
|
||||
> ,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
|
||||
> ,("+a", PrefixOp "+" $ Iden "a")
|
||||
> ,("-a", PrefixOp "-" $ Iden "a")
|
||||
> ]
|
||||
|
|
Loading…
Reference in a new issue