1
Fork 0

refactor some of the multi keyword/ multiword typename parsing

This commit is contained in:
Jake Wheat 2014-04-18 14:50:54 +03:00
parent 247c7a26b7
commit b0f1e044b4
4 changed files with 99 additions and 54 deletions

View file

@ -22,8 +22,8 @@
> import qualified Text.Parsec as P (ParseError)
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
> import qualified Text.Parsec.Expr as E
> import Data.List (intercalate)
> import Data.List (intercalate,sort,groupBy)
> import Data.Function (on)
> import Language.SQL.SimpleSQL.Syntax
The public API functions.
@ -513,15 +513,13 @@ typename: used in casts. Special cases for the multi keyword typenames
that SQL supports.
> typeName :: Parser TypeName
> typeName = (choice (multiWordParsers
> ++ [TypeName <$> identifierBlacklist blacklist])
> >>= optionSuffix precision
> ) <?> "typename"
> typeName =
> (choice [multiWordParsers
> ,TypeName <$> identifierBlacklist blacklist]
> >>= optionSuffix precision
> ) <?> "typename"
> where
> multiWordParsers =
> flip map multiWordTypeNames
> $ \ks -> (TypeName . unwords) <$> try (keywords ks)
> multiWordTypeNames = map words
> multiWordParsers = (TypeName . unwords) <$> makeKeywordTree
> ["double precision"
> ,"character varying"
> ,"char varying"
@ -559,6 +557,41 @@ todo: timestamp types:
> ctor [a] = Parens a
> ctor as = SpecialOp [Name "rowctor"] as
== multi keyword helper
This helper is to help parsing multiple options of multiple keywords
with similar prefixes, e.g. parsing 'is null' and 'is not null'.
use to left factor/ improve:
typed literal and general identifiers
not like, not in, not between operators
help with factoring keyword functions and other app-likes
the join keyword sequences
fetch first/next
row/rows only
There is probably a simpler way of doing this but I am a bit
thick.
> makeKeywordTree :: [String] -> Parser [String]
> makeKeywordTree sets = do
> reverse <$> parseTrees (sort $ map words sets)
> -- ?? <?> intercalate "," sets
> where
> parseTrees :: [[String]] -> Parser [String]
> parseTrees ws = do
> let gs :: [[[String]]]
> gs = groupBy ((==) `on` head) ws
> choice $ map parseGroup gs
> parseGroup :: [[String]] -> Parser [String]
> parseGroup l = do
> let k = head $ head l
> keyword_ k
> let tls = map tail l
> pr = (k:) <$> parseTrees tls
> if (or $ map null tls)
> then pr <|> return [k]
> else pr
== operator parsing
@ -569,6 +602,12 @@ can be symbols (a + b), single keywords (a and b) or multiple keywords
TODO: carefully review the precedences and associativities.
TODO: to fix the parsing completely, I think will need to parse
without precedence and associativity and fix up afterwards, since SQL
syntax is way too messy. It might be possible to avoid this if we
wanted to avoid extensibility and to not be concerned with parse error
messages, but both of these are considered too important.
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
> opTable bExpr =
> [-- parse match and quantified comparisons as postfix ops
@ -598,21 +637,22 @@ TODO: carefully review the precedences and associativities.
> ,binarySym "|" E.AssocRight
> ,binaryKeyword "like" E.AssocNone
> ,binaryKeyword "overlaps" E.AssocNone]
> ++ map (`binaryKeywords` E.AssocNone)
> ["not like"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
> ++ map postfixKeywords
> ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
> ++ [binaryKeywords $ makeKeywordTree
> ["not like"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
> ,postfixKeywords $ makeKeywordTree
> ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
> ]
> -- have to use try with inSuffix because of a conflict
> -- with 'in' in position function, and not between
> -- between also has a try in it to deal with 'not'
@ -631,17 +671,20 @@ TODO: carefully review the precedences and associativities.
> where
> binarySym nm assoc = binary (symbol_ nm) nm assoc
> binaryKeyword nm assoc = binary (keyword_ nm) nm assoc
> -- use try with the multi keywords because of shared
> -- prefixes to the sets of keywords. Should left factor
> -- somehow
> binaryKeywords nm assoc = binary (try $ keywords_ (words nm)) nm assoc
> binaryKeywords p =
> E.Infix (do
> o <- p
> return (\a b -> BinOp a [Name $ unwords o] b))
> E.AssocNone
> postfixKeywords p =
> postfix' $ do
> o <- p
> return $ PostfixOp [Name $ unwords o]
> binary p nm assoc =
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
> prefixKeyword nm = prefix (keyword_ nm) nm
> prefixSym nm = prefix (symbol_ nm) nm
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
> postfixKeywords nm = postfix (try $ keywords_ (words nm)) nm
> postfix p nm = postfix' (p >> return (PostfixOp [Name nm]))
> -- hack from here
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
> -- not implemented properly yet
@ -815,11 +858,9 @@ allows offset and fetch in either order
> fetch :: Parser ValueExpr
> fetch = choice [ansiFetch, limit]
> where --todo: better left factoring
> ansiFetch = keyword_ "fetch" >>
> choice [keyword_ "first",keyword_ "next"]
> *> valueExpr
> <* choice [keyword_ "rows",keyword_ "row"]
> <* keyword_ "only"
> fs = makeKeywordTree ["fetch first", "fetch next"]
> ro = makeKeywordTree ["rows only", "row only"]
> ansiFetch = fs *> valueExpr <* ro
> limit = keyword_ "limit" *> valueExpr
== common table expressions
@ -1070,9 +1111,6 @@ todo: work out the symbol parsing better
helper function to improve error messages
> keywords :: [String] -> Parser [String]
> keywords ks = mapM keyword ks <?> intercalate " " ks
> keywords_ :: [String] -> Parser ()
> keywords_ ks = mapM_ keyword_ ks <?> intercalate " " ks

View file

@ -139,7 +139,7 @@
> -- e.g. a[3]. The first
> -- valueExpr is the array, the
> -- second is the subscripts/ctor args
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)> deriving (Eq,Show,Read,Data,Typeable)
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
> | CSStringLit String String
> | Escape ValueExpr Char
> | UEscape ValueExpr Char

32
TODO
View file

@ -14,7 +14,8 @@ represent missing optional bits in the ast as nothing instead of the
look at fixing the expression parsing completely
represent natural and using/on in the syntax more close to the
concrete syntax - don't combine in the ast
review haddock in the syntax and update
review syntax names and representation
careful review of token parses wrt trailing delimiters/junk
review places in the parse which should allow only a fixed set of
@ -26,10 +27,12 @@ decide whether to represent numeric literals better, instead of a
rough SQL 2003 todo, including tests to write:
switch TypedLit to CSStringLit based on first char being underscore?
idens: "", unicode, charset?, check dotted idens and contexts
add missing type name support: lots of missing ones here, including
simple stuff like lob variations, and new things like interval,
row, ref, scope, array, multiset type names.
add tests for all the typenames cast and typed literal
date and time literals
multisets
review window functions, window clause
@ -43,13 +46,15 @@ filter in aggs
within group in aggs
rows review
support needed MODULE syntax in identifiers - already covered?
decide how to represent special identifiers including the session
LNR: maybe leave until after next release
LNR support needed MODULE syntax in identifiers - already covered?
LNR decide how to represent special identifiers including the session
variables or whatever they are called like current_user
grouping - needs special syntax?
next value for
probably leave for now: subtypes, methods, new /routine, dereference
double check associativity, precedence (value exprs, joins, set ops)
LNR grouping - needs special syntax?
LNR next value for
LNR subtypes, methods, new /routine, dereference
LNR double check associativity, precedence (value exprs, joins, set ops)
position expressions
length expressions
extract expression
@ -61,7 +66,7 @@ convert
translate
trim
overlay
specifictype
LNR specifictype
datetime value expressions
intervals
row value constructors, expressions review
@ -80,21 +85,24 @@ between symmetric/asymmetric
in predicate review
escape for like
escape for [not] similar to
regular expression syntax?
LNR regular expression syntax?
normalized predicate
overlaps predicate
distinct from predicate
member predicate
submultiset predicate
set predicate
type predicate
LNR type predicate
additional stuff review:
interval stuff
aggregate functions: lots of missing bits
especially: filter where, within group
complete list of keywords/reserved keywords
select into
other language format identifiers for host params?
LNR select into
LNR other language format identifiers for host params?
----
above not marked LNR are for next release
review areas where this parser is too permissive, e.g. value
expressions allowed where column reference names only should be

View file

@ -132,8 +132,7 @@ Tests for parsing value expressions
> unaryOperators :: TestItem
> unaryOperators = Group "unaryOperators" $ map (uncurry TestValueExpr)
> [("not a", PrefixOp "not" $ Iden "a")
> -- I think this is a missing feature or bug in parsec buildExpressionParser
> --,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
> ,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
> ,("+a", PrefixOp "+" $ Iden "a")
> ,("-a", PrefixOp "-" $ Iden "a")
> ]