refactor some of the multi keyword/ multiword typename parsing
This commit is contained in:
parent
247c7a26b7
commit
b0f1e044b4
|
@ -22,8 +22,8 @@
|
||||||
> import qualified Text.Parsec as P (ParseError)
|
> import qualified Text.Parsec as P (ParseError)
|
||||||
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
||||||
> import qualified Text.Parsec.Expr as E
|
> import qualified Text.Parsec.Expr as E
|
||||||
> import Data.List (intercalate)
|
> import Data.List (intercalate,sort,groupBy)
|
||||||
|
> import Data.Function (on)
|
||||||
> import Language.SQL.SimpleSQL.Syntax
|
> import Language.SQL.SimpleSQL.Syntax
|
||||||
|
|
||||||
The public API functions.
|
The public API functions.
|
||||||
|
@ -513,15 +513,13 @@ typename: used in casts. Special cases for the multi keyword typenames
|
||||||
that SQL supports.
|
that SQL supports.
|
||||||
|
|
||||||
> typeName :: Parser TypeName
|
> typeName :: Parser TypeName
|
||||||
> typeName = (choice (multiWordParsers
|
> typeName =
|
||||||
> ++ [TypeName <$> identifierBlacklist blacklist])
|
> (choice [multiWordParsers
|
||||||
> >>= optionSuffix precision
|
> ,TypeName <$> identifierBlacklist blacklist]
|
||||||
> ) <?> "typename"
|
> >>= optionSuffix precision
|
||||||
|
> ) <?> "typename"
|
||||||
> where
|
> where
|
||||||
> multiWordParsers =
|
> multiWordParsers = (TypeName . unwords) <$> makeKeywordTree
|
||||||
> flip map multiWordTypeNames
|
|
||||||
> $ \ks -> (TypeName . unwords) <$> try (keywords ks)
|
|
||||||
> multiWordTypeNames = map words
|
|
||||||
> ["double precision"
|
> ["double precision"
|
||||||
> ,"character varying"
|
> ,"character varying"
|
||||||
> ,"char varying"
|
> ,"char varying"
|
||||||
|
@ -559,6 +557,41 @@ todo: timestamp types:
|
||||||
> ctor [a] = Parens a
|
> ctor [a] = Parens a
|
||||||
> ctor as = SpecialOp [Name "rowctor"] as
|
> ctor as = SpecialOp [Name "rowctor"] as
|
||||||
|
|
||||||
|
== multi keyword helper
|
||||||
|
|
||||||
|
This helper is to help parsing multiple options of multiple keywords
|
||||||
|
with similar prefixes, e.g. parsing 'is null' and 'is not null'.
|
||||||
|
|
||||||
|
use to left factor/ improve:
|
||||||
|
typed literal and general identifiers
|
||||||
|
not like, not in, not between operators
|
||||||
|
help with factoring keyword functions and other app-likes
|
||||||
|
the join keyword sequences
|
||||||
|
fetch first/next
|
||||||
|
row/rows only
|
||||||
|
|
||||||
|
There is probably a simpler way of doing this but I am a bit
|
||||||
|
thick.
|
||||||
|
|
||||||
|
> makeKeywordTree :: [String] -> Parser [String]
|
||||||
|
> makeKeywordTree sets = do
|
||||||
|
> reverse <$> parseTrees (sort $ map words sets)
|
||||||
|
> -- ?? <?> intercalate "," sets
|
||||||
|
> where
|
||||||
|
> parseTrees :: [[String]] -> Parser [String]
|
||||||
|
> parseTrees ws = do
|
||||||
|
> let gs :: [[[String]]]
|
||||||
|
> gs = groupBy ((==) `on` head) ws
|
||||||
|
> choice $ map parseGroup gs
|
||||||
|
> parseGroup :: [[String]] -> Parser [String]
|
||||||
|
> parseGroup l = do
|
||||||
|
> let k = head $ head l
|
||||||
|
> keyword_ k
|
||||||
|
> let tls = map tail l
|
||||||
|
> pr = (k:) <$> parseTrees tls
|
||||||
|
> if (or $ map null tls)
|
||||||
|
> then pr <|> return [k]
|
||||||
|
> else pr
|
||||||
|
|
||||||
== operator parsing
|
== operator parsing
|
||||||
|
|
||||||
|
@ -569,6 +602,12 @@ can be symbols (a + b), single keywords (a and b) or multiple keywords
|
||||||
|
|
||||||
TODO: carefully review the precedences and associativities.
|
TODO: carefully review the precedences and associativities.
|
||||||
|
|
||||||
|
TODO: to fix the parsing completely, I think will need to parse
|
||||||
|
without precedence and associativity and fix up afterwards, since SQL
|
||||||
|
syntax is way too messy. It might be possible to avoid this if we
|
||||||
|
wanted to avoid extensibility and to not be concerned with parse error
|
||||||
|
messages, but both of these are considered too important.
|
||||||
|
|
||||||
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
|
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
|
||||||
> opTable bExpr =
|
> opTable bExpr =
|
||||||
> [-- parse match and quantified comparisons as postfix ops
|
> [-- parse match and quantified comparisons as postfix ops
|
||||||
|
@ -598,21 +637,22 @@ TODO: carefully review the precedences and associativities.
|
||||||
> ,binarySym "|" E.AssocRight
|
> ,binarySym "|" E.AssocRight
|
||||||
> ,binaryKeyword "like" E.AssocNone
|
> ,binaryKeyword "like" E.AssocNone
|
||||||
> ,binaryKeyword "overlaps" E.AssocNone]
|
> ,binaryKeyword "overlaps" E.AssocNone]
|
||||||
> ++ map (`binaryKeywords` E.AssocNone)
|
> ++ [binaryKeywords $ makeKeywordTree
|
||||||
> ["not like"
|
> ["not like"
|
||||||
> ,"is similar to"
|
> ,"is similar to"
|
||||||
> ,"is not similar to"
|
> ,"is not similar to"
|
||||||
> ,"is distinct from"
|
> ,"is distinct from"
|
||||||
> ,"is not distinct from"]
|
> ,"is not distinct from"]
|
||||||
> ++ map postfixKeywords
|
> ,postfixKeywords $ makeKeywordTree
|
||||||
> ["is null"
|
> ["is null"
|
||||||
> ,"is not null"
|
> ,"is not null"
|
||||||
> ,"is true"
|
> ,"is true"
|
||||||
> ,"is not true"
|
> ,"is not true"
|
||||||
> ,"is false"
|
> ,"is false"
|
||||||
> ,"is not false"
|
> ,"is not false"
|
||||||
> ,"is unknown"
|
> ,"is unknown"
|
||||||
> ,"is not unknown"]
|
> ,"is not unknown"]
|
||||||
|
> ]
|
||||||
> -- have to use try with inSuffix because of a conflict
|
> -- have to use try with inSuffix because of a conflict
|
||||||
> -- with 'in' in position function, and not between
|
> -- with 'in' in position function, and not between
|
||||||
> -- between also has a try in it to deal with 'not'
|
> -- between also has a try in it to deal with 'not'
|
||||||
|
@ -631,17 +671,20 @@ TODO: carefully review the precedences and associativities.
|
||||||
> where
|
> where
|
||||||
> binarySym nm assoc = binary (symbol_ nm) nm assoc
|
> binarySym nm assoc = binary (symbol_ nm) nm assoc
|
||||||
> binaryKeyword nm assoc = binary (keyword_ nm) nm assoc
|
> binaryKeyword nm assoc = binary (keyword_ nm) nm assoc
|
||||||
> -- use try with the multi keywords because of shared
|
> binaryKeywords p =
|
||||||
> -- prefixes to the sets of keywords. Should left factor
|
> E.Infix (do
|
||||||
> -- somehow
|
> o <- p
|
||||||
> binaryKeywords nm assoc = binary (try $ keywords_ (words nm)) nm assoc
|
> return (\a b -> BinOp a [Name $ unwords o] b))
|
||||||
|
> E.AssocNone
|
||||||
|
> postfixKeywords p =
|
||||||
|
> postfix' $ do
|
||||||
|
> o <- p
|
||||||
|
> return $ PostfixOp [Name $ unwords o]
|
||||||
> binary p nm assoc =
|
> binary p nm assoc =
|
||||||
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
|
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
|
||||||
> prefixKeyword nm = prefix (keyword_ nm) nm
|
> prefixKeyword nm = prefix (keyword_ nm) nm
|
||||||
> prefixSym nm = prefix (symbol_ nm) nm
|
> prefixSym nm = prefix (symbol_ nm) nm
|
||||||
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
|
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
|
||||||
> postfixKeywords nm = postfix (try $ keywords_ (words nm)) nm
|
|
||||||
> postfix p nm = postfix' (p >> return (PostfixOp [Name nm]))
|
|
||||||
> -- hack from here
|
> -- hack from here
|
||||||
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
|
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
|
||||||
> -- not implemented properly yet
|
> -- not implemented properly yet
|
||||||
|
@ -815,11 +858,9 @@ allows offset and fetch in either order
|
||||||
> fetch :: Parser ValueExpr
|
> fetch :: Parser ValueExpr
|
||||||
> fetch = choice [ansiFetch, limit]
|
> fetch = choice [ansiFetch, limit]
|
||||||
> where --todo: better left factoring
|
> where --todo: better left factoring
|
||||||
> ansiFetch = keyword_ "fetch" >>
|
> fs = makeKeywordTree ["fetch first", "fetch next"]
|
||||||
> choice [keyword_ "first",keyword_ "next"]
|
> ro = makeKeywordTree ["rows only", "row only"]
|
||||||
> *> valueExpr
|
> ansiFetch = fs *> valueExpr <* ro
|
||||||
> <* choice [keyword_ "rows",keyword_ "row"]
|
|
||||||
> <* keyword_ "only"
|
|
||||||
> limit = keyword_ "limit" *> valueExpr
|
> limit = keyword_ "limit" *> valueExpr
|
||||||
|
|
||||||
== common table expressions
|
== common table expressions
|
||||||
|
@ -1070,9 +1111,6 @@ todo: work out the symbol parsing better
|
||||||
|
|
||||||
helper function to improve error messages
|
helper function to improve error messages
|
||||||
|
|
||||||
> keywords :: [String] -> Parser [String]
|
|
||||||
> keywords ks = mapM keyword ks <?> intercalate " " ks
|
|
||||||
|
|
||||||
> keywords_ :: [String] -> Parser ()
|
> keywords_ :: [String] -> Parser ()
|
||||||
> keywords_ ks = mapM_ keyword_ ks <?> intercalate " " ks
|
> keywords_ ks = mapM_ keyword_ ks <?> intercalate " " ks
|
||||||
|
|
||||||
|
|
|
@ -139,7 +139,7 @@
|
||||||
> -- e.g. a[3]. The first
|
> -- e.g. a[3]. The first
|
||||||
> -- valueExpr is the array, the
|
> -- valueExpr is the array, the
|
||||||
> -- second is the subscripts/ctor args
|
> -- second is the subscripts/ctor args
|
||||||
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)> deriving (Eq,Show,Read,Data,Typeable)
|
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
|
||||||
> | CSStringLit String String
|
> | CSStringLit String String
|
||||||
> | Escape ValueExpr Char
|
> | Escape ValueExpr Char
|
||||||
> | UEscape ValueExpr Char
|
> | UEscape ValueExpr Char
|
||||||
|
|
32
TODO
32
TODO
|
@ -14,7 +14,8 @@ represent missing optional bits in the ast as nothing instead of the
|
||||||
look at fixing the expression parsing completely
|
look at fixing the expression parsing completely
|
||||||
represent natural and using/on in the syntax more close to the
|
represent natural and using/on in the syntax more close to the
|
||||||
concrete syntax - don't combine in the ast
|
concrete syntax - don't combine in the ast
|
||||||
|
review haddock in the syntax and update
|
||||||
|
review syntax names and representation
|
||||||
careful review of token parses wrt trailing delimiters/junk
|
careful review of token parses wrt trailing delimiters/junk
|
||||||
|
|
||||||
review places in the parse which should allow only a fixed set of
|
review places in the parse which should allow only a fixed set of
|
||||||
|
@ -26,10 +27,12 @@ decide whether to represent numeric literals better, instead of a
|
||||||
|
|
||||||
rough SQL 2003 todo, including tests to write:
|
rough SQL 2003 todo, including tests to write:
|
||||||
|
|
||||||
|
switch TypedLit to CSStringLit based on first char being underscore?
|
||||||
idens: "", unicode, charset?, check dotted idens and contexts
|
idens: "", unicode, charset?, check dotted idens and contexts
|
||||||
add missing type name support: lots of missing ones here, including
|
add missing type name support: lots of missing ones here, including
|
||||||
simple stuff like lob variations, and new things like interval,
|
simple stuff like lob variations, and new things like interval,
|
||||||
row, ref, scope, array, multiset type names.
|
row, ref, scope, array, multiset type names.
|
||||||
|
add tests for all the typenames cast and typed literal
|
||||||
date and time literals
|
date and time literals
|
||||||
multisets
|
multisets
|
||||||
review window functions, window clause
|
review window functions, window clause
|
||||||
|
@ -43,13 +46,15 @@ filter in aggs
|
||||||
within group in aggs
|
within group in aggs
|
||||||
rows review
|
rows review
|
||||||
|
|
||||||
support needed MODULE syntax in identifiers - already covered?
|
LNR: maybe leave until after next release
|
||||||
decide how to represent special identifiers including the session
|
|
||||||
|
LNR support needed MODULE syntax in identifiers - already covered?
|
||||||
|
LNR decide how to represent special identifiers including the session
|
||||||
variables or whatever they are called like current_user
|
variables or whatever they are called like current_user
|
||||||
grouping - needs special syntax?
|
LNR grouping - needs special syntax?
|
||||||
next value for
|
LNR next value for
|
||||||
probably leave for now: subtypes, methods, new /routine, dereference
|
LNR subtypes, methods, new /routine, dereference
|
||||||
double check associativity, precedence (value exprs, joins, set ops)
|
LNR double check associativity, precedence (value exprs, joins, set ops)
|
||||||
position expressions
|
position expressions
|
||||||
length expressions
|
length expressions
|
||||||
extract expression
|
extract expression
|
||||||
|
@ -61,7 +66,7 @@ convert
|
||||||
translate
|
translate
|
||||||
trim
|
trim
|
||||||
overlay
|
overlay
|
||||||
specifictype
|
LNR specifictype
|
||||||
datetime value expressions
|
datetime value expressions
|
||||||
intervals
|
intervals
|
||||||
row value constructors, expressions review
|
row value constructors, expressions review
|
||||||
|
@ -80,21 +85,24 @@ between symmetric/asymmetric
|
||||||
in predicate review
|
in predicate review
|
||||||
escape for like
|
escape for like
|
||||||
escape for [not] similar to
|
escape for [not] similar to
|
||||||
regular expression syntax?
|
LNR regular expression syntax?
|
||||||
normalized predicate
|
normalized predicate
|
||||||
overlaps predicate
|
overlaps predicate
|
||||||
distinct from predicate
|
distinct from predicate
|
||||||
member predicate
|
member predicate
|
||||||
submultiset predicate
|
submultiset predicate
|
||||||
set predicate
|
set predicate
|
||||||
type predicate
|
LNR type predicate
|
||||||
additional stuff review:
|
additional stuff review:
|
||||||
interval stuff
|
interval stuff
|
||||||
aggregate functions: lots of missing bits
|
aggregate functions: lots of missing bits
|
||||||
especially: filter where, within group
|
especially: filter where, within group
|
||||||
complete list of keywords/reserved keywords
|
complete list of keywords/reserved keywords
|
||||||
select into
|
LNR select into
|
||||||
other language format identifiers for host params?
|
LNR other language format identifiers for host params?
|
||||||
|
|
||||||
|
----
|
||||||
|
above not marked LNR are for next release
|
||||||
|
|
||||||
review areas where this parser is too permissive, e.g. value
|
review areas where this parser is too permissive, e.g. value
|
||||||
expressions allowed where column reference names only should be
|
expressions allowed where column reference names only should be
|
||||||
|
|
|
@ -132,8 +132,7 @@ Tests for parsing value expressions
|
||||||
> unaryOperators :: TestItem
|
> unaryOperators :: TestItem
|
||||||
> unaryOperators = Group "unaryOperators" $ map (uncurry TestValueExpr)
|
> unaryOperators = Group "unaryOperators" $ map (uncurry TestValueExpr)
|
||||||
> [("not a", PrefixOp "not" $ Iden "a")
|
> [("not a", PrefixOp "not" $ Iden "a")
|
||||||
> -- I think this is a missing feature or bug in parsec buildExpressionParser
|
> ,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
|
||||||
> --,("not not a", PrefixOp "not" $ PrefixOp "not" $ Iden "a")
|
|
||||||
> ,("+a", PrefixOp "+" $ Iden "a")
|
> ,("+a", PrefixOp "+" $ Iden "a")
|
||||||
> ,("-a", PrefixOp "-" $ Iden "a")
|
> ,("-a", PrefixOp "-" $ Iden "a")
|
||||||
> ]
|
> ]
|
||||||
|
|
Loading…
Reference in a new issue