1
Fork 0

refactor some of the multi keyword/ multiword typename parsing

This commit is contained in:
Jake Wheat 2014-04-18 14:50:54 +03:00
parent 247c7a26b7
commit b0f1e044b4
4 changed files with 99 additions and 54 deletions
Language/SQL/SimpleSQL

View file

@ -22,8 +22,8 @@
> import qualified Text.Parsec as P (ParseError)
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
> import qualified Text.Parsec.Expr as E
> import Data.List (intercalate)
> import Data.List (intercalate,sort,groupBy)
> import Data.Function (on)
> import Language.SQL.SimpleSQL.Syntax
The public API functions.
@ -513,15 +513,13 @@ typename: used in casts. Special cases for the multi keyword typenames
that SQL supports.
> typeName :: Parser TypeName
> typeName = (choice (multiWordParsers
> ++ [TypeName <$> identifierBlacklist blacklist])
> >>= optionSuffix precision
> ) <?> "typename"
> typeName =
> (choice [multiWordParsers
> ,TypeName <$> identifierBlacklist blacklist]
> >>= optionSuffix precision
> ) <?> "typename"
> where
> multiWordParsers =
> flip map multiWordTypeNames
> $ \ks -> (TypeName . unwords) <$> try (keywords ks)
> multiWordTypeNames = map words
> multiWordParsers = (TypeName . unwords) <$> makeKeywordTree
> ["double precision"
> ,"character varying"
> ,"char varying"
@ -559,6 +557,41 @@ todo: timestamp types:
> ctor [a] = Parens a
> ctor as = SpecialOp [Name "rowctor"] as
== multi keyword helper
This helper is to help parsing multiple options of multiple keywords
with similar prefixes, e.g. parsing 'is null' and 'is not null'.
use to left factor/ improve:
typed literal and general identifiers
not like, not in, not between operators
help with factoring keyword functions and other app-likes
the join keyword sequences
fetch first/next
row/rows only
There is probably a simpler way of doing this but I am a bit
thick.
> makeKeywordTree :: [String] -> Parser [String]
> makeKeywordTree sets = do
> reverse <$> parseTrees (sort $ map words sets)
> -- ?? <?> intercalate "," sets
> where
> parseTrees :: [[String]] -> Parser [String]
> parseTrees ws = do
> let gs :: [[[String]]]
> gs = groupBy ((==) `on` head) ws
> choice $ map parseGroup gs
> parseGroup :: [[String]] -> Parser [String]
> parseGroup l = do
> let k = head $ head l
> keyword_ k
> let tls = map tail l
> pr = (k:) <$> parseTrees tls
> if (or $ map null tls)
> then pr <|> return [k]
> else pr
== operator parsing
@ -569,6 +602,12 @@ can be symbols (a + b), single keywords (a and b) or multiple keywords
TODO: carefully review the precedences and associativities.
TODO: to fix the parsing completely, I think will need to parse
without precedence and associativity and fix up afterwards, since SQL
syntax is way too messy. It might be possible to avoid this if we
wanted to avoid extensibility and to not be concerned with parse error
messages, but both of these are considered too important.
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
> opTable bExpr =
> [-- parse match and quantified comparisons as postfix ops
@ -598,21 +637,22 @@ TODO: carefully review the precedences and associativities.
> ,binarySym "|" E.AssocRight
> ,binaryKeyword "like" E.AssocNone
> ,binaryKeyword "overlaps" E.AssocNone]
> ++ map (`binaryKeywords` E.AssocNone)
> ["not like"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
> ++ map postfixKeywords
> ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
> ++ [binaryKeywords $ makeKeywordTree
> ["not like"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
> ,postfixKeywords $ makeKeywordTree
> ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
> ]
> -- have to use try with inSuffix because of a conflict
> -- with 'in' in position function, and not between
> -- between also has a try in it to deal with 'not'
@ -631,17 +671,20 @@ TODO: carefully review the precedences and associativities.
> where
> binarySym nm assoc = binary (symbol_ nm) nm assoc
> binaryKeyword nm assoc = binary (keyword_ nm) nm assoc
> -- use try with the multi keywords because of shared
> -- prefixes to the sets of keywords. Should left factor
> -- somehow
> binaryKeywords nm assoc = binary (try $ keywords_ (words nm)) nm assoc
> binaryKeywords p =
> E.Infix (do
> o <- p
> return (\a b -> BinOp a [Name $ unwords o] b))
> E.AssocNone
> postfixKeywords p =
> postfix' $ do
> o <- p
> return $ PostfixOp [Name $ unwords o]
> binary p nm assoc =
> E.Infix (p >> return (\a b -> BinOp a [Name nm] b)) assoc
> prefixKeyword nm = prefix (keyword_ nm) nm
> prefixSym nm = prefix (symbol_ nm) nm
> prefix p nm = prefix' (p >> return (PrefixOp [Name nm]))
> postfixKeywords nm = postfix (try $ keywords_ (words nm)) nm
> postfix p nm = postfix' (p >> return (PostfixOp [Name nm]))
> -- hack from here
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
> -- not implemented properly yet
@ -815,11 +858,9 @@ allows offset and fetch in either order
> fetch :: Parser ValueExpr
> fetch = choice [ansiFetch, limit]
> where --todo: better left factoring
> ansiFetch = keyword_ "fetch" >>
> choice [keyword_ "first",keyword_ "next"]
> *> valueExpr
> <* choice [keyword_ "rows",keyword_ "row"]
> <* keyword_ "only"
> fs = makeKeywordTree ["fetch first", "fetch next"]
> ro = makeKeywordTree ["rows only", "row only"]
> ansiFetch = fs *> valueExpr <* ro
> limit = keyword_ "limit" *> valueExpr
== common table expressions
@ -1070,9 +1111,6 @@ todo: work out the symbol parsing better
helper function to improve error messages
> keywords :: [String] -> Parser [String]
> keywords ks = mapM keyword ks <?> intercalate " " ks
> keywords_ :: [String] -> Parser ()
> keywords_ ks = mapM_ keyword_ ks <?> intercalate " " ks

View file

@ -139,7 +139,7 @@
> -- e.g. a[3]. The first
> -- valueExpr is the array, the
> -- second is the subscripts/ctor args
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)> deriving (Eq,Show,Read,Data,Typeable)
> | ArrayCtor QueryExpr -- ^ this is used for the query expression version of array constructors, e.g. array(select * from t)
> | CSStringLit String String
> | Escape ValueExpr Char
> | UEscape ValueExpr Char