change the ints in the syntax to be integers
partially add buildExprParser hack to deal with some nested prefix and postfix unary operators add new file which starts going through the sql2003 grammar to try to create lots of examples for comprehensive testing of sql2003 support. replace the lexers with lexers from the tutorial project
This commit is contained in:
parent
8b1fa81de7
commit
89015144f9
|
@ -16,7 +16,8 @@
|
||||||
> ,setPosition,setSourceColumn,setSourceLine,getPosition
|
> ,setPosition,setSourceColumn,setSourceLine,getPosition
|
||||||
> ,option,between,sepBy,sepBy1,string,manyTill,anyChar
|
> ,option,between,sepBy,sepBy1,string,manyTill,anyChar
|
||||||
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
||||||
> ,optionMaybe,optional,many,letter,alphaNum,parse)
|
> ,optionMaybe,optional,many,letter,parse
|
||||||
|
> ,chainl1)
|
||||||
> import Text.Parsec.String (Parser)
|
> import Text.Parsec.String (Parser)
|
||||||
> import qualified Text.Parsec as P (ParseError)
|
> import qualified Text.Parsec as P (ParseError)
|
||||||
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
||||||
|
@ -74,7 +75,7 @@ converts the error return to the nice wrapper
|
||||||
> -> Either ParseError a
|
> -> Either ParseError a
|
||||||
> wrapParse parser f p src =
|
> wrapParse parser f p src =
|
||||||
> either (Left . convParseError src) Right
|
> either (Left . convParseError src) Right
|
||||||
> $ parse (setPos p *> whiteSpace *> parser <* eof) f src
|
> $ parse (setPos p *> whitespace *> parser <* eof) f src
|
||||||
|
|
||||||
> -- | Type to represent parse errors.
|
> -- | Type to represent parse errors.
|
||||||
> data ParseError = ParseError
|
> data ParseError = ParseError
|
||||||
|
@ -95,10 +96,10 @@ converts the error return to the nice wrapper
|
||||||
|
|
||||||
== literals
|
== literals
|
||||||
|
|
||||||
See the stringLiteral lexer below for notes on string literal syntax.
|
See the stringToken lexer below for notes on string literal syntax.
|
||||||
|
|
||||||
> estring :: Parser ValueExpr
|
> estring :: Parser ValueExpr
|
||||||
> estring = StringLit <$> stringLiteral
|
> estring = StringLit <$> stringToken
|
||||||
|
|
||||||
> number :: Parser ValueExpr
|
> number :: Parser ValueExpr
|
||||||
> number = NumLit <$> numberLiteral
|
> number = NumLit <$> numberLiteral
|
||||||
|
@ -115,9 +116,9 @@ which parses as a typed literal
|
||||||
> interval :: Parser ValueExpr
|
> interval :: Parser ValueExpr
|
||||||
> interval = try (keyword_ "interval" >>
|
> interval = try (keyword_ "interval" >>
|
||||||
> IntervalLit
|
> IntervalLit
|
||||||
> <$> stringLiteral
|
> <$> stringToken
|
||||||
> <*> identifierString
|
> <*> identifierBlacklist blacklist
|
||||||
> <*> optionMaybe (try $ parens integerLiteral))
|
> <*> optionMaybe (try $ parens integer))
|
||||||
|
|
||||||
> literal :: Parser ValueExpr
|
> literal :: Parser ValueExpr
|
||||||
> literal = number <|> estring <|> interval
|
> literal = number <|> estring <|> interval
|
||||||
|
@ -129,10 +130,10 @@ identifiers.
|
||||||
|
|
||||||
> name :: Parser Name
|
> name :: Parser Name
|
||||||
> name = choice [QName <$> quotedIdentifier
|
> name = choice [QName <$> quotedIdentifier
|
||||||
> ,Name <$> identifierString]
|
> ,Name <$> identifierBlacklist blacklist]
|
||||||
|
|
||||||
> identifier :: Parser ValueExpr
|
> iden :: Parser ValueExpr
|
||||||
> identifier = Iden <$> name
|
> iden = Iden <$> name
|
||||||
|
|
||||||
== star
|
== star
|
||||||
|
|
||||||
|
@ -149,7 +150,7 @@ in any value expression context.
|
||||||
use in e.g. select * from t where a = ?
|
use in e.g. select * from t where a = ?
|
||||||
|
|
||||||
> parameter :: Parser ValueExpr
|
> parameter :: Parser ValueExpr
|
||||||
> parameter = Parameter <$ symbol "?"
|
> parameter = Parameter <$ questionMark
|
||||||
|
|
||||||
== function application, aggregates and windows
|
== function application, aggregates and windows
|
||||||
|
|
||||||
|
@ -250,13 +251,13 @@ to separate the arguments.
|
||||||
cast: cast(expr as type)
|
cast: cast(expr as type)
|
||||||
|
|
||||||
> cast :: Parser ValueExpr
|
> cast :: Parser ValueExpr
|
||||||
> cast = parensCast <|> prefixCast
|
> cast = (parensCast <|> prefixCast)
|
||||||
> where
|
> where
|
||||||
> parensCast = try (keyword_ "cast") >>
|
> parensCast = try (keyword_ "cast") >>
|
||||||
> parens (Cast <$> valueExpr
|
> parens (Cast <$> valueExpr
|
||||||
> <*> (keyword_ "as" *> typeName))
|
> <*> (keyword_ "as" *> typeName))
|
||||||
> prefixCast = try (TypedLit <$> typeName
|
> prefixCast = try (TypedLit <$> typeName
|
||||||
> <*> stringLiteral)
|
> <*> stringToken)
|
||||||
|
|
||||||
the special op keywords
|
the special op keywords
|
||||||
parse an operator which is
|
parse an operator which is
|
||||||
|
@ -273,7 +274,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
||||||
> -> Parser ValueExpr
|
> -> Parser ValueExpr
|
||||||
> specialOpK opName firstArg kws =
|
> specialOpK opName firstArg kws =
|
||||||
> keyword_ opName >> do
|
> keyword_ opName >> do
|
||||||
> void $ symbol "("
|
> void openParen
|
||||||
> let pfa = do
|
> let pfa = do
|
||||||
> e <- valueExpr
|
> e <- valueExpr
|
||||||
> -- check we haven't parsed the first
|
> -- check we haven't parsed the first
|
||||||
|
@ -287,7 +288,7 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
||||||
> SOKOptional -> optionMaybe (try pfa)
|
> SOKOptional -> optionMaybe (try pfa)
|
||||||
> SOKMandatory -> Just <$> pfa
|
> SOKMandatory -> Just <$> pfa
|
||||||
> as <- mapM parseArg kws
|
> as <- mapM parseArg kws
|
||||||
> void $ symbol ")"
|
> void closeParen
|
||||||
> return $ SpecialOpK (Name opName) fa $ catMaybes as
|
> return $ SpecialOpK (Name opName) fa $ catMaybes as
|
||||||
> where
|
> where
|
||||||
> parseArg (nm,mand) =
|
> parseArg (nm,mand) =
|
||||||
|
@ -353,9 +354,9 @@ in the source
|
||||||
> keyword "trim" >>
|
> keyword "trim" >>
|
||||||
> parens (mkTrim
|
> parens (mkTrim
|
||||||
> <$> option "both" sides
|
> <$> option "both" sides
|
||||||
> <*> option " " stringLiteral
|
> <*> option " " stringToken
|
||||||
> <*> (keyword_ "from" *> valueExpr)
|
> <*> (keyword_ "from" *> valueExpr)
|
||||||
> <*> optionMaybe (keyword_ "collate" *> stringLiteral))
|
> <*> optionMaybe (keyword_ "collate" *> stringToken))
|
||||||
> where
|
> where
|
||||||
> sides = choice ["leading" <$ keyword_ "leading"
|
> sides = choice ["leading" <$ keyword_ "leading"
|
||||||
> ,"trailing" <$ keyword_ "trailing"
|
> ,"trailing" <$ keyword_ "trailing"
|
||||||
|
@ -428,7 +429,7 @@ that SQL supports.
|
||||||
|
|
||||||
> typeName :: Parser TypeName
|
> typeName :: Parser TypeName
|
||||||
> typeName = choice (multiWordParsers
|
> typeName = choice (multiWordParsers
|
||||||
> ++ [TypeName <$> identifierString])
|
> ++ [TypeName <$> identifier])
|
||||||
> >>= optionSuffix precision
|
> >>= optionSuffix precision
|
||||||
> where
|
> where
|
||||||
> multiWordParsers =
|
> multiWordParsers =
|
||||||
|
@ -456,7 +457,7 @@ todo: timestamp types:
|
||||||
| TIMESTAMParser [ <left paren> <timestamp precision> <right paren> ] [ WITH TIME ZONE ]
|
| TIMESTAMParser [ <left paren> <timestamp precision> <right paren> ] [ WITH TIME ZONE ]
|
||||||
|
|
||||||
|
|
||||||
> precision t = try (parens (commaSep integerLiteral)) >>= makeWrap t
|
> precision t = try (parens (commaSep integer)) >>= makeWrap t
|
||||||
> makeWrap (TypeName t) [a] = return $ PrecTypeName t a
|
> makeWrap (TypeName t) [a] = return $ PrecTypeName t a
|
||||||
> makeWrap (TypeName t) [a,b] = return $ PrecScaleTypeName t a b
|
> makeWrap (TypeName t) [a,b] = return $ PrecScaleTypeName t a b
|
||||||
> makeWrap _ _ = fail "there must be one or two precision components"
|
> makeWrap _ _ = fail "there must be one or two precision components"
|
||||||
|
@ -535,9 +536,19 @@ TODO: carefully review the precedences and associativities.
|
||||||
> E.Infix (p >> return (\a b -> BinOp a (Name nm) b)) assoc
|
> E.Infix (p >> return (\a b -> BinOp a (Name nm) b)) assoc
|
||||||
> prefixKeyword nm = prefix (try $ keyword_ nm) nm
|
> prefixKeyword nm = prefix (try $ keyword_ nm) nm
|
||||||
> prefixSym nm = prefix (try $ symbol_ nm) nm
|
> prefixSym nm = prefix (try $ symbol_ nm) nm
|
||||||
> prefix p nm = E.Prefix (p >> return (PrefixOp (Name nm)))
|
> prefix p nm = prefix' (p >> return (PrefixOp (Name nm)))
|
||||||
> postfixKeywords nm = postfix (try $ mapM_ keyword_ (words nm)) nm
|
> postfixKeywords nm = postfix (try $ mapM_ keyword_ (words nm)) nm
|
||||||
> postfix p nm = E.Postfix (p >> return (PostfixOp (Name nm)))
|
> postfix p nm = postfix' (p >> return (PostfixOp (Name nm)))
|
||||||
|
|
||||||
|
> -- hack from here
|
||||||
|
> -- http://stackoverflow.com/questions/10475337/parsec-expr-repeated-prefix-postfix-operator-not-supported
|
||||||
|
> -- not implemented properly yet
|
||||||
|
> -- I don't think this will be enough for all cases
|
||||||
|
> -- at least it works for 'not not a'
|
||||||
|
> -- ok: "x is not true is not true"
|
||||||
|
> -- no work: "x is not true is not null"
|
||||||
|
> prefix' p = E.Prefix . chainl1 p $ return (.)
|
||||||
|
> postfix' p = E.Postfix . chainl1 p $ return (flip (.))
|
||||||
|
|
||||||
== value expressions
|
== value expressions
|
||||||
|
|
||||||
|
@ -560,7 +571,7 @@ fragile and could at least do with some heavy explanation.
|
||||||
> ,subquery
|
> ,subquery
|
||||||
> ,try app
|
> ,try app
|
||||||
> ,try star
|
> ,try star
|
||||||
> ,identifier
|
> ,iden
|
||||||
> ,sparens]
|
> ,sparens]
|
||||||
|
|
||||||
expose the b expression for window frame clause range between
|
expose the b expression for window frame clause range between
|
||||||
|
@ -757,7 +768,7 @@ wrapper for query expr which ignores optional trailing semicolon.
|
||||||
|
|
||||||
> topLevelQueryExpr :: Parser QueryExpr
|
> topLevelQueryExpr :: Parser QueryExpr
|
||||||
> topLevelQueryExpr =
|
> topLevelQueryExpr =
|
||||||
> queryExpr >>= optionSuffix ((symbol ";" *>) . return)
|
> queryExpr >>= optionSuffix ((semi *>) . return)
|
||||||
|
|
||||||
wrapper to parse a series of query exprs from a single source. They
|
wrapper to parse a series of query exprs from a single source. They
|
||||||
must be separated by semicolon, but for the last expression, the
|
must be separated by semicolon, but for the last expression, the
|
||||||
|
@ -766,91 +777,38 @@ trailing semicolon is optional.
|
||||||
> queryExprs :: Parser [QueryExpr]
|
> queryExprs :: Parser [QueryExpr]
|
||||||
> queryExprs =
|
> queryExprs =
|
||||||
> (:[]) <$> queryExpr
|
> (:[]) <$> queryExpr
|
||||||
> >>= optionSuffix ((symbol ";" *>) . return)
|
> >>= optionSuffix ((semi *>) . return)
|
||||||
> >>= optionSuffix (\p -> (p++) <$> queryExprs)
|
> >>= optionSuffix (\p -> (p++) <$> queryExprs)
|
||||||
|
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
|
||||||
= lexing parsers
|
= lexing parsers
|
||||||
|
|
||||||
The lexing is a bit 'virtual', in the usual parsec style. The
|
whitespace parser which skips comments also
|
||||||
convention in this file is to put all the parsers which access
|
|
||||||
characters directly or indirectly here (i.e. ones which use char,
|
|
||||||
string, digit, etc.), except for the parsers which only indirectly
|
|
||||||
access them via these functions, if you follow?
|
|
||||||
|
|
||||||
> symbol :: String -> Parser String
|
> whitespace :: Parser ()
|
||||||
> symbol s = string s
|
> whitespace =
|
||||||
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
|
> choice [simpleWhitespace *> whitespace
|
||||||
> <* whiteSpace
|
> ,lineComment *> whitespace
|
||||||
|
> ,blockComment *> whitespace
|
||||||
> symbol_ :: String -> Parser ()
|
> ,return ()]
|
||||||
> symbol_ s = symbol s *> return ()
|
|
||||||
|
|
||||||
TODO: now that keyword has try in it, a lot of the trys above can be
|
|
||||||
removed
|
|
||||||
|
|
||||||
> keyword :: String -> Parser String
|
|
||||||
> keyword s = try $ do
|
|
||||||
> i <- identifierRaw
|
|
||||||
> guard (map toLower i == map toLower s)
|
|
||||||
> return i
|
|
||||||
|
|
||||||
> keyword_ :: String -> Parser ()
|
|
||||||
> keyword_ s = keyword s *> return ()
|
|
||||||
|
|
||||||
Identifiers are very simple at the moment: start with a letter or
|
|
||||||
underscore, and continue with letter, underscore or digit. It doesn't
|
|
||||||
support quoting other other sorts of identifiers yet. There is a
|
|
||||||
blacklist of keywords which aren't supported as identifiers.
|
|
||||||
|
|
||||||
the identifier raw doesn't check the blacklist since it is used by the
|
|
||||||
keyword parser also
|
|
||||||
|
|
||||||
> identifierRaw :: Parser String
|
|
||||||
> identifierRaw = (:) <$> letterOrUnderscore
|
|
||||||
> <*> many letterDigitOrUnderscore <* whiteSpace
|
|
||||||
> where
|
> where
|
||||||
> letterOrUnderscore = char '_' <|> letter
|
> lineComment = try (string "--")
|
||||||
> letterDigitOrUnderscore = char '_' <|> alphaNum
|
> *> manyTill anyChar (void (char '\n') <|> eof)
|
||||||
|
> blockComment = -- no nesting of block comments in SQL
|
||||||
|
> try (string "/*")
|
||||||
|
> -- try used here so it doesn't fail when we see a
|
||||||
|
> -- '*' which isn't followed by a '/'
|
||||||
|
> *> manyTill anyChar (try $ string "*/")
|
||||||
|
> -- use many1 so we can more easily avoid non terminating loops
|
||||||
|
> simpleWhitespace = void $ many1 (oneOf " \t\n")
|
||||||
|
|
||||||
> identifierString :: Parser String
|
> lexeme :: Parser a -> Parser a
|
||||||
> identifierString = do
|
> lexeme p = p <* whitespace
|
||||||
> s <- identifierRaw
|
|
||||||
> guard (map toLower s `notElem` blacklist)
|
|
||||||
> return s
|
|
||||||
|
|
||||||
> blacklist :: [String]
|
> integer :: Parser Integer
|
||||||
> blacklist =
|
> integer = read <$> lexeme (many1 digit)
|
||||||
> ["select", "as", "from", "where", "having", "group", "order"
|
|
||||||
> ,"limit", "offset", "fetch"
|
|
||||||
> ,"inner", "left", "right", "full", "natural", "join"
|
|
||||||
> ,"cross", "on", "using", "lateral"
|
|
||||||
> ,"when", "then", "case", "end", "in"
|
|
||||||
> ,"except", "intersect", "union"]
|
|
||||||
|
|
||||||
These blacklisted names are mostly needed when we parse something with
|
|
||||||
an optional alias, e.g. select a a from t. If we write select a from
|
|
||||||
t, we have to make sure the from isn't parsed as an alias. I'm not
|
|
||||||
sure what other places strictly need the blacklist, and in theory it
|
|
||||||
could be tuned differently for each place the identifierString/
|
|
||||||
identifier parsers are used to only blacklist the bare minimum.
|
|
||||||
|
|
||||||
> quotedIdentifier :: Parser String
|
|
||||||
> quotedIdentifier = char '"' *> manyTill anyChar (symbol_ "\"")
|
|
||||||
|
|
||||||
|
|
||||||
String literals: limited at the moment, no escaping \' or other
|
|
||||||
variations.
|
|
||||||
|
|
||||||
> stringLiteral :: Parser String
|
|
||||||
> stringLiteral = (char '\'' *> manyTill anyChar (char '\'')
|
|
||||||
> >>= optionSuffix moreString) <* whiteSpace
|
|
||||||
> where
|
|
||||||
> moreString s0 = try $ do
|
|
||||||
> void $ char '\''
|
|
||||||
> s <- manyTill anyChar (char '\'')
|
|
||||||
> optionSuffix moreString (s0 ++ "'" ++ s)
|
|
||||||
|
|
||||||
number literals
|
number literals
|
||||||
|
|
||||||
|
@ -866,13 +824,12 @@ making a decision on how to represent numbers, the client code can
|
||||||
make this choice.
|
make this choice.
|
||||||
|
|
||||||
> numberLiteral :: Parser String
|
> numberLiteral :: Parser String
|
||||||
> numberLiteral =
|
> numberLiteral = lexeme $
|
||||||
> choice [int
|
> choice [int
|
||||||
> >>= optionSuffix dot
|
> >>= optionSuffix dot
|
||||||
> >>= optionSuffix fracts
|
> >>= optionSuffix fracts
|
||||||
> >>= optionSuffix expon
|
> >>= optionSuffix expon
|
||||||
> ,fract "" >>= optionSuffix expon]
|
> ,fract "" >>= optionSuffix expon]
|
||||||
> <* whiteSpace
|
|
||||||
> where
|
> where
|
||||||
> int = many1 digit
|
> int = many1 digit
|
||||||
> fract p = dot p >>= fracts
|
> fract p = dot p >>= fracts
|
||||||
|
@ -884,30 +841,83 @@ make this choice.
|
||||||
> ,option "" (string "+" <|> string "-")
|
> ,option "" (string "+" <|> string "-")
|
||||||
> ,int]
|
> ,int]
|
||||||
|
|
||||||
lexer for integer literals which appear in some places in SQL
|
|
||||||
|
|
||||||
> integerLiteral :: Parser Int
|
> identifier :: Parser String
|
||||||
> integerLiteral = read <$> many1 digit <* whiteSpace
|
> identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
|
||||||
|
|
||||||
whitespace parser which skips comments also
|
|
||||||
|
|
||||||
> whiteSpace :: Parser ()
|
|
||||||
> whiteSpace =
|
|
||||||
> choice [simpleWhiteSpace *> whiteSpace
|
|
||||||
> ,lineComment *> whiteSpace
|
|
||||||
> ,blockComment *> whiteSpace
|
|
||||||
> ,return ()]
|
|
||||||
> where
|
> where
|
||||||
> lineComment = try (string "--")
|
> firstChar = letter <|> char '_'
|
||||||
> *> manyTill anyChar (void (char '\n') <|> eof)
|
> nonFirstChar = digit <|> firstChar
|
||||||
> blockComment = -- no nesting of block comments in SQL
|
|
||||||
> try (string "/*")
|
|
||||||
> -- TODO: why is try used herex
|
|
||||||
> *> manyTill anyChar (try $ string "*/")
|
|
||||||
> -- use many1 so we can more easily avoid non terminating loops
|
|
||||||
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
|
|
||||||
|
|
||||||
= generic parser helpers
|
> quotedIdentifier :: Parser String
|
||||||
|
> quotedIdentifier = char '"' *> manyTill anyChar doubleQuote
|
||||||
|
|
||||||
|
TODO: add "" inside quoted identifiers
|
||||||
|
|
||||||
|
todo: work out the symbol parsing better
|
||||||
|
|
||||||
|
> symbol :: String -> Parser String
|
||||||
|
> symbol s = try $ lexeme $ do
|
||||||
|
> u <- choice
|
||||||
|
> [string "."
|
||||||
|
> ,many1 (oneOf "<>=+-^%/*!|~&")
|
||||||
|
> ]
|
||||||
|
> guard (s == u)
|
||||||
|
> return s
|
||||||
|
|
||||||
|
> questionMark :: Parser Char
|
||||||
|
> questionMark = lexeme $ char '?'
|
||||||
|
|
||||||
|
> openParen :: Parser Char
|
||||||
|
> openParen = lexeme $ char '('
|
||||||
|
|
||||||
|
> closeParen :: Parser Char
|
||||||
|
> closeParen = lexeme $ char ')'
|
||||||
|
|
||||||
|
> comma :: Parser Char
|
||||||
|
> comma = lexeme $ char ','
|
||||||
|
|
||||||
|
> semi :: Parser Char
|
||||||
|
> semi = lexeme $ char ';'
|
||||||
|
|
||||||
|
> doubleQuote :: Parser Char
|
||||||
|
> doubleQuote = lexeme $ char '"'
|
||||||
|
|
||||||
|
> --stringToken :: Parser String
|
||||||
|
> --stringToken = lexeme (char '\'' *> manyTill anyChar (char '\''))
|
||||||
|
> -- todo: tidy this up, add the prefixes stuff, and add the multiple
|
||||||
|
> -- string stuff
|
||||||
|
> stringToken :: Parser String
|
||||||
|
> stringToken =
|
||||||
|
> lexeme (char '\'' *> manyTill anyChar (char '\'')
|
||||||
|
> >>= optionSuffix moreString)
|
||||||
|
> where
|
||||||
|
> moreString s0 = try $ do
|
||||||
|
> void $ char '\''
|
||||||
|
> s <- manyTill anyChar (char '\'')
|
||||||
|
> optionSuffix moreString (s0 ++ "'" ++ s)
|
||||||
|
|
||||||
|
= helper functions
|
||||||
|
|
||||||
|
> keyword :: String -> Parser String
|
||||||
|
> keyword k = try $ do
|
||||||
|
> i <- identifier
|
||||||
|
> guard (map toLower i == k)
|
||||||
|
> return k
|
||||||
|
|
||||||
|
> parens :: Parser a -> Parser a
|
||||||
|
> parens = between openParen closeParen
|
||||||
|
|
||||||
|
> commaSep :: Parser a -> Parser [a]
|
||||||
|
> commaSep = (`sepBy` comma)
|
||||||
|
|
||||||
|
> keyword_ :: String -> Parser ()
|
||||||
|
> keyword_ = void . keyword
|
||||||
|
|
||||||
|
> symbol_ :: String -> Parser ()
|
||||||
|
> symbol_ = void . symbol
|
||||||
|
|
||||||
|
> commaSep1 :: Parser a -> Parser [a]
|
||||||
|
> commaSep1 = (`sepBy1` comma)
|
||||||
|
|
||||||
a possible issue with the option suffix is that it enforces left
|
a possible issue with the option suffix is that it enforces left
|
||||||
associativity when chaining it recursively. Have to review
|
associativity when chaining it recursively. Have to review
|
||||||
|
@ -917,15 +927,30 @@ instead, and create an alternative suffix parser
|
||||||
> optionSuffix :: (a -> Parser a) -> a -> Parser a
|
> optionSuffix :: (a -> Parser a) -> a -> Parser a
|
||||||
> optionSuffix p a = option a (p a)
|
> optionSuffix p a = option a (p a)
|
||||||
|
|
||||||
> parens :: Parser a -> Parser a
|
> identifierBlacklist :: [String] -> Parser String
|
||||||
> parens = between (symbol_ "(") (symbol_ ")")
|
> identifierBlacklist bl = do
|
||||||
|
> i <- identifier
|
||||||
|
> guard (map toLower i `notElem` bl)
|
||||||
|
> return i
|
||||||
|
|
||||||
> commaSep :: Parser a -> Parser [a]
|
> blacklist :: [String]
|
||||||
> commaSep = (`sepBy` symbol_ ",")
|
> blacklist =
|
||||||
|
> [-- case
|
||||||
|
> "case", "when", "then", "else", "end"
|
||||||
|
> ,--join
|
||||||
|
> "natural","inner","outer","cross","left","right","full","join"
|
||||||
|
> ,"on","using","lateral"
|
||||||
|
> ,"from","where","group","having","order","limit", "offset", "fetch"
|
||||||
|
> ,"as","in"
|
||||||
|
> ,"except", "intersect", "union"
|
||||||
|
> ]
|
||||||
|
|
||||||
|
These blacklisted names are mostly needed when we parse something with
|
||||||
> commaSep1 :: Parser a -> Parser [a]
|
an optional alias, e.g. select a a from t. If we write select a from
|
||||||
> commaSep1 = (`sepBy1` symbol_ ",")
|
t, we have to make sure the from isn't parsed as an alias. I'm not
|
||||||
|
sure what other places strictly need the blacklist, and in theory it
|
||||||
|
could be tuned differently for each place the identifierString/
|
||||||
|
identifier parsers are used to only blacklist the bare minimum.
|
||||||
|
|
||||||
--------------------------------------------
|
--------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@
|
||||||
> | IntervalLit
|
> | IntervalLit
|
||||||
> {ilLiteral :: String -- ^ literal text
|
> {ilLiteral :: String -- ^ literal text
|
||||||
> ,ilUnits :: String -- ^ units
|
> ,ilUnits :: String -- ^ units
|
||||||
> ,ilPrecision :: Maybe Int -- ^ precision
|
> ,ilPrecision :: Maybe Integer -- ^ precision
|
||||||
> }
|
> }
|
||||||
> -- | identifier without dots
|
> -- | identifier without dots
|
||||||
> | Iden Name
|
> | Iden Name
|
||||||
|
@ -130,8 +130,8 @@
|
||||||
|
|
||||||
> -- | Represents a type name, used in casts.
|
> -- | Represents a type name, used in casts.
|
||||||
> data TypeName = TypeName String
|
> data TypeName = TypeName String
|
||||||
> | PrecTypeName String Int
|
> | PrecTypeName String Integer
|
||||||
> | PrecScaleTypeName String Int Int
|
> | PrecScaleTypeName String Integer Integer
|
||||||
> deriving (Eq,Show,Read,Data,Typeable)
|
> deriving (Eq,Show,Read,Data,Typeable)
|
||||||
|
|
||||||
|
|
||||||
|
|
60
TODO
60
TODO
|
@ -1,9 +1,33 @@
|
||||||
synchronize parsing and syntax design with tutorial parser?
|
continue 2003 review and tests
|
||||||
|
tutorial parser:
|
||||||
|
expr hack as best as can
|
||||||
|
left factor as much as possible
|
||||||
|
table expression in syntax
|
||||||
|
replace into this project
|
||||||
|
finish off ansi 2003 support or specific subset
|
||||||
|
summarize todos
|
||||||
|
start looking at error messages
|
||||||
|
change the booleans in the ast to better types for less ambiguity
|
||||||
|
represent missing optional bits in the ast as nothing instead of the
|
||||||
|
default
|
||||||
|
look at fixing the expression parsing completely
|
||||||
|
represent natural and using/on in the syntax more close to the
|
||||||
|
concrete syntax - don't combine
|
||||||
|
|
||||||
error handling work: <?> + left factor
|
big feature summary:
|
||||||
|
all ansi sql queries
|
||||||
|
better expression tree parsing
|
||||||
|
error messages, left factor
|
||||||
|
dml, ddl, procedural sql
|
||||||
|
position annotation
|
||||||
|
type checker/ etc.
|
||||||
|
lexer
|
||||||
|
dialects
|
||||||
|
quasi quotes
|
||||||
|
typesafe sql dbms wrapper support for haskell
|
||||||
|
extensibility
|
||||||
|
performance analysis
|
||||||
|
|
||||||
create list of features to add, maybe try to do up to sql 99 for the
|
|
||||||
next release?
|
|
||||||
|
|
||||||
= next release
|
= next release
|
||||||
|
|
||||||
|
@ -46,17 +70,12 @@ review internal sql collection for more syntax/tests
|
||||||
|
|
||||||
other
|
other
|
||||||
|
|
||||||
change any/some/all to be proper infix operators like in ??
|
|
||||||
|
|
||||||
review syntax to replace maybe and bool with better ctors
|
review syntax to replace maybe and bool with better ctors
|
||||||
|
|
||||||
----
|
----
|
||||||
|
|
||||||
demo program: convert tpch to sql server syntax exe processor
|
demo program: convert tpch to sql server syntax exe processor
|
||||||
|
|
||||||
dialect framework
|
|
||||||
try to implement fixity without the hse hack
|
|
||||||
source position annotation?
|
|
||||||
review abstract syntax (e.g. combine App with SpecialOp?)
|
review abstract syntax (e.g. combine App with SpecialOp?)
|
||||||
|
|
||||||
more operators
|
more operators
|
||||||
|
@ -77,12 +96,6 @@ run through other manuals for example queries and features: sql in a
|
||||||
check the order of exports, imports and functions/cases in the files
|
check the order of exports, imports and functions/cases in the files
|
||||||
fix up the import namespaces/explicit names nicely
|
fix up the import namespaces/explicit names nicely
|
||||||
|
|
||||||
do some tests for parse errors?
|
|
||||||
|
|
||||||
left factor parsing code in remaining places
|
|
||||||
|
|
||||||
quasi quotes?
|
|
||||||
|
|
||||||
ast checker: checks the ast represents valid syntax, the parser
|
ast checker: checks the ast represents valid syntax, the parser
|
||||||
doesn't check as much as it could, and this can also be used to
|
doesn't check as much as it could, and this can also be used to
|
||||||
check generated trees. Maybe this doesn't belong in this package
|
check generated trees. Maybe this doesn't belong in this package
|
||||||
|
@ -172,21 +185,12 @@ teradata
|
||||||
ms sql server
|
ms sql server
|
||||||
mysql?
|
mysql?
|
||||||
db2?
|
db2?
|
||||||
|
what other major dialects are there?
|
||||||
|
sqlite
|
||||||
|
sap dbmss (can't work out what are separate products or what are the
|
||||||
|
dialects)
|
||||||
|
|
||||||
maybe later: other dml
|
maybe later: other dml
|
||||||
insert, update, delete, truncate, merge + set, show?
|
insert, update, delete, truncate, merge + set, show?
|
||||||
copy, execute?, explain?, begin/end/rollback?
|
copy, execute?, explain?, begin/end/rollback?
|
||||||
|
|
||||||
big feature summary:
|
|
||||||
all ansi sql queries
|
|
||||||
error messages, left factor
|
|
||||||
position annotation
|
|
||||||
type checker/ etc.
|
|
||||||
lexer
|
|
||||||
dialects
|
|
||||||
dml, ddl, procedural sql
|
|
||||||
quasi quotes
|
|
||||||
typesafe sql dbms wrapper support for haskell
|
|
||||||
extensibility
|
|
||||||
better expression tree parsing
|
|
||||||
performance analysis
|
|
2796
tools/Language/SQL/SimpleSQL/SQL2003.lhs
Normal file
2796
tools/Language/SQL/SimpleSQL/SQL2003.lhs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue