1
Fork 0
simple-sql-parser/Language/SQL/SimpleSQL/Parser.lhs

423 lines
14 KiB
Plaintext
Raw Normal View History

2013-12-13 11:39:26 +01:00
2013-12-13 15:04:48 +01:00
> module Language.SQL.SimpleSQL.Parser
> (parseQueryExpr
> ,parseScalarExpr
2013-12-13 18:21:44 +01:00
> ,ParseError(..)) where
2013-12-13 11:39:26 +01:00
> import Text.Groom
2013-12-13 18:21:44 +01:00
> import Text.Parsec hiding (ParseError)
> import qualified Text.Parsec as P
2013-12-13 11:39:26 +01:00
> import Control.Monad.Identity
> import Control.Applicative hiding (many, (<|>), optional)
> import qualified Language.Haskell.Exts.Syntax as HSE
> import qualified Language.Haskell.Exts.Fixity as HSE
> import Data.Maybe
2013-12-13 19:01:57 +01:00
> import Data.List
2013-12-13 11:39:26 +01:00
2013-12-13 15:04:48 +01:00
> import Language.SQL.SimpleSQL.Syntax
2013-12-13 11:39:26 +01:00
2013-12-13 18:21:44 +01:00
> parseQueryExpr :: FilePath
> -> Maybe (Int,Int)
> -> String
> -> Either ParseError QueryExpr
> parseQueryExpr f p src =
> either (Left . convParseError src) Right
2013-12-13 19:08:43 +01:00
> $ parse (setPos p *> whiteSpace
> *> queryExpr <* eof) f src
2013-12-13 11:39:26 +01:00
2013-12-13 18:21:44 +01:00
> parseScalarExpr :: FilePath
> -> Maybe (Int,Int)
> -> String
> -> Either ParseError ScalarExpr
> parseScalarExpr f p src =
> either (Left . convParseError src) Right
2013-12-13 19:08:43 +01:00
> $ parse (setPos p *> whiteSpace
> *> scalarExpr <* eof) f src
> setPos :: Maybe (Int,Int) -> P ()
> setPos Nothing = return ()
> setPos (Just (l,c)) =
> getPosition
> >>= (return
> . flip setSourceColumn c
> . flip setSourceLine l)
> >>= setPosition
2013-12-13 18:21:44 +01:00
> data ParseError = ParseError
> {peErrorString :: String
> ,peFilename :: FilePath
> ,pePosition :: (Int,Int)
2013-12-13 19:01:57 +01:00
> ,peFormattedError :: String
> } deriving (Eq,Show)
2013-12-13 18:21:44 +01:00
> convParseError :: String -> P.ParseError -> ParseError
> convParseError src e =
> ParseError
> {peErrorString = show e
> ,peFilename = sourceName p
> ,pePosition = (sourceLine p, sourceColumn p)
> ,peFormattedError = formatError src e
> }
> where
> p = errorPos e
format the error more nicely: emacs format for positioning, plus context
> formatError :: String -> P.ParseError -> String
> formatError src e =
> sourceName p ++ ":" ++ show (sourceLine p)
> ++ ":" ++ show (sourceColumn p) ++ ":"
> ++ context
> ++ show e
> where
> context =
> let lns = take 1 $ drop (sourceLine p - 1) $ lines src
> in case lns of
> [x] -> "\n" ++ x ++ "\n"
> ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
> _ -> ""
> p = errorPos e
2013-12-13 11:39:26 +01:00
> type P a = ParsecT String () Identity a
------------------------------------------------
= scalar expressions
> estring :: P ScalarExpr
> estring = StringLit <$> (symbol_ "'" *> manyTill anyChar (symbol_ "'"))
digits
digits.[digits][e[+-]digits]
[digits].digits[e[+-]digits]
digitse[+-]digits
> number :: P ScalarExpr
> number =
> NumLit <$> (choice [int
> >>= optionSuffix dot
> >>= optionSuffix fracts
> >>= optionSuffix expon
> ,fract "" >>= optionSuffix expon]
> <* whiteSpace)
> where
> int = many1 digit
> fract p = dot p >>= fracts
> dot p = ((p++) . (:[])) <$> char '.'
> fracts p = (p++) <$> int
> expon p = do
> void $ char 'e'
> s <- option "" ((:[]) <$> (char '+' <|> char '-'))
> i <- int
> return (p ++ "e" ++ s ++ i)
2013-12-13 11:39:26 +01:00
> literal :: P ScalarExpr
> literal = number <|> estring
2013-12-13 11:39:26 +01:00
> identifierString :: P String
> identifierString = do
> s <- (:) <$> letterOrUnderscore
> <*> many letterDigitOrUnderscore <* whiteSpace
> guard (s `notElem` blacklist)
> return s
> where
> letterOrUnderscore = char '_' <|> letter
> letterDigitOrUnderscore = char '_' <|> alphaNum
> blacklist :: [String]
> blacklist = ["as", "from", "where", "having", "group", "order"
2013-12-13 11:39:26 +01:00
> ,"inner", "left", "right", "full", "natural", "join"
2013-12-13 16:27:02 +01:00
> ,"on", "using", "when", "then", "case", "end", "order"
> ,"limit", "offset"]
2013-12-13 11:39:26 +01:00
TODO: talk about what must be in the blacklist, and what doesn't need
to be.
> identifier :: P ScalarExpr
> identifier = Iden <$> identifierString
2013-12-13 11:39:26 +01:00
> dottedIden :: P ScalarExpr
> dottedIden = Iden2 <$> identifierString
2013-12-13 11:39:26 +01:00
> <*> (symbol "." *> identifierString)
> star :: P ScalarExpr
> star = choice [Star <$ symbol "*"
> ,Star2 <$> (identifierString <* symbol "." <* symbol "*")]
> app :: P ScalarExpr
2013-12-13 13:21:25 +01:00
> app = App <$> identifierString
> -- support for count(*)
> <*> parens (choice[(:[]) <$> try star
> ,commaSep scalarExpr'])
2013-12-13 11:39:26 +01:00
> scase :: P ScalarExpr
> scase =
> Case <$> (try (keyword_ "case") *> optionMaybe (try scalarExpr'))
> <*> many1 swhen
> <*> optionMaybe (try (keyword_ "else") *> scalarExpr')
> <* keyword_ "end"
> where
> swhen = keyword_ "when" *>
> ((,) <$> scalarExpr' <*> (keyword_ "then" *> scalarExpr'))
> binOpSymbolNames :: [String]
> binOpSymbolNames = ["=", "<=", ">="
> ,"!=", "<>", "<", ">"
> ,"*", "/", "+", "-"
> ,"||"]
> binOpKeywordNames :: [String]
> binOpKeywordNames = ["and", "or", "like"]
2013-12-13 11:39:26 +01:00
2013-12-13 19:01:57 +01:00
> unOpKeywordNames :: [String]
> unOpKeywordNames = ["not"]
> unOpSymbolNames :: [String]
> unOpSymbolNames = ["+", "-"]
2013-12-13 11:39:26 +01:00
> unaryOp :: P ScalarExpr
2013-12-13 19:01:57 +01:00
> unaryOp =
> makeOp <$> opSymbol <*> scalarExpr
> where
> makeOp nm e = Op nm [e]
> opSymbol = choice (map (try . symbol) unOpSymbolNames
> ++ map (try . keyword) unOpKeywordNames)
2013-12-13 11:39:26 +01:00
> scalarExpr' :: P ScalarExpr
> scalarExpr' = factor >>= trysuffix
> where
> factor = choice [literal
> ,scase
> ,unaryOp
> ,try app
> ,try dottedIden
2013-12-13 11:39:26 +01:00
> ,identifier
> ,sparens]
2013-12-13 11:39:26 +01:00
> trysuffix e = try (suffix e) <|> return e
> suffix e0 = (makeOp e0 <$> opSymbol <*> factor) >>= trysuffix
> opSymbol = choice (map (try . symbol) binOpSymbolNames
> ++ map (try . keyword) binOpKeywordNames)
2013-12-13 11:39:26 +01:00
> makeOp e0 op e1 = Op op [e0,e1]
> sparens :: P ScalarExpr
> sparens = Parens <$> parens scalarExpr'
2013-12-13 11:39:26 +01:00
> toHaskell :: ScalarExpr -> HSE.Exp
> toHaskell e = case e of
> Iden i -> HSE.Var $ HSE.UnQual $ HSE.Ident i
> StringLit l -> HSE.Lit $ HSE.String $ 's':l
> NumLit l -> HSE.Lit $ HSE.String $ 'n':l
> App n es -> HSE.App (toHaskell $ Iden n) $ ltoh es
2013-12-13 11:39:26 +01:00
> Op n [e0,e1] -> HSE.InfixApp (toHaskell e0)
> (HSE.QVarOp $ HSE.UnQual $ HSE.Ident n)
> (toHaskell e1)
2013-12-13 19:01:57 +01:00
> Op o [e0] -> toHaskell $ App ("unary:" ++ o) [e0]
2013-12-13 11:39:26 +01:00
> Op {} -> error $ "bad args to operator " ++ groom e
> Star -> HSE.Var $ HSE.UnQual $ HSE.Ident "*"
> Iden2 a b -> HSE.Var $ HSE.Qual (HSE.ModuleName a) (HSE.Ident b)
2013-12-13 11:39:26 +01:00
> Star2 q -> HSE.Var $ HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")
> Parens e0 -> HSE.Paren $ toHaskell e0
> -- map the two maybes to lists with either 0 or 1 element
> Case v ts el -> HSE.App (toHaskell $ Iden "$case")
2013-12-13 11:39:26 +01:00
> (HSE.List [ltoh $ maybeToList v
> ,HSE.List $ map (ltoh . (\(a,b) -> [a,b])) ts
> ,ltoh $ maybeToList el])
> where
> ltoh = HSE.List . map toHaskell
> toSql :: HSE.Exp -> ScalarExpr
> toSql e = case e of
> HSE.Var (HSE.UnQual (HSE.Ident "*")) -> Star
> HSE.Var (HSE.Qual (HSE.ModuleName q) (HSE.Ident "*")) -> Star2 q
> HSE.Var (HSE.Qual (HSE.ModuleName a) (HSE.Ident b)) -> Iden2 a b
> HSE.Var (HSE.UnQual (HSE.Ident i)) -> Iden i
> HSE.Lit (HSE.String ('s':l)) -> StringLit l
> HSE.Lit (HSE.String ('n':l)) -> NumLit l
2013-12-13 11:39:26 +01:00
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident "$case"))) (HSE.List [v,ts,el]) ->
> Case (ltom v) (pairs ts) (ltom el)
2013-12-13 19:01:57 +01:00
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident x)))
> (HSE.List [ea]) | "unary:" `isPrefixOf` x ->
> Op (drop 6 x) [toSql ea]
2013-12-13 11:39:26 +01:00
> HSE.App (HSE.Var (HSE.UnQual (HSE.Ident i)))
> (HSE.List es) -> App i $ map toSql es
> HSE.InfixApp e0 (HSE.QVarOp (HSE.UnQual (HSE.Ident n))) e1 ->
> Op n [toSql e0, toSql e1]
> HSE.Paren e0 -> Parens $ toSql e0
> _ -> error $ "unsupported haskell " ++ groom e
> where
> ltom (HSE.List []) = Nothing
> ltom (HSE.List [ex]) = Just $ toSql ex
> ltom ex = error $ "unsupported haskell " ++ groom ex
> pairs (HSE.List l) = map (\(HSE.List [a,b]) -> (toSql a, toSql b)) l
> pairs ex = error $ "unsupported haskell " ++ groom ex
> sqlFixities :: [HSE.Fixity]
> sqlFixities = HSE.infixl_ 9 ["*", "/"]
> ++ HSE.infixl_ 8 ["+", "-"]
> ++ HSE.infixl_ 6 ["<=",">=","!=","<>","||", "like"]
> ++ HSE.infix_ 4 ["<", ">"]
> ++ HSE.infixr_ 3 ["="]
> ++ HSE.infixr_ 2 ["or"]
> ++ HSE.infixl_ 1 ["and"]
> ++ HSE.infixl_ 0 ["or"]
> fixFixity :: ScalarExpr -> ScalarExpr
> fixFixity se = runIdentity $
> toSql <$> HSE.applyFixities sqlFixities (toHaskell se)
> scalarExpr :: P ScalarExpr
> scalarExpr =
> choice [try star
> ,fixFixity <$> scalarExpr']
-------------------------------------------------
= query expressions
2013-12-13 16:27:02 +01:00
> duplicates :: P Duplicates
> duplicates = option All $ try $ choice [All <$ keyword_ "all"
> ,Distinct <$ keyword "distinct"]
2013-12-13 11:39:26 +01:00
> selectItem :: P (Maybe String, ScalarExpr)
> selectItem = flip (,) <$> scalarExpr <*> optionMaybe (try alias)
> where alias = optional (try (keyword_ "as")) *> identifierString
> selectList :: P [(Maybe String,ScalarExpr)]
2013-12-13 16:27:02 +01:00
> selectList = commaSep1 selectItem
2013-12-13 11:39:26 +01:00
> from :: P [TableRef]
> from = option [] (try (keyword_ "from") *> commaSep1 tref)
> where
> tref = choice [try (JoinQueryExpr <$> parens queryExpr)
> ,JoinParens <$> parens tref
2013-12-13 11:39:26 +01:00
> ,SimpleTableRef <$> identifierString]
> >>= optionSuffix pjoin
2013-12-13 11:39:26 +01:00
> >>= optionSuffix alias
> pjoin tref0 =
2013-12-13 11:39:26 +01:00
> choice
> [try (keyword_ "natural") *> keyword_ "inner"
> *> conditionlessSuffix tref0 Inner (Just JoinNatural)
> ,try (keyword_ "join")
> *> (JoinTableRef Inner tref0 <$> tref <*> joinExpr)
> ,try (keyword_ "inner")
> *> conditionSuffix tref0 Inner
> ,try (choice [JLeft <$ keyword_ "left"
> ,JRight <$ keyword_ "right"
> ,Full <$ keyword_ "full"])
> >>= outerJoinSuffix tref0
> ,try (keyword_ "cross")
> *> conditionlessSuffix tref0 Cross Nothing
> ]
> >>= optionSuffix pjoin
2013-12-13 11:39:26 +01:00
> outerJoinSuffix tref0 jt =
> optional (keyword_ "outer") *> conditionSuffix tref0 jt
> conditionSuffix tref0 jt =
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> joinExpr)
> conditionlessSuffix tref0 jt jc =
> keyword_ "join" *> (JoinTableRef jt tref0 <$> tref <*> return jc)
> joinExpr = choice
> [(Just . JoinUsing)
> <$> (try (keyword_ "using")
> *> parens (commaSep1 identifierString))
2013-12-13 11:39:26 +01:00
> ,(Just . JoinOn) <$> (try (keyword_ "on") *> scalarExpr)
> ,return Nothing
> ]
> alias j = let a1 = optional (try (keyword_ "as")) *> identifierString
> in option j (JoinAlias j <$> try a1)
2013-12-13 16:27:02 +01:00
> optionalScalarExpr :: String -> P (Maybe ScalarExpr)
> optionalScalarExpr k = optionMaybe (try (keyword_ k) *> scalarExpr)
2013-12-13 11:39:26 +01:00
> swhere :: P (Maybe ScalarExpr)
2013-12-13 16:27:02 +01:00
> swhere = optionalScalarExpr "where"
2013-12-13 11:39:26 +01:00
> sgroupBy :: P [ScalarExpr]
> sgroupBy = option [] (try (keyword_ "group")
> *> keyword_ "by"
> *> commaSep1 scalarExpr)
> having :: P (Maybe ScalarExpr)
2013-12-13 16:27:02 +01:00
> having = optionalScalarExpr "having"
2013-12-13 11:39:26 +01:00
2013-12-13 16:08:10 +01:00
> orderBy :: P [(ScalarExpr,Direction)]
2013-12-13 11:39:26 +01:00
> orderBy = option [] (try (keyword_ "order")
> *> keyword_ "by"
2013-12-13 16:08:10 +01:00
> *> commaSep1 ob)
> where
> ob = (,) <$> scalarExpr
> <*> option Asc (choice [Asc <$ keyword_ "asc"
> ,Desc <$ keyword_ "desc"])
2013-12-13 11:39:26 +01:00
2013-12-13 16:27:02 +01:00
> limit :: P (Maybe ScalarExpr)
> limit = optionalScalarExpr "limit"
> offset :: P (Maybe ScalarExpr)
> offset = optionalScalarExpr "offset"
2013-12-13 11:39:26 +01:00
> queryExpr :: P QueryExpr
> queryExpr =
2013-12-13 16:27:02 +01:00
> try (keyword_ "select") >>
2013-12-13 11:39:26 +01:00
> Select
2013-12-13 16:27:02 +01:00
> <$> duplicates
> <*> selectList
2013-12-13 11:39:26 +01:00
> <*> from
> <*> swhere
> <*> sgroupBy
> <*> having
> <*> orderBy
2013-12-13 16:27:02 +01:00
> <*> limit
> <*> offset
2013-12-13 11:39:26 +01:00
------------------------------------------------
= helper functions
> whiteSpace :: P ()
> whiteSpace =
> choice [simpleWhiteSpace *> whiteSpace
> ,lineComment *> whiteSpace
> ,blockComment *> whiteSpace
> ,return ()]
> where
> lineComment = try (string "--")
> *> manyTill anyChar (void (char '\n') <|> eof)
> blockComment = -- no nesting of block comments in SQL
> try (string "/*")
> -- TODO: why is try used herex
> *> manyTill anyChar (try $ string "*/")
> -- use many1 so we can more easily avoid non terminating loops
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
> optionSuffix :: (a -> P a) -> a -> P a
> optionSuffix p a = option a (p a)
> parens :: P a -> P a
> parens = between (symbol_ "(") (symbol_ ")")
2013-12-13 11:39:26 +01:00
> commaSep :: P a -> P [a]
> commaSep = (`sepBy` symbol_ ",")
> symbol :: String -> P String
> symbol s = string s
> -- <* notFollowedBy (oneOf "+-/*<>=!|")
> <* whiteSpace
2013-12-13 11:39:26 +01:00
> symbol_ :: String -> P ()
> symbol_ s = symbol s *> return ()
> keyword :: String -> P String
> keyword s = string s
> <* notFollowedBy (char '_' <|> alphaNum)
> <* whiteSpace
2013-12-13 11:39:26 +01:00
> keyword_ :: String -> P ()
> keyword_ s = keyword s *> return ()
> commaSep1 :: P a -> P [a]
> commaSep1 = (`sepBy1` symbol_ ",")