add separate lexer
This commit is contained in:
parent
1364c58534
commit
2df76e3095
|
@ -17,9 +17,8 @@
|
|||
|
||||
> import Control.Applicative ((<$>), (<*>), (<**>), pure, Applicative)
|
||||
> import Text.Parsec (option,many)
|
||||
> import Text.Parsec.Prim (Parsec)
|
||||
> import Text.Parsec.String (GenParser)
|
||||
|
||||
> type Parser s = Parsec String s
|
||||
|
||||
a possible issue with the option suffix is that it enforces left
|
||||
associativity when chaining it recursively. Have to review
|
||||
|
@ -29,7 +28,7 @@ instead, and create an alternative suffix parser
|
|||
This function style is not good, and should be replaced with chain and
|
||||
<??> which has a different type
|
||||
|
||||
> optionSuffix :: (a -> Parser s a) -> a -> Parser s a
|
||||
> optionSuffix :: (a -> GenParser t s a) -> a -> GenParser t s a
|
||||
> optionSuffix p a = option a (p a)
|
||||
|
||||
|
||||
|
@ -39,7 +38,7 @@ hand result, taken from uu-parsinglib
|
|||
TODO: make sure the precedence higher than <|> and lower than the
|
||||
other operators so it can be used nicely
|
||||
|
||||
> (<??>) :: Parser s a -> Parser s (a -> a) -> Parser s a
|
||||
> (<??>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
|
||||
> p <??> q = p <**> option id q
|
||||
|
||||
|
||||
|
@ -80,7 +79,7 @@ composing suffix parsers, not sure about the name. This is used to add
|
|||
a second or more suffix parser contingent on the first suffix parser
|
||||
succeeding.
|
||||
|
||||
> (<??.>) :: Parser s (a -> a) -> Parser s (a -> a) -> Parser s (a -> a)
|
||||
> (<??.>) :: GenParser t s (a -> a) -> GenParser t s (a -> a) -> GenParser t s (a -> a)
|
||||
> (<??.>) pa pb = (.) `c` pa <*> option id pb
|
||||
> -- todo: fix this mess
|
||||
> where c = (<$>) . flip
|
||||
|
@ -88,7 +87,7 @@ succeeding.
|
|||
|
||||
0 to many repeated applications of suffix parser
|
||||
|
||||
> (<??*>) :: Parser s a -> Parser s (a -> a) -> Parser s a
|
||||
> (<??*>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
|
||||
> p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
|
||||
|
||||
|
||||
|
|
296
Language/SQL/SimpleSQL/Lexer.lhs
Normal file
296
Language/SQL/SimpleSQL/Lexer.lhs
Normal file
|
@ -0,0 +1,296 @@
|
|||
|
||||
Lexer TODO:
|
||||
|
||||
left factor to get rid of trys
|
||||
|
||||
> -- | This is the module contains a Lexer for SQL.
|
||||
> {-# LANGUAGE TupleSections #-}
|
||||
> module Language.SQL.SimpleSQL.Lexer
|
||||
> (lexSQL
|
||||
> ,Token(..)
|
||||
> ,prettyToken
|
||||
> ,prettyTokens
|
||||
> ,Position
|
||||
> ,ParseError(..)
|
||||
> ,Dialect(..)) where
|
||||
|
||||
> import Language.SQL.SimpleSQL.Syntax (Dialect(..))
|
||||
|
||||
> import Text.Parsec (option,string,manyTill,anyChar
|
||||
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
||||
> ,many,runParser,lookAhead,satisfy)
|
||||
> import Language.SQL.SimpleSQL.Combinators
|
||||
> import Language.SQL.SimpleSQL.Errors
|
||||
> import Control.Applicative hiding ((<|>), many)
|
||||
> import Data.Char
|
||||
> import Control.Monad
|
||||
> import Prelude hiding (takeWhile)
|
||||
> import Text.Parsec.String (Parser)
|
||||
|
||||
|
||||
> -- | Represents a lexed token
|
||||
> data Token
|
||||
> -- | a symbol is one of the following
|
||||
> -- * multi char symbols <> <= >= != ||
|
||||
> -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( )
|
||||
> --
|
||||
> = Symbol String
|
||||
>
|
||||
> -- | This is an identifier or keyword.
|
||||
> --
|
||||
> | Identifier String
|
||||
>
|
||||
> -- | This is an identifier quoted with "
|
||||
> | QIdentifier String
|
||||
> -- | This is an identifier quoted with u&"
|
||||
> | UQIdentifier String
|
||||
|
||||
> -- | This is a dialect specific quoted identifier with the quote
|
||||
> -- characters explicit. The first and second fields are the
|
||||
> -- starting and ending quote characters.n
|
||||
> | DQIdentifier String String String
|
||||
>
|
||||
> -- | This is a host param symbol, e.g. :param
|
||||
> | HostParam String
|
||||
>
|
||||
> -- | This is a string literal.
|
||||
> | SqlString String
|
||||
>
|
||||
> -- | This is a character set string literal. The first field is
|
||||
> -- the charatecter set (one of nNbBxX).
|
||||
> | CSSqlString String String
|
||||
>
|
||||
> -- | a number literal (integral or otherwise), stored in original format
|
||||
> -- unchanged
|
||||
> | SqlNumber String
|
||||
>
|
||||
> -- | non-significant whitespace (space, tab, newline) (strictly speaking,
|
||||
> -- it is up to the client to decide whether the whitespace is significant
|
||||
> -- or not)
|
||||
> | Whitespace String
|
||||
>
|
||||
> -- | a commented line using --, contains every character starting with the
|
||||
> -- \'--\' and including the terminating newline character if there is one
|
||||
> -- - this will be missing if the last line in the source is a line comment
|
||||
> -- with no trailing newline
|
||||
> | LineComment String
|
||||
>
|
||||
> -- | a block comment, \/* stuff *\/, includes the comment delimiters
|
||||
> | BlockComment String
|
||||
>
|
||||
> deriving (Eq,Show)
|
||||
|
||||
|
||||
|
||||
> -- | Accurate pretty printing, if you lex a bunch of tokens,
|
||||
> -- then pretty print them, should should get back exactly the
|
||||
> -- same string
|
||||
> prettyToken :: Dialect -> Token -> String
|
||||
> prettyToken _ (Symbol s) = s
|
||||
> prettyToken _ (Identifier t) = t
|
||||
> prettyToken _ (QIdentifier t) =
|
||||
> "\"" ++ doubleChars '"' t ++ "\""
|
||||
> prettyToken _ (UQIdentifier t) =
|
||||
> "u&\"" ++ doubleChars '"' t ++ "\""
|
||||
> prettyToken _ (DQIdentifier s e t) =
|
||||
> s ++ t ++ e
|
||||
> prettyToken _ (HostParam p) = ':':p
|
||||
> prettyToken _ (SqlString t) = "'" ++ doubleChars '\'' t ++ "'"
|
||||
> prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
|
||||
> prettyToken _ (SqlNumber r) = r
|
||||
> prettyToken _ (Whitespace t) = t
|
||||
> prettyToken _ (LineComment l) = l
|
||||
> prettyToken _ (BlockComment c) = c
|
||||
|
||||
> prettyTokens :: Dialect -> [Token] -> String
|
||||
> prettyTokens d ts = concat $ map (prettyToken d) ts
|
||||
|
||||
> doubleChars :: Char -> String -> String
|
||||
> doubleChars _ [] = []
|
||||
> doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
|
||||
> | otherwise = d:doubleChars c ds
|
||||
|
||||
TODO: try to make all parsers applicative only
|
||||
|
||||
> type Position = (String,Int,Int)
|
||||
|
||||
> addPosition :: Position -> String -> Position
|
||||
> addPosition = addPosition'
|
||||
|
||||
> addPosition' :: Position -> String -> Position
|
||||
> addPosition' (f,l,c) [] = (f,l,c)
|
||||
> addPosition' (f,l,_) ('\n':xs) = addPosition' (f,l+1,0) xs
|
||||
> addPosition' (f,l,c) (_:xs) = addPosition' (f,l,c+1) xs
|
||||
|
||||
|
||||
|
||||
> lexSQL :: Dialect -> Position -> String -> Either ParseError [(Position,Token)]
|
||||
> lexSQL dialect pos@(fn,_,_) txt =
|
||||
> either (Left . convParseError fn) Right
|
||||
> $ runParser (many_p pos <* eof) () "" txt
|
||||
> where
|
||||
|
||||
> many_p pos' = some_p pos' `mplus` return []
|
||||
> some_p pos' = do
|
||||
> tok <- sqlToken dialect pos'
|
||||
> let pos'' = advancePos dialect pos' (snd tok)
|
||||
> (tok:) <$> many_p pos''
|
||||
|
||||
> advancePos :: Dialect -> Position -> Token -> Position
|
||||
> advancePos dialect pos tok =
|
||||
> let pt = prettyToken dialect tok
|
||||
> in addPosition pos pt
|
||||
|
||||
> -- | parser for a sql token
|
||||
> sqlToken :: Dialect -> Position -> Parser (Position,Token)
|
||||
> sqlToken d p =
|
||||
> (p,) <$> choice [sqlString d
|
||||
> ,identifier d
|
||||
> ,hostParam d
|
||||
> ,lineComment d
|
||||
> ,blockComment d
|
||||
> ,sqlNumber d
|
||||
> ,symbol d
|
||||
> ,sqlWhitespace d]
|
||||
|
||||
> takeWhile1 :: (Char -> Bool) -> Parser String
|
||||
> takeWhile1 p = many1 (satisfy p)
|
||||
|
||||
> takeWhile :: (Char -> Bool) -> Parser String
|
||||
> takeWhile p = many (satisfy p)
|
||||
|
||||
> takeTill :: (Char -> Bool) -> Parser String
|
||||
> takeTill p =
|
||||
> try (manyTill anyChar (peekSatisfy p))
|
||||
|
||||
> peekSatisfy :: (Char -> Bool) -> Parser ()
|
||||
> peekSatisfy p = do
|
||||
> void $ try $ lookAhead (satisfy p)
|
||||
|
||||
> identifier :: Dialect -> Parser Token
|
||||
> identifier d =
|
||||
> choice
|
||||
> [QIdentifier <$> qiden
|
||||
> ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
|
||||
> ,Identifier <$> identifierString
|
||||
> ,DQIdentifier "`" "`" <$> mySqlQIden
|
||||
> ]
|
||||
> where
|
||||
> qiden = char '"' *> qidenSuffix ""
|
||||
> qidenSuffix t = do
|
||||
> s <- takeTill (=='"')
|
||||
> void $ char '"'
|
||||
> -- deal with "" as literal double quote character
|
||||
> choice [do
|
||||
> void $ char '"'
|
||||
> qidenSuffix $ concat [t,s,"\""]
|
||||
> ,return $ concat [t,s]]
|
||||
> -- mysql can quote identifiers with `
|
||||
> mySqlQIden = do
|
||||
> guard (d == MySQL)
|
||||
> char '`' *> takeWhile1 (/='`') <* char '`'
|
||||
|
||||
> identifierString :: Parser String
|
||||
> identifierString =
|
||||
> startsWith (\c -> c == '_' || isAlpha c)
|
||||
> (\c -> c == '_' || isAlphaNum c)
|
||||
|
||||
|
||||
> sqlString :: Dialect -> Parser Token
|
||||
> sqlString _ =
|
||||
> choice [csString
|
||||
> ,normalString
|
||||
> ]
|
||||
> where
|
||||
> normalString = SqlString {-"'"-} <$> (char '\'' *> normalStringSuffix "")
|
||||
> normalStringSuffix t = do
|
||||
> s <- takeTill (=='\'')
|
||||
> void $ char '\''
|
||||
> -- deal with '' as literal quote character
|
||||
> choice [do
|
||||
> void $ char '\''
|
||||
> normalStringSuffix $ concat [t,s,"'"]
|
||||
> ,return $ concat [t,s]]
|
||||
> csString = CSSqlString <$> try (cs <* char '\'') <*> normalStringSuffix ""
|
||||
> cs = choice [(:[]) <$> oneOf "nNbBxX"
|
||||
> ,string "u&"
|
||||
> ,string "U&"]
|
||||
|
||||
> hostParam :: Dialect -> Parser Token
|
||||
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
||||
|
||||
|
||||
> sqlNumber :: Dialect -> Parser Token
|
||||
> sqlNumber _ = SqlNumber <$>
|
||||
> (int <??> (pp dot <??.> pp int)
|
||||
> <|> try ((++) <$> dot <*> int))
|
||||
> <??> pp expon
|
||||
> where
|
||||
> int = many1 digit
|
||||
> dot = string "."
|
||||
> expon = (:) <$> oneOf "eE" <*> sInt
|
||||
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
||||
> pp = (<$$> (++))
|
||||
|
||||
digits
|
||||
digits.[digits][e[+-]digits]
|
||||
[digits].digits[e[+-]digits]
|
||||
digitse[+-]digits
|
||||
|
||||
where digits is one or more decimal digits (0 through 9). At least one
|
||||
digit must be before or after the decimal point, if one is used. At
|
||||
least one digit must follow the exponent marker (e), if one is
|
||||
present. There cannot be any spaces or other characters embedded in
|
||||
the constant. Note that any leading plus or minus sign is not actually
|
||||
considered part of the constant; it is an operator applied to the
|
||||
constant.
|
||||
|
||||
|
||||
A symbol is one of the two character symbols, or one of the single
|
||||
character symbols in the two lists below.
|
||||
|
||||
> symbol :: Dialect -> Parser Token
|
||||
> symbol _ = Symbol <$> choice (many1 (char '.') :
|
||||
> map (try . string) [">=","<=","!=","<>","||"]
|
||||
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
|
||||
|
||||
> sqlWhitespace :: Dialect -> Parser Token
|
||||
> sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
|
||||
|
||||
> lineComment :: Dialect -> Parser Token
|
||||
> lineComment _ =
|
||||
> (\s -> LineComment $ concat ["--",s]) <$>
|
||||
> (try (string "--") *> choice
|
||||
> [flip snoc '\n' <$> takeTill (=='\n') <* char '\n'
|
||||
> ,takeWhile (/='\n') <* eof
|
||||
> ])
|
||||
> where
|
||||
> snoc :: String -> Char -> String
|
||||
> snoc s a = s ++ [a]
|
||||
|
||||
> blockComment :: Dialect -> Parser Token
|
||||
> blockComment _ =
|
||||
> (\s -> BlockComment $ concat ["/*",s]) <$>
|
||||
> (try (string "/*") *> commentSuffix 0)
|
||||
> where
|
||||
> commentSuffix :: Int -> Parser String
|
||||
> commentSuffix n = do
|
||||
> -- read until a possible end comment or nested comment
|
||||
> x <- takeWhile (\e -> e /= '/' && e /= '*')
|
||||
> choice [-- close comment: if the nesting is 0, done
|
||||
> -- otherwise recurse on commentSuffix
|
||||
> try (string "*/") *> let t = concat [x,"*/"]
|
||||
> in if n == 0
|
||||
> then return t
|
||||
> else (\s -> concat [t,s]) <$> commentSuffix (n - 1)
|
||||
> -- nested comment, recurse
|
||||
> ,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
|
||||
> -- not an end comment or nested comment, continue
|
||||
> ,(:) <$> anyChar <*> commentSuffix n]
|
||||
|
||||
|
||||
> startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
|
||||
> startsWith p ps = do
|
||||
> c <- satisfy p
|
||||
> choice [(:) c <$> (takeWhile1 ps)
|
||||
> ,return [c]]
|
|
@ -47,7 +47,7 @@ isn't done because of the impact on the parser error
|
|||
messages. Apparently it can also help the speed but this hasn't been
|
||||
looked into.
|
||||
|
||||
== Parser rrror messages
|
||||
== Parser error messages
|
||||
|
||||
A lot of care has been given to generating good parser error messages
|
||||
for invalid syntax. There are a few utils below which partially help
|
||||
|
@ -185,22 +185,25 @@ fixing them in the syntax but leaving them till the semantic checking
|
|||
> import Control.Monad.Identity (Identity)
|
||||
> import Control.Monad (guard, void, when)
|
||||
> import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>), (<**>), pure)
|
||||
> import Data.Maybe (catMaybes)
|
||||
> import Data.Char (toLower)
|
||||
> import Data.Char (toLower, isDigit)
|
||||
> import Text.Parsec (setPosition,setSourceColumn,setSourceLine,getPosition
|
||||
> ,option,between,sepBy,sepBy1,string,manyTill,anyChar
|
||||
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
||||
> ,optionMaybe,optional,many,letter,runParser
|
||||
> ,chainl1, chainr1,(<?>) {-,notFollowedBy,alphaNum-}, lookAhead)
|
||||
> ,option,between,sepBy,sepBy1
|
||||
> ,try,many1,(<|>),choice,eof
|
||||
> ,optionMaybe,optional,runParser
|
||||
> ,chainl1, chainr1,(<?>))
|
||||
> -- import Text.Parsec.String (Parser)
|
||||
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
||||
> import Text.Parsec.Prim (Parsec, getState)
|
||||
> import Text.Parsec.Prim (getState, token)
|
||||
> import Text.Parsec.Pos (newPos)
|
||||
> import qualified Text.Parsec.Expr as E
|
||||
> import Data.List (intercalate,sort,groupBy)
|
||||
> import Data.Function (on)
|
||||
> import Language.SQL.SimpleSQL.Syntax
|
||||
> import Language.SQL.SimpleSQL.Combinators
|
||||
> import Language.SQL.SimpleSQL.Errors
|
||||
> import qualified Language.SQL.SimpleSQL.Lexer as L
|
||||
> import Data.Maybe
|
||||
> import Text.Parsec.String (GenParser)
|
||||
|
||||
= Public API
|
||||
|
||||
|
@ -257,14 +260,21 @@ converts the error return to the nice wrapper
|
|||
> -> Maybe (Int,Int)
|
||||
> -> String
|
||||
> -> Either ParseError a
|
||||
> wrapParse parser d f p src =
|
||||
> wrapParse parser d f p src = do
|
||||
> let (l,c) = fromMaybe (1,0) p
|
||||
> lx <- L.lexSQL d (f,l,c) src
|
||||
> either (Left . convParseError src) Right
|
||||
> $ runParser (setPos p *> whitespace *> parser <* eof)
|
||||
> d f src
|
||||
> $ runParser (setPos p *> parser <* eof)
|
||||
> d f $ filter keep lx
|
||||
> where
|
||||
> setPos Nothing = pure ()
|
||||
> setPos (Just (l,c)) = fmap up getPosition >>= setPosition
|
||||
> where up = flip setSourceColumn c . flip setSourceLine l
|
||||
> keep (_,L.Whitespace {}) = False
|
||||
> keep (_,L.LineComment {}) = False
|
||||
> keep (_,L.BlockComment {}) = False
|
||||
> keep _ = True
|
||||
|
||||
|
||||
------------------------------------------------
|
||||
|
||||
|
@ -304,15 +314,11 @@ u&"example quoted"
|
|||
> name :: Parser Name
|
||||
> name = do
|
||||
> d <- getState
|
||||
> choice [QName <$> quotedIdentifier
|
||||
> ,UQName <$> uquotedIdentifier
|
||||
> choice [QName <$> qidentifierTok
|
||||
> ,UQName <$> uqidentifierTok
|
||||
> ,Name <$> identifierBlacklist (blacklist d)
|
||||
> ,dqName]
|
||||
> where
|
||||
> dqName = guardDialect [MySQL] *>
|
||||
> lexeme (DQName "`" "`"
|
||||
> <$> (char '`'
|
||||
> *> manyTill anyChar (char '`')))
|
||||
> ,(\(s,e,t) -> DQName s e t) <$> dqidentifierTok
|
||||
> ]
|
||||
|
||||
todo: replace (:[]) with a named function all over
|
||||
|
||||
|
@ -433,7 +439,7 @@ Unfortunately, to improve the error messages, there is a lot of (left)
|
|||
factoring in this function, and it is a little dense.
|
||||
|
||||
> typeName :: Parser TypeName
|
||||
> typeName = lexeme $
|
||||
> typeName =
|
||||
> (rowTypeName <|> intervalTypeName <|> otherTypeName)
|
||||
> <??*> tnSuffix
|
||||
> where
|
||||
|
@ -536,20 +542,13 @@ factoring in this function, and it is a little dense.
|
|||
See the stringToken lexer below for notes on string literal syntax.
|
||||
|
||||
> stringLit :: Parser ValueExpr
|
||||
> stringLit = StringLit <$> stringToken
|
||||
> stringLit = StringLit <$> stringTokExtend
|
||||
|
||||
> numberLit :: Parser ValueExpr
|
||||
> numberLit = NumLit <$> numberLiteral
|
||||
> numberLit = NumLit <$> sqlNumberTok
|
||||
|
||||
> characterSetLit :: Parser ValueExpr
|
||||
> characterSetLit =
|
||||
> CSStringLit <$> shortCSPrefix <*> stringToken
|
||||
> where
|
||||
> shortCSPrefix = try $ choice
|
||||
> [(:[]) <$> oneOf "nNbBxX"
|
||||
> ,string "u&"
|
||||
> ,string "U&"
|
||||
> ] <* lookAhead quote
|
||||
> characterSetLit = uncurry CSStringLit <$> csSqlStringLitTok
|
||||
|
||||
> simpleLiteral :: Parser ValueExpr
|
||||
> simpleLiteral = numberLit <|> stringLit <|> characterSetLit
|
||||
|
@ -575,8 +574,8 @@ select x from t where x > :param
|
|||
> parameter = choice
|
||||
> [Parameter <$ questionMark
|
||||
> ,HostParameter
|
||||
> <$> hostParameterToken
|
||||
> <*> optionMaybe (keyword "indicator" *> hostParameterToken)]
|
||||
> <$> hostParamTok
|
||||
> <*> optionMaybe (keyword "indicator" *> hostParamTok)]
|
||||
|
||||
== parens
|
||||
|
||||
|
@ -675,7 +674,7 @@ this. also fix the monad -> applicative
|
|||
> intervalLit = try (keyword_ "interval" >> do
|
||||
> s <- optionMaybe $ choice [True <$ symbol_ "+"
|
||||
> ,False <$ symbol_ "-"]
|
||||
> lit <- stringToken
|
||||
> lit <- stringTok
|
||||
> q <- optionMaybe intervalQualifier
|
||||
> mkIt s lit q)
|
||||
> where
|
||||
|
@ -701,7 +700,7 @@ all the value expressions which start with an identifier
|
|||
> idenExpr :: Parser ValueExpr
|
||||
> idenExpr =
|
||||
> -- todo: work out how to left factor this
|
||||
> try (TypedLit <$> typeName <*> stringToken)
|
||||
> try (TypedLit <$> typeName <*> stringTokExtend)
|
||||
> <|> (names <**> option Iden app)
|
||||
|
||||
=== special
|
||||
|
@ -731,9 +730,11 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
|||
> e <- valueExpr
|
||||
> -- check we haven't parsed the first
|
||||
> -- keyword as an identifier
|
||||
> guard (case (e,kws) of
|
||||
> (Iden [Name i], (k,_):_) | map toLower i == k -> False
|
||||
> _ -> True)
|
||||
> case (e,kws) of
|
||||
> (Iden [Name i], (k,_):_)
|
||||
> | map toLower i == k ->
|
||||
> fail $ "cannot use keyword here: " ++ i
|
||||
> _ -> return ()
|
||||
> pure e
|
||||
> fa <- case firstArg of
|
||||
> SOKNone -> pure Nothing
|
||||
|
@ -806,7 +807,7 @@ in the source
|
|||
> keyword "trim" >>
|
||||
> parens (mkTrim
|
||||
> <$> option "both" sides
|
||||
> <*> option " " stringToken
|
||||
> <*> option " " stringTok
|
||||
> <*> (keyword_ "from" *> valueExpr))
|
||||
> where
|
||||
> sides = choice ["leading" <$ keyword_ "leading"
|
||||
|
@ -986,13 +987,28 @@ a match (select a from t)
|
|||
|
||||
=== escape
|
||||
|
||||
It is going to be really difficult to support an arbitrary character
|
||||
for the escape now there is a separate lexer ...
|
||||
|
||||
> escapeSuffix :: Parser (ValueExpr -> ValueExpr)
|
||||
> escapeSuffix = do
|
||||
> ctor <- choice
|
||||
> [Escape <$ keyword_ "escape"
|
||||
> ,UEscape <$ keyword_ "uescape"]
|
||||
> c <- anyChar
|
||||
> c <- escapeChar
|
||||
> pure $ \v -> ctor v c
|
||||
> where
|
||||
> escapeChar = escapeIden <|> escapeSym
|
||||
> escapeIden = do
|
||||
> c <- identifierTok
|
||||
> case c of
|
||||
> [c'] -> return c'
|
||||
> _ -> fail "escape char must be single char"
|
||||
> escapeSym = do
|
||||
> c <- symbolTok
|
||||
> case c of
|
||||
> [c'] -> return c'
|
||||
> _ -> fail "escape char must be single char"
|
||||
|
||||
=== collate
|
||||
|
||||
|
@ -1018,7 +1034,7 @@ syntax is way too messy. It might be possible to avoid this if we
|
|||
wanted to avoid extensibility and to not be concerned with parse error
|
||||
messages, but both of these are too important.
|
||||
|
||||
> opTable :: Bool -> [[E.Operator String ParseState Identity ValueExpr]]
|
||||
> opTable :: Bool -> [[E.Operator [Token] ParseState Identity ValueExpr]]
|
||||
> opTable bExpr =
|
||||
> [-- parse match and quantified comparisons as postfix ops
|
||||
> -- todo: left factor the quantified comparison with regular
|
||||
|
@ -1452,163 +1468,134 @@ thick.
|
|||
|
||||
------------------------------------------------
|
||||
|
||||
= lexing parsers
|
||||
= lexing
|
||||
|
||||
whitespace parser which skips comments also
|
||||
TODO: push checks into here:
|
||||
keyword blacklists
|
||||
unsigned integer match
|
||||
symbol matching
|
||||
keyword matching
|
||||
|
||||
> whitespace :: Parser ()
|
||||
> whitespace =
|
||||
> choice [simpleWhitespace *> whitespace
|
||||
> ,lineComment *> whitespace
|
||||
> ,blockComment *> whitespace
|
||||
> ,pure ()] <?> "whitespace"
|
||||
> csSqlStringLitTok :: Parser (String,String)
|
||||
> csSqlStringLitTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.CSSqlString p s -> Just (p,s)
|
||||
> _ -> Nothing)
|
||||
|
||||
> stringTok :: Parser String
|
||||
> stringTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.SqlString s -> Just s
|
||||
> _ -> Nothing)
|
||||
|
||||
This is to support SQL strings where you can write
|
||||
'part of a string' ' another part'
|
||||
and it will parse as a single string
|
||||
|
||||
> stringTokExtend :: Parser String
|
||||
> stringTokExtend = do
|
||||
> x <- stringTok
|
||||
> choice [
|
||||
> ((x++) <$> stringTokExtend)
|
||||
> ,return x
|
||||
> ]
|
||||
|
||||
> hostParamTok :: Parser String
|
||||
> hostParamTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.HostParam p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> sqlNumberTok :: Parser String
|
||||
> sqlNumberTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.SqlNumber p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
|
||||
> symbolTok :: Parser String
|
||||
> symbolTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.Symbol p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> identifierTok :: Parser String
|
||||
> identifierTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.Identifier p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> qidentifierTok :: Parser String
|
||||
> qidentifierTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.QIdentifier p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
> dqidentifierTok :: Parser (String,String,String)
|
||||
> dqidentifierTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.DQIdentifier s e t -> Just (s,e,t)
|
||||
> _ -> Nothing)
|
||||
|
||||
> uqidentifierTok :: Parser String
|
||||
> uqidentifierTok = mytoken (\tok ->
|
||||
> case tok of
|
||||
> L.UQIdentifier p -> Just p
|
||||
> _ -> Nothing)
|
||||
|
||||
|
||||
> mytoken :: (L.Token -> Maybe a) -> Parser a
|
||||
> mytoken test = token showToken posToken testToken
|
||||
> where
|
||||
> lineComment = try (string "--")
|
||||
> *> manyTill anyChar (void (char '\n') <|> eof)
|
||||
> blockComment = -- no nesting of block comments in SQL
|
||||
> try (string "/*")
|
||||
> -- try used here so it doesn't fail when we see a
|
||||
> -- '*' which isn't followed by a '/'
|
||||
> *> manyTill anyChar (try $ string "*/")
|
||||
> -- use many1 so we can more easily avoid non terminating loops
|
||||
> simpleWhitespace = void $ many1 (oneOf " \t\n")
|
||||
|
||||
> lexeme :: Parser a -> Parser a
|
||||
> lexeme p = p <* whitespace
|
||||
> showToken (_,tok) = show tok
|
||||
> posToken ((a,b,c),_) = newPos a b c
|
||||
> testToken (_,tok) = test tok
|
||||
|
||||
> unsignedInteger :: Parser Integer
|
||||
> unsignedInteger = read <$> lexeme (many1 digit) <?> "integer"
|
||||
|
||||
|
||||
number literals
|
||||
|
||||
here is the rough grammar target:
|
||||
|
||||
digits
|
||||
digits.[digits][e[+-]digits]
|
||||
[digits].digits[e[+-]digits]
|
||||
digitse[+-]digits
|
||||
|
||||
numbers are parsed to strings, not to a numeric type. This is to avoid
|
||||
making a decision on how to represent numbers, the client code can
|
||||
make this choice.
|
||||
|
||||
> numberLiteral :: Parser String
|
||||
> numberLiteral = lexeme (
|
||||
> (int <??> (pp dot <??.> pp int)
|
||||
> <|> (++) <$> dot <*> int)
|
||||
> <??> pp expon)
|
||||
> where
|
||||
> int = many1 digit
|
||||
> dot = string "."
|
||||
> expon = (:) <$> oneOf "eE" <*> sInt
|
||||
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
||||
> pp = (<$$> (++))
|
||||
|
||||
|
||||
> identifier :: Parser String
|
||||
> identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
|
||||
> <?> "identifier"
|
||||
> where
|
||||
> firstChar = letter <|> char '_' <?> "identifier"
|
||||
> nonFirstChar = digit <|> firstChar <?> ""
|
||||
|
||||
> quotedIdentifier :: Parser String
|
||||
> quotedIdentifier = quotedIdenHelper
|
||||
|
||||
> quotedIdenHelper :: Parser String
|
||||
> quotedIdenHelper =
|
||||
> lexeme (dq *> manyTill anyChar dq >>= optionSuffix moreIden)
|
||||
> <?> "identifier"
|
||||
> where
|
||||
> moreIden s0 = do
|
||||
> void dq
|
||||
> s <- manyTill anyChar dq
|
||||
> optionSuffix moreIden (s0 ++ "\"" ++ s)
|
||||
> dq = char '"' <?> "double quote"
|
||||
|
||||
> uquotedIdentifier :: Parser String
|
||||
> uquotedIdentifier =
|
||||
> try (string "u&" <|> string "U&") *> quotedIdenHelper
|
||||
> <?> "identifier"
|
||||
|
||||
parses an identifier with a : prefix. The : isn't included in the
|
||||
return value
|
||||
|
||||
> hostParameterToken :: Parser String
|
||||
> hostParameterToken = lexeme $ char ':' *> identifier
|
||||
> unsignedInteger = try (do
|
||||
> x <- sqlNumberTok
|
||||
> guard (all isDigit x)
|
||||
> return $ read x
|
||||
> ) <?> "integer"
|
||||
|
||||
todo: work out the symbol parsing better
|
||||
|
||||
> symbol :: String -> Parser String
|
||||
> symbol s = try (lexeme $ do
|
||||
> u <- choice (many1 (char '.') :
|
||||
> map (try . string) [">=","<=","!=","<>","||"]
|
||||
> ++ map (string . (:[])) "+-^*/%~&|<>=")
|
||||
> guard (s == u)
|
||||
> pure s)
|
||||
> <?> s
|
||||
> symbol s = try (do
|
||||
> u <- symbolTok
|
||||
> guard (s == u)
|
||||
> pure s) <?> s
|
||||
|
||||
> singleCharSymbol :: Char -> Parser Char
|
||||
> singleCharSymbol c = c <$ symbol [c]
|
||||
|
||||
> questionMark :: Parser Char
|
||||
> questionMark = lexeme (char '?') <?> "question mark"
|
||||
> questionMark = singleCharSymbol '?' <?> "question mark"
|
||||
|
||||
> openParen :: Parser Char
|
||||
> openParen = lexeme $ char '('
|
||||
> openParen = singleCharSymbol '('
|
||||
|
||||
> closeParen :: Parser Char
|
||||
> closeParen = lexeme $ char ')'
|
||||
> closeParen = singleCharSymbol ')'
|
||||
|
||||
> openBracket :: Parser Char
|
||||
> openBracket = lexeme $ char '['
|
||||
> openBracket = singleCharSymbol '['
|
||||
|
||||
> closeBracket :: Parser Char
|
||||
> closeBracket = lexeme $ char ']'
|
||||
> closeBracket = singleCharSymbol ']'
|
||||
|
||||
|
||||
> comma :: Parser Char
|
||||
> comma = lexeme (char ',') <?> "comma"
|
||||
> comma = singleCharSymbol ','
|
||||
|
||||
> semi :: Parser Char
|
||||
> semi = lexeme (char ';') <?> "semicolon"
|
||||
|
||||
> quote :: Parser Char
|
||||
> quote = lexeme (char '\'') <?> "single quote"
|
||||
|
||||
> --stringToken :: Parser String
|
||||
> --stringToken = lexeme (char '\'' *> manyTill anyChar (char '\''))
|
||||
> -- todo: tidy this up, add the prefixes stuff, and add the multiple
|
||||
> -- string stuff
|
||||
> stringToken :: Parser String
|
||||
> stringToken =
|
||||
> lexeme (nlquote *> manyTill anyChar nlquote
|
||||
> >>= optionSuffix moreString)
|
||||
> <?> "string"
|
||||
> where
|
||||
> moreString s0 = choice
|
||||
> [-- handle two adjacent quotes
|
||||
> do
|
||||
> void nlquote
|
||||
> s <- manyTill anyChar nlquote
|
||||
> optionSuffix moreString (s0 ++ "'" ++ s)
|
||||
> ,-- handle string in separate parts
|
||||
> -- e.g. 'part 1' 'part 2'
|
||||
> do --can this whitespace be factored out?
|
||||
> -- since it will be parsed twice when there is no more literal
|
||||
> -- yes: split the adjacent quote and multiline literal
|
||||
> -- into two different suffixes
|
||||
> -- won't need to call lexeme at the top level anymore after this
|
||||
> try (whitespace <* nlquote)
|
||||
> s <- manyTill anyChar nlquote
|
||||
> optionSuffix moreString (s0 ++ s)
|
||||
> ]
|
||||
> -- non lexeme quote
|
||||
> nlquote = char '\'' <?> "single quote"
|
||||
> semi = singleCharSymbol ';'
|
||||
|
||||
= helper functions
|
||||
|
||||
> keyword :: String -> Parser String
|
||||
> keyword k = try (do
|
||||
> i <- identifier
|
||||
> i <- identifierTok
|
||||
> guard (map toLower i == k)
|
||||
> pure k) <?> k
|
||||
|
||||
|
@ -1638,7 +1625,7 @@ helper function to improve error messages
|
|||
|
||||
> identifierBlacklist :: [String] -> Parser String
|
||||
> identifierBlacklist bl = try (do
|
||||
> i <- identifier
|
||||
> i <- identifierTok
|
||||
> when (map toLower i `elem` bl) $
|
||||
> fail $ "keyword not allowed here: " ++ i
|
||||
> pure i)
|
||||
|
@ -2001,7 +1988,10 @@ different parsers can be used for different dialects
|
|||
|
||||
> type ParseState = Dialect
|
||||
|
||||
> type Parser = Parsec String ParseState
|
||||
> type Token = (L.Position,L.Token)
|
||||
|
||||
> --type Parser = Parsec String ParseState
|
||||
> type Parser = GenParser Token ParseState
|
||||
|
||||
> guardDialect :: [Dialect] -> Parser ()
|
||||
> guardDialect ds = do
|
||||
|
|
|
@ -29,6 +29,7 @@ Flag sqlindent
|
|||
library
|
||||
exposed-modules: Language.SQL.SimpleSQL.Pretty,
|
||||
Language.SQL.SimpleSQL.Parser,
|
||||
Language.SQL.SimpleSQL.Lexer,
|
||||
Language.SQL.SimpleSQL.Syntax
|
||||
Other-Modules: Language.SQL.SimpleSQL.Errors,
|
||||
Language.SQL.SimpleSQL.Combinators
|
||||
|
@ -55,6 +56,7 @@ Test-Suite Tests
|
|||
|
||||
Other-Modules: Language.SQL.SimpleSQL.Pretty,
|
||||
Language.SQL.SimpleSQL.Parser,
|
||||
Language.SQL.SimpleSQL.Lexer,
|
||||
Language.SQL.SimpleSQL.Syntax,
|
||||
Language.SQL.SimpleSQL.Errors,
|
||||
Language.SQL.SimpleSQL.Combinators
|
||||
|
@ -71,11 +73,12 @@ Test-Suite Tests
|
|||
Language.SQL.SimpleSQL.TestTypes,
|
||||
Language.SQL.SimpleSQL.Tests,
|
||||
Language.SQL.SimpleSQL.Tpch,
|
||||
Language.SQL.SimpleSQL.ValueExprs
|
||||
Language.SQL.SimpleSQL.ValueExprs,
|
||||
Language.SQL.SimpleSQL.LexerTests
|
||||
|
||||
other-extensions: TupleSections,DeriveDataTypeable
|
||||
default-language: Haskell2010
|
||||
ghc-options: -Wall
|
||||
ghc-options: -Wall -threaded
|
||||
|
||||
executable SQLIndent
|
||||
main-is: SQLIndent.lhs
|
||||
|
@ -91,3 +94,15 @@ executable SQLIndent
|
|||
buildable: True
|
||||
else
|
||||
buildable: False
|
||||
|
||||
executable TestLex
|
||||
main-is: TestLex.lhs
|
||||
hs-source-dirs: .,tools
|
||||
Build-Depends: base >=4.5 && <4.9,
|
||||
parsec >=3.1 && <3.2,
|
||||
mtl >=2.1 && <2.3,
|
||||
pretty >= 1.1 && < 1.2
|
||||
other-extensions: TupleSections,DeriveDataTypeable
|
||||
default-language: Haskell2010
|
||||
ghc-options: -Wall
|
||||
buildable: False
|
||||
|
|
144
tools/Language/SQL/SimpleSQL/LexerTests.lhs
Normal file
144
tools/Language/SQL/SimpleSQL/LexerTests.lhs
Normal file
|
@ -0,0 +1,144 @@
|
|||
|
||||
|
||||
Test for the lexer
|
||||
|
||||
> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where
|
||||
|
||||
> import Language.SQL.SimpleSQL.TestTypes
|
||||
> import Language.SQL.SimpleSQL.Lexer (Token(..))
|
||||
> --import Debug.Trace
|
||||
|
||||
> lexerTable :: [(String,[Token])]
|
||||
> lexerTable =
|
||||
> -- single char symbols
|
||||
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
||||
> -- multi char symbols
|
||||
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
||||
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||
> -- simple identifiers
|
||||
> in map (\i -> (i, [Identifier i])) idens
|
||||
> ++ map (\i -> ("\"" ++ i ++ "\"", [QIdentifier i])) idens
|
||||
> -- todo: in order to make lex . pretty id, need to
|
||||
> -- preserve the case of the u
|
||||
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [UQIdentifier i])) idens
|
||||
> -- host param
|
||||
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||
> )
|
||||
> -- quoted identifiers with embedded double quotes
|
||||
> ++ [("\"normal \"\" iden\"", [QIdentifier "normal \" iden"])]
|
||||
> -- strings
|
||||
> ++ [("'string'", [SqlString "string"])
|
||||
> ,("'normal '' quote'", [SqlString "normal ' quote"])
|
||||
> ,("'normalendquote '''", [SqlString "normalendquote '"])]
|
||||
> -- csstrings
|
||||
> ++ map (\c -> (c ++ "'test'", [CSSqlString c "test"]))
|
||||
> ["n", "N","b", "B","x", "X", "u&"]
|
||||
> -- numbers
|
||||
> ++ [("10", [SqlNumber "10"])
|
||||
> ,(".1", [SqlNumber ".1"])
|
||||
> ,("5e3", [SqlNumber "5e3"])
|
||||
> ,("5e+3", [SqlNumber "5e+3"])
|
||||
> ,("5e-3", [SqlNumber "5e-3"])
|
||||
> ,("10.2", [SqlNumber "10.2"])
|
||||
> ,("10.2e7", [SqlNumber "10.2e7"])]
|
||||
> -- whitespace
|
||||
> ++ concat [[([a],[Whitespace [a]])
|
||||
> ,([a,b], [Whitespace [a,b]])]
|
||||
> | a <- " \n\t", b <- " \n\t"]
|
||||
> -- line comment
|
||||
> ++ map (\c -> (c, [LineComment c]))
|
||||
> ["--", "-- ", "-- this is a comment"]
|
||||
> -- block comment
|
||||
> ++ map (\c -> (c, [BlockComment c]))
|
||||
> ["/**/", "/* */","/* this is a comment */"
|
||||
> -- todo: bug in the lexer, fix this
|
||||
> --,"/* this *is/ a comment */"
|
||||
> ]
|
||||
|
||||
|
||||
> lexerTests :: TestItem
|
||||
> lexerTests = Group "lexerTests" $
|
||||
> [LexerTest SQL2011 s t | (s,t) <- lexerTable]
|
||||
> ++
|
||||
> [ LexerTest SQL2011 (s ++ s1) (t ++ t1)
|
||||
> | (s,t) <- lexerTable
|
||||
> , (s1,t1) <- lexerTable
|
||||
|
||||
which combinations won't work:
|
||||
<> <= >= || two single symbols which make a double char symbol
|
||||
identifier + identifier if both are quoted or unquoted
|
||||
string string
|
||||
csstring string
|
||||
line comment anything (can add newline?)
|
||||
number number (todo: double check more carefully)
|
||||
|
||||
> , isGood $ t ++ t1
|
||||
|
||||
> ]
|
||||
> ++ map (uncurry $ LexerTest SQL2011)
|
||||
> [("", [])
|
||||
> ]
|
||||
|
||||
> where
|
||||
> isGood :: [Token] -> Bool
|
||||
> isGood l = {-let b =-} and $ map not [p l | p <- map listPred badCombos]
|
||||
> -- in trace ("isGood " ++ show (l,b)) b
|
||||
> badCombos :: [((Token -> Bool),(Token -> Bool))]
|
||||
> badCombos = [symbolPair "<" ">"
|
||||
> ,symbolPair "<" "="
|
||||
> ,symbolPair ">" "="
|
||||
> ,symbolPair "!" "="
|
||||
> ,symbolPair "|" "|"
|
||||
> ,symbolPair "||" "|"
|
||||
> ,symbolPair "|" "||"
|
||||
> ,symbolPair "||" "||"
|
||||
> ,symbolPair "<" ">="
|
||||
|
||||
> ,symbolPair "-" "-"
|
||||
> ,symbolPair "/" "*"
|
||||
> ,symbolPair "*" "/"
|
||||
|
||||
> ,(isIdentifier, isIdentifier)
|
||||
> ,(isQIdentifier, isQIdentifier)
|
||||
> ,(isUQIdentifier, isQIdentifier)
|
||||
> ,(isString, isString)
|
||||
> ,(isCsString, isString)
|
||||
> ,(isLineComment, const True)
|
||||
> ,(isNumber, isNumber)
|
||||
> ,(isHostParam,isIdentifier)
|
||||
> ,(isHostParam,isCsString)
|
||||
> ,(isHostParam,isUQIdentifier)
|
||||
> ,(isIdentifier,isCsString)
|
||||
> ,(isIdentifier,isUQIdentifier)
|
||||
> ,(isWhitespace, isWhitespace)
|
||||
> ,(isIdentifier, isNumber)
|
||||
> ,(isHostParam, isNumber)
|
||||
> ,(isMinus, isLineComment)
|
||||
> ]
|
||||
> isIdentifier (Identifier _) = True
|
||||
> isIdentifier _ = False
|
||||
> isQIdentifier (QIdentifier _) = True
|
||||
> isQIdentifier _ = False
|
||||
> isUQIdentifier (UQIdentifier _) = True
|
||||
> isUQIdentifier _ = False
|
||||
> isCsString (CSSqlString {}) = True
|
||||
> isCsString _ = False
|
||||
> isLineComment (LineComment{}) = True
|
||||
> isLineComment _ = False
|
||||
> isNumber (SqlNumber{}) = True
|
||||
> isNumber _ = False
|
||||
> isHostParam (HostParam{}) = True
|
||||
> isHostParam _ = False
|
||||
> isWhitespace (Whitespace{}) = True
|
||||
> isWhitespace _ = False
|
||||
> isMinus (Symbol "-") = True
|
||||
> isMinus _ = False
|
||||
|
||||
> isString (SqlString _) = True
|
||||
> isString _ = False
|
||||
> symbolPair a b = ((==Symbol a), (==Symbol b))
|
||||
> listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool
|
||||
> listPred _ [] = False
|
||||
> listPred _ [_] = False
|
||||
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
|
||||
> | otherwise = listPred (p,p1) (t1:ts)
|
|
@ -7,6 +7,7 @@ Tests.lhs module for the 'interpreter'.
|
|||
> ,Dialect(..)) where
|
||||
|
||||
> import Language.SQL.SimpleSQL.Syntax
|
||||
> import Language.SQL.SimpleSQL.Lexer (Token)
|
||||
|
||||
TODO: maybe make the dialect args into [dialect], then each test
|
||||
checks all the dialects mentioned work, and all the dialects not
|
||||
|
@ -28,4 +29,5 @@ check that the string given fails to parse
|
|||
|
||||
> | ParseQueryExprFails Dialect String
|
||||
> | ParseValueExprFails Dialect String
|
||||
> | LexerTest Dialect String [Token]
|
||||
> deriving (Eq,Show)
|
||||
|
|
|
@ -15,6 +15,7 @@ test data to the Test.Framework tests.
|
|||
> --import Language.SQL.SimpleSQL.Syntax
|
||||
> import Language.SQL.SimpleSQL.Pretty
|
||||
> import Language.SQL.SimpleSQL.Parser
|
||||
> import Language.SQL.SimpleSQL.Lexer
|
||||
|
||||
> import Language.SQL.SimpleSQL.TestTypes
|
||||
|
||||
|
@ -26,6 +27,7 @@ test data to the Test.Framework tests.
|
|||
> import Language.SQL.SimpleSQL.TableRefs
|
||||
> import Language.SQL.SimpleSQL.ValueExprs
|
||||
> import Language.SQL.SimpleSQL.Tpch
|
||||
> import Language.SQL.SimpleSQL.LexerTests
|
||||
|
||||
> import Language.SQL.SimpleSQL.SQL2011
|
||||
|
||||
|
@ -37,7 +39,8 @@ order on the generated documentation.
|
|||
> testData :: TestItem
|
||||
> testData =
|
||||
> Group "parserTest"
|
||||
> [valueExprTests
|
||||
> [lexerTests
|
||||
> ,valueExprTests
|
||||
> ,queryExprComponentTests
|
||||
> ,queryExprsTests
|
||||
> ,tableRefTests
|
||||
|
@ -73,6 +76,14 @@ order on the generated documentation.
|
|||
> itemToTest (ParseValueExprFails d str) =
|
||||
> toFTest parseValueExpr prettyValueExpr d str
|
||||
|
||||
> itemToTest (LexerTest d s ts) = makeLexerTest d s ts
|
||||
|
||||
> makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree
|
||||
> makeLexerTest d s ts = H.testCase s $ do
|
||||
> let lx = either (error . show) id $ lexSQL d ("", 1, 1) s
|
||||
> H.assertEqual "" ts $ map snd lx
|
||||
> let s' = prettyTokens d $ map snd lx
|
||||
> H.assertEqual "pretty print" s s'
|
||||
|
||||
> toTest :: (Eq a, Show a) =>
|
||||
> (Dialect -> String -> Maybe (Int,Int) -> String -> Either ParseError a)
|
||||
|
@ -123,9 +134,9 @@ order on the generated documentation.
|
|||
> -> Dialect
|
||||
> -> String
|
||||
> -> T.TestTree
|
||||
> toFTest parser pp d str = H.testCase str $ do
|
||||
> toFTest parser _pp d str = H.testCase str $ do
|
||||
> let egot = parser d "" Nothing str
|
||||
> case egot of
|
||||
> Left e -> return ()
|
||||
> Right got ->
|
||||
> Left _e -> return ()
|
||||
> Right _got ->
|
||||
> H.assertFailure $ "parse didn't fail: " ++ show d ++ "\n" ++ str
|
||||
|
|
Loading…
Reference in a new issue