add separate lexer
This commit is contained in:
parent
1364c58534
commit
2df76e3095
|
@ -17,9 +17,8 @@
|
||||||
|
|
||||||
> import Control.Applicative ((<$>), (<*>), (<**>), pure, Applicative)
|
> import Control.Applicative ((<$>), (<*>), (<**>), pure, Applicative)
|
||||||
> import Text.Parsec (option,many)
|
> import Text.Parsec (option,many)
|
||||||
> import Text.Parsec.Prim (Parsec)
|
> import Text.Parsec.String (GenParser)
|
||||||
|
|
||||||
> type Parser s = Parsec String s
|
|
||||||
|
|
||||||
a possible issue with the option suffix is that it enforces left
|
a possible issue with the option suffix is that it enforces left
|
||||||
associativity when chaining it recursively. Have to review
|
associativity when chaining it recursively. Have to review
|
||||||
|
@ -29,7 +28,7 @@ instead, and create an alternative suffix parser
|
||||||
This function style is not good, and should be replaced with chain and
|
This function style is not good, and should be replaced with chain and
|
||||||
<??> which has a different type
|
<??> which has a different type
|
||||||
|
|
||||||
> optionSuffix :: (a -> Parser s a) -> a -> Parser s a
|
> optionSuffix :: (a -> GenParser t s a) -> a -> GenParser t s a
|
||||||
> optionSuffix p a = option a (p a)
|
> optionSuffix p a = option a (p a)
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,7 +38,7 @@ hand result, taken from uu-parsinglib
|
||||||
TODO: make sure the precedence higher than <|> and lower than the
|
TODO: make sure the precedence higher than <|> and lower than the
|
||||||
other operators so it can be used nicely
|
other operators so it can be used nicely
|
||||||
|
|
||||||
> (<??>) :: Parser s a -> Parser s (a -> a) -> Parser s a
|
> (<??>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
|
||||||
> p <??> q = p <**> option id q
|
> p <??> q = p <**> option id q
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,7 +79,7 @@ composing suffix parsers, not sure about the name. This is used to add
|
||||||
a second or more suffix parser contingent on the first suffix parser
|
a second or more suffix parser contingent on the first suffix parser
|
||||||
succeeding.
|
succeeding.
|
||||||
|
|
||||||
> (<??.>) :: Parser s (a -> a) -> Parser s (a -> a) -> Parser s (a -> a)
|
> (<??.>) :: GenParser t s (a -> a) -> GenParser t s (a -> a) -> GenParser t s (a -> a)
|
||||||
> (<??.>) pa pb = (.) `c` pa <*> option id pb
|
> (<??.>) pa pb = (.) `c` pa <*> option id pb
|
||||||
> -- todo: fix this mess
|
> -- todo: fix this mess
|
||||||
> where c = (<$>) . flip
|
> where c = (<$>) . flip
|
||||||
|
@ -88,7 +87,7 @@ succeeding.
|
||||||
|
|
||||||
0 to many repeated applications of suffix parser
|
0 to many repeated applications of suffix parser
|
||||||
|
|
||||||
> (<??*>) :: Parser s a -> Parser s (a -> a) -> Parser s a
|
> (<??*>) :: GenParser t s a -> GenParser t s (a -> a) -> GenParser t s a
|
||||||
> p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
|
> p <??*> q = foldr ($) <$> p <*> (reverse <$> many q)
|
||||||
|
|
||||||
|
|
||||||
|
|
296
Language/SQL/SimpleSQL/Lexer.lhs
Normal file
296
Language/SQL/SimpleSQL/Lexer.lhs
Normal file
|
@ -0,0 +1,296 @@
|
||||||
|
|
||||||
|
Lexer TODO:
|
||||||
|
|
||||||
|
left factor to get rid of trys
|
||||||
|
|
||||||
|
> -- | This is the module contains a Lexer for SQL.
|
||||||
|
> {-# LANGUAGE TupleSections #-}
|
||||||
|
> module Language.SQL.SimpleSQL.Lexer
|
||||||
|
> (lexSQL
|
||||||
|
> ,Token(..)
|
||||||
|
> ,prettyToken
|
||||||
|
> ,prettyTokens
|
||||||
|
> ,Position
|
||||||
|
> ,ParseError(..)
|
||||||
|
> ,Dialect(..)) where
|
||||||
|
|
||||||
|
> import Language.SQL.SimpleSQL.Syntax (Dialect(..))
|
||||||
|
|
||||||
|
> import Text.Parsec (option,string,manyTill,anyChar
|
||||||
|
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
||||||
|
> ,many,runParser,lookAhead,satisfy)
|
||||||
|
> import Language.SQL.SimpleSQL.Combinators
|
||||||
|
> import Language.SQL.SimpleSQL.Errors
|
||||||
|
> import Control.Applicative hiding ((<|>), many)
|
||||||
|
> import Data.Char
|
||||||
|
> import Control.Monad
|
||||||
|
> import Prelude hiding (takeWhile)
|
||||||
|
> import Text.Parsec.String (Parser)
|
||||||
|
|
||||||
|
|
||||||
|
> -- | Represents a lexed token
|
||||||
|
> data Token
|
||||||
|
> -- | a symbol is one of the following
|
||||||
|
> -- * multi char symbols <> <= >= != ||
|
||||||
|
> -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( )
|
||||||
|
> --
|
||||||
|
> = Symbol String
|
||||||
|
>
|
||||||
|
> -- | This is an identifier or keyword.
|
||||||
|
> --
|
||||||
|
> | Identifier String
|
||||||
|
>
|
||||||
|
> -- | This is an identifier quoted with "
|
||||||
|
> | QIdentifier String
|
||||||
|
> -- | This is an identifier quoted with u&"
|
||||||
|
> | UQIdentifier String
|
||||||
|
|
||||||
|
> -- | This is a dialect specific quoted identifier with the quote
|
||||||
|
> -- characters explicit. The first and second fields are the
|
||||||
|
> -- starting and ending quote characters.n
|
||||||
|
> | DQIdentifier String String String
|
||||||
|
>
|
||||||
|
> -- | This is a host param symbol, e.g. :param
|
||||||
|
> | HostParam String
|
||||||
|
>
|
||||||
|
> -- | This is a string literal.
|
||||||
|
> | SqlString String
|
||||||
|
>
|
||||||
|
> -- | This is a character set string literal. The first field is
|
||||||
|
> -- the charatecter set (one of nNbBxX).
|
||||||
|
> | CSSqlString String String
|
||||||
|
>
|
||||||
|
> -- | a number literal (integral or otherwise), stored in original format
|
||||||
|
> -- unchanged
|
||||||
|
> | SqlNumber String
|
||||||
|
>
|
||||||
|
> -- | non-significant whitespace (space, tab, newline) (strictly speaking,
|
||||||
|
> -- it is up to the client to decide whether the whitespace is significant
|
||||||
|
> -- or not)
|
||||||
|
> | Whitespace String
|
||||||
|
>
|
||||||
|
> -- | a commented line using --, contains every character starting with the
|
||||||
|
> -- \'--\' and including the terminating newline character if there is one
|
||||||
|
> -- - this will be missing if the last line in the source is a line comment
|
||||||
|
> -- with no trailing newline
|
||||||
|
> | LineComment String
|
||||||
|
>
|
||||||
|
> -- | a block comment, \/* stuff *\/, includes the comment delimiters
|
||||||
|
> | BlockComment String
|
||||||
|
>
|
||||||
|
> deriving (Eq,Show)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
> -- | Accurate pretty printing, if you lex a bunch of tokens,
|
||||||
|
> -- then pretty print them, should should get back exactly the
|
||||||
|
> -- same string
|
||||||
|
> prettyToken :: Dialect -> Token -> String
|
||||||
|
> prettyToken _ (Symbol s) = s
|
||||||
|
> prettyToken _ (Identifier t) = t
|
||||||
|
> prettyToken _ (QIdentifier t) =
|
||||||
|
> "\"" ++ doubleChars '"' t ++ "\""
|
||||||
|
> prettyToken _ (UQIdentifier t) =
|
||||||
|
> "u&\"" ++ doubleChars '"' t ++ "\""
|
||||||
|
> prettyToken _ (DQIdentifier s e t) =
|
||||||
|
> s ++ t ++ e
|
||||||
|
> prettyToken _ (HostParam p) = ':':p
|
||||||
|
> prettyToken _ (SqlString t) = "'" ++ doubleChars '\'' t ++ "'"
|
||||||
|
> prettyToken _ (CSSqlString cs t) = cs ++ "'" ++ t ++ "'"
|
||||||
|
> prettyToken _ (SqlNumber r) = r
|
||||||
|
> prettyToken _ (Whitespace t) = t
|
||||||
|
> prettyToken _ (LineComment l) = l
|
||||||
|
> prettyToken _ (BlockComment c) = c
|
||||||
|
|
||||||
|
> prettyTokens :: Dialect -> [Token] -> String
|
||||||
|
> prettyTokens d ts = concat $ map (prettyToken d) ts
|
||||||
|
|
||||||
|
> doubleChars :: Char -> String -> String
|
||||||
|
> doubleChars _ [] = []
|
||||||
|
> doubleChars c (d:ds) | c == d = c:d:doubleChars c ds
|
||||||
|
> | otherwise = d:doubleChars c ds
|
||||||
|
|
||||||
|
TODO: try to make all parsers applicative only
|
||||||
|
|
||||||
|
> type Position = (String,Int,Int)
|
||||||
|
|
||||||
|
> addPosition :: Position -> String -> Position
|
||||||
|
> addPosition = addPosition'
|
||||||
|
|
||||||
|
> addPosition' :: Position -> String -> Position
|
||||||
|
> addPosition' (f,l,c) [] = (f,l,c)
|
||||||
|
> addPosition' (f,l,_) ('\n':xs) = addPosition' (f,l+1,0) xs
|
||||||
|
> addPosition' (f,l,c) (_:xs) = addPosition' (f,l,c+1) xs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
> lexSQL :: Dialect -> Position -> String -> Either ParseError [(Position,Token)]
|
||||||
|
> lexSQL dialect pos@(fn,_,_) txt =
|
||||||
|
> either (Left . convParseError fn) Right
|
||||||
|
> $ runParser (many_p pos <* eof) () "" txt
|
||||||
|
> where
|
||||||
|
|
||||||
|
> many_p pos' = some_p pos' `mplus` return []
|
||||||
|
> some_p pos' = do
|
||||||
|
> tok <- sqlToken dialect pos'
|
||||||
|
> let pos'' = advancePos dialect pos' (snd tok)
|
||||||
|
> (tok:) <$> many_p pos''
|
||||||
|
|
||||||
|
> advancePos :: Dialect -> Position -> Token -> Position
|
||||||
|
> advancePos dialect pos tok =
|
||||||
|
> let pt = prettyToken dialect tok
|
||||||
|
> in addPosition pos pt
|
||||||
|
|
||||||
|
> -- | parser for a sql token
|
||||||
|
> sqlToken :: Dialect -> Position -> Parser (Position,Token)
|
||||||
|
> sqlToken d p =
|
||||||
|
> (p,) <$> choice [sqlString d
|
||||||
|
> ,identifier d
|
||||||
|
> ,hostParam d
|
||||||
|
> ,lineComment d
|
||||||
|
> ,blockComment d
|
||||||
|
> ,sqlNumber d
|
||||||
|
> ,symbol d
|
||||||
|
> ,sqlWhitespace d]
|
||||||
|
|
||||||
|
> takeWhile1 :: (Char -> Bool) -> Parser String
|
||||||
|
> takeWhile1 p = many1 (satisfy p)
|
||||||
|
|
||||||
|
> takeWhile :: (Char -> Bool) -> Parser String
|
||||||
|
> takeWhile p = many (satisfy p)
|
||||||
|
|
||||||
|
> takeTill :: (Char -> Bool) -> Parser String
|
||||||
|
> takeTill p =
|
||||||
|
> try (manyTill anyChar (peekSatisfy p))
|
||||||
|
|
||||||
|
> peekSatisfy :: (Char -> Bool) -> Parser ()
|
||||||
|
> peekSatisfy p = do
|
||||||
|
> void $ try $ lookAhead (satisfy p)
|
||||||
|
|
||||||
|
> identifier :: Dialect -> Parser Token
|
||||||
|
> identifier d =
|
||||||
|
> choice
|
||||||
|
> [QIdentifier <$> qiden
|
||||||
|
> ,UQIdentifier <$> ((try (string "u&" <|> string "U&")) *> qiden)
|
||||||
|
> ,Identifier <$> identifierString
|
||||||
|
> ,DQIdentifier "`" "`" <$> mySqlQIden
|
||||||
|
> ]
|
||||||
|
> where
|
||||||
|
> qiden = char '"' *> qidenSuffix ""
|
||||||
|
> qidenSuffix t = do
|
||||||
|
> s <- takeTill (=='"')
|
||||||
|
> void $ char '"'
|
||||||
|
> -- deal with "" as literal double quote character
|
||||||
|
> choice [do
|
||||||
|
> void $ char '"'
|
||||||
|
> qidenSuffix $ concat [t,s,"\""]
|
||||||
|
> ,return $ concat [t,s]]
|
||||||
|
> -- mysql can quote identifiers with `
|
||||||
|
> mySqlQIden = do
|
||||||
|
> guard (d == MySQL)
|
||||||
|
> char '`' *> takeWhile1 (/='`') <* char '`'
|
||||||
|
|
||||||
|
> identifierString :: Parser String
|
||||||
|
> identifierString =
|
||||||
|
> startsWith (\c -> c == '_' || isAlpha c)
|
||||||
|
> (\c -> c == '_' || isAlphaNum c)
|
||||||
|
|
||||||
|
|
||||||
|
> sqlString :: Dialect -> Parser Token
|
||||||
|
> sqlString _ =
|
||||||
|
> choice [csString
|
||||||
|
> ,normalString
|
||||||
|
> ]
|
||||||
|
> where
|
||||||
|
> normalString = SqlString {-"'"-} <$> (char '\'' *> normalStringSuffix "")
|
||||||
|
> normalStringSuffix t = do
|
||||||
|
> s <- takeTill (=='\'')
|
||||||
|
> void $ char '\''
|
||||||
|
> -- deal with '' as literal quote character
|
||||||
|
> choice [do
|
||||||
|
> void $ char '\''
|
||||||
|
> normalStringSuffix $ concat [t,s,"'"]
|
||||||
|
> ,return $ concat [t,s]]
|
||||||
|
> csString = CSSqlString <$> try (cs <* char '\'') <*> normalStringSuffix ""
|
||||||
|
> cs = choice [(:[]) <$> oneOf "nNbBxX"
|
||||||
|
> ,string "u&"
|
||||||
|
> ,string "U&"]
|
||||||
|
|
||||||
|
> hostParam :: Dialect -> Parser Token
|
||||||
|
> hostParam _ = HostParam <$> (char ':' *> identifierString)
|
||||||
|
|
||||||
|
|
||||||
|
> sqlNumber :: Dialect -> Parser Token
|
||||||
|
> sqlNumber _ = SqlNumber <$>
|
||||||
|
> (int <??> (pp dot <??.> pp int)
|
||||||
|
> <|> try ((++) <$> dot <*> int))
|
||||||
|
> <??> pp expon
|
||||||
|
> where
|
||||||
|
> int = many1 digit
|
||||||
|
> dot = string "."
|
||||||
|
> expon = (:) <$> oneOf "eE" <*> sInt
|
||||||
|
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
||||||
|
> pp = (<$$> (++))
|
||||||
|
|
||||||
|
digits
|
||||||
|
digits.[digits][e[+-]digits]
|
||||||
|
[digits].digits[e[+-]digits]
|
||||||
|
digitse[+-]digits
|
||||||
|
|
||||||
|
where digits is one or more decimal digits (0 through 9). At least one
|
||||||
|
digit must be before or after the decimal point, if one is used. At
|
||||||
|
least one digit must follow the exponent marker (e), if one is
|
||||||
|
present. There cannot be any spaces or other characters embedded in
|
||||||
|
the constant. Note that any leading plus or minus sign is not actually
|
||||||
|
considered part of the constant; it is an operator applied to the
|
||||||
|
constant.
|
||||||
|
|
||||||
|
|
||||||
|
A symbol is one of the two character symbols, or one of the single
|
||||||
|
character symbols in the two lists below.
|
||||||
|
|
||||||
|
> symbol :: Dialect -> Parser Token
|
||||||
|
> symbol _ = Symbol <$> choice (many1 (char '.') :
|
||||||
|
> map (try . string) [">=","<=","!=","<>","||"]
|
||||||
|
> ++ map (string . (:[])) "+-^*/%~&|?<>[]=,;()")
|
||||||
|
|
||||||
|
> sqlWhitespace :: Dialect -> Parser Token
|
||||||
|
> sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
|
||||||
|
|
||||||
|
> lineComment :: Dialect -> Parser Token
|
||||||
|
> lineComment _ =
|
||||||
|
> (\s -> LineComment $ concat ["--",s]) <$>
|
||||||
|
> (try (string "--") *> choice
|
||||||
|
> [flip snoc '\n' <$> takeTill (=='\n') <* char '\n'
|
||||||
|
> ,takeWhile (/='\n') <* eof
|
||||||
|
> ])
|
||||||
|
> where
|
||||||
|
> snoc :: String -> Char -> String
|
||||||
|
> snoc s a = s ++ [a]
|
||||||
|
|
||||||
|
> blockComment :: Dialect -> Parser Token
|
||||||
|
> blockComment _ =
|
||||||
|
> (\s -> BlockComment $ concat ["/*",s]) <$>
|
||||||
|
> (try (string "/*") *> commentSuffix 0)
|
||||||
|
> where
|
||||||
|
> commentSuffix :: Int -> Parser String
|
||||||
|
> commentSuffix n = do
|
||||||
|
> -- read until a possible end comment or nested comment
|
||||||
|
> x <- takeWhile (\e -> e /= '/' && e /= '*')
|
||||||
|
> choice [-- close comment: if the nesting is 0, done
|
||||||
|
> -- otherwise recurse on commentSuffix
|
||||||
|
> try (string "*/") *> let t = concat [x,"*/"]
|
||||||
|
> in if n == 0
|
||||||
|
> then return t
|
||||||
|
> else (\s -> concat [t,s]) <$> commentSuffix (n - 1)
|
||||||
|
> -- nested comment, recurse
|
||||||
|
> ,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
|
||||||
|
> -- not an end comment or nested comment, continue
|
||||||
|
> ,(:) <$> anyChar <*> commentSuffix n]
|
||||||
|
|
||||||
|
|
||||||
|
> startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
|
||||||
|
> startsWith p ps = do
|
||||||
|
> c <- satisfy p
|
||||||
|
> choice [(:) c <$> (takeWhile1 ps)
|
||||||
|
> ,return [c]]
|
|
@ -47,7 +47,7 @@ isn't done because of the impact on the parser error
|
||||||
messages. Apparently it can also help the speed but this hasn't been
|
messages. Apparently it can also help the speed but this hasn't been
|
||||||
looked into.
|
looked into.
|
||||||
|
|
||||||
== Parser rrror messages
|
== Parser error messages
|
||||||
|
|
||||||
A lot of care has been given to generating good parser error messages
|
A lot of care has been given to generating good parser error messages
|
||||||
for invalid syntax. There are a few utils below which partially help
|
for invalid syntax. There are a few utils below which partially help
|
||||||
|
@ -185,22 +185,25 @@ fixing them in the syntax but leaving them till the semantic checking
|
||||||
> import Control.Monad.Identity (Identity)
|
> import Control.Monad.Identity (Identity)
|
||||||
> import Control.Monad (guard, void, when)
|
> import Control.Monad (guard, void, when)
|
||||||
> import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>), (<**>), pure)
|
> import Control.Applicative ((<$), (<$>), (<*>) ,(<*), (*>), (<**>), pure)
|
||||||
> import Data.Maybe (catMaybes)
|
> import Data.Char (toLower, isDigit)
|
||||||
> import Data.Char (toLower)
|
|
||||||
> import Text.Parsec (setPosition,setSourceColumn,setSourceLine,getPosition
|
> import Text.Parsec (setPosition,setSourceColumn,setSourceLine,getPosition
|
||||||
> ,option,between,sepBy,sepBy1,string,manyTill,anyChar
|
> ,option,between,sepBy,sepBy1
|
||||||
> ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
|
> ,try,many1,(<|>),choice,eof
|
||||||
> ,optionMaybe,optional,many,letter,runParser
|
> ,optionMaybe,optional,runParser
|
||||||
> ,chainl1, chainr1,(<?>) {-,notFollowedBy,alphaNum-}, lookAhead)
|
> ,chainl1, chainr1,(<?>))
|
||||||
> -- import Text.Parsec.String (Parser)
|
> -- import Text.Parsec.String (Parser)
|
||||||
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
> import Text.Parsec.Perm (permute,(<$?>), (<|?>))
|
||||||
> import Text.Parsec.Prim (Parsec, getState)
|
> import Text.Parsec.Prim (getState, token)
|
||||||
|
> import Text.Parsec.Pos (newPos)
|
||||||
> import qualified Text.Parsec.Expr as E
|
> import qualified Text.Parsec.Expr as E
|
||||||
> import Data.List (intercalate,sort,groupBy)
|
> import Data.List (intercalate,sort,groupBy)
|
||||||
> import Data.Function (on)
|
> import Data.Function (on)
|
||||||
> import Language.SQL.SimpleSQL.Syntax
|
> import Language.SQL.SimpleSQL.Syntax
|
||||||
> import Language.SQL.SimpleSQL.Combinators
|
> import Language.SQL.SimpleSQL.Combinators
|
||||||
> import Language.SQL.SimpleSQL.Errors
|
> import Language.SQL.SimpleSQL.Errors
|
||||||
|
> import qualified Language.SQL.SimpleSQL.Lexer as L
|
||||||
|
> import Data.Maybe
|
||||||
|
> import Text.Parsec.String (GenParser)
|
||||||
|
|
||||||
= Public API
|
= Public API
|
||||||
|
|
||||||
|
@ -257,14 +260,21 @@ converts the error return to the nice wrapper
|
||||||
> -> Maybe (Int,Int)
|
> -> Maybe (Int,Int)
|
||||||
> -> String
|
> -> String
|
||||||
> -> Either ParseError a
|
> -> Either ParseError a
|
||||||
> wrapParse parser d f p src =
|
> wrapParse parser d f p src = do
|
||||||
|
> let (l,c) = fromMaybe (1,0) p
|
||||||
|
> lx <- L.lexSQL d (f,l,c) src
|
||||||
> either (Left . convParseError src) Right
|
> either (Left . convParseError src) Right
|
||||||
> $ runParser (setPos p *> whitespace *> parser <* eof)
|
> $ runParser (setPos p *> parser <* eof)
|
||||||
> d f src
|
> d f $ filter keep lx
|
||||||
> where
|
> where
|
||||||
> setPos Nothing = pure ()
|
> setPos Nothing = pure ()
|
||||||
> setPos (Just (l,c)) = fmap up getPosition >>= setPosition
|
> setPos (Just (l,c)) = fmap up getPosition >>= setPosition
|
||||||
> where up = flip setSourceColumn c . flip setSourceLine l
|
> where up = flip setSourceColumn c . flip setSourceLine l
|
||||||
|
> keep (_,L.Whitespace {}) = False
|
||||||
|
> keep (_,L.LineComment {}) = False
|
||||||
|
> keep (_,L.BlockComment {}) = False
|
||||||
|
> keep _ = True
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
|
||||||
|
@ -304,15 +314,11 @@ u&"example quoted"
|
||||||
> name :: Parser Name
|
> name :: Parser Name
|
||||||
> name = do
|
> name = do
|
||||||
> d <- getState
|
> d <- getState
|
||||||
> choice [QName <$> quotedIdentifier
|
> choice [QName <$> qidentifierTok
|
||||||
> ,UQName <$> uquotedIdentifier
|
> ,UQName <$> uqidentifierTok
|
||||||
> ,Name <$> identifierBlacklist (blacklist d)
|
> ,Name <$> identifierBlacklist (blacklist d)
|
||||||
> ,dqName]
|
> ,(\(s,e,t) -> DQName s e t) <$> dqidentifierTok
|
||||||
> where
|
> ]
|
||||||
> dqName = guardDialect [MySQL] *>
|
|
||||||
> lexeme (DQName "`" "`"
|
|
||||||
> <$> (char '`'
|
|
||||||
> *> manyTill anyChar (char '`')))
|
|
||||||
|
|
||||||
todo: replace (:[]) with a named function all over
|
todo: replace (:[]) with a named function all over
|
||||||
|
|
||||||
|
@ -433,7 +439,7 @@ Unfortunately, to improve the error messages, there is a lot of (left)
|
||||||
factoring in this function, and it is a little dense.
|
factoring in this function, and it is a little dense.
|
||||||
|
|
||||||
> typeName :: Parser TypeName
|
> typeName :: Parser TypeName
|
||||||
> typeName = lexeme $
|
> typeName =
|
||||||
> (rowTypeName <|> intervalTypeName <|> otherTypeName)
|
> (rowTypeName <|> intervalTypeName <|> otherTypeName)
|
||||||
> <??*> tnSuffix
|
> <??*> tnSuffix
|
||||||
> where
|
> where
|
||||||
|
@ -536,20 +542,13 @@ factoring in this function, and it is a little dense.
|
||||||
See the stringToken lexer below for notes on string literal syntax.
|
See the stringToken lexer below for notes on string literal syntax.
|
||||||
|
|
||||||
> stringLit :: Parser ValueExpr
|
> stringLit :: Parser ValueExpr
|
||||||
> stringLit = StringLit <$> stringToken
|
> stringLit = StringLit <$> stringTokExtend
|
||||||
|
|
||||||
> numberLit :: Parser ValueExpr
|
> numberLit :: Parser ValueExpr
|
||||||
> numberLit = NumLit <$> numberLiteral
|
> numberLit = NumLit <$> sqlNumberTok
|
||||||
|
|
||||||
> characterSetLit :: Parser ValueExpr
|
> characterSetLit :: Parser ValueExpr
|
||||||
> characterSetLit =
|
> characterSetLit = uncurry CSStringLit <$> csSqlStringLitTok
|
||||||
> CSStringLit <$> shortCSPrefix <*> stringToken
|
|
||||||
> where
|
|
||||||
> shortCSPrefix = try $ choice
|
|
||||||
> [(:[]) <$> oneOf "nNbBxX"
|
|
||||||
> ,string "u&"
|
|
||||||
> ,string "U&"
|
|
||||||
> ] <* lookAhead quote
|
|
||||||
|
|
||||||
> simpleLiteral :: Parser ValueExpr
|
> simpleLiteral :: Parser ValueExpr
|
||||||
> simpleLiteral = numberLit <|> stringLit <|> characterSetLit
|
> simpleLiteral = numberLit <|> stringLit <|> characterSetLit
|
||||||
|
@ -575,8 +574,8 @@ select x from t where x > :param
|
||||||
> parameter = choice
|
> parameter = choice
|
||||||
> [Parameter <$ questionMark
|
> [Parameter <$ questionMark
|
||||||
> ,HostParameter
|
> ,HostParameter
|
||||||
> <$> hostParameterToken
|
> <$> hostParamTok
|
||||||
> <*> optionMaybe (keyword "indicator" *> hostParameterToken)]
|
> <*> optionMaybe (keyword "indicator" *> hostParamTok)]
|
||||||
|
|
||||||
== parens
|
== parens
|
||||||
|
|
||||||
|
@ -675,7 +674,7 @@ this. also fix the monad -> applicative
|
||||||
> intervalLit = try (keyword_ "interval" >> do
|
> intervalLit = try (keyword_ "interval" >> do
|
||||||
> s <- optionMaybe $ choice [True <$ symbol_ "+"
|
> s <- optionMaybe $ choice [True <$ symbol_ "+"
|
||||||
> ,False <$ symbol_ "-"]
|
> ,False <$ symbol_ "-"]
|
||||||
> lit <- stringToken
|
> lit <- stringTok
|
||||||
> q <- optionMaybe intervalQualifier
|
> q <- optionMaybe intervalQualifier
|
||||||
> mkIt s lit q)
|
> mkIt s lit q)
|
||||||
> where
|
> where
|
||||||
|
@ -701,7 +700,7 @@ all the value expressions which start with an identifier
|
||||||
> idenExpr :: Parser ValueExpr
|
> idenExpr :: Parser ValueExpr
|
||||||
> idenExpr =
|
> idenExpr =
|
||||||
> -- todo: work out how to left factor this
|
> -- todo: work out how to left factor this
|
||||||
> try (TypedLit <$> typeName <*> stringToken)
|
> try (TypedLit <$> typeName <*> stringTokExtend)
|
||||||
> <|> (names <**> option Iden app)
|
> <|> (names <**> option Iden app)
|
||||||
|
|
||||||
=== special
|
=== special
|
||||||
|
@ -731,9 +730,11 @@ operatorname(firstArg keyword0 arg0 keyword1 arg1 etc.)
|
||||||
> e <- valueExpr
|
> e <- valueExpr
|
||||||
> -- check we haven't parsed the first
|
> -- check we haven't parsed the first
|
||||||
> -- keyword as an identifier
|
> -- keyword as an identifier
|
||||||
> guard (case (e,kws) of
|
> case (e,kws) of
|
||||||
> (Iden [Name i], (k,_):_) | map toLower i == k -> False
|
> (Iden [Name i], (k,_):_)
|
||||||
> _ -> True)
|
> | map toLower i == k ->
|
||||||
|
> fail $ "cannot use keyword here: " ++ i
|
||||||
|
> _ -> return ()
|
||||||
> pure e
|
> pure e
|
||||||
> fa <- case firstArg of
|
> fa <- case firstArg of
|
||||||
> SOKNone -> pure Nothing
|
> SOKNone -> pure Nothing
|
||||||
|
@ -806,7 +807,7 @@ in the source
|
||||||
> keyword "trim" >>
|
> keyword "trim" >>
|
||||||
> parens (mkTrim
|
> parens (mkTrim
|
||||||
> <$> option "both" sides
|
> <$> option "both" sides
|
||||||
> <*> option " " stringToken
|
> <*> option " " stringTok
|
||||||
> <*> (keyword_ "from" *> valueExpr))
|
> <*> (keyword_ "from" *> valueExpr))
|
||||||
> where
|
> where
|
||||||
> sides = choice ["leading" <$ keyword_ "leading"
|
> sides = choice ["leading" <$ keyword_ "leading"
|
||||||
|
@ -986,13 +987,28 @@ a match (select a from t)
|
||||||
|
|
||||||
=== escape
|
=== escape
|
||||||
|
|
||||||
|
It is going to be really difficult to support an arbitrary character
|
||||||
|
for the escape now there is a separate lexer ...
|
||||||
|
|
||||||
> escapeSuffix :: Parser (ValueExpr -> ValueExpr)
|
> escapeSuffix :: Parser (ValueExpr -> ValueExpr)
|
||||||
> escapeSuffix = do
|
> escapeSuffix = do
|
||||||
> ctor <- choice
|
> ctor <- choice
|
||||||
> [Escape <$ keyword_ "escape"
|
> [Escape <$ keyword_ "escape"
|
||||||
> ,UEscape <$ keyword_ "uescape"]
|
> ,UEscape <$ keyword_ "uescape"]
|
||||||
> c <- anyChar
|
> c <- escapeChar
|
||||||
> pure $ \v -> ctor v c
|
> pure $ \v -> ctor v c
|
||||||
|
> where
|
||||||
|
> escapeChar = escapeIden <|> escapeSym
|
||||||
|
> escapeIden = do
|
||||||
|
> c <- identifierTok
|
||||||
|
> case c of
|
||||||
|
> [c'] -> return c'
|
||||||
|
> _ -> fail "escape char must be single char"
|
||||||
|
> escapeSym = do
|
||||||
|
> c <- symbolTok
|
||||||
|
> case c of
|
||||||
|
> [c'] -> return c'
|
||||||
|
> _ -> fail "escape char must be single char"
|
||||||
|
|
||||||
=== collate
|
=== collate
|
||||||
|
|
||||||
|
@ -1018,7 +1034,7 @@ syntax is way too messy. It might be possible to avoid this if we
|
||||||
wanted to avoid extensibility and to not be concerned with parse error
|
wanted to avoid extensibility and to not be concerned with parse error
|
||||||
messages, but both of these are too important.
|
messages, but both of these are too important.
|
||||||
|
|
||||||
> opTable :: Bool -> [[E.Operator String ParseState Identity ValueExpr]]
|
> opTable :: Bool -> [[E.Operator [Token] ParseState Identity ValueExpr]]
|
||||||
> opTable bExpr =
|
> opTable bExpr =
|
||||||
> [-- parse match and quantified comparisons as postfix ops
|
> [-- parse match and quantified comparisons as postfix ops
|
||||||
> -- todo: left factor the quantified comparison with regular
|
> -- todo: left factor the quantified comparison with regular
|
||||||
|
@ -1452,163 +1468,134 @@ thick.
|
||||||
|
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
|
||||||
= lexing parsers
|
= lexing
|
||||||
|
|
||||||
whitespace parser which skips comments also
|
TODO: push checks into here:
|
||||||
|
keyword blacklists
|
||||||
|
unsigned integer match
|
||||||
|
symbol matching
|
||||||
|
keyword matching
|
||||||
|
|
||||||
> whitespace :: Parser ()
|
> csSqlStringLitTok :: Parser (String,String)
|
||||||
> whitespace =
|
> csSqlStringLitTok = mytoken (\tok ->
|
||||||
> choice [simpleWhitespace *> whitespace
|
> case tok of
|
||||||
> ,lineComment *> whitespace
|
> L.CSSqlString p s -> Just (p,s)
|
||||||
> ,blockComment *> whitespace
|
> _ -> Nothing)
|
||||||
> ,pure ()] <?> "whitespace"
|
|
||||||
|
> stringTok :: Parser String
|
||||||
|
> stringTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.SqlString s -> Just s
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
This is to support SQL strings where you can write
|
||||||
|
'part of a string' ' another part'
|
||||||
|
and it will parse as a single string
|
||||||
|
|
||||||
|
> stringTokExtend :: Parser String
|
||||||
|
> stringTokExtend = do
|
||||||
|
> x <- stringTok
|
||||||
|
> choice [
|
||||||
|
> ((x++) <$> stringTokExtend)
|
||||||
|
> ,return x
|
||||||
|
> ]
|
||||||
|
|
||||||
|
> hostParamTok :: Parser String
|
||||||
|
> hostParamTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.HostParam p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
> sqlNumberTok :: Parser String
|
||||||
|
> sqlNumberTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.SqlNumber p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
|
||||||
|
> symbolTok :: Parser String
|
||||||
|
> symbolTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.Symbol p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
> identifierTok :: Parser String
|
||||||
|
> identifierTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.Identifier p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
> qidentifierTok :: Parser String
|
||||||
|
> qidentifierTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.QIdentifier p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
> dqidentifierTok :: Parser (String,String,String)
|
||||||
|
> dqidentifierTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.DQIdentifier s e t -> Just (s,e,t)
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
> uqidentifierTok :: Parser String
|
||||||
|
> uqidentifierTok = mytoken (\tok ->
|
||||||
|
> case tok of
|
||||||
|
> L.UQIdentifier p -> Just p
|
||||||
|
> _ -> Nothing)
|
||||||
|
|
||||||
|
|
||||||
|
> mytoken :: (L.Token -> Maybe a) -> Parser a
|
||||||
|
> mytoken test = token showToken posToken testToken
|
||||||
> where
|
> where
|
||||||
> lineComment = try (string "--")
|
> showToken (_,tok) = show tok
|
||||||
> *> manyTill anyChar (void (char '\n') <|> eof)
|
> posToken ((a,b,c),_) = newPos a b c
|
||||||
> blockComment = -- no nesting of block comments in SQL
|
> testToken (_,tok) = test tok
|
||||||
> try (string "/*")
|
|
||||||
> -- try used here so it doesn't fail when we see a
|
|
||||||
> -- '*' which isn't followed by a '/'
|
|
||||||
> *> manyTill anyChar (try $ string "*/")
|
|
||||||
> -- use many1 so we can more easily avoid non terminating loops
|
|
||||||
> simpleWhitespace = void $ many1 (oneOf " \t\n")
|
|
||||||
|
|
||||||
> lexeme :: Parser a -> Parser a
|
|
||||||
> lexeme p = p <* whitespace
|
|
||||||
|
|
||||||
> unsignedInteger :: Parser Integer
|
> unsignedInteger :: Parser Integer
|
||||||
> unsignedInteger = read <$> lexeme (many1 digit) <?> "integer"
|
> unsignedInteger = try (do
|
||||||
|
> x <- sqlNumberTok
|
||||||
|
> guard (all isDigit x)
|
||||||
number literals
|
> return $ read x
|
||||||
|
> ) <?> "integer"
|
||||||
here is the rough grammar target:
|
|
||||||
|
|
||||||
digits
|
|
||||||
digits.[digits][e[+-]digits]
|
|
||||||
[digits].digits[e[+-]digits]
|
|
||||||
digitse[+-]digits
|
|
||||||
|
|
||||||
numbers are parsed to strings, not to a numeric type. This is to avoid
|
|
||||||
making a decision on how to represent numbers, the client code can
|
|
||||||
make this choice.
|
|
||||||
|
|
||||||
> numberLiteral :: Parser String
|
|
||||||
> numberLiteral = lexeme (
|
|
||||||
> (int <??> (pp dot <??.> pp int)
|
|
||||||
> <|> (++) <$> dot <*> int)
|
|
||||||
> <??> pp expon)
|
|
||||||
> where
|
|
||||||
> int = many1 digit
|
|
||||||
> dot = string "."
|
|
||||||
> expon = (:) <$> oneOf "eE" <*> sInt
|
|
||||||
> sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
|
||||||
> pp = (<$$> (++))
|
|
||||||
|
|
||||||
|
|
||||||
> identifier :: Parser String
|
|
||||||
> identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
|
|
||||||
> <?> "identifier"
|
|
||||||
> where
|
|
||||||
> firstChar = letter <|> char '_' <?> "identifier"
|
|
||||||
> nonFirstChar = digit <|> firstChar <?> ""
|
|
||||||
|
|
||||||
> quotedIdentifier :: Parser String
|
|
||||||
> quotedIdentifier = quotedIdenHelper
|
|
||||||
|
|
||||||
> quotedIdenHelper :: Parser String
|
|
||||||
> quotedIdenHelper =
|
|
||||||
> lexeme (dq *> manyTill anyChar dq >>= optionSuffix moreIden)
|
|
||||||
> <?> "identifier"
|
|
||||||
> where
|
|
||||||
> moreIden s0 = do
|
|
||||||
> void dq
|
|
||||||
> s <- manyTill anyChar dq
|
|
||||||
> optionSuffix moreIden (s0 ++ "\"" ++ s)
|
|
||||||
> dq = char '"' <?> "double quote"
|
|
||||||
|
|
||||||
> uquotedIdentifier :: Parser String
|
|
||||||
> uquotedIdentifier =
|
|
||||||
> try (string "u&" <|> string "U&") *> quotedIdenHelper
|
|
||||||
> <?> "identifier"
|
|
||||||
|
|
||||||
parses an identifier with a : prefix. The : isn't included in the
|
|
||||||
return value
|
|
||||||
|
|
||||||
> hostParameterToken :: Parser String
|
|
||||||
> hostParameterToken = lexeme $ char ':' *> identifier
|
|
||||||
|
|
||||||
todo: work out the symbol parsing better
|
todo: work out the symbol parsing better
|
||||||
|
|
||||||
> symbol :: String -> Parser String
|
> symbol :: String -> Parser String
|
||||||
> symbol s = try (lexeme $ do
|
> symbol s = try (do
|
||||||
> u <- choice (many1 (char '.') :
|
> u <- symbolTok
|
||||||
> map (try . string) [">=","<=","!=","<>","||"]
|
> guard (s == u)
|
||||||
> ++ map (string . (:[])) "+-^*/%~&|<>=")
|
> pure s) <?> s
|
||||||
> guard (s == u)
|
|
||||||
> pure s)
|
> singleCharSymbol :: Char -> Parser Char
|
||||||
> <?> s
|
> singleCharSymbol c = c <$ symbol [c]
|
||||||
|
|
||||||
> questionMark :: Parser Char
|
> questionMark :: Parser Char
|
||||||
> questionMark = lexeme (char '?') <?> "question mark"
|
> questionMark = singleCharSymbol '?' <?> "question mark"
|
||||||
|
|
||||||
> openParen :: Parser Char
|
> openParen :: Parser Char
|
||||||
> openParen = lexeme $ char '('
|
> openParen = singleCharSymbol '('
|
||||||
|
|
||||||
> closeParen :: Parser Char
|
> closeParen :: Parser Char
|
||||||
> closeParen = lexeme $ char ')'
|
> closeParen = singleCharSymbol ')'
|
||||||
|
|
||||||
> openBracket :: Parser Char
|
> openBracket :: Parser Char
|
||||||
> openBracket = lexeme $ char '['
|
> openBracket = singleCharSymbol '['
|
||||||
|
|
||||||
> closeBracket :: Parser Char
|
> closeBracket :: Parser Char
|
||||||
> closeBracket = lexeme $ char ']'
|
> closeBracket = singleCharSymbol ']'
|
||||||
|
|
||||||
|
|
||||||
> comma :: Parser Char
|
> comma :: Parser Char
|
||||||
> comma = lexeme (char ',') <?> "comma"
|
> comma = singleCharSymbol ','
|
||||||
|
|
||||||
> semi :: Parser Char
|
> semi :: Parser Char
|
||||||
> semi = lexeme (char ';') <?> "semicolon"
|
> semi = singleCharSymbol ';'
|
||||||
|
|
||||||
> quote :: Parser Char
|
|
||||||
> quote = lexeme (char '\'') <?> "single quote"
|
|
||||||
|
|
||||||
> --stringToken :: Parser String
|
|
||||||
> --stringToken = lexeme (char '\'' *> manyTill anyChar (char '\''))
|
|
||||||
> -- todo: tidy this up, add the prefixes stuff, and add the multiple
|
|
||||||
> -- string stuff
|
|
||||||
> stringToken :: Parser String
|
|
||||||
> stringToken =
|
|
||||||
> lexeme (nlquote *> manyTill anyChar nlquote
|
|
||||||
> >>= optionSuffix moreString)
|
|
||||||
> <?> "string"
|
|
||||||
> where
|
|
||||||
> moreString s0 = choice
|
|
||||||
> [-- handle two adjacent quotes
|
|
||||||
> do
|
|
||||||
> void nlquote
|
|
||||||
> s <- manyTill anyChar nlquote
|
|
||||||
> optionSuffix moreString (s0 ++ "'" ++ s)
|
|
||||||
> ,-- handle string in separate parts
|
|
||||||
> -- e.g. 'part 1' 'part 2'
|
|
||||||
> do --can this whitespace be factored out?
|
|
||||||
> -- since it will be parsed twice when there is no more literal
|
|
||||||
> -- yes: split the adjacent quote and multiline literal
|
|
||||||
> -- into two different suffixes
|
|
||||||
> -- won't need to call lexeme at the top level anymore after this
|
|
||||||
> try (whitespace <* nlquote)
|
|
||||||
> s <- manyTill anyChar nlquote
|
|
||||||
> optionSuffix moreString (s0 ++ s)
|
|
||||||
> ]
|
|
||||||
> -- non lexeme quote
|
|
||||||
> nlquote = char '\'' <?> "single quote"
|
|
||||||
|
|
||||||
= helper functions
|
= helper functions
|
||||||
|
|
||||||
> keyword :: String -> Parser String
|
> keyword :: String -> Parser String
|
||||||
> keyword k = try (do
|
> keyword k = try (do
|
||||||
> i <- identifier
|
> i <- identifierTok
|
||||||
> guard (map toLower i == k)
|
> guard (map toLower i == k)
|
||||||
> pure k) <?> k
|
> pure k) <?> k
|
||||||
|
|
||||||
|
@ -1638,7 +1625,7 @@ helper function to improve error messages
|
||||||
|
|
||||||
> identifierBlacklist :: [String] -> Parser String
|
> identifierBlacklist :: [String] -> Parser String
|
||||||
> identifierBlacklist bl = try (do
|
> identifierBlacklist bl = try (do
|
||||||
> i <- identifier
|
> i <- identifierTok
|
||||||
> when (map toLower i `elem` bl) $
|
> when (map toLower i `elem` bl) $
|
||||||
> fail $ "keyword not allowed here: " ++ i
|
> fail $ "keyword not allowed here: " ++ i
|
||||||
> pure i)
|
> pure i)
|
||||||
|
@ -2001,7 +1988,10 @@ different parsers can be used for different dialects
|
||||||
|
|
||||||
> type ParseState = Dialect
|
> type ParseState = Dialect
|
||||||
|
|
||||||
> type Parser = Parsec String ParseState
|
> type Token = (L.Position,L.Token)
|
||||||
|
|
||||||
|
> --type Parser = Parsec String ParseState
|
||||||
|
> type Parser = GenParser Token ParseState
|
||||||
|
|
||||||
> guardDialect :: [Dialect] -> Parser ()
|
> guardDialect :: [Dialect] -> Parser ()
|
||||||
> guardDialect ds = do
|
> guardDialect ds = do
|
||||||
|
|
|
@ -29,6 +29,7 @@ Flag sqlindent
|
||||||
library
|
library
|
||||||
exposed-modules: Language.SQL.SimpleSQL.Pretty,
|
exposed-modules: Language.SQL.SimpleSQL.Pretty,
|
||||||
Language.SQL.SimpleSQL.Parser,
|
Language.SQL.SimpleSQL.Parser,
|
||||||
|
Language.SQL.SimpleSQL.Lexer,
|
||||||
Language.SQL.SimpleSQL.Syntax
|
Language.SQL.SimpleSQL.Syntax
|
||||||
Other-Modules: Language.SQL.SimpleSQL.Errors,
|
Other-Modules: Language.SQL.SimpleSQL.Errors,
|
||||||
Language.SQL.SimpleSQL.Combinators
|
Language.SQL.SimpleSQL.Combinators
|
||||||
|
@ -55,6 +56,7 @@ Test-Suite Tests
|
||||||
|
|
||||||
Other-Modules: Language.SQL.SimpleSQL.Pretty,
|
Other-Modules: Language.SQL.SimpleSQL.Pretty,
|
||||||
Language.SQL.SimpleSQL.Parser,
|
Language.SQL.SimpleSQL.Parser,
|
||||||
|
Language.SQL.SimpleSQL.Lexer,
|
||||||
Language.SQL.SimpleSQL.Syntax,
|
Language.SQL.SimpleSQL.Syntax,
|
||||||
Language.SQL.SimpleSQL.Errors,
|
Language.SQL.SimpleSQL.Errors,
|
||||||
Language.SQL.SimpleSQL.Combinators
|
Language.SQL.SimpleSQL.Combinators
|
||||||
|
@ -71,11 +73,12 @@ Test-Suite Tests
|
||||||
Language.SQL.SimpleSQL.TestTypes,
|
Language.SQL.SimpleSQL.TestTypes,
|
||||||
Language.SQL.SimpleSQL.Tests,
|
Language.SQL.SimpleSQL.Tests,
|
||||||
Language.SQL.SimpleSQL.Tpch,
|
Language.SQL.SimpleSQL.Tpch,
|
||||||
Language.SQL.SimpleSQL.ValueExprs
|
Language.SQL.SimpleSQL.ValueExprs,
|
||||||
|
Language.SQL.SimpleSQL.LexerTests
|
||||||
|
|
||||||
other-extensions: TupleSections,DeriveDataTypeable
|
other-extensions: TupleSections,DeriveDataTypeable
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
ghc-options: -Wall
|
ghc-options: -Wall -threaded
|
||||||
|
|
||||||
executable SQLIndent
|
executable SQLIndent
|
||||||
main-is: SQLIndent.lhs
|
main-is: SQLIndent.lhs
|
||||||
|
@ -91,3 +94,15 @@ executable SQLIndent
|
||||||
buildable: True
|
buildable: True
|
||||||
else
|
else
|
||||||
buildable: False
|
buildable: False
|
||||||
|
|
||||||
|
executable TestLex
|
||||||
|
main-is: TestLex.lhs
|
||||||
|
hs-source-dirs: .,tools
|
||||||
|
Build-Depends: base >=4.5 && <4.9,
|
||||||
|
parsec >=3.1 && <3.2,
|
||||||
|
mtl >=2.1 && <2.3,
|
||||||
|
pretty >= 1.1 && < 1.2
|
||||||
|
other-extensions: TupleSections,DeriveDataTypeable
|
||||||
|
default-language: Haskell2010
|
||||||
|
ghc-options: -Wall
|
||||||
|
buildable: False
|
||||||
|
|
144
tools/Language/SQL/SimpleSQL/LexerTests.lhs
Normal file
144
tools/Language/SQL/SimpleSQL/LexerTests.lhs
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
|
||||||
|
|
||||||
|
Test for the lexer
|
||||||
|
|
||||||
|
> module Language.SQL.SimpleSQL.LexerTests (lexerTests) where
|
||||||
|
|
||||||
|
> import Language.SQL.SimpleSQL.TestTypes
|
||||||
|
> import Language.SQL.SimpleSQL.Lexer (Token(..))
|
||||||
|
> --import Debug.Trace
|
||||||
|
|
||||||
|
> lexerTable :: [(String,[Token])]
|
||||||
|
> lexerTable =
|
||||||
|
> -- single char symbols
|
||||||
|
> map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
||||||
|
> -- multi char symbols
|
||||||
|
> ++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
||||||
|
> ++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||||
|
> -- simple identifiers
|
||||||
|
> in map (\i -> (i, [Identifier i])) idens
|
||||||
|
> ++ map (\i -> ("\"" ++ i ++ "\"", [QIdentifier i])) idens
|
||||||
|
> -- todo: in order to make lex . pretty id, need to
|
||||||
|
> -- preserve the case of the u
|
||||||
|
> ++ map (\i -> ("u&\"" ++ i ++ "\"", [UQIdentifier i])) idens
|
||||||
|
> -- host param
|
||||||
|
> ++ map (\i -> (':':i, [HostParam i])) idens
|
||||||
|
> )
|
||||||
|
> -- quoted identifiers with embedded double quotes
|
||||||
|
> ++ [("\"normal \"\" iden\"", [QIdentifier "normal \" iden"])]
|
||||||
|
> -- strings
|
||||||
|
> ++ [("'string'", [SqlString "string"])
|
||||||
|
> ,("'normal '' quote'", [SqlString "normal ' quote"])
|
||||||
|
> ,("'normalendquote '''", [SqlString "normalendquote '"])]
|
||||||
|
> -- csstrings
|
||||||
|
> ++ map (\c -> (c ++ "'test'", [CSSqlString c "test"]))
|
||||||
|
> ["n", "N","b", "B","x", "X", "u&"]
|
||||||
|
> -- numbers
|
||||||
|
> ++ [("10", [SqlNumber "10"])
|
||||||
|
> ,(".1", [SqlNumber ".1"])
|
||||||
|
> ,("5e3", [SqlNumber "5e3"])
|
||||||
|
> ,("5e+3", [SqlNumber "5e+3"])
|
||||||
|
> ,("5e-3", [SqlNumber "5e-3"])
|
||||||
|
> ,("10.2", [SqlNumber "10.2"])
|
||||||
|
> ,("10.2e7", [SqlNumber "10.2e7"])]
|
||||||
|
> -- whitespace
|
||||||
|
> ++ concat [[([a],[Whitespace [a]])
|
||||||
|
> ,([a,b], [Whitespace [a,b]])]
|
||||||
|
> | a <- " \n\t", b <- " \n\t"]
|
||||||
|
> -- line comment
|
||||||
|
> ++ map (\c -> (c, [LineComment c]))
|
||||||
|
> ["--", "-- ", "-- this is a comment"]
|
||||||
|
> -- block comment
|
||||||
|
> ++ map (\c -> (c, [BlockComment c]))
|
||||||
|
> ["/**/", "/* */","/* this is a comment */"
|
||||||
|
> -- todo: bug in the lexer, fix this
|
||||||
|
> --,"/* this *is/ a comment */"
|
||||||
|
> ]
|
||||||
|
|
||||||
|
|
||||||
|
> lexerTests :: TestItem
|
||||||
|
> lexerTests = Group "lexerTests" $
|
||||||
|
> [LexerTest SQL2011 s t | (s,t) <- lexerTable]
|
||||||
|
> ++
|
||||||
|
> [ LexerTest SQL2011 (s ++ s1) (t ++ t1)
|
||||||
|
> | (s,t) <- lexerTable
|
||||||
|
> , (s1,t1) <- lexerTable
|
||||||
|
|
||||||
|
which combinations won't work:
|
||||||
|
<> <= >= || two single symbols which make a double char symbol
|
||||||
|
identifier + identifier if both are quoted or unquoted
|
||||||
|
string string
|
||||||
|
csstring string
|
||||||
|
line comment anything (can add newline?)
|
||||||
|
number number (todo: double check more carefully)
|
||||||
|
|
||||||
|
> , isGood $ t ++ t1
|
||||||
|
|
||||||
|
> ]
|
||||||
|
> ++ map (uncurry $ LexerTest SQL2011)
|
||||||
|
> [("", [])
|
||||||
|
> ]
|
||||||
|
|
||||||
|
> where
|
||||||
|
> isGood :: [Token] -> Bool
|
||||||
|
> isGood l = {-let b =-} and $ map not [p l | p <- map listPred badCombos]
|
||||||
|
> -- in trace ("isGood " ++ show (l,b)) b
|
||||||
|
> badCombos :: [((Token -> Bool),(Token -> Bool))]
|
||||||
|
> badCombos = [symbolPair "<" ">"
|
||||||
|
> ,symbolPair "<" "="
|
||||||
|
> ,symbolPair ">" "="
|
||||||
|
> ,symbolPair "!" "="
|
||||||
|
> ,symbolPair "|" "|"
|
||||||
|
> ,symbolPair "||" "|"
|
||||||
|
> ,symbolPair "|" "||"
|
||||||
|
> ,symbolPair "||" "||"
|
||||||
|
> ,symbolPair "<" ">="
|
||||||
|
|
||||||
|
> ,symbolPair "-" "-"
|
||||||
|
> ,symbolPair "/" "*"
|
||||||
|
> ,symbolPair "*" "/"
|
||||||
|
|
||||||
|
> ,(isIdentifier, isIdentifier)
|
||||||
|
> ,(isQIdentifier, isQIdentifier)
|
||||||
|
> ,(isUQIdentifier, isQIdentifier)
|
||||||
|
> ,(isString, isString)
|
||||||
|
> ,(isCsString, isString)
|
||||||
|
> ,(isLineComment, const True)
|
||||||
|
> ,(isNumber, isNumber)
|
||||||
|
> ,(isHostParam,isIdentifier)
|
||||||
|
> ,(isHostParam,isCsString)
|
||||||
|
> ,(isHostParam,isUQIdentifier)
|
||||||
|
> ,(isIdentifier,isCsString)
|
||||||
|
> ,(isIdentifier,isUQIdentifier)
|
||||||
|
> ,(isWhitespace, isWhitespace)
|
||||||
|
> ,(isIdentifier, isNumber)
|
||||||
|
> ,(isHostParam, isNumber)
|
||||||
|
> ,(isMinus, isLineComment)
|
||||||
|
> ]
|
||||||
|
> isIdentifier (Identifier _) = True
|
||||||
|
> isIdentifier _ = False
|
||||||
|
> isQIdentifier (QIdentifier _) = True
|
||||||
|
> isQIdentifier _ = False
|
||||||
|
> isUQIdentifier (UQIdentifier _) = True
|
||||||
|
> isUQIdentifier _ = False
|
||||||
|
> isCsString (CSSqlString {}) = True
|
||||||
|
> isCsString _ = False
|
||||||
|
> isLineComment (LineComment{}) = True
|
||||||
|
> isLineComment _ = False
|
||||||
|
> isNumber (SqlNumber{}) = True
|
||||||
|
> isNumber _ = False
|
||||||
|
> isHostParam (HostParam{}) = True
|
||||||
|
> isHostParam _ = False
|
||||||
|
> isWhitespace (Whitespace{}) = True
|
||||||
|
> isWhitespace _ = False
|
||||||
|
> isMinus (Symbol "-") = True
|
||||||
|
> isMinus _ = False
|
||||||
|
|
||||||
|
> isString (SqlString _) = True
|
||||||
|
> isString _ = False
|
||||||
|
> symbolPair a b = ((==Symbol a), (==Symbol b))
|
||||||
|
> listPred :: ((Token -> Bool),(Token -> Bool)) -> [Token] -> Bool
|
||||||
|
> listPred _ [] = False
|
||||||
|
> listPred _ [_] = False
|
||||||
|
> listPred (p,p1) (t:t1:ts) | p t && p1 t1 = True
|
||||||
|
> | otherwise = listPred (p,p1) (t1:ts)
|
|
@ -7,6 +7,7 @@ Tests.lhs module for the 'interpreter'.
|
||||||
> ,Dialect(..)) where
|
> ,Dialect(..)) where
|
||||||
|
|
||||||
> import Language.SQL.SimpleSQL.Syntax
|
> import Language.SQL.SimpleSQL.Syntax
|
||||||
|
> import Language.SQL.SimpleSQL.Lexer (Token)
|
||||||
|
|
||||||
TODO: maybe make the dialect args into [dialect], then each test
|
TODO: maybe make the dialect args into [dialect], then each test
|
||||||
checks all the dialects mentioned work, and all the dialects not
|
checks all the dialects mentioned work, and all the dialects not
|
||||||
|
@ -28,4 +29,5 @@ check that the string given fails to parse
|
||||||
|
|
||||||
> | ParseQueryExprFails Dialect String
|
> | ParseQueryExprFails Dialect String
|
||||||
> | ParseValueExprFails Dialect String
|
> | ParseValueExprFails Dialect String
|
||||||
|
> | LexerTest Dialect String [Token]
|
||||||
> deriving (Eq,Show)
|
> deriving (Eq,Show)
|
||||||
|
|
|
@ -15,6 +15,7 @@ test data to the Test.Framework tests.
|
||||||
> --import Language.SQL.SimpleSQL.Syntax
|
> --import Language.SQL.SimpleSQL.Syntax
|
||||||
> import Language.SQL.SimpleSQL.Pretty
|
> import Language.SQL.SimpleSQL.Pretty
|
||||||
> import Language.SQL.SimpleSQL.Parser
|
> import Language.SQL.SimpleSQL.Parser
|
||||||
|
> import Language.SQL.SimpleSQL.Lexer
|
||||||
|
|
||||||
> import Language.SQL.SimpleSQL.TestTypes
|
> import Language.SQL.SimpleSQL.TestTypes
|
||||||
|
|
||||||
|
@ -26,6 +27,7 @@ test data to the Test.Framework tests.
|
||||||
> import Language.SQL.SimpleSQL.TableRefs
|
> import Language.SQL.SimpleSQL.TableRefs
|
||||||
> import Language.SQL.SimpleSQL.ValueExprs
|
> import Language.SQL.SimpleSQL.ValueExprs
|
||||||
> import Language.SQL.SimpleSQL.Tpch
|
> import Language.SQL.SimpleSQL.Tpch
|
||||||
|
> import Language.SQL.SimpleSQL.LexerTests
|
||||||
|
|
||||||
> import Language.SQL.SimpleSQL.SQL2011
|
> import Language.SQL.SimpleSQL.SQL2011
|
||||||
|
|
||||||
|
@ -37,7 +39,8 @@ order on the generated documentation.
|
||||||
> testData :: TestItem
|
> testData :: TestItem
|
||||||
> testData =
|
> testData =
|
||||||
> Group "parserTest"
|
> Group "parserTest"
|
||||||
> [valueExprTests
|
> [lexerTests
|
||||||
|
> ,valueExprTests
|
||||||
> ,queryExprComponentTests
|
> ,queryExprComponentTests
|
||||||
> ,queryExprsTests
|
> ,queryExprsTests
|
||||||
> ,tableRefTests
|
> ,tableRefTests
|
||||||
|
@ -73,6 +76,14 @@ order on the generated documentation.
|
||||||
> itemToTest (ParseValueExprFails d str) =
|
> itemToTest (ParseValueExprFails d str) =
|
||||||
> toFTest parseValueExpr prettyValueExpr d str
|
> toFTest parseValueExpr prettyValueExpr d str
|
||||||
|
|
||||||
|
> itemToTest (LexerTest d s ts) = makeLexerTest d s ts
|
||||||
|
|
||||||
|
> makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree
|
||||||
|
> makeLexerTest d s ts = H.testCase s $ do
|
||||||
|
> let lx = either (error . show) id $ lexSQL d ("", 1, 1) s
|
||||||
|
> H.assertEqual "" ts $ map snd lx
|
||||||
|
> let s' = prettyTokens d $ map snd lx
|
||||||
|
> H.assertEqual "pretty print" s s'
|
||||||
|
|
||||||
> toTest :: (Eq a, Show a) =>
|
> toTest :: (Eq a, Show a) =>
|
||||||
> (Dialect -> String -> Maybe (Int,Int) -> String -> Either ParseError a)
|
> (Dialect -> String -> Maybe (Int,Int) -> String -> Either ParseError a)
|
||||||
|
@ -123,9 +134,9 @@ order on the generated documentation.
|
||||||
> -> Dialect
|
> -> Dialect
|
||||||
> -> String
|
> -> String
|
||||||
> -> T.TestTree
|
> -> T.TestTree
|
||||||
> toFTest parser pp d str = H.testCase str $ do
|
> toFTest parser _pp d str = H.testCase str $ do
|
||||||
> let egot = parser d "" Nothing str
|
> let egot = parser d "" Nothing str
|
||||||
> case egot of
|
> case egot of
|
||||||
> Left e -> return ()
|
> Left _e -> return ()
|
||||||
> Right got ->
|
> Right _got ->
|
||||||
> H.assertFailure $ "parse didn't fail: " ++ show d ++ "\n" ++ str
|
> H.assertFailure $ "parse didn't fail: " ++ show d ++ "\n" ++ str
|
||||||
|
|
Loading…
Reference in a new issue