1
Fork 0

switch in megaparsec with stub lexing code

This commit is contained in:
Jake Wheat 2024-01-09 17:53:12 +00:00
parent d80796b1dd
commit 9396aa8cba
6 changed files with 345 additions and 129 deletions

View file

@ -52,38 +52,70 @@ start writing the error message tests:
still be manual) still be manual)
try again to add annotation to the ast try again to add annotation to the ast
-} -}
-- | Lexer for SQL. -- | Lexer for SQL.
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.SQL.SimpleSQL.Lex module Language.SQL.SimpleSQL.Lex
(Token(..) (Token(..)
,WithPos(..)
,lexSQL ,lexSQL
,prettyToken ,prettyToken
,prettyTokens ,prettyTokens
,ParseError(..) ,ParseError(..)
,prettyError
,tokenListWillPrintAndLex ,tokenListWillPrintAndLex
,ansi2011 ,ansi2011
) where ) where
import Language.SQL.SimpleSQL.Dialect import Language.SQL.SimpleSQL.Dialect
(Dialect(..)
,ansi2011
)
import Text.Megaparsec
(Parsec
,runParser'
,ParseErrorBundle(..)
,errorBundlePretty
,SourcePos(..)
,getSourcePos
,getOffset
,pstateSourcePos
,statePosState
,mkPos
,choice
,satisfy
,takeWhileP
,takeWhile1P
,(<?>)
,eof
,many
,try
,option
)
import Text.Megaparsec.Char
(string
,char
)
import Text.Megaparsec.State (initialState)
import Data.Void (Void)
import Text.Parsec (option,string,manyTill,anyChar
,try,string,many1,oneOf,digit,(<|>),choice,char,eof
,many,runParser,lookAhead,satisfy
,setPosition,getPosition
,setSourceColumn,setSourceLine
,sourceName, setSourceName
,sourceLine, sourceColumn
,notFollowedBy)
import Language.SQL.SimpleSQL.Combinators
import Language.SQL.SimpleSQL.Errors
import Control.Applicative hiding ((<|>), many)
import Data.Char import Data.Char
import Control.Monad (isAlphaNum
import Prelude hiding (takeWhile) ,isAlpha
import Text.Parsec.String (Parser) ,isSpace
import Data.Maybe ,isDigit
)
import Control.Monad (void)
import Data.Text (Text)
import qualified Data.Text as T
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -96,33 +128,33 @@ data Token
-- * multi char symbols <> \<= \>= != || -- * multi char symbols <> \<= \>= != ||
-- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( ) -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( )
-- --
= Symbol String = Symbol Text
-- | This is an identifier or keyword. The first field is -- | This is an identifier or keyword. The first field is
-- the quotes used, or nothing if no quotes were used. The quotes -- the quotes used, or nothing if no quotes were used. The quotes
-- can be " or u& or something dialect specific like [] -- can be " or u& or something dialect specific like []
| Identifier (Maybe (String,String)) String | Identifier (Maybe (Text,Text)) Text
-- | This is a prefixed variable symbol, such as :var, @var or #var -- | This is a prefixed variable symbol, such as :var, @var or #var
-- (only :var is used in ansi dialect) -- (only :var is used in ansi dialect)
| PrefixedVariable Char String | PrefixedVariable Char Text
-- | This is a positional arg identifier e.g. $1 -- | This is a positional arg identifier e.g. $1
| PositionalArg Int | PositionalArg Int
-- | This is a string literal. The first two fields are the -- -- | This is a string literal. The first two fields are the --
-- start and end quotes, which are usually both ', but can be -- start and end quotes, which are usually both ', but can be
-- the character set (one of nNbBxX, or u&, U&), or a dialect -- the character set (one of nNbBxX, or u&, U&), or a dialect
-- specific string quoting (such as $$ in postgres) -- specific string quoting (such as $$ in postgres)
| SqlString String String String | SqlString Text Text Text
-- | A number literal (integral or otherwise), stored in original format -- | A number literal (integral or otherwise), stored in original format
-- unchanged -- unchanged
| SqlNumber String | SqlNumber Text
-- | Whitespace, one or more of space, tab or newline. -- | Whitespace, one or more of space, tab or newline.
| Whitespace String | Whitespace Text
-- | A commented line using --, contains every character starting with the -- | A commented line using --, contains every character starting with the
-- \'--\' and including the terminating newline character if there is one -- \'--\' and including the terminating newline character if there is one
-- - this will be missing if the last line in the source is a line comment -- - this will be missing if the last line in the source is a line comment
-- with no trailing newline -- with no trailing newline
| LineComment String | LineComment Text
-- | A block comment, \/* stuff *\/, includes the comment delimiters -- | A block comment, \/* stuff *\/, includes the comment delimiters
| BlockComment String | BlockComment Text
deriving (Eq,Show) deriving (Eq,Show)
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -131,79 +163,98 @@ data Token
-- | Lex some SQL to a list of tokens. -- | Lex some SQL to a list of tokens.
lexSQL :: Dialect lexSQL :: Dialect
-- ^ dialect of SQL to use -- ^ dialect of SQL to use
-> FilePath -> Text
-- ^ filename to use in error messages -- ^ filename to use in error messages
-> Maybe (Int,Int) -> Maybe (Int,Int)
-- ^ line number and column number of the first character -- ^ line number and column number of the first character
-- in the source to use in error messages -- in the source to use in error messages
-> String -> Text
-- ^ the SQL source to lex -- ^ the SQL source to lex
-> Either ParseError [((String,Int,Int),Token)] -> Either ParseError [WithPos Token]
lexSQL dialect fn' p src = lexSQL dialect fn p src = myParse fn p (many (sqlToken dialect) <* (eof <?> "")) src
let (l',c') = fromMaybe (1,1) p
in either (Left . convParseError src) Right myParse :: Text -> (Maybe (Int,Int)) -> Parser a -> Text -> Either ParseError a
$ runParser (setPos (fn',l',c') *> many (sqlToken dialect) <* eof) () fn' src myParse name sp' p s =
where let sp = maybe (1,1) id sp'
setPos (fn,l,c) = do ps = SourcePos (T.unpack name) (mkPos $ fst sp) (mkPos $ snd sp)
fmap (flip setSourceName fn is = (initialState (T.unpack name) s)
. flip setSourceLine l sps = (statePosState is) {pstateSourcePos = ps}
. flip setSourceColumn c) getPosition is' = is {statePosState = sps}
>>= setPosition in snd $ runParser' p is'
prettyError :: ParseError -> Text
prettyError = T.pack . errorBundlePretty
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- pretty printing -- parsing boilerplate
type ParseError = ParseErrorBundle Text Void
type Parser = Parsec Void Text
-- | Positional information added to tokens to preserve source positions
-- for the parser
data WithPos a = WithPos
{ startPos :: SourcePos
, endPos :: SourcePos
, tokenLength :: Int
, tokenVal :: a
} deriving (Eq, Ord, Show)
------------------------------------------------------------------------------
-- pretty print
-- | Pretty printing, if you lex a bunch of tokens, then pretty -- | Pretty printing, if you lex a bunch of tokens, then pretty
-- print them, should should get back exactly the same string -- print them, should should get back exactly the same string
prettyToken :: Dialect -> Token -> String prettyToken :: Dialect -> Token -> Text
prettyToken _ (Symbol s) = s prettyToken _ (Symbol s) = s
prettyToken _ (Identifier Nothing t) = t prettyToken _ (Identifier Nothing t) = t
prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2 prettyToken _ (Identifier (Just (q1,q2)) t) = q1 <> t <> q2
prettyToken _ (PrefixedVariable c p) = c:p prettyToken _ (PrefixedVariable c p) = T.cons c p
prettyToken _ (PositionalArg p) = '$':show p prettyToken _ (PositionalArg p) = T.cons '$' $ T.pack $ show p
prettyToken _ (SqlString s e t) = s ++ t ++ e prettyToken _ (SqlString s e t) = s <> t <> e
prettyToken _ (SqlNumber r) = r prettyToken _ (SqlNumber r) = r
prettyToken _ (Whitespace t) = t prettyToken _ (Whitespace t) = t
prettyToken _ (LineComment l) = l prettyToken _ (LineComment l) = l
prettyToken _ (BlockComment c) = c prettyToken _ (BlockComment c) = c
prettyTokens :: Dialect -> [Token] -> String prettyTokens :: Dialect -> [Token] -> Text
prettyTokens d ts = concat $ map (prettyToken d) ts prettyTokens d ts = T.concat $ map (prettyToken d) ts
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- token parsers -- token parsers
-- | parser for a sql token -- | parser for a sql token
sqlToken :: Dialect -> Parser ((String,Int,Int),Token) sqlToken :: Dialect -> Parser (WithPos Token)
sqlToken d = do sqlToken d = do
p' <- getPosition -- possibly there's a more efficient way of doing the source positions?
let p = (sourceName p',sourceLine p', sourceColumn p') sp <- getSourcePos
off <- getOffset
{- t <- choice
The order of parsers is important: strings and quoted identifiers can [sqlString d
start out looking like normal identifiers, so we try to parse these ,identifier d
first and use a little bit of try. Line and block comments start like ,lineComment d
symbols, so we try these before symbol. Numbers can start with a . so ,blockComment d
this is also tried before symbol (a .1 will be parsed as a number, but ,sqlNumber d
. otherwise will be parsed as a symbol). ,positionalArg d
-} --,dontParseEndBlockComment d
,prefixedVariable d
(p,) <$> choice [sqlString d ,symbol d
,identifier d ,sqlWhitespace d]
,lineComment d off1 <- getOffset
,blockComment d ep <- getSourcePos
,sqlNumber d pure $ WithPos sp ep (off1 - off) t
,positionalArg d
,dontParseEndBlockComment d
,prefixedVariable d
,symbol d
,sqlWhitespace d]
-------------------------------------- --------------------------------------
sqlString :: Dialect -> Parser Token
sqlString d =
SqlString "'" "'" <$> (char_ '\'' *> takeWhileP (Just "non quote char") (/= '\'') <* char_ '\'')
{- {-
Parse a SQL string. Examples: Parse a SQL string. Examples:
@ -214,7 +265,7 @@ b'binary string'
x'hexidecimal string' x'hexidecimal string'
-} -}
{-
sqlString :: Dialect -> Parser Token sqlString :: Dialect -> Parser Token
sqlString d = dollarString <|> csString <|> normalString sqlString d = dollarString <|> csString <|> normalString
where where
@ -259,10 +310,27 @@ sqlString d = dollarString <|> csString <|> normalString
cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes) cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
++ [string "u&'" ++ [string "u&'"
,string "U&'"] ,string "U&'"]
-}
-------------------------------------- --------------------------------------
-- TODO: this reconstitutes the string from bits, instead of lifting
-- it in one piece from the source. This is a performance issue, not
-- sure if it will be significant. The same comment applies to most of
-- the other parsers
identifier :: Dialect -> Parser Token
identifier d = Identifier Nothing <$> identifierString d
identifierString :: Dialect -> Parser Text
identifierString _ = (do
c <- satisfy isFirstLetter
choice
[T.cons c <$> (takeWhileP (Just "identifier char") isNonFirstLetter)
,pure $ T.singleton c]) <?> "identifier"
where
isFirstLetter c = c == '_' || isAlpha c
isNonFirstLetter c = c == '_' || isAlphaNum c
{- {-
Parses identifiers: Parses identifiers:
@ -273,6 +341,7 @@ u&"unicode quoted identifier"
`mysql quoted identifier` `mysql quoted identifier`
-} -}
{-
identifier :: Dialect -> Parser Token identifier :: Dialect -> Parser Token
identifier d = identifier d =
choice choice
@ -317,9 +386,28 @@ identifierString =
isIdentifierChar :: Char -> Bool isIdentifierChar :: Char -> Bool
isIdentifierChar c = c == '_' || isAlphaNum c isIdentifierChar c = c == '_' || isAlphaNum c
-}
-------------------------------------- --------------------------------------
{-
I think it's always faster to use a string locally created in the parser code,
than to use one taken from the parsed source, unless you take it without modifying it,
the example here is using -- and \n. this won't be needed in this case if can work out
how to lift the entire comment as a single string from the source.
this concept does apply to things like symbols
-}
lineComment :: Dialect -> Parser Token lineComment :: Dialect -> Parser Token
lineComment _ = do
try (string_ "--") <?> ""
rest <- takeWhileP (Just "non newline character") (/='\n')
-- can you optionally read the \n to terminate the takewhilep without reparsing it?
suf <- option "" ("\n" <$ char_ '\n')
pure $ LineComment $ T.concat ["--", rest, suf]
{-lineComment :: Dialect -> Parser Token
lineComment _ = lineComment _ =
(\s -> LineComment $ concat ["--",s]) <$> (\s -> LineComment $ concat ["--",s]) <$>
-- try is used here in case we see a - symbol -- try is used here in case we see a - symbol
@ -332,16 +420,28 @@ lineComment _ =
conc a (Just b) = a ++ b conc a (Just b) = a ++ b
lineCommentEnd = lineCommentEnd =
Just "\n" <$ char '\n' Just "\n" <$ char '\n'
<|> Nothing <$ eof <|> Nothing <$ eof-}
-------------------------------------- --------------------------------------
blockComment :: Dialect -> Parser Token
blockComment _ = (do
try $ string_ "/*"
BlockComment . T.concat . ("/*":) <$> more) <?> ""
where
more = choice
[["*/"] <$ try (string_ "*/")
,char_ '*' *> (("*":) <$> more)
,(:) <$> takeWhile1P (Just "non comment terminator text") (/= '*') <*> more]
{- {-
Try is used in the block comment for the two symbol bits because we Try is used in the block comment for the two symbol bits because we
want to backtrack if we read the first symbol but the second symbol want to backtrack if we read the first symbol but the second symbol
isn't there. isn't there.
-} -}
{-
blockComment :: Dialect -> Parser Token blockComment :: Dialect -> Parser Token
blockComment _ = blockComment _ =
(\s -> BlockComment $ concat ["/*",s]) <$> (\s -> BlockComment $ concat ["/*",s]) <$>
@ -361,7 +461,7 @@ blockComment _ =
,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1)) ,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
-- not an end comment or nested comment, continue -- not an end comment or nested comment, continue
,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n] ,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]
-}
{- {-
This is to improve user experience: provide an error if we see */ This is to improve user experience: provide an error if we see */
@ -370,14 +470,22 @@ in them (which is a stupid thing to do). In other cases, the user
should write * / instead (I can't think of any cases when this would should write * / instead (I can't think of any cases when this would
be valid syntax though). be valid syntax though).
-} -}
{-
dontParseEndBlockComment :: Dialect -> Parser Token dontParseEndBlockComment :: Dialect -> Parser Token
dontParseEndBlockComment _ = dontParseEndBlockComment _ =
-- don't use try, then it should commit to the error -- don't use try, then it should commit to the error
try (string "*/") *> fail "comment end without comment start" try (string "*/") *> fail "comment end without comment start"
-}
-------------------------------------- --------------------------------------
sqlNumber :: Dialect -> Parser Token
sqlNumber _ =
SqlNumber <$> digits
digits :: Parser Text
digits = takeWhile1P (Just "digit") isDigit
{- {-
numbers numbers
@ -396,6 +504,7 @@ considered part of the constant; it is an operator applied to the
constant. constant.
-} -}
{-
sqlNumber :: Dialect -> Parser Token sqlNumber :: Dialect -> Parser Token
sqlNumber d = sqlNumber d =
SqlNumber <$> completeNumber SqlNumber <$> completeNumber
@ -427,20 +536,34 @@ sqlNumber d =
expon = (:) <$> oneOf "eE" <*> sInt expon = (:) <$> oneOf "eE" <*> sInt
sInt = (++) <$> option "" (string "+" <|> string "-") <*> int sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
pp = (<$$> (++)) pp = (<$$> (++))
-}
-------------------------------------- --------------------------------------
positionalArg :: Dialect -> Parser Token
positionalArg _ = PositionalArg <$> (char_ '$' *> (read . T.unpack <$> digits))
{-
positionalArg :: Dialect -> Parser Token positionalArg :: Dialect -> Parser Token
positionalArg d = positionalArg d =
guard (diPositionalArg d) >> guard (diPositionalArg d) >>
-- use try to avoid ambiguities with other syntax which starts with dollar -- use try to avoid ambiguities with other syntax which starts with dollar
PositionalArg <$> try (char '$' *> (read <$> many1 digit)) PositionalArg <$> try (char '$' *> (read <$> many1 digit))
-}
-------------------------------------- --------------------------------------
-- todo: I think the try here should read a prefix char, then a single valid
-- identifier char, then commit
prefixedVariable :: Dialect -> Parser Token
prefixedVariable d = try $ choice
[PrefixedVariable <$> (':' <$ char_ ':') <*> identifierString d
]
-- use try because : and @ can be part of other things also -- use try because : and @ can be part of other things also
{-
prefixedVariable :: Dialect -> Parser Token prefixedVariable :: Dialect -> Parser Token
prefixedVariable d = try $ choice prefixedVariable d = try $ choice
[PrefixedVariable <$> char ':' <*> identifierString [PrefixedVariable <$> char ':' <*> identifierString
@ -449,9 +572,21 @@ prefixedVariable d = try $ choice
,guard (diHashIdentifier d) >> ,guard (diHashIdentifier d) >>
PrefixedVariable <$> char '#' <*> identifierString PrefixedVariable <$> char '#' <*> identifierString
] ]
-}
-------------------------------------- --------------------------------------
symbol :: Dialect -> Parser Token
symbol _ =
Symbol <$> choice
[try $ choice $ map (\a -> string a) multiCharSymbols
,T.singleton <$> satisfy (`elem` singleLetterSymbol)
]
where
singleLetterSymbol = "(),-+*/<>=." :: String
multiCharSymbols = ["!=", "<>", ">=", "<=", "||"]
{- {-
Symbols Symbols
@ -461,7 +596,7 @@ A symbol is an operator, or one of the misc symbols which include:
The postgresql operator syntax allows a huge range of operators The postgresql operator syntax allows a huge range of operators
compared with ansi and other dialects compared with ansi and other dialects
-} -}
{-
symbol :: Dialect -> Parser Token symbol :: Dialect -> Parser Token
symbol d = Symbol <$> choice (concat symbol d = Symbol <$> choice (concat
[dots [dots
@ -576,16 +711,27 @@ generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
oneOf "<>=") oneOf "<>=")
<*> option [] opMoreChars <*> option [] opMoreChars
] ]
-}
-------------------------------------- --------------------------------------
sqlWhitespace :: Dialect -> Parser Token sqlWhitespace :: Dialect -> Parser Token
sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace) sqlWhitespace _ = Whitespace <$> takeWhile1P (Just "whitespace") isSpace <?> ""
--sqlWhitespace :: Dialect -> Parser Token
--sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- parser helpers -- parser helpers
char_ :: Char -> Parser ()
char_ = void . char
string_ :: Text -> Parser ()
string_ = void . string
{-
startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
startsWith p ps = do startsWith p ps = do
c <- satisfy p c <- satisfy p
@ -603,6 +749,7 @@ takeTill p = manyTill anyChar (peekSatisfy p)
peekSatisfy :: (Char -> Bool) -> Parser () peekSatisfy :: (Char -> Bool) -> Parser ()
peekSatisfy p = void $ lookAhead (satisfy p) peekSatisfy p = void $ lookAhead (satisfy p)
-}
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@ -629,7 +776,8 @@ successes. I don't think it succeeds this test at the moment
-- will pretty print then lex back to the same set of tokens. -- will pretty print then lex back to the same set of tokens.
-- Used internally, might be useful for generating SQL via lexical tokens. -- Used internally, might be useful for generating SQL via lexical tokens.
tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool
tokenListWillPrintAndLex _ [] = True tokenListWillPrintAndLex = undefined
{-tokenListWillPrintAndLex _ [] = True
tokenListWillPrintAndLex _ [_] = True tokenListWillPrintAndLex _ [_] = True
tokenListWillPrintAndLex d (a:b:xs) = tokenListWillPrintAndLex d (a:b:xs) =
tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs) tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs)
@ -770,3 +918,5 @@ TODO: not 100% on this always being bad
checkLastAChar f = case prettya of checkLastAChar f = case prettya of
(_:_) -> f $ last prettya (_:_) -> f $ last prettya
_ -> False _ -> False
-}

View file

@ -281,7 +281,7 @@ wrapParse :: Parser a
-> Maybe (Int,Int) -> Maybe (Int,Int)
-> String -> String
-> Either ParseError a -> Either ParseError a
wrapParse parser d f p src = do wrapParse parser d f p src = undefined {-do
let (l,c) = fromMaybe (1,1) p let (l,c) = fromMaybe (1,1) p
lx <- L.lexSQL d f (Just (l,c)) src lx <- L.lexSQL d f (Just (l,c)) src
either (Left . convParseError src) Right either (Left . convParseError src) Right
@ -294,7 +294,7 @@ wrapParse parser d f p src = do
keep (_,L.Whitespace {}) = False keep (_,L.Whitespace {}) = False
keep (_,L.LineComment {}) = False keep (_,L.LineComment {}) = False
keep (_,L.BlockComment {}) = False keep (_,L.BlockComment {}) = False
keep _ = True keep _ = True-}
{- {-
@ -2084,16 +2084,16 @@ keyword matching
-} -}
stringTok :: Parser (String,String,String) stringTok :: Parser (String,String,String)
stringTok = mytoken (\tok -> stringTok = undefined {-mytoken (\tok ->
case tok of case tok of
L.SqlString s e t -> Just (s,e,t) L.SqlString s e t -> Just (s,e,t)
_ -> Nothing) _ -> Nothing)-}
singleQuotesOnlyStringTok :: Parser String singleQuotesOnlyStringTok :: Parser String
singleQuotesOnlyStringTok = mytoken (\tok -> singleQuotesOnlyStringTok = undefined {-mytoken (\tok ->
case tok of case tok of
L.SqlString "'" "'" t -> Just t L.SqlString "'" "'" t -> Just t
_ -> Nothing) _ -> Nothing)-}
{- {-
This is to support SQL strings where you can write This is to support SQL strings where you can write
@ -2104,7 +2104,7 @@ It is only allowed when all the strings are quoted with ' atm.
-} -}
stringTokExtend :: Parser (String,String,String) stringTokExtend :: Parser (String,String,String)
stringTokExtend = do stringTokExtend = undefined {-do
(s,e,x) <- stringTok (s,e,x) <- stringTok
choice [ choice [
do do
@ -2113,48 +2113,48 @@ stringTokExtend = do
guard (s' == "'" && e' == "'") guard (s' == "'" && e' == "'")
return $ (s,e,x ++ y) return $ (s,e,x ++ y)
,return (s,e,x) ,return (s,e,x)
] ]-}
hostParamTok :: Parser String hostParamTok :: Parser String
hostParamTok = mytoken (\tok -> hostParamTok = undefined {-mytoken (\tok ->
case tok of case tok of
L.PrefixedVariable c p -> Just (c:p) L.PrefixedVariable c p -> Just (c:p)
_ -> Nothing) _ -> Nothing)-}
positionalArgTok :: Parser Int positionalArgTok :: Parser Int
positionalArgTok = mytoken (\tok -> positionalArgTok = undefined {-mytoken (\tok ->
case tok of case tok of
L.PositionalArg p -> Just p L.PositionalArg p -> Just p
_ -> Nothing) _ -> Nothing)-}
sqlNumberTok :: Bool -> Parser String sqlNumberTok :: Bool -> Parser String
sqlNumberTok intOnly = mytoken (\tok -> sqlNumberTok intOnly = undefined {-mytoken (\tok ->
case tok of case tok of
L.SqlNumber p | not intOnly || all isDigit p -> Just p L.SqlNumber p | not intOnly || all isDigit p -> Just p
_ -> Nothing) _ -> Nothing)-}
symbolTok :: Maybe String -> Parser String symbolTok :: Maybe String -> Parser String
symbolTok sym = mytoken (\tok -> symbolTok sym = undefined {-mytoken (\tok ->
case (sym,tok) of case (sym,tok) of
(Nothing, L.Symbol p) -> Just p (Nothing, L.Symbol p) -> Just p
(Just s, L.Symbol p) | s == p -> Just p (Just s, L.Symbol p) | s == p -> Just p
_ -> Nothing) _ -> Nothing)-}
identifierTok :: [String] -> Parser (Maybe (String,String), String) identifierTok :: [String] -> Parser (Maybe (String,String), String)
identifierTok blackList = mytoken (\tok -> identifierTok blackList = undefined {-mytoken (\tok ->
case tok of case tok of
L.Identifier q@(Just {}) p -> Just (q,p) L.Identifier q@(Just {}) p -> Just (q,p)
L.Identifier q p | map toLower p `notElem` blackList -> Just (q,p) L.Identifier q p | map toLower p `notElem` blackList -> Just (q,p)
_ -> Nothing) _ -> Nothing)-}
unquotedIdentifierTok :: [String] -> Maybe String -> Parser String unquotedIdentifierTok :: [String] -> Maybe String -> Parser String
unquotedIdentifierTok blackList kw = mytoken (\tok -> unquotedIdentifierTok blackList kw = undefined {-mytoken (\tok ->
case (kw,tok) of case (kw,tok) of
(Nothing, L.Identifier Nothing p) | map toLower p `notElem` blackList -> Just p (Nothing, L.Identifier Nothing p) | map toLower p `notElem` blackList -> Just p
(Just k, L.Identifier Nothing p) | k == map toLower p -> Just p (Just k, L.Identifier Nothing p) | k == map toLower p -> Just p
_ -> Nothing) _ -> Nothing)-}
mytoken :: (L.Token -> Maybe a) -> Parser a mytoken :: (L.Token -> Maybe a) -> Parser a
mytoken test = token showToken posToken testToken mytoken test = token showToken posToken testToken

View file

@ -39,7 +39,9 @@ Flag fixitytest
common shared-properties common shared-properties
default-language: Haskell2010 default-language: Haskell2010
build-depends: base >=4 && <5, build-depends: base >=4 && <5,
parsec >=3.1 && <3.2, megaparsec >=9.6 && <9.7,
parser-combinators >= 1.3 && < 1.4,
parsec,
mtl >=2.1 && <2.4, mtl >=2.1 && <2.4,
prettyprinter >= 1.7 && < 1.8, prettyprinter >= 1.7 && < 1.8,
text >= 2.1 && < 2.2 text >= 2.1 && < 2.2

View file

@ -2,23 +2,89 @@
-- Test for the lexer -- Test for the lexer
{-
TODO:
figure out a way to do quickcheck testing:
1. generate valid tokens and check they parse
2. combine two generated tokens together for the combo testing
this especially will work much better for the postgresql extensible
operator tests which doing exhaustively takes ages and doesn't bring
much benefit over testing a few using quickcheck.
-}
{-# LANGUAGE OverloadedStrings #-}
module Language.SQL.SimpleSQL.LexerTests (lexerTests) where module Language.SQL.SimpleSQL.LexerTests (lexerTests) where
import Language.SQL.SimpleSQL.TestTypes import Language.SQL.SimpleSQL.TestTypes
import Language.SQL.SimpleSQL.Lex (Token(..),tokenListWillPrintAndLex) import Language.SQL.SimpleSQL.Lex
(Token(..)
,tokenListWillPrintAndLex
)
import Language.SQL.SimpleSQL.Dialect
(ansi2011)
import qualified Data.Text as T
--import Debug.Trace --import Debug.Trace
--import Data.Char (isAlpha) --import Data.Char (isAlpha)
import Data.List -- import Data.List
lexerTests :: TestItem lexerTests :: TestItem
lexerTests = Group "lexerTests" $ lexerTests = Group "lexerTests" $
[Group "lexer token tests" [ansiLexerTests [bootstrapTests{-Group "lexer token tests" [ansiLexerTests
,postgresLexerTests ,postgresLexerTests
,sqlServerLexerTests ,sqlServerLexerTests
,oracleLexerTests ,oracleLexerTests
,mySqlLexerTests ,mySqlLexerTests
,odbcLexerTests]] ,odbcLexerTests]-}]
-- quick sanity tests to see something working
bootstrapTests :: TestItem
bootstrapTests = Group "bootstrap tests" $
map (uncurry (LexTest ansi2011)) (
[("iden", [Identifier Nothing "iden"])
,("'string'", [SqlString "'" "'" "string"])
,(" ", [Whitespace " "])
,("\t ", [Whitespace "\t "])
,(" \n ", [Whitespace " \n "])
,("--", [LineComment "--"])
,("--\n", [LineComment "--\n"])
,("--stuff", [LineComment "--stuff"])
,("-- stuff", [LineComment "-- stuff"])
,("-- stuff\n", [LineComment "-- stuff\n"])
,("--\nstuff", [LineComment "--\n", Identifier Nothing "stuff"])
,("-- com \nstuff", [LineComment "-- com \n", Identifier Nothing "stuff"])
,("/*test1*/", [BlockComment "/*test1*/"])
,("/**/", [BlockComment "/**/"])
,("/***/", [BlockComment "/***/"])
,("/* * */", [BlockComment "/* * */"])
,("/*test*/", [BlockComment "/*test*/"])
,("/*te/*st*/", [BlockComment "/*te/*st*/"])
,("/*te*st*/", [BlockComment "/*te*st*/"])
,("/*lines\nmore lines*/", [BlockComment "/*lines\nmore lines*/"])
,("/*test1*/\n", [BlockComment "/*test1*/", Whitespace "\n"])
,("/*test1*/stuff", [BlockComment "/*test1*/", Identifier Nothing "stuff"])
,("1", [SqlNumber "1"])
,("42", [SqlNumber "42"])
,("$1", [PositionalArg 1])
,("$200", [PositionalArg 200])
,(":test", [PrefixedVariable ':' "test"])
] ++ map (\a -> (a, [Symbol a])) (
["!=", "<>", ">=", "<=", "||"]
++ map T.singleton ("(),-+*/<>=." :: String)))
{-
ansiLexerTable :: [(String,[Token])] ansiLexerTable :: [(String,[Token])]
ansiLexerTable = ansiLexerTable =
-- single char symbols -- single char symbols
@ -331,13 +397,4 @@ combos :: [a] -> Int -> [[a]]
combos _ 0 = [[]] combos _ 0 = [[]]
combos l n = [ x:tl | x <- l, tl <- combos l (n - 1) ] combos l n = [ x:tl | x <- l, tl <- combos l (n - 1) ]
{-
figure out a way to do quickcheck testing:
1. generate valid tokens and check they parse
2. combine two generated tokens together for the combo testing
this especially will work much better for the postgresql extensible
operator tests which doing exhaustively takes ages and doesn't bring
much benefit over testing a few using quickcheck.
-} -}

View file

@ -13,6 +13,8 @@ import Language.SQL.SimpleSQL.Syntax
import Language.SQL.SimpleSQL.Lex (Token) import Language.SQL.SimpleSQL.Lex (Token)
import Language.SQL.SimpleSQL.Dialect import Language.SQL.SimpleSQL.Dialect
import Data.Text (Text)
{- {-
TODO: maybe make the dialect args into [dialect], then each test TODO: maybe make the dialect args into [dialect], then each test
checks all the dialects mentioned work, and all the dialects not checks all the dialects mentioned work, and all the dialects not
@ -38,6 +40,6 @@ should all be TODO to convert to a testqueryexpr test.
| ParseQueryExprFails Dialect String | ParseQueryExprFails Dialect String
| ParseScalarExprFails Dialect String | ParseScalarExprFails Dialect String
| LexTest Dialect String [Token] | LexTest Dialect Text [Token]
| LexFails Dialect String | LexFails Dialect String
deriving (Eq,Show) deriving (Eq,Show)

View file

@ -5,6 +5,7 @@ Test.Framework tests. It also contains the code which converts the
test data to the Test.Framework tests. test data to the Test.Framework tests.
-} -}
{-# LANGUAGE OverloadedStrings #-}
module Language.SQL.SimpleSQL.Tests module Language.SQL.SimpleSQL.Tests
(testData (testData
,tests ,tests
@ -17,7 +18,7 @@ import qualified Test.Tasty.HUnit as H
--import Language.SQL.SimpleSQL.Syntax --import Language.SQL.SimpleSQL.Syntax
import Language.SQL.SimpleSQL.Pretty import Language.SQL.SimpleSQL.Pretty
import Language.SQL.SimpleSQL.Parse import Language.SQL.SimpleSQL.Parse
import Language.SQL.SimpleSQL.Lex import qualified Language.SQL.SimpleSQL.Lex as Lex
import Language.SQL.SimpleSQL.TestTypes import Language.SQL.SimpleSQL.TestTypes
@ -44,6 +45,9 @@ import Language.SQL.SimpleSQL.MySQL
import Language.SQL.SimpleSQL.Oracle import Language.SQL.SimpleSQL.Oracle
import Language.SQL.SimpleSQL.CustomDialect import Language.SQL.SimpleSQL.CustomDialect
import Data.Text (Text)
import qualified Data.Text as T
{- {-
Order the tests to start from the simplest first. This is also the Order the tests to start from the simplest first. This is also the
@ -54,7 +58,7 @@ testData :: TestItem
testData = testData =
Group "parserTest" Group "parserTest"
[lexerTests [lexerTests
,scalarExprTests {-,scalarExprTests
,odbcTests ,odbcTests
,queryExprComponentTests ,queryExprComponentTests
,queryExprsTests ,queryExprsTests
@ -72,7 +76,7 @@ testData =
,oracleTests ,oracleTests
,customDialectTests ,customDialectTests
,emptyStatementTests ,emptyStatementTests
,createIndexTests ,createIndexTests-}
] ]
tests :: T.TestTree tests :: T.TestTree
@ -104,18 +108,19 @@ itemToTest (ParseScalarExprFails d str) =
itemToTest (LexTest d s ts) = makeLexerTest d s ts itemToTest (LexTest d s ts) = makeLexerTest d s ts
itemToTest (LexFails d s) = makeLexingFailsTest d s itemToTest (LexFails d s) = makeLexingFailsTest d s
makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree makeLexerTest :: Dialect -> Text -> [Lex.Token] -> T.TestTree
makeLexerTest d s ts = H.testCase s $ do makeLexerTest d s ts = H.testCase (T.unpack s) $ do
let lx = either (error . show) id $ lexSQL d "" Nothing s let lx = either (error . T.unpack . Lex.prettyError) id $ Lex.lexSQL d "" Nothing s
H.assertEqual "" ts $ map snd lx ts1 = map Lex.tokenVal lx
let s' = prettyTokens d $ map snd lx H.assertEqual "" ts ts1
let s' = Lex.prettyTokens d $ ts1
H.assertEqual "pretty print" s s' H.assertEqual "pretty print" s s'
makeLexingFailsTest :: Dialect -> String -> T.TestTree makeLexingFailsTest :: Dialect -> String -> T.TestTree
makeLexingFailsTest d s = H.testCase s $ do makeLexingFailsTest d s = H.testCase s $ do
case lexSQL d "" Nothing s of undefined {-case lexSQL d "" Nothing s of
Right x -> H.assertFailure $ "lexing should have failed: " ++ s ++ "\ngot: " ++ show x Right x -> H.assertFailure $ "lexing should have failed: " ++ s ++ "\ngot: " ++ show x
Left _ -> return () Left _ -> return ()-}
toTest :: (Eq a, Show a) => toTest :: (Eq a, Show a) =>