get old lexing code working again, now only 3 tests fail
This commit is contained in:
parent
0f307f51c7
commit
4e09fe9f45
|
@ -19,6 +19,21 @@ directly without the separately testing lexing stage.
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
|
|
||||||
|
optimisations:
|
||||||
|
|
||||||
|
check for left factor opportunities
|
||||||
|
check for places where it parses a few substrings from the source,
|
||||||
|
then puts them back together with a concatenate of some flavour
|
||||||
|
-> this is better if can find a way to parse the entire string
|
||||||
|
from the source and lift it in one go into the lexical token
|
||||||
|
before this is done, a smaller optimisation is when any code matches
|
||||||
|
a constant string in the lexer, use that constant string instead
|
||||||
|
of the string from the parser, it might make a small difference in
|
||||||
|
a few places
|
||||||
|
maybe every token should carry the exact source as well as any fields
|
||||||
|
it's been broken into - so pretty printing is trivial
|
||||||
|
|
||||||
|
|
||||||
make the tokenswill print more dialect accurate. Maybe add symbol
|
make the tokenswill print more dialect accurate. Maybe add symbol
|
||||||
chars and identifier chars to the dialect definition and use them from
|
chars and identifier chars to the dialect definition and use them from
|
||||||
here
|
here
|
||||||
|
@ -98,12 +113,19 @@ import Text.Megaparsec
|
||||||
,many
|
,many
|
||||||
,try
|
,try
|
||||||
,option
|
,option
|
||||||
|
,(<|>)
|
||||||
|
,notFollowedBy
|
||||||
|
,manyTill
|
||||||
|
,anySingle
|
||||||
|
,lookAhead
|
||||||
)
|
)
|
||||||
|
import qualified Text.Megaparsec as M
|
||||||
import Text.Megaparsec.Char
|
import Text.Megaparsec.Char
|
||||||
(string
|
(string
|
||||||
,char
|
,char
|
||||||
)
|
)
|
||||||
import Text.Megaparsec.State (initialState)
|
import Text.Megaparsec.State (initialState)
|
||||||
|
import Control.Applicative ((<**>))
|
||||||
|
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
|
|
||||||
|
@ -113,7 +135,7 @@ import Data.Char
|
||||||
,isSpace
|
,isSpace
|
||||||
,isDigit
|
,isDigit
|
||||||
)
|
)
|
||||||
import Control.Monad (void)
|
import Control.Monad (void, guard)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
@ -241,7 +263,7 @@ sqlToken d = do
|
||||||
,blockComment d
|
,blockComment d
|
||||||
,sqlNumber d
|
,sqlNumber d
|
||||||
,positionalArg d
|
,positionalArg d
|
||||||
--,dontParseEndBlockComment d
|
,dontParseEndBlockComment d
|
||||||
,prefixedVariable d
|
,prefixedVariable d
|
||||||
,symbol d
|
,symbol d
|
||||||
,sqlWhitespace d]
|
,sqlWhitespace d]
|
||||||
|
@ -251,10 +273,6 @@ sqlToken d = do
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
sqlString :: Dialect -> Parser Token
|
|
||||||
sqlString _d =
|
|
||||||
SqlString "'" "'" <$> (char_ '\'' *> takeWhileP (Just "non quote char") (/= '\'') <* char_ '\'')
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
Parse a SQL string. Examples:
|
Parse a SQL string. Examples:
|
||||||
|
|
||||||
|
@ -265,7 +283,6 @@ b'binary string'
|
||||||
x'hexidecimal string'
|
x'hexidecimal string'
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-
|
|
||||||
sqlString :: Dialect -> Parser Token
|
sqlString :: Dialect -> Parser Token
|
||||||
sqlString d = dollarString <|> csString <|> normalString
|
sqlString d = dollarString <|> csString <|> normalString
|
||||||
where
|
where
|
||||||
|
@ -273,21 +290,21 @@ sqlString d = dollarString <|> csString <|> normalString
|
||||||
guard $ diDollarString d
|
guard $ diDollarString d
|
||||||
-- use try because of ambiguity with symbols and with
|
-- use try because of ambiguity with symbols and with
|
||||||
-- positional arg
|
-- positional arg
|
||||||
delim <- (\x -> concat ["$",x,"$"])
|
delim <- (\x -> T.concat ["$",x,"$"])
|
||||||
<$> try (char '$' *> option "" identifierString <* char '$')
|
<$> try (char '$' *> option "" identifierString <* char '$')
|
||||||
SqlString delim delim <$> manyTill anyChar (try $ string delim)
|
SqlString delim delim . T.pack <$> manyTill anySingle (try $ string delim)
|
||||||
normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix False "")
|
normalString = SqlString "'" "'" <$> (char '\'' *> normalStringSuffix False "")
|
||||||
normalStringSuffix allowBackslash t = do
|
normalStringSuffix allowBackslash t = do
|
||||||
s <- takeTill $ if allowBackslash
|
s <- takeWhileP Nothing $ if allowBackslash
|
||||||
then (`elem` "'\\")
|
then (`notElemChar` "'\\")
|
||||||
else (== '\'')
|
else (/= '\'')
|
||||||
-- deal with '' or \' as literal quote character
|
-- deal with '' or \' as literal quote character
|
||||||
choice [do
|
choice [do
|
||||||
ctu <- choice ["''" <$ try (string "''")
|
ctu <- choice ["''" <$ try (string "''")
|
||||||
,"\\'" <$ string "\\'"
|
,"\\'" <$ string "\\'"
|
||||||
,"\\" <$ char '\\']
|
,"\\" <$ char '\\']
|
||||||
normalStringSuffix allowBackslash $ concat [t,s,ctu]
|
normalStringSuffix allowBackslash $ T.concat [t,s,ctu]
|
||||||
,concat [t,s] <$ char '\'']
|
,T.concat [t,s] <$ char '\'']
|
||||||
-- try is used to to avoid conflicts with
|
-- try is used to to avoid conflicts with
|
||||||
-- identifiers which can start with n,b,x,u
|
-- identifiers which can start with n,b,x,u
|
||||||
-- once we read the quote type and the starting '
|
-- once we read the quote type and the starting '
|
||||||
|
@ -299,38 +316,19 @@ sqlString d = dollarString <|> csString <|> normalString
|
||||||
csString
|
csString
|
||||||
| diEString d =
|
| diEString d =
|
||||||
choice [SqlString <$> try (string "e'" <|> string "E'")
|
choice [SqlString <$> try (string "e'" <|> string "E'")
|
||||||
<*> return "'" <*> normalStringSuffix True ""
|
<*> pure "'" <*> normalStringSuffix True ""
|
||||||
,csString']
|
,csString']
|
||||||
| otherwise = csString'
|
| otherwise = csString'
|
||||||
csString' = SqlString
|
csString' = SqlString
|
||||||
<$> try cs
|
<$> try cs
|
||||||
<*> return "'"
|
<*> pure "'"
|
||||||
<*> normalStringSuffix False ""
|
<*> normalStringSuffix False ""
|
||||||
csPrefixes = "nNbBxX"
|
csPrefixes = (map (flip T.cons "'") "nNbBxX") ++ ["u&'", "U&'"]
|
||||||
cs = choice $ (map (\x -> string ([x] ++ "'")) csPrefixes)
|
cs :: Parser Text
|
||||||
++ [string "u&'"
|
cs = choice $ map string csPrefixes
|
||||||
,string "U&'"]
|
|
||||||
-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
-- TODO: this reconstitutes the string from bits, instead of lifting
|
|
||||||
-- it in one piece from the source. This is a performance issue, not
|
|
||||||
-- sure if it will be significant. The same comment applies to most of
|
|
||||||
-- the other parsers
|
|
||||||
identifier :: Dialect -> Parser Token
|
|
||||||
identifier d = Identifier Nothing <$> identifierString d
|
|
||||||
|
|
||||||
identifierString :: Dialect -> Parser Text
|
|
||||||
identifierString _ = (do
|
|
||||||
c <- satisfy isFirstLetter
|
|
||||||
choice
|
|
||||||
[T.cons c <$> (takeWhileP (Just "identifier char") isNonFirstLetter)
|
|
||||||
,pure $ T.singleton c]) <?> "identifier"
|
|
||||||
where
|
|
||||||
isFirstLetter c = c == '_' || isAlpha c
|
|
||||||
isNonFirstLetter c = c == '_' || isAlphaNum c
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
Parses identifiers:
|
Parses identifiers:
|
||||||
|
|
||||||
|
@ -341,7 +339,6 @@ u&"unicode quoted identifier"
|
||||||
`mysql quoted identifier`
|
`mysql quoted identifier`
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-
|
|
||||||
identifier :: Dialect -> Parser Token
|
identifier :: Dialect -> Parser Token
|
||||||
identifier d =
|
identifier d =
|
||||||
choice
|
choice
|
||||||
|
@ -355,50 +352,39 @@ identifier d =
|
||||||
regularIden = Identifier Nothing <$> identifierString
|
regularIden = Identifier Nothing <$> identifierString
|
||||||
quotedIden = Identifier (Just ("\"","\"")) <$> qidenPart
|
quotedIden = Identifier (Just ("\"","\"")) <$> qidenPart
|
||||||
mySqlQuotedIden = Identifier (Just ("`","`"))
|
mySqlQuotedIden = Identifier (Just ("`","`"))
|
||||||
<$> (char '`' *> takeWhile1 (/='`') <* char '`')
|
<$> (char '`' *> takeWhile1P Nothing (/='`') <* char '`')
|
||||||
sqlServerQuotedIden = Identifier (Just ("[","]"))
|
sqlServerQuotedIden = Identifier (Just ("[","]"))
|
||||||
<$> (char '[' *> takeWhile1 (`notElem` "[]") <* char ']')
|
<$> (char '[' *> takeWhile1P Nothing (`notElemChar` "[]") <* char ']')
|
||||||
-- try is used here to avoid a conflict with identifiers
|
-- try is used here to avoid a conflict with identifiers
|
||||||
-- and quoted strings which also start with a 'u'
|
-- and quoted strings which also start with a 'u'
|
||||||
unicodeQuotedIden = Identifier
|
unicodeQuotedIden = Identifier
|
||||||
<$> (f <$> try (oneOf "uU" <* string "&"))
|
<$> (f <$> try ((oneOf "uU") <* string "&"))
|
||||||
<*> qidenPart
|
<*> qidenPart
|
||||||
where f x = Just (x: "&\"", "\"")
|
where f x = Just (T.cons x "&\"", "\"")
|
||||||
qidenPart = char '"' *> qidenSuffix ""
|
qidenPart = char '"' *> qidenSuffix ""
|
||||||
qidenSuffix t = do
|
qidenSuffix t = do
|
||||||
s <- takeTill (=='"')
|
s <- takeWhileP Nothing (/='"')
|
||||||
void $ char '"'
|
void $ char '"'
|
||||||
-- deal with "" as literal double quote character
|
-- deal with "" as literal double quote character
|
||||||
choice [do
|
choice [do
|
||||||
void $ char '"'
|
void $ char '"'
|
||||||
qidenSuffix $ concat [t,s,"\"\""]
|
qidenSuffix $ T.concat [t,s,"\"\""]
|
||||||
,return $ concat [t,s]]
|
,pure $ T.concat [t,s]]
|
||||||
|
|
||||||
|
identifierString :: Parser Text
|
||||||
-- This parses a valid identifier without quotes.
|
identifierString = (do
|
||||||
|
c <- satisfy isFirstLetter
|
||||||
identifierString :: Parser String
|
choice
|
||||||
identifierString =
|
[T.cons c <$> (takeWhileP (Just "identifier char") isIdentifierChar)
|
||||||
startsWith (\c -> c == '_' || isAlpha c) isIdentifierChar
|
,pure $ T.singleton c]) <?> "identifier"
|
||||||
|
where
|
||||||
-- this can be moved to the dialect at some point
|
isFirstLetter c = c == '_' || isAlpha c
|
||||||
|
|
||||||
isIdentifierChar :: Char -> Bool
|
isIdentifierChar :: Char -> Bool
|
||||||
isIdentifierChar c = c == '_' || isAlphaNum c
|
isIdentifierChar c = c == '_' || isAlphaNum c
|
||||||
|
|
||||||
-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
{-
|
|
||||||
I think it's always faster to use a string locally created in the parser code,
|
|
||||||
than to use one taken from the parsed source, unless you take it without modifying it,
|
|
||||||
the example here is using -- and \n. this won't be needed in this case if can work out
|
|
||||||
how to lift the entire comment as a single string from the source.
|
|
||||||
|
|
||||||
this concept does apply to things like symbols
|
|
||||||
-}
|
|
||||||
|
|
||||||
lineComment :: Dialect -> Parser Token
|
lineComment :: Dialect -> Parser Token
|
||||||
lineComment _ = do
|
lineComment _ = do
|
||||||
try (string_ "--") <?> ""
|
try (string_ "--") <?> ""
|
||||||
|
@ -407,62 +393,22 @@ lineComment _ = do
|
||||||
suf <- option "" ("\n" <$ char_ '\n')
|
suf <- option "" ("\n" <$ char_ '\n')
|
||||||
pure $ LineComment $ T.concat ["--", rest, suf]
|
pure $ LineComment $ T.concat ["--", rest, suf]
|
||||||
|
|
||||||
{-lineComment :: Dialect -> Parser Token
|
|
||||||
lineComment _ =
|
|
||||||
(\s -> LineComment $ concat ["--",s]) <$>
|
|
||||||
-- try is used here in case we see a - symbol
|
|
||||||
-- once we read two -- then we commit to the comment token
|
|
||||||
(try (string "--") *> (
|
|
||||||
-- todo: there must be a better way to do this
|
|
||||||
conc <$> manyTill anyChar (lookAhead lineCommentEnd) <*> lineCommentEnd))
|
|
||||||
where
|
|
||||||
conc a Nothing = a
|
|
||||||
conc a (Just b) = a ++ b
|
|
||||||
lineCommentEnd =
|
|
||||||
Just "\n" <$ char '\n'
|
|
||||||
<|> Nothing <$ eof-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
|
-- TODO: the parser before the switch to megaparsec parsed nested block comments
|
||||||
|
-- I don't know any dialects that use this, but I think it's useful, if needed,
|
||||||
|
-- add it back in under a dialect flag?
|
||||||
blockComment :: Dialect -> Parser Token
|
blockComment :: Dialect -> Parser Token
|
||||||
blockComment _ = (do
|
blockComment _ = (do
|
||||||
try $ string_ "/*"
|
try $ string_ "/*"
|
||||||
BlockComment . T.concat . ("/*":) <$> more) <?> ""
|
BlockComment . T.concat . ("/*":) <$> more) <?> ""
|
||||||
where
|
where
|
||||||
more = choice
|
more = choice
|
||||||
[["*/"] <$ try (string_ "*/")
|
[["*/"] <$ try (string_ "*/") -- comment ended
|
||||||
,char_ '*' *> (("*":) <$> more)
|
,char_ '*' *> (("*":) <$> more) -- comment contains * but this isn't the comment end token
|
||||||
|
-- not sure if there's an easy optimisation here
|
||||||
,(:) <$> takeWhile1P (Just "non comment terminator text") (/= '*') <*> more]
|
,(:) <$> takeWhile1P (Just "non comment terminator text") (/= '*') <*> more]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{-
|
|
||||||
Try is used in the block comment for the two symbol bits because we
|
|
||||||
want to backtrack if we read the first symbol but the second symbol
|
|
||||||
isn't there.
|
|
||||||
-}
|
|
||||||
{-
|
|
||||||
blockComment :: Dialect -> Parser Token
|
|
||||||
blockComment _ =
|
|
||||||
(\s -> BlockComment $ concat ["/*",s]) <$>
|
|
||||||
(try (string "/*") *> commentSuffix 0)
|
|
||||||
where
|
|
||||||
commentSuffix :: Int -> Parser String
|
|
||||||
commentSuffix n = do
|
|
||||||
-- read until a possible end comment or nested comment
|
|
||||||
x <- takeWhile (\e -> e /= '/' && e /= '*')
|
|
||||||
choice [-- close comment: if the nesting is 0, done
|
|
||||||
-- otherwise recurse on commentSuffix
|
|
||||||
try (string "*/") *> let t = concat [x,"*/"]
|
|
||||||
in if n == 0
|
|
||||||
then return t
|
|
||||||
else (\s -> concat [t,s]) <$> commentSuffix (n - 1)
|
|
||||||
-- nested comment, recurse
|
|
||||||
,try (string "/*") *> ((\s -> concat [x,"/*",s]) <$> commentSuffix (n + 1))
|
|
||||||
-- not an end comment or nested comment, continue
|
|
||||||
,(\c s -> x ++ [c] ++ s) <$> anyChar <*> commentSuffix n]
|
|
||||||
-}
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
This is to improve user experience: provide an error if we see */
|
This is to improve user experience: provide an error if we see */
|
||||||
outside a comment. This could potentially break postgres ops with */
|
outside a comment. This could potentially break postgres ops with */
|
||||||
|
@ -470,23 +416,14 @@ in them (which is a stupid thing to do). In other cases, the user
|
||||||
should write * / instead (I can't think of any cases when this would
|
should write * / instead (I can't think of any cases when this would
|
||||||
be valid syntax though).
|
be valid syntax though).
|
||||||
-}
|
-}
|
||||||
{-
|
|
||||||
dontParseEndBlockComment :: Dialect -> Parser Token
|
dontParseEndBlockComment :: Dialect -> Parser Token
|
||||||
dontParseEndBlockComment _ =
|
dontParseEndBlockComment _ =
|
||||||
-- don't use try, then it should commit to the error
|
-- don't use try, then it should commit to the error
|
||||||
try (string "*/") *> fail "comment end without comment start"
|
try (string "*/") *> fail "comment end without comment start"
|
||||||
-}
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
sqlNumber :: Dialect -> Parser Token
|
|
||||||
sqlNumber _ =
|
|
||||||
SqlNumber <$> digits
|
|
||||||
|
|
||||||
|
|
||||||
digits :: Parser Text
|
|
||||||
digits = takeWhile1P (Just "digit") isDigit
|
|
||||||
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
numbers
|
numbers
|
||||||
|
|
||||||
|
@ -502,9 +439,18 @@ present. There cannot be any spaces or other characters embedded in
|
||||||
the constant. Note that any leading plus or minus sign is not actually
|
the constant. Note that any leading plus or minus sign is not actually
|
||||||
considered part of the constant; it is an operator applied to the
|
considered part of the constant; it is an operator applied to the
|
||||||
constant.
|
constant.
|
||||||
|
|
||||||
|
|
||||||
|
algorithm:
|
||||||
|
either
|
||||||
|
parse 1 or more digits
|
||||||
|
then an optional dot which isn't two dots
|
||||||
|
then optional digits
|
||||||
|
or: parse a dot which isn't two dots
|
||||||
|
then digits
|
||||||
|
followed by an optional exponent
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-
|
|
||||||
sqlNumber :: Dialect -> Parser Token
|
sqlNumber :: Dialect -> Parser Token
|
||||||
sqlNumber d =
|
sqlNumber d =
|
||||||
SqlNumber <$> completeNumber
|
SqlNumber <$> completeNumber
|
||||||
|
@ -517,15 +463,14 @@ sqlNumber d =
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
completeNumber =
|
completeNumber =
|
||||||
(int <??> (pp dot <??.> pp int)
|
(digits <??> (pp dot <??.> pp digits)
|
||||||
-- try is used in case we read a dot
|
-- try is used in case we read a dot
|
||||||
-- and it isn't part of a number
|
-- and it isn't part of a number
|
||||||
-- if there are any following digits, then we commit
|
-- if there are any following digits, then we commit
|
||||||
-- to it being a number and not something else
|
-- to it being a number and not something else
|
||||||
<|> try ((++) <$> dot <*> int))
|
<|> try ((<>) <$> dot <*> digits))
|
||||||
<??> pp expon
|
<??> pp expon
|
||||||
|
|
||||||
int = many1 digit
|
|
||||||
-- make sure we don't parse two adjacent dots in a number
|
-- make sure we don't parse two adjacent dots in a number
|
||||||
-- special case for postgresql, we backtrack if we see two adjacent dots
|
-- special case for postgresql, we backtrack if we see two adjacent dots
|
||||||
-- to parse 1..2, but in other dialects we commit to the failure
|
-- to parse 1..2, but in other dialects we commit to the failure
|
||||||
|
@ -533,38 +478,31 @@ sqlNumber d =
|
||||||
in if diPostgresSymbols d
|
in if diPostgresSymbols d
|
||||||
then try p
|
then try p
|
||||||
else p
|
else p
|
||||||
expon = (:) <$> oneOf "eE" <*> sInt
|
expon = T.cons <$> oneOf "eE" <*> sInt
|
||||||
sInt = (++) <$> option "" (string "+" <|> string "-") <*> int
|
sInt = (<>) <$> option "" (T.singleton <$> oneOf "+-") <*> digits
|
||||||
pp = (<$$> (++))
|
pp = (<$$> (<>))
|
||||||
-}
|
p <??> q = p <**> option id q
|
||||||
|
pa <$$> c = pa <**> pure (flip c)
|
||||||
|
pa <??.> pb =
|
||||||
|
let c = (<$>) . flip
|
||||||
|
in (.) `c` pa <*> option id pb
|
||||||
|
|
||||||
|
digits :: Parser Text
|
||||||
|
digits = takeWhile1P (Just "digit") isDigit
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
positionalArg :: Dialect -> Parser Token
|
|
||||||
positionalArg _ = PositionalArg <$> (char_ '$' *> (read . T.unpack <$> digits))
|
|
||||||
|
|
||||||
{-
|
|
||||||
positionalArg :: Dialect -> Parser Token
|
positionalArg :: Dialect -> Parser Token
|
||||||
positionalArg d =
|
positionalArg d =
|
||||||
guard (diPositionalArg d) >>
|
guard (diPositionalArg d) >>
|
||||||
-- use try to avoid ambiguities with other syntax which starts with dollar
|
-- use try to avoid ambiguities with other syntax which starts with dollar
|
||||||
PositionalArg <$> try (char '$' *> (read <$> many1 digit))
|
PositionalArg <$> try (char_ '$' *> (read . T.unpack <$> digits))
|
||||||
-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
-- todo: I think the try here should read a prefix char, then a single valid
|
-- todo: I think the try here should read a prefix char, then a single valid
|
||||||
-- identifier char, then commit
|
-- identifier char, then commit
|
||||||
prefixedVariable :: Dialect -> Parser Token
|
prefixedVariable :: Dialect -> Parser Token
|
||||||
prefixedVariable d = try $ choice
|
|
||||||
[PrefixedVariable <$> (':' <$ char_ ':') <*> identifierString d
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
-- use try because : and @ can be part of other things also
|
|
||||||
|
|
||||||
{-
|
|
||||||
prefixedVariable :: Dialect -> Parser Token
|
|
||||||
prefixedVariable d = try $ choice
|
prefixedVariable d = try $ choice
|
||||||
[PrefixedVariable <$> char ':' <*> identifierString
|
[PrefixedVariable <$> char ':' <*> identifierString
|
||||||
,guard (diAtIdentifier d) >>
|
,guard (diAtIdentifier d) >>
|
||||||
|
@ -572,21 +510,9 @@ prefixedVariable d = try $ choice
|
||||||
,guard (diHashIdentifier d) >>
|
,guard (diHashIdentifier d) >>
|
||||||
PrefixedVariable <$> char '#' <*> identifierString
|
PrefixedVariable <$> char '#' <*> identifierString
|
||||||
]
|
]
|
||||||
-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
symbol :: Dialect -> Parser Token
|
|
||||||
symbol _ =
|
|
||||||
Symbol <$> choice
|
|
||||||
[try $ choice $ map (\a -> string a) multiCharSymbols
|
|
||||||
,T.singleton <$> satisfy (`elem` singleLetterSymbol)
|
|
||||||
]
|
|
||||||
where
|
|
||||||
singleLetterSymbol = "(),-+*/<>=." :: String
|
|
||||||
multiCharSymbols = ["!=", "<>", ">=", "<=", "||"]
|
|
||||||
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
Symbols
|
Symbols
|
||||||
|
|
||||||
|
@ -596,7 +522,7 @@ A symbol is an operator, or one of the misc symbols which include:
|
||||||
The postgresql operator syntax allows a huge range of operators
|
The postgresql operator syntax allows a huge range of operators
|
||||||
compared with ansi and other dialects
|
compared with ansi and other dialects
|
||||||
-}
|
-}
|
||||||
{-
|
|
||||||
symbol :: Dialect -> Parser Token
|
symbol :: Dialect -> Parser Token
|
||||||
symbol d = Symbol <$> choice (concat
|
symbol d = Symbol <$> choice (concat
|
||||||
[dots
|
[dots
|
||||||
|
@ -610,14 +536,14 @@ symbol d = Symbol <$> choice (concat
|
||||||
else basicAnsiOps
|
else basicAnsiOps
|
||||||
])
|
])
|
||||||
where
|
where
|
||||||
dots = [many1 (char '.')]
|
dots = [takeWhile1P (Just "dot") (=='.')]
|
||||||
odbcSymbol = [string "{", string "}"]
|
odbcSymbol = [string "{", string "}"]
|
||||||
postgresExtraSymbols =
|
postgresExtraSymbols =
|
||||||
[try (string ":=")
|
[try (string ":=")
|
||||||
-- parse :: and : and avoid allowing ::: or more
|
-- parse :: and : and avoid allowing ::: or more
|
||||||
,try (string "::" <* notFollowedBy (char ':'))
|
,try (string "::" <* notFollowedBy (char ':'))
|
||||||
,try (string ":" <* notFollowedBy (char ':'))]
|
,try (string ":" <* notFollowedBy (char ':'))]
|
||||||
miscSymbol = map (string . (:[])) $
|
miscSymbol = map (string . T.singleton) $
|
||||||
case () of
|
case () of
|
||||||
_ | diSqlServerSymbols d -> ",;():?"
|
_ | diSqlServerSymbols d -> ",;():?"
|
||||||
| diPostgresSymbols d -> "[],;()"
|
| diPostgresSymbols d -> "[],;()"
|
||||||
|
@ -629,14 +555,14 @@ symbols can also be part of a single character symbol
|
||||||
-}
|
-}
|
||||||
|
|
||||||
basicAnsiOps = map (try . string) [">=","<=","!=","<>"]
|
basicAnsiOps = map (try . string) [">=","<=","!=","<>"]
|
||||||
++ map (string . (:[])) "+-^*/%~&<>="
|
++ map (string . T.singleton) "+-^*/%~&<>="
|
||||||
++ pipes
|
++ pipes
|
||||||
pipes = -- what about using many1 (char '|'), then it will
|
pipes = -- what about using many1 (char '|'), then it will
|
||||||
-- fail in the parser? Not sure exactly how
|
-- fail in the parser? Not sure exactly how
|
||||||
-- standalone the lexer should be
|
-- standalone the lexer should be
|
||||||
[char '|' *>
|
[char '|' *>
|
||||||
choice ["||" <$ char '|' <* notFollowedBy (char '|')
|
choice ["||" <$ char '|' <* notFollowedBy (char '|')
|
||||||
,return "|"]]
|
,pure "|"]]
|
||||||
|
|
||||||
{-
|
{-
|
||||||
postgresql generalized operators
|
postgresql generalized operators
|
||||||
|
@ -662,7 +588,7 @@ which allows the last character of a multi character symbol to be + or
|
||||||
-
|
-
|
||||||
-}
|
-}
|
||||||
|
|
||||||
generalizedPostgresqlOperator :: [Parser String]
|
generalizedPostgresqlOperator :: [Parser Text]
|
||||||
generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
|
generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
|
||||||
where
|
where
|
||||||
allOpSymbols = "+-*/<>=~!@#%^&|`?"
|
allOpSymbols = "+-*/<>=~!@#%^&|`?"
|
||||||
|
@ -674,13 +600,13 @@ generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
|
||||||
singlePlusMinus = try $ do
|
singlePlusMinus = try $ do
|
||||||
c <- oneOf "+-"
|
c <- oneOf "+-"
|
||||||
notFollowedBy $ oneOf allOpSymbols
|
notFollowedBy $ oneOf allOpSymbols
|
||||||
return [c]
|
pure $ T.singleton c
|
||||||
|
|
||||||
-- this is used when we are parsing a potentially multi symbol
|
-- this is used when we are parsing a potentially multi symbol
|
||||||
-- operator and we have alread seen one of the 'exception chars'
|
-- operator and we have alread seen one of the 'exception chars'
|
||||||
-- and so we can end with a + or -
|
-- and so we can end with a + or -
|
||||||
moreOpCharsException = do
|
moreOpCharsException = do
|
||||||
c <- oneOf (filter (`notElem` "-/*") allOpSymbols)
|
c <- oneOf (filter (`notElemChar` "-/*") allOpSymbols)
|
||||||
-- make sure we don't parse a comment starting token
|
-- make sure we don't parse a comment starting token
|
||||||
-- as part of an operator
|
-- as part of an operator
|
||||||
<|> try (char '/' <* notFollowedBy (char '*'))
|
<|> try (char '/' <* notFollowedBy (char '*'))
|
||||||
|
@ -688,14 +614,14 @@ generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
|
||||||
-- and make sure we don't parse a block comment end
|
-- and make sure we don't parse a block comment end
|
||||||
-- as part of another symbol
|
-- as part of another symbol
|
||||||
<|> try (char '*' <* notFollowedBy (char '/'))
|
<|> try (char '*' <* notFollowedBy (char '/'))
|
||||||
(c:) <$> option [] moreOpCharsException
|
T.cons c <$> option "" moreOpCharsException
|
||||||
|
|
||||||
opMoreChars = choice
|
opMoreChars = choice
|
||||||
[-- parse an exception char, now we can finish with a + -
|
[-- parse an exception char, now we can finish with a + -
|
||||||
(:)
|
T.cons
|
||||||
<$> oneOf exceptionOpSymbols
|
<$> oneOf exceptionOpSymbols
|
||||||
<*> option [] moreOpCharsException
|
<*> option "" moreOpCharsException
|
||||||
,(:)
|
,T.cons
|
||||||
<$> (-- parse +, make sure it isn't the last symbol
|
<$> (-- parse +, make sure it isn't the last symbol
|
||||||
try (char '+' <* lookAhead (oneOf allOpSymbols))
|
try (char '+' <* lookAhead (oneOf allOpSymbols))
|
||||||
<|> -- parse -, make sure it isn't the last symbol
|
<|> -- parse -, make sure it isn't the last symbol
|
||||||
|
@ -709,18 +635,14 @@ generalizedPostgresqlOperator = [singlePlusMinus,opMoreChars]
|
||||||
try (char '*' <* notFollowedBy (char '/'))
|
try (char '*' <* notFollowedBy (char '/'))
|
||||||
<|> -- any other ansi operator symbol
|
<|> -- any other ansi operator symbol
|
||||||
oneOf "<>=")
|
oneOf "<>=")
|
||||||
<*> option [] opMoreChars
|
<*> option "" opMoreChars
|
||||||
]
|
]
|
||||||
-}
|
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
sqlWhitespace :: Dialect -> Parser Token
|
sqlWhitespace :: Dialect -> Parser Token
|
||||||
sqlWhitespace _ = Whitespace <$> takeWhile1P (Just "whitespace") isSpace <?> ""
|
sqlWhitespace _ = Whitespace <$> takeWhile1P (Just "whitespace") isSpace <?> ""
|
||||||
|
|
||||||
--sqlWhitespace :: Dialect -> Parser Token
|
|
||||||
--sqlWhitespace _ = Whitespace <$> many1 (satisfy isSpace)
|
|
||||||
|
|
||||||
----------------------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- parser helpers
|
-- parser helpers
|
||||||
|
@ -731,25 +653,11 @@ char_ = void . char
|
||||||
string_ :: Text -> Parser ()
|
string_ :: Text -> Parser ()
|
||||||
string_ = void . string
|
string_ = void . string
|
||||||
|
|
||||||
{-
|
oneOf :: [Char] -> Parser Char
|
||||||
startsWith :: (Char -> Bool) -> (Char -> Bool) -> Parser String
|
oneOf = M.oneOf
|
||||||
startsWith p ps = do
|
|
||||||
c <- satisfy p
|
|
||||||
choice [(:) c <$> (takeWhile1 ps)
|
|
||||||
,return [c]]
|
|
||||||
|
|
||||||
takeWhile1 :: (Char -> Bool) -> Parser String
|
notElemChar :: Char -> [Char] -> Bool
|
||||||
takeWhile1 p = many1 (satisfy p)
|
notElemChar a b = a `notElem` (b :: [Char])
|
||||||
|
|
||||||
takeWhile :: (Char -> Bool) -> Parser String
|
|
||||||
takeWhile p = many (satisfy p)
|
|
||||||
|
|
||||||
takeTill :: (Char -> Bool) -> Parser String
|
|
||||||
takeTill p = manyTill anyChar (peekSatisfy p)
|
|
||||||
|
|
||||||
peekSatisfy :: (Char -> Bool) -> Parser ()
|
|
||||||
peekSatisfy p = void $ lookAhead (satisfy p)
|
|
||||||
-}
|
|
||||||
|
|
||||||
----------------------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -776,8 +684,7 @@ successes. I don't think it succeeds this test at the moment
|
||||||
-- will pretty print then lex back to the same set of tokens.
|
-- will pretty print then lex back to the same set of tokens.
|
||||||
-- Used internally, might be useful for generating SQL via lexical tokens.
|
-- Used internally, might be useful for generating SQL via lexical tokens.
|
||||||
tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool
|
tokenListWillPrintAndLex :: Dialect -> [Token] -> Bool
|
||||||
tokenListWillPrintAndLex = undefined
|
tokenListWillPrintAndLex _ [] = True
|
||||||
{-tokenListWillPrintAndLex _ [] = True
|
|
||||||
tokenListWillPrintAndLex _ [_] = True
|
tokenListWillPrintAndLex _ [_] = True
|
||||||
tokenListWillPrintAndLex d (a:b:xs) =
|
tokenListWillPrintAndLex d (a:b:xs) =
|
||||||
tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs)
|
tokensWillPrintAndLex d a b && tokenListWillPrintAndLex d (b:xs)
|
||||||
|
@ -791,7 +698,7 @@ followed by = or : makes a different symbol
|
||||||
-}
|
-}
|
||||||
|
|
||||||
| Symbol ":" <- a
|
| Symbol ":" <- a
|
||||||
, checkFirstBChar (\x -> isIdentifierChar x || x `elem` ":=") = False
|
, checkFirstBChar (\x -> isIdentifierChar x || x `T.elem` ":=") = False
|
||||||
|
|
||||||
{-
|
{-
|
||||||
two symbols next to eachother will fail if the symbols can combine and
|
two symbols next to eachother will fail if the symbols can combine and
|
||||||
|
@ -801,7 +708,7 @@ two symbols next to eachother will fail if the symbols can combine and
|
||||||
| diPostgresSymbols d
|
| diPostgresSymbols d
|
||||||
, Symbol a' <- a
|
, Symbol a' <- a
|
||||||
, Symbol b' <- b
|
, Symbol b' <- b
|
||||||
, b' `notElem` ["+", "-"] || or (map (`elem` a') "~!@#%^&|`?") = False
|
, b' `notElem` ["+", "-"] || or (map (`T.elem` a') "~!@#%^&|`?") = False
|
||||||
|
|
||||||
{-
|
{-
|
||||||
check two adjacent symbols in non postgres where the combination
|
check two adjacent symbols in non postgres where the combination
|
||||||
|
@ -906,17 +813,13 @@ TODO: not 100% on this always being bad
|
||||||
-- helper function to run a predicate on the
|
-- helper function to run a predicate on the
|
||||||
-- last character of the first token and the first
|
-- last character of the first token and the first
|
||||||
-- character of the second token
|
-- character of the second token
|
||||||
checkBorderChars f
|
checkBorderChars f =
|
||||||
| (_:_) <- prettya
|
case (T.unsnoc prettya, T.uncons prettyb) of
|
||||||
, (fb:_) <- prettyb
|
(Just (_,la), Just (fb,_)) -> f la fb
|
||||||
, la <- last prettya
|
|
||||||
= f la fb
|
|
||||||
checkBorderChars _ = False
|
|
||||||
checkFirstBChar f = case prettyb of
|
|
||||||
(b':_) -> f b'
|
|
||||||
_ -> False
|
_ -> False
|
||||||
checkLastAChar f = case prettya of
|
checkFirstBChar f = case T.uncons prettyb of
|
||||||
(_:_) -> f $ last prettya
|
Just (b',_) -> f b'
|
||||||
|
_ -> False
|
||||||
|
checkLastAChar f = case T.unsnoc prettya of
|
||||||
|
Just (_,la) -> f la
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
-}
|
|
||||||
|
|
|
@ -24,10 +24,8 @@ import Language.SQL.SimpleSQL.Lex
|
||||||
,tokenListWillPrintAndLex
|
,tokenListWillPrintAndLex
|
||||||
)
|
)
|
||||||
|
|
||||||
import Language.SQL.SimpleSQL.Dialect
|
|
||||||
(ansi2011)
|
|
||||||
|
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
--import Debug.Trace
|
--import Debug.Trace
|
||||||
--import Data.Char (isAlpha)
|
--import Data.Char (isAlpha)
|
||||||
|
@ -35,12 +33,13 @@ import qualified Data.Text as T
|
||||||
|
|
||||||
lexerTests :: TestItem
|
lexerTests :: TestItem
|
||||||
lexerTests = Group "lexerTests" $
|
lexerTests = Group "lexerTests" $
|
||||||
[bootstrapTests{-Group "lexer token tests" [ansiLexerTests
|
[bootstrapTests
|
||||||
|
,ansiLexerTests
|
||||||
,postgresLexerTests
|
,postgresLexerTests
|
||||||
,sqlServerLexerTests
|
,sqlServerLexerTests
|
||||||
,oracleLexerTests
|
,oracleLexerTests
|
||||||
,mySqlLexerTests
|
,mySqlLexerTests
|
||||||
,odbcLexerTests]-}]
|
,odbcLexerTests]
|
||||||
|
|
||||||
-- quick sanity tests to see something working
|
-- quick sanity tests to see something working
|
||||||
bootstrapTests :: TestItem
|
bootstrapTests :: TestItem
|
||||||
|
@ -75,8 +74,9 @@ bootstrapTests = Group "bootstrap tests" $
|
||||||
,("1", [SqlNumber "1"])
|
,("1", [SqlNumber "1"])
|
||||||
,("42", [SqlNumber "42"])
|
,("42", [SqlNumber "42"])
|
||||||
|
|
||||||
,("$1", [PositionalArg 1])
|
-- have to fix the dialect handling in the tests
|
||||||
,("$200", [PositionalArg 200])
|
--,("$1", [PositionalArg 1])
|
||||||
|
--,("$200", [PositionalArg 200])
|
||||||
|
|
||||||
,(":test", [PrefixedVariable ':' "test"])
|
,(":test", [PrefixedVariable ':' "test"])
|
||||||
|
|
||||||
|
@ -84,22 +84,22 @@ bootstrapTests = Group "bootstrap tests" $
|
||||||
["!=", "<>", ">=", "<=", "||"]
|
["!=", "<>", ">=", "<=", "||"]
|
||||||
++ map T.singleton ("(),-+*/<>=." :: String)))
|
++ map T.singleton ("(),-+*/<>=." :: String)))
|
||||||
|
|
||||||
{-
|
|
||||||
ansiLexerTable :: [(String,[Token])]
|
ansiLexerTable :: [(Text,[Token])]
|
||||||
ansiLexerTable =
|
ansiLexerTable =
|
||||||
-- single char symbols
|
-- single char symbols
|
||||||
map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;()"
|
map (\s -> (T.singleton s,[Symbol $ T.singleton s])) "+-^*/%~&|?<>[]=,;()"
|
||||||
-- multi char symbols
|
-- multi char symbols
|
||||||
++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||"]
|
||||||
++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||||
-- simple identifiers
|
-- simple identifiers
|
||||||
in map (\i -> (i, [Identifier Nothing i])) idens
|
in map (\i -> (i, [Identifier Nothing i])) idens
|
||||||
++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
|
<> map (\i -> ("\"" <> i <> "\"", [Identifier (Just ("\"","\"")) i])) idens
|
||||||
-- todo: in order to make lex . pretty id, need to
|
-- todo: in order to make lex . pretty id, need to
|
||||||
-- preserve the case of the u
|
-- preserve the case of the u
|
||||||
++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
<> map (\i -> ("u&\"" <> i <> "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
||||||
-- host param
|
-- host param
|
||||||
++ map (\i -> (':':i, [PrefixedVariable ':' i])) idens
|
<> map (\i -> (T.cons ':' i, [PrefixedVariable ':' i])) idens
|
||||||
)
|
)
|
||||||
-- quoted identifiers with embedded double quotes
|
-- quoted identifiers with embedded double quotes
|
||||||
-- the lexer doesn't unescape the quotes
|
-- the lexer doesn't unescape the quotes
|
||||||
|
@ -111,7 +111,7 @@ ansiLexerTable =
|
||||||
,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
|
,("'normalendquote '''", [SqlString "'" "'" "normalendquote ''"])
|
||||||
,("'\n'", [SqlString "'" "'" "\n"])]
|
,("'\n'", [SqlString "'" "'" "\n"])]
|
||||||
-- csstrings
|
-- csstrings
|
||||||
++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
++ map (\c -> (c <> "'test'", [SqlString (c <> "'") "'" "test"]))
|
||||||
["n", "N","b", "B","x", "X", "u&"]
|
["n", "N","b", "B","x", "X", "u&"]
|
||||||
-- numbers
|
-- numbers
|
||||||
++ [("10", [SqlNumber "10"])
|
++ [("10", [SqlNumber "10"])
|
||||||
|
@ -122,8 +122,8 @@ ansiLexerTable =
|
||||||
,("10.2", [SqlNumber "10.2"])
|
,("10.2", [SqlNumber "10.2"])
|
||||||
,("10.2e7", [SqlNumber "10.2e7"])]
|
,("10.2e7", [SqlNumber "10.2e7"])]
|
||||||
-- whitespace
|
-- whitespace
|
||||||
++ concat [[([a],[Whitespace [a]])
|
++ concat [[(T.singleton a,[Whitespace $ T.singleton a])
|
||||||
,([a,b], [Whitespace [a,b]])]
|
,(T.singleton a <> T.singleton b, [Whitespace (T.singleton a <> T.singleton b)])]
|
||||||
| a <- " \n\t", b <- " \n\t"]
|
| a <- " \n\t", b <- " \n\t"]
|
||||||
-- line comment
|
-- line comment
|
||||||
++ map (\c -> (c, [LineComment c]))
|
++ map (\c -> (c, [LineComment c]))
|
||||||
|
@ -134,14 +134,15 @@ ansiLexerTable =
|
||||||
,"/* this *is/ a comment */"
|
,"/* this *is/ a comment */"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
ansiLexerTests :: TestItem
|
ansiLexerTests :: TestItem
|
||||||
ansiLexerTests = Group "ansiLexerTests" $
|
ansiLexerTests = Group "ansiLexerTests" $
|
||||||
[Group "ansi lexer token tests" $ [LexTest ansi2011 s t | (s,t) <- ansiLexerTable]
|
[Group "ansi lexer token tests" $ [LexTest ansi2011 s t | (s,t) <- ansiLexerTable]
|
||||||
,Group "ansi generated combination lexer tests" $
|
,Group "ansi generated combination lexer tests" $
|
||||||
[ LexTest ansi2011 (s ++ s1) (t ++ t1)
|
[ LexTest ansi2011 (s <> s1) (t <> t1)
|
||||||
| (s,t) <- ansiLexerTable
|
| (s,t) <- ansiLexerTable
|
||||||
, (s1,t1) <- ansiLexerTable
|
, (s1,t1) <- ansiLexerTable
|
||||||
, tokenListWillPrintAndLex ansi2011 $ t ++ t1
|
, tokenListWillPrintAndLex ansi2011 $ t <> t1
|
||||||
|
|
||||||
]
|
]
|
||||||
,Group "ansiadhoclexertests" $
|
,Group "ansiadhoclexertests" $
|
||||||
|
@ -185,10 +186,10 @@ assurance.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
postgresLexerTable :: [(String,[Token])]
|
postgresLexerTable :: [(Text,[Token])]
|
||||||
postgresLexerTable =
|
postgresLexerTable =
|
||||||
-- single char symbols
|
-- single char symbols
|
||||||
map (\s -> ([s],[Symbol [s]])) "+-^*/%~&|?<>[]=,;():"
|
map (\s -> (T.singleton s,[Symbol $ T.singleton s])) "+-^*/%~&|?<>[]=,;():"
|
||||||
-- multi char symbols
|
-- multi char symbols
|
||||||
++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
|
++ map (\s -> (s,[Symbol s])) [">=","<=","!=","<>","||", "::","..",":="]
|
||||||
-- generic symbols
|
-- generic symbols
|
||||||
|
@ -196,12 +197,12 @@ postgresLexerTable =
|
||||||
++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
++ (let idens = ["a", "_a", "test", "table", "Stuff", "STUFF"]
|
||||||
-- simple identifiers
|
-- simple identifiers
|
||||||
in map (\i -> (i, [Identifier Nothing i])) idens
|
in map (\i -> (i, [Identifier Nothing i])) idens
|
||||||
++ map (\i -> ("\"" ++ i ++ "\"", [Identifier (Just ("\"","\"")) i])) idens
|
++ map (\i -> ("\"" <> i <> "\"", [Identifier (Just ("\"","\"")) i])) idens
|
||||||
-- todo: in order to make lex . pretty id, need to
|
-- todo: in order to make lex . pretty id, need to
|
||||||
-- preserve the case of the u
|
-- preserve the case of the u
|
||||||
++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
++ map (\i -> ("u&\"" <> i <> "\"", [Identifier (Just ("u&\"","\"")) i])) idens
|
||||||
-- host param
|
-- host param
|
||||||
++ map (\i -> (':':i, [PrefixedVariable ':' i])) idens
|
++ map (\i -> (T.cons ':' i, [PrefixedVariable ':' i])) idens
|
||||||
)
|
)
|
||||||
-- positional var
|
-- positional var
|
||||||
++ [("$1", [PositionalArg 1])]
|
++ [("$1", [PositionalArg 1])]
|
||||||
|
@ -223,7 +224,7 @@ postgresLexerTable =
|
||||||
,("$a$ $$string 3$$ $a$", [SqlString "$a$" "$a$" " $$string 3$$ "])
|
,("$a$ $$string 3$$ $a$", [SqlString "$a$" "$a$" " $$string 3$$ "])
|
||||||
]
|
]
|
||||||
-- csstrings
|
-- csstrings
|
||||||
++ map (\c -> (c ++ "'test'", [SqlString (c ++ "'") "'" "test"]))
|
++ map (\c -> (c <> "'test'", [SqlString (c <> "'") "'" "test"]))
|
||||||
["n", "N","b", "B","x", "X", "u&", "e", "E"]
|
["n", "N","b", "B","x", "X", "u&", "e", "E"]
|
||||||
-- numbers
|
-- numbers
|
||||||
++ [("10", [SqlNumber "10"])
|
++ [("10", [SqlNumber "10"])
|
||||||
|
@ -234,8 +235,8 @@ postgresLexerTable =
|
||||||
,("10.2", [SqlNumber "10.2"])
|
,("10.2", [SqlNumber "10.2"])
|
||||||
,("10.2e7", [SqlNumber "10.2e7"])]
|
,("10.2e7", [SqlNumber "10.2e7"])]
|
||||||
-- whitespace
|
-- whitespace
|
||||||
++ concat [[([a],[Whitespace [a]])
|
++ concat [[(T.singleton a,[Whitespace $ T.singleton a])
|
||||||
,([a,b], [Whitespace [a,b]])]
|
,(T.singleton a <> T.singleton b, [Whitespace $ T.singleton a <> T.singleton b])]
|
||||||
| a <- " \n\t", b <- " \n\t"]
|
| a <- " \n\t", b <- " \n\t"]
|
||||||
-- line comment
|
-- line comment
|
||||||
++ map (\c -> (c, [LineComment c]))
|
++ map (\c -> (c, [LineComment c]))
|
||||||
|
@ -267,24 +268,24 @@ operators without one of the exception chars
|
||||||
also: do the testing for the ansi compatibility special cases
|
also: do the testing for the ansi compatibility special cases
|
||||||
-}
|
-}
|
||||||
|
|
||||||
postgresShortOperatorTable :: [(String,[Token])]
|
postgresShortOperatorTable :: [(Text,[Token])]
|
||||||
postgresShortOperatorTable =
|
postgresShortOperatorTable =
|
||||||
[ (x, [Symbol x]) | x <- someValidPostgresOperators 2]
|
[ (x, [Symbol x]) | x <- someValidPostgresOperators 2]
|
||||||
|
|
||||||
|
|
||||||
postgresExtraOperatorTable :: [(String,[Token])]
|
postgresExtraOperatorTable :: [(Text,[Token])]
|
||||||
postgresExtraOperatorTable =
|
postgresExtraOperatorTable =
|
||||||
[ (x, [Symbol x]) | x <- someValidPostgresOperators 4]
|
[ (x, [Symbol x]) | x <- someValidPostgresOperators 4]
|
||||||
|
|
||||||
|
|
||||||
someValidPostgresOperators :: Int -> [String]
|
someValidPostgresOperators :: Int -> [Text]
|
||||||
someValidPostgresOperators l =
|
someValidPostgresOperators l =
|
||||||
[ x
|
[ x
|
||||||
| n <- [1..l]
|
| n <- [1..l]
|
||||||
, x <- combos "+-*/<>=~!@#%^&|`?" n
|
, x <- combos "+-*/<>=~!@#%^&|`?" n
|
||||||
, not ("--" `isInfixOf` x || "/*" `isInfixOf` x || "*/" `isInfixOf` x)
|
, not ("--" `T.isInfixOf` x || "/*" `T.isInfixOf` x || "*/" `T.isInfixOf` x)
|
||||||
, not (last x `elem` "+-")
|
, not (T.last x `T.elem` "+-")
|
||||||
|| or (map (`elem` x) "~!@#%^&|`?")
|
|| or (map (`T.elem` x) "~!@#%^&|`?")
|
||||||
]
|
]
|
||||||
|
|
||||||
{-
|
{-
|
||||||
|
@ -293,13 +294,13 @@ These are postgres operators, which if followed immediately by a + or
|
||||||
the + or -.
|
the + or -.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
somePostgresOpsWhichWontAddTrailingPlusMinus :: Int -> [String]
|
somePostgresOpsWhichWontAddTrailingPlusMinus :: Int -> [Text]
|
||||||
somePostgresOpsWhichWontAddTrailingPlusMinus l =
|
somePostgresOpsWhichWontAddTrailingPlusMinus l =
|
||||||
[ x
|
[ x
|
||||||
| n <- [1..l]
|
| n <- [1..l]
|
||||||
, x <- combos "+-*/<>=" n
|
, x <- combos "+-*/<>=" n
|
||||||
, not ("--" `isInfixOf` x || "/*" `isInfixOf` x || "*/" `isInfixOf` x)
|
, not ("--" `T.isInfixOf` x || "/*" `T.isInfixOf` x || "*/" `T.isInfixOf` x)
|
||||||
, not (last x `elem` "+-")
|
, not (T.last x `T.elem` "+-")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -310,7 +311,7 @@ postgresLexerTests = Group "postgresLexerTests" $
|
||||||
,Group "postgres generated lexer token tests" $
|
,Group "postgres generated lexer token tests" $
|
||||||
[LexTest postgres s t | (s,t) <- postgresShortOperatorTable ++ postgresExtraOperatorTable]
|
[LexTest postgres s t | (s,t) <- postgresShortOperatorTable ++ postgresExtraOperatorTable]
|
||||||
,Group "postgres generated combination lexer tests" $
|
,Group "postgres generated combination lexer tests" $
|
||||||
[ LexTest postgres (s ++ s1) (t ++ t1)
|
[ LexTest postgres (s <> s1) (t <> t1)
|
||||||
| (s,t) <- postgresLexerTable ++ postgresShortOperatorTable
|
| (s,t) <- postgresLexerTable ++ postgresShortOperatorTable
|
||||||
, (s1,t1) <- postgresLexerTable ++ postgresShortOperatorTable
|
, (s1,t1) <- postgresLexerTable ++ postgresShortOperatorTable
|
||||||
, tokenListWillPrintAndLex postgres $ t ++ t1
|
, tokenListWillPrintAndLex postgres $ t ++ t1
|
||||||
|
@ -344,18 +345,18 @@ postgresLexerTests = Group "postgresLexerTests" $
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
edgeCaseCommentOps =
|
edgeCaseCommentOps =
|
||||||
[ (x ++ "/*<test*/", [Symbol x, BlockComment "/*<test*/"])
|
[ (x <> "/*<test*/", [Symbol x, BlockComment "/*<test*/"])
|
||||||
| x <- eccops
|
| x <- eccops
|
||||||
, not (last x == '*')
|
, not (T.last x == '*')
|
||||||
] ++
|
] ++
|
||||||
[ (x ++ "--<test", [Symbol x, LineComment "--<test"])
|
[ (x <> "--<test", [Symbol x, LineComment "--<test"])
|
||||||
| x <- eccops
|
| x <- eccops
|
||||||
, not (last x == '-')
|
, not (T.last x == '-')
|
||||||
]
|
]
|
||||||
eccops = someValidPostgresOperators 2
|
eccops = someValidPostgresOperators 2
|
||||||
edgeCasePlusMinusOps = concat
|
edgeCasePlusMinusOps = concat
|
||||||
[ [ (x ++ "+", [Symbol x, Symbol "+"])
|
[ [ (x <> "+", [Symbol x, Symbol "+"])
|
||||||
, (x ++ "-", [Symbol x, Symbol "-"]) ]
|
, (x <> "-", [Symbol x, Symbol "-"]) ]
|
||||||
| x <- somePostgresOpsWhichWontAddTrailingPlusMinus 2
|
| x <- somePostgresOpsWhichWontAddTrailingPlusMinus 2
|
||||||
]
|
]
|
||||||
edgeCasePlusMinusComments =
|
edgeCasePlusMinusComments =
|
||||||
|
@ -365,7 +366,6 @@ postgresLexerTests = Group "postgresLexerTests" $
|
||||||
,("+/**/", [Symbol "+", BlockComment "/**/"])
|
,("+/**/", [Symbol "+", BlockComment "/**/"])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
sqlServerLexerTests :: TestItem
|
sqlServerLexerTests :: TestItem
|
||||||
sqlServerLexerTests = Group "sqlServerLexTests" $
|
sqlServerLexerTests = Group "sqlServerLexTests" $
|
||||||
[ LexTest sqlserver s t | (s,t) <-
|
[ LexTest sqlserver s t | (s,t) <-
|
||||||
|
@ -393,8 +393,6 @@ odbcLexerTests = Group "odbcLexTests" $
|
||||||
++ [LexFails sqlserver {diOdbc = False} "{"
|
++ [LexFails sqlserver {diOdbc = False} "{"
|
||||||
,LexFails sqlserver {diOdbc = False} "}"]
|
,LexFails sqlserver {diOdbc = False} "}"]
|
||||||
|
|
||||||
combos :: [a] -> Int -> [[a]]
|
combos :: [Char] -> Int -> [Text]
|
||||||
combos _ 0 = [[]]
|
combos _ 0 = [T.empty]
|
||||||
combos l n = [ x:tl | x <- l, tl <- combos l (n - 1) ]
|
combos l n = [ T.cons x tl | x <- l, tl <- combos l (n - 1) ]
|
||||||
|
|
||||||
-}
|
|
||||||
|
|
|
@ -36,7 +36,6 @@ module Language.SQL.SimpleSQL.SQL2011Queries (sql2011QueryTests) where
|
||||||
import Language.SQL.SimpleSQL.TestTypes
|
import Language.SQL.SimpleSQL.TestTypes
|
||||||
import Language.SQL.SimpleSQL.Syntax
|
import Language.SQL.SimpleSQL.Syntax
|
||||||
|
|
||||||
import qualified Data.Text as T
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
|
||||||
sql2011QueryTests :: TestItem
|
sql2011QueryTests :: TestItem
|
||||||
|
|
|
@ -7,8 +7,6 @@ module Language.SQL.SimpleSQL.ScalarExprs (scalarExprTests) where
|
||||||
import Language.SQL.SimpleSQL.TestTypes
|
import Language.SQL.SimpleSQL.TestTypes
|
||||||
import Language.SQL.SimpleSQL.Syntax
|
import Language.SQL.SimpleSQL.Syntax
|
||||||
|
|
||||||
import qualified Data.Text as T
|
|
||||||
|
|
||||||
scalarExprTests :: TestItem
|
scalarExprTests :: TestItem
|
||||||
scalarExprTests = Group "scalarExprTests"
|
scalarExprTests = Group "scalarExprTests"
|
||||||
[literals
|
[literals
|
||||||
|
|
|
@ -58,7 +58,7 @@ testData :: TestItem
|
||||||
testData =
|
testData =
|
||||||
Group "parserTest"
|
Group "parserTest"
|
||||||
[lexerTests
|
[lexerTests
|
||||||
{-,scalarExprTests
|
,scalarExprTests
|
||||||
,odbcTests
|
,odbcTests
|
||||||
,queryExprComponentTests
|
,queryExprComponentTests
|
||||||
,queryExprsTests
|
,queryExprsTests
|
||||||
|
@ -76,7 +76,7 @@ testData =
|
||||||
,oracleTests
|
,oracleTests
|
||||||
,customDialectTests
|
,customDialectTests
|
||||||
,emptyStatementTests
|
,emptyStatementTests
|
||||||
,createIndexTests-}
|
,createIndexTests
|
||||||
]
|
]
|
||||||
|
|
||||||
tests :: T.TestTree
|
tests :: T.TestTree
|
||||||
|
|
Loading…
Reference in a new issue