work on lexing: error cases

fix so following a number with either . or e or E without whitespace will cause a lexing error remove */ from symbols in postgres (although postgres strictly speaking allows this I think, it is not a good idea) reject */ anywhere with an error reject ||| (or more pipes) in ansi, etc. dialects instead of trying to parse it as something like '||', '|'
2016-02-15 20:31:06 +02:00 · 2016-02-15 20:31:06 +02:00 · 9fd2970f26
commit 9fd2970f26
parent 4bca2fa2ec
4 changed files with 136 additions and 65 deletions
--- a/tools/Language/SQL/SimpleSQL/LexerTests.lhs
+++ b/tools/Language/SQL/SimpleSQL/LexerTests.lhs
@ -68,19 +68,38 @@ Test for the lexer

 > ansiLexerTests :: TestItem
 > ansiLexerTests = Group "ansiLexerTests" $
->     [Group "ansi lexer token tests" $ [LexerTest ansi2011 s t |  (s,t) <- ansiLexerTable]
+>     [Group "ansi lexer token tests" $ [LexTest ansi2011 s t |  (s,t) <- ansiLexerTable]
 >     ,Group "ansi generated combination lexer tests" $
->     [ LexerTest ansi2011 (s ++ s1) (t ++ t1)
+>     [ LexTest ansi2011 (s ++ s1) (t ++ t1)
 >     | (s,t) <- ansiLexerTable
 >     , (s1,t1) <- ansiLexerTable
 >     , tokenListWillPrintAndLex ansi2011 $ t ++ t1

 >     ]
->     ,Group "adhoc lexer tests" $
->        map (uncurry $ LexerTest ansi2011)
+>     ,Group "ansiadhoclexertests" $
+>        map (uncurry $ LexTest ansi2011)
 >        [("", [])
 >        ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier Nothing "stuff"])
->        ]
+>        ] ++
+>        [-- want to make sure this gives a parse error
+>         LexFails ansi2011 "*/"
+>         -- combinations of pipes: make sure they fail because they could be
+>         -- ambiguous and it is really unclear when they are or not, and
+>         -- what the result is even when they are not ambiguous
+>        ,LexFails ansi2011 "|||"
+>        ,LexFails ansi2011 "||||"
+>        ,LexFails ansi2011 "|||||"
+>        -- another user experience thing: make sure extra trailing
+>        -- number chars are rejected rather than attempting to parse
+>        -- if the user means to write something that is rejected by this code,
+>        -- then they can use whitespace to make it clear and then it will parse
+>        ,LexFails ansi2011 "12e3e4"
+>        ,LexFails ansi2011 "12e3e4"
+>        ,LexFails ansi2011 "12e3e4"
+>        ,LexFails ansi2011 "12e3.4"
+>        ,LexFails ansi2011 "12.4.5"
+>        ,LexFails ansi2011 "12.4e5.6"
+>        ,LexFails ansi2011 "12.4e5e7"]
 >      ]

 todo: lexing tests
@ -187,7 +206,7 @@ also: do the testing for the ansi compatibility special cases
 >        [ x
 >        | n <- [1..l]
 >        , x <- combos "+-*/<>=~!@#%^&|`?" n
->        , not ("--" `isInfixOf` x || "/*" `isInfixOf` x)
+>        , not ("--" `isInfixOf` x || "/*" `isInfixOf` x || "*/" `isInfixOf` x)
 >        , not (last x `elem` "+-")
 >          || or (map (`elem` x) "~!@#%^&|`?")
 >        ]
@ -201,7 +220,7 @@ the + or -.
 >        [ x
 >        | n <- [1..l]
 >        , x <- combos "+-*/<>=" n
->        , not ("--" `isInfixOf` x || "/*" `isInfixOf` x)
+>        , not ("--" `isInfixOf` x || "/*" `isInfixOf` x || "*/" `isInfixOf` x)
 >        , not (last x `elem` "+-")
 >        ]

@ -209,27 +228,44 @@ the + or -.
 > postgresLexerTests :: TestItem
 > postgresLexerTests = Group "postgresLexerTests" $
 >     [Group "postgres lexer token tests" $
->      [LexerTest postgres s t | (s,t) <- postgresLexerTable ++ postgresExtraOperatorTable]
+>      [LexTest postgres s t | (s,t) <- postgresLexerTable ++ postgresExtraOperatorTable]
 >     ,Group "postgres generated combination lexer tests" $
->     [ LexerTest postgres (s ++ s1) (t ++ t1)
+>     [ LexTest postgres (s ++ s1) (t ++ t1)
 >     | (s,t) <- postgresLexerTable
 >     , (s1,t1) <- postgresLexerTable
 >     , tokenListWillPrintAndLex postgres $ t ++ t1

 >     ]
->     ,Group "adhoc postgres lexer tests" $
->      [LexerTest postgres s t
+>     ,Group "adhoc postgres lexertests" $
+>      [LexTest postgres s t
 >      | (s,t) <- edgeCaseCommentOps
 >                 ++ edgeCasePlusMinusOps
 >                 ++ edgeCasePlusMinusComments]
+>      ++
+>       -- need more tests for */ to make sure it is caught if it is in the middle of a
+>       -- sequence of symbol letters
+>         [LexFails postgres "*/"
+>         ,LexFails postgres "@*/"
+>         ,LexFails postgres "-*/"
+>         ,LexFails postgres "12e3e4"
+>         ,LexFails postgres "12e3e4"
+>         ,LexFails postgres "12e3e4"
+>         ,LexFails postgres "12e3.4"
+>         ,LexFails postgres "12.4.5"
+>         ,LexFails postgres "12.4e5.6"
+>         ,LexFails postgres "12.4e5e7"]
 >     ]
 >  where
->    edgeCaseCommentOps = concat
->      [ [ (x ++ "/*<test*/", [Symbol x, BlockComment "/*<test*/"])
->        , (x ++ "--<test", [Symbol x, LineComment "--<test"]) ]
->      | x <- someValidPostgresOperators 2
+>    edgeCaseCommentOps =
+>      [ (x ++ "/*<test*/", [Symbol x, BlockComment "/*<test*/"])
+>      | x <- eccops
+>      , not (last x == '*')
+>      ] ++
+>      [ (x ++ "--<test", [Symbol x, LineComment "--<test"])
+>      | x <- eccops
 >      , not (last x == '-')
 >      ]
+>    eccops = someValidPostgresOperators 2
 >    edgeCasePlusMinusOps = concat
 >      [ [ (x ++ "+", [Symbol x, Symbol "+"])
 >        , (x ++ "-", [Symbol x, Symbol "-"]) ]
@ -244,22 +280,22 @@ the + or -.


 > sqlServerLexerTests :: TestItem
-> sqlServerLexerTests = Group "sqlServerLexerTests" $
->     [ LexerTest sqlserver s t | (s,t) <-
+> sqlServerLexerTests = Group "sqlServerLexTests" $
+>     [ LexTest sqlserver s t | (s,t) <-
 >     [--("@variable", [(Identifier (Just ("@", "")) "variable")])
 >     --,("[quoted identifier]", [(Identifier (Just ("[", "]")) "variable")])
 >     ]]

 > oracleLexerTests :: TestItem
-> oracleLexerTests = Group "oracleLexerTests" $
->     [ LexerTest oracle s t | (s,t) <-
+> oracleLexerTests = Group "oracleLexTests" $
+>     [ LexTest oracle s t | (s,t) <-
 >     [--("#variable", [(Identifier (Just ("#", "")) "variable")])
 >     ]
 >     ]

 > odbcLexerTests :: TestItem
-> odbcLexerTests = Group "odbcLexerTests" $
->     [ LexerTest sqlserver {- {odbc = True} -} s t | (s,t) <-
+> odbcLexerTests = Group "odbcLexTests" $
+>     [ LexTest sqlserver {- {odbc = True} -} s t | (s,t) <-
 >     [--("{}", [Symbol "{", Symbol "}"])
 >     ]
 >     ]
--- a/tools/Language/SQL/SimpleSQL/TestTypes.lhs
+++ b/tools/Language/SQL/SimpleSQL/TestTypes.lhs
@ -30,5 +30,6 @@ check that the string given fails to parse

 >               | ParseQueryExprFails Dialect String
 >               | ParseValueExprFails Dialect String
->               | LexerTest Dialect String [Token]
+>               | LexTest Dialect String [Token]
+>               | LexFails Dialect String
 >                 deriving (Eq,Show)
--- a/tools/Language/SQL/SimpleSQL/Tests.lhs
+++ b/tools/Language/SQL/SimpleSQL/Tests.lhs
@ -86,7 +86,8 @@ order on the generated documentation.
 > itemToTest (ParseValueExprFails d str) =
 >     toFTest parseValueExpr prettyValueExpr d str

-> itemToTest (LexerTest d s ts) = makeLexerTest d s ts
+> itemToTest (LexTest d s ts) = makeLexerTest d s ts
+> itemToTest (LexFails d s) = makeLexingFailsTest d s

 > makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree
 > makeLexerTest d s ts = H.testCase s $ do
@ -95,6 +96,13 @@ order on the generated documentation.
 >     let s' = prettyTokens d $ map snd lx
 >     H.assertEqual "pretty print" s s'

+> makeLexingFailsTest :: Dialect -> String -> T.TestTree
+> makeLexingFailsTest d s = H.testCase s $ do
+>     case lexSQL d "" Nothing s of
+>          Right x -> H.assertFailure $ "lexing should have failed: " ++ s ++ "\ngot: " ++ show x
+>          Left _ -> return ()
+
+
 > toTest :: (Eq a, Show a) =>
 >           (Dialect -> String -> Maybe (Int,Int) -> String -> Either ParseError a)
 >        -> (Dialect -> a -> String)