From ee4098e189b393606c7c23db046ba9a47da5441b Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Mon, 15 Feb 2016 20:35:38 +0200
Subject: [PATCH] lexer tweaks

combine hostparam with prefixed variable
refactor some of the lexing code slightly
fix error in tests where it was using the ansi dialect instead of
  postgres for testing :::, etc.
---
 Language/SQL/SimpleSQL/Lex.lhs                | 43 ++++++-------------
 Language/SQL/SimpleSQL/Parse.lhs              |  2 +-
 Language/SQL/SimpleSQL/Pretty.lhs             |  4 +-
 tools/Language/SQL/SimpleSQL/LexerTests.lhs   | 10 ++---
 .../Language/SQL/SimpleSQL/SQL2011Queries.lhs |  6 +--
 5 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/Language/SQL/SimpleSQL/Lex.lhs b/Language/SQL/SimpleSQL/Lex.lhs
index 3a2b8e2..5b8c93b 100644
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@@ -62,12 +62,8 @@ directly without the separately testing lexing stage.
 >     -- can be " or u& or something dialect specific like []
 >     | Identifier (Maybe (String,String)) String
 >
->     -- | This is a host param symbol, e.g. :param
->     | HostParam String
->
->     -- | This is a prefixed variable symbol, such as @var or #var
->     -- (not used in ansi dialect) TODO: maybe combine hostparam with
->     -- this
+>     -- | This is a prefixed variable symbol, such as :var, @var or #var
+>     -- (only :var is used in ansi dialect)
 >     | PrefixedVariable Char String
 >
 >     -- | This is a positional arg identifier e.g. $1
@@ -105,7 +101,6 @@ directly without the separately testing lexing stage.
 > prettyToken _ (Symbol s) = s
 > prettyToken _ (Identifier Nothing t) = t
 > prettyToken _ (Identifier (Just (q1,q2)) t) = q1 ++ t ++ q2
-> prettyToken _ (HostParam p) = ':':p
 > prettyToken _ (PrefixedVariable c p) = c:p
 > prettyToken _ (PositionalArg p) = '$':show p
 > prettyToken _ (SqlString s e t) = s ++ t ++ e
@@ -156,7 +151,6 @@ this is also tried before symbol (a .1 will be parsed as a number, but
 
 >     (p,) <$> choice [sqlString d
 >                     ,identifier d
->                     ,hostParam d
 >                     ,lineComment d
 >                     ,blockComment d
 >                     ,sqlNumber d
@@ -268,28 +262,22 @@ x'hexidecimal string'
 >                   ++ [string "u&'"
 >                      ,string "U&'"]
 
-> hostParam :: Dialect -> Parser Token
-
-use try for postgres because we also support : and :: as symbols
-There might be a problem with parsing e.g. a[1:b]
-
-> hostParam d | diSyntaxFlavour d == Postgres =
->     HostParam <$> try (char ':' *> identifierString)
-
-> hostParam _ = HostParam <$> (char ':' *> identifierString)
+use try because : and @ can be part of other things also
 
 > prefixedVariable :: Dialect -> Parser Token
-> prefixedVariable  d | diSyntaxFlavour d == SQLServer =
->     PrefixedVariable <$> char '@' <*> identifierString
-> prefixedVariable  d | diSyntaxFlavour d == Oracle =
->     PrefixedVariable <$> char '#' <*> identifierString
-> prefixedVariable _ = guard False *> fail "unpossible"
+> prefixedVariable  d = try $ choice
+>     [PrefixedVariable <$> char ':' <*> identifierString
+>     ,guard (diSyntaxFlavour d == SQLServer) >>
+>      PrefixedVariable <$> char '@' <*> identifierString
+>     ,guard (diSyntaxFlavour d == Oracle) >>
+>      PrefixedVariable <$> char '#' <*> identifierString
+>     ]
 
 > positionalArg :: Dialect -> Parser Token
-> positionalArg d | diSyntaxFlavour d == Postgres =
+> positionalArg d =
+>   guard (diSyntaxFlavour d == Postgres) >>
 >   -- use try to avoid ambiguities with other syntax which starts with dollar
 >   PositionalArg <$> try (char '$' *> (read <$> many1 digit))
-> positionalArg _ = guard False *> fail "unpossible"
 
 
 digits
@@ -625,12 +613,7 @@ a quoted identifier using ", followed by a " will fail
 >    | Identifier (Just (_,"\"")) _ <- a
 >    , checkFirstBChar (=='"') = False
 
-host param followed by an identifier char will be absorbed
-
->    | HostParam {} <- a
->    , checkFirstBChar isIdentifierChar = False
-
-prefixed variable same:
+prefixed variable followed by an identifier char will be absorbed
 
 >    | PrefixedVariable {} <- a
 >    , checkFirstBChar isIdentifierChar = False
diff --git a/Language/SQL/SimpleSQL/Parse.lhs b/Language/SQL/SimpleSQL/Parse.lhs
index 893b508..23ab162 100644
--- a/Language/SQL/SimpleSQL/Parse.lhs
+++ b/Language/SQL/SimpleSQL/Parse.lhs
@@ -1980,7 +1980,7 @@ It is only allowed when all the strings are quoted with ' atm.
 > hostParamTok :: Parser String
 > hostParamTok = mytoken (\tok ->
 >     case tok of
->       L.HostParam p -> Just p
+>       L.PrefixedVariable c p -> Just (c:p)
 >       _ -> Nothing)
 
 > sqlNumberTok :: Bool -> Parser String
diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs
index fb80b8c..4df03bf 100644
--- a/Language/SQL/SimpleSQL/Pretty.lhs
+++ b/Language/SQL/SimpleSQL/Pretty.lhs
@@ -53,8 +53,8 @@ which have been changed to try to improve the layout of the output.
 > valueExpr _ Star = text "*"
 > valueExpr _ Parameter = text "?"
 > valueExpr _ (HostParameter p i) =
->     text (':':p)
->     <+> me (\i' -> text "indicator" <+> text (':':i')) i
+>     text p
+>     <+> me (\i' -> text "indicator" <+> text i') i
 
 > valueExpr d (App f es) = names f <> parens (commaSep (map (valueExpr d) es))
 
diff --git a/tools/Language/SQL/SimpleSQL/LexerTests.lhs b/tools/Language/SQL/SimpleSQL/LexerTests.lhs
index 9b21b7e..fb2bdc1 100644
--- a/tools/Language/SQL/SimpleSQL/LexerTests.lhs
+++ b/tools/Language/SQL/SimpleSQL/LexerTests.lhs
@@ -33,7 +33,7 @@ Test for the lexer
 >            -- preserve the case of the u
 >            ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
 >            -- host param
->            ++ map (\i -> (':':i, [HostParam i])) idens
+>            ++ map (\i -> (':':i, [PrefixedVariable ':' i])) idens
 >        )
 >     -- quoted identifiers with embedded double quotes
 >     -- the lexer doesn't unescape the quotes
@@ -154,7 +154,7 @@ also: do the testing for the ansi compatibility special cases
 >            -- preserve the case of the u
 >            ++ map (\i -> ("u&\"" ++ i ++ "\"", [Identifier (Just ("u&\"","\"")) i])) idens
 >            -- host param
->            ++ map (\i -> (':':i, [HostParam i])) idens
+>            ++ map (\i -> (':':i, [PrefixedVariable ':' i])) idens
 >        )
 >     -- positional var
 >     ++ [("$1", [PositionalArg 1])]
@@ -246,9 +246,9 @@ the + or -.
 >       -- need more tests for */ to make sure it is caught if it is in the middle of a
 >       -- sequence of symbol letters
 >         [LexFails postgres "*/"
->         ,LexFails ansi2011 ":::"
->         ,LexFails ansi2011 "::::"
->         ,LexFails ansi2011 ":::::"
+>         ,LexFails postgres ":::"
+>         ,LexFails postgres "::::"
+>         ,LexFails postgres ":::::"
 >         ,LexFails postgres "@*/"
 >         ,LexFails postgres "-*/"
 >         ,LexFails postgres "12e3e4"
diff --git a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
index 339afdd..bfa6505 100644
--- a/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
+++ b/tools/Language/SQL/SimpleSQL/SQL2011Queries.lhs
@@ -1384,11 +1384,11 @@ TODO: add the missing bits
 > parameterSpecification :: TestItem
 > parameterSpecification = Group "parameter specification"
 >     $ map (uncurry (TestValueExpr ansi2011))
->     [(":hostparam", HostParameter "hostparam" Nothing)
+>     [(":hostparam", HostParameter ":hostparam" Nothing)
 >     ,(":hostparam indicator :another_host_param"
->      ,HostParameter "hostparam" $ Just "another_host_param")
+>      ,HostParameter ":hostparam" $ Just ":another_host_param")
 >     ,("?", Parameter)
->     ,(":h[3]", Array (HostParameter "h" Nothing) [NumLit "3"])
+>     ,(":h[3]", Array (HostParameter ":h" Nothing) [NumLit "3"])
 >     ]
 
 <current collation specification> ::=