refactor the identifier lexer a bit

2016-02-22 23:24:02 +02:00 · 2016-02-22 23:24:02 +02:00 · 09223c3de9
commit 09223c3de9
parent 9457720d2c
1 changed files with 22 additions and 29 deletions
--- a/Language/SQL/SimpleSQL/Lex.lhs
+++ b/Language/SQL/SimpleSQL/Lex.lhs
@ -171,22 +171,26 @@ u&"unicode quoted identifier"
 > identifier :: Dialect -> Parser Token
 > identifier d =
 >     choice
->     [Identifier (Just ("\"","\"")) <$> qiden
+>     [quotedIden
->      -- try is used here to avoid a conflict with identifiers
+>     ,unicodeQuotedIden
->      -- and quoted strings which also start with a 'u'
+>     ,regularIden
->     ,Identifier (Just ("u&\"","\"")) <$> (try (string "u&") *> qiden)
+>     ,guard (diSyntaxFlavour d == MySQL) >> mySqlQuotedIden
->     ,Identifier (Just ("U&\"","\"")) <$> (try (string "U&") *> qiden)
+>     ,guard (diSyntaxFlavour d == SQLServer) >> sqlServerQuotedIden
 >     ,Identifier Nothing <$> identifierString
 >      -- todo: dialect protection
 >     ,guard (diSyntaxFlavour d == MySQL) >>
 >      Identifier (Just ("`","`"))
 >      <$> (char '`' *> takeWhile1 (/='`') <* char '`')
 >     ,guard (diSyntaxFlavour d == SQLServer) >>
 >      Identifier (Just ("[","]"))
 >      <$> (char '[' *> takeWhile1 (`notElem` "[]") <* char ']')
 >     ]
 >   where
->     qiden = char '"' *> qidenSuffix ""
+>     regularIden = Identifier Nothing <$> identifierString
 >     quotedIden = Identifier (Just ("\"","\"")) <$> qidenPart
 >     mySqlQuotedIden = Identifier (Just ("`","`"))
 >                       <$> (char '`' *> takeWhile1 (/='`') <* char '`')
 >     sqlServerQuotedIden = Identifier (Just ("[","]"))
 >                           <$> (char '[' *> takeWhile1 (`notElem` "[]") <* char ']')
 >     -- try is used here to avoid a conflict with identifiers
 >     -- and quoted strings which also start with a 'u'
 >     unicodeQuotedIden = Identifier
 >                         <$> (f <$> try (oneOf "uU" <* string "&"))
 >                         <*> qidenPart
 >       where f x = Just (x: "&\"", "\"")
 >     qidenPart = char '"' *> qidenSuffix ""
 >     qidenSuffix t = do
 >         s <- takeTill (=='"')
 >         void $ char '"'
@ -330,18 +334,7 @@ constant.
 Symbols
 A symbol is an operator, or one of the misc symbols which include:
-.
+. .. := : :: ( ) ? ; , { } (for odbc)
 ..
 :=
 :
 ::
 (
 )
 ?
 ;
 ,
 { (for odbc)
 }
 The postgresql operator syntax allows a huge range of operators
 compared with ansi and other dialects
@ -502,9 +495,9 @@ isn't there.
 This is to improve user experience: provide an error if we see */
 outside a comment. This could potentially break postgres ops with */
-in (which is a stupid thing to do). In other cases, the user should
+in them (which is a stupid thing to do). In other cases, the user
-write * / instead (I can't think of any cases when this would be valid
+should write * / instead (I can't think of any cases when this would
-syntax though).
+be valid syntax though).
 > dontParseEndBlockComment :: Dialect -> Parser Token
 > dontParseEndBlockComment _ =