From 3b5deec2e5db36baba9009bdd520ec4c5ae7ff4a Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Sun, 2 Aug 2015 15:29:35 +0300 Subject: [PATCH] fix positioning in lexer? and fix line comment token missing trailing \n --- Language/SQL/SimpleSQL/Lexer.lhs | 27 +++++++++++++++------ Language/SQL/SimpleSQL/Syntax.lhs | 6 ++--- tools/Language/SQL/SimpleSQL/LexerTests.lhs | 1 + 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Language/SQL/SimpleSQL/Lexer.lhs b/Language/SQL/SimpleSQL/Lexer.lhs index eb6340a..b4f9ba5 100644 --- a/Language/SQL/SimpleSQL/Lexer.lhs +++ b/Language/SQL/SimpleSQL/Lexer.lhs @@ -25,6 +25,7 @@ parsec > ,many,runParser,lookAhead,satisfy > ,setPosition,getPosition > ,setSourceColumn,setSourceLine +> ,sourceName, setSourceName > ,sourceLine, sourceColumn) > import Language.SQL.SimpleSQL.Combinators > import Language.SQL.SimpleSQL.Errors @@ -136,19 +137,22 @@ TODO: try to make all parsers applicative only > -> String > -- ^ the SQL source to lex > -> Either ParseError [((String,Int,Int),Token)] -> lexSQL dialect fn p src = -> let (l,c) = fromMaybe (1,1) p +> lexSQL dialect fn' p src = +> let (l',c') = fromMaybe (1,1) p > in either (Left . convParseError src) Right -> $ runParser (setPos (l,c) *> many (sqlToken dialect) <* eof) () fn src +> $ runParser (setPos (fn',l',c') *> many (sqlToken dialect) <* eof) () fn' src > where -> setPos (l,c) = fmap up getPosition >>= setPosition -> where up = flip setSourceColumn c . flip setSourceLine l +> setPos (fn,l,c) = do +> fmap (flip setSourceName fn +> . flip setSourceLine l +> . flip setSourceColumn c) getPosition +> >>= setPosition > -- | parser for a sql token > sqlToken :: Dialect -> Parser ((String,Int,Int),Token) > sqlToken d = do > p' <- getPosition -> let p = ("",sourceLine p', sourceColumn p') +> let p = (sourceName p',sourceLine p', sourceColumn p') The order of parsers is important: strings and quoted identifiers can start out looking like normal identifiers, so we try to parse these @@ -295,8 +299,15 @@ character symbols in the two lists below. > (\s -> LineComment $ concat ["--",s]) <$> > -- try is used here in case we see a - symbol > -- once we read two -- then we commit to the comment token -> (try (string "--") *> -> manyTill anyChar (void (char '\n') <|> eof)) +> (try (string "--") *> ( +> -- todo: there must be a better way to do this +> conc <$> manyTill anyChar (lookAhead lineCommentEnd) <*> lineCommentEnd)) +> where +> conc a Nothing = a +> conc a (Just b) = a ++ b +> lineCommentEnd = +> Just "\n" <$ char '\n' +> <|> Nothing <$ eof Try is used in the block comment for the two symbol bits because we want to backtrack if we read the first symbol but the second symbol diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index 52bc5db..1b1df45 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -489,7 +489,7 @@ I'm not sure if this is valid syntax or not. > data TableElement = > ColumnDef Name TypeName -> (Maybe DefaultClause) +> -- (Maybe DefaultClause) > -- (Maybe ColumnConstraintDef) > -- (Maybe CollateClause) > -- | TableConstraintDef @@ -498,10 +498,10 @@ I'm not sure if this is valid syntax or not. > {-data TableConstraintDef > deriving (Eq,Show,Read,Data,Typeable) -} -> data DefaultClause = +> {-data DefaultClause = > DefaultClause ValueExpr > | IdentityColumnSpec -> | GenerationClause +> | GenerationClause-} > {-data ColumnConstraintDef = > | NotNullConstraint diff --git a/tools/Language/SQL/SimpleSQL/LexerTests.lhs b/tools/Language/SQL/SimpleSQL/LexerTests.lhs index 4e33cc1..fe77719 100644 --- a/tools/Language/SQL/SimpleSQL/LexerTests.lhs +++ b/tools/Language/SQL/SimpleSQL/LexerTests.lhs @@ -76,6 +76,7 @@ number number (todo: double check more carefully) > ] > ++ map (uncurry $ LexerTest SQL2011) > [("", []) +> ,("-- line com\nstuff", [LineComment "-- line com\n",Identifier "stuff"]) > ] > where