1
Fork 0

move the megaparsec stream stuff to the lexer, restore the old lex return without source positions, add a new lex function that does return source positions

This commit is contained in:
Jake Wheat 2024-01-10 11:57:13 +00:00
parent e76aa2818b
commit de121d1fd6
4 changed files with 132 additions and 117 deletions

View file

@ -73,16 +73,21 @@ try again to add annotation to the ast
-- | Lexer for SQL. -- | Lexer for SQL.
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
module Language.SQL.SimpleSQL.Lex module Language.SQL.SimpleSQL.Lex
(Token(..) (Token(..)
,WithPos(..) ,WithPos(..)
,lexSQL ,lexSQL
,lexSQLWithPositions
,prettyToken ,prettyToken
,prettyTokens ,prettyTokens
,ParseError ,ParseError
,prettyError ,prettyError
,tokenListWillPrintAndLex ,tokenListWillPrintAndLex
,ansi2011 ,ansi2011
,MyStream(..)
) where ) where
import Language.SQL.SimpleSQL.Dialect import Language.SQL.SimpleSQL.Dialect
@ -94,6 +99,10 @@ import Text.Megaparsec
(Parsec (Parsec
,runParser' ,runParser'
,PosState(..)
,TraversableStream(..)
,VisualStream(..)
,ParseErrorBundle(..) ,ParseErrorBundle(..)
,errorBundlePretty ,errorBundlePretty
@ -125,10 +134,13 @@ import Text.Megaparsec.Char
,char ,char
) )
import Text.Megaparsec.State (initialState) import Text.Megaparsec.State (initialState)
import Control.Applicative ((<**>))
import qualified Data.List as DL
import qualified Data.List.NonEmpty as NE
import Data.Proxy (Proxy(..))
import Data.Void (Void) import Data.Void (Void)
import Control.Applicative ((<**>))
import Data.Char import Data.Char
(isAlphaNum (isAlphaNum
,isAlpha ,isAlpha
@ -184,7 +196,8 @@ data Token
-- main api functions -- main api functions
-- | Lex some SQL to a list of tokens. -- | Lex some SQL to a list of tokens.
lexSQL :: Dialect lexSQLWithPositions
:: Dialect
-- ^ dialect of SQL to use -- ^ dialect of SQL to use
-> Text -> Text
-- ^ filename to use in error messages -- ^ filename to use in error messages
@ -194,7 +207,23 @@ lexSQL :: Dialect
-> Text -> Text
-- ^ the SQL source to lex -- ^ the SQL source to lex
-> Either ParseError [WithPos Token] -> Either ParseError [WithPos Token]
lexSQL dialect fn p src = myParse fn p (many (sqlToken dialect) <* (eof <?> "")) src lexSQLWithPositions dialect fn p src = myParse fn p (many (sqlToken dialect) <* (eof <?> "")) src
-- | Lex some SQL to a list of tokens.
lexSQL
:: Dialect
-- ^ dialect of SQL to use
-> Text
-- ^ filename to use in error messages
-> Maybe (Int,Int)
-- ^ line number and column number of the first character
-- in the source to use in error messages
-> Text
-- ^ the SQL source to lex
-> Either ParseError [Token]
lexSQL dialect fn p src =
fmap (map tokenVal) $ lexSQLWithPositions dialect fn p src
myParse :: Text -> (Maybe (Int,Int)) -> Parser a -> Text -> Either ParseError a myParse :: Text -> (Maybe (Int,Int)) -> Parser a -> Text -> Either ParseError a
myParse name sp' p s = myParse name sp' p s =
@ -823,3 +852,88 @@ TODO: not 100% on this always being bad
checkLastAChar f = case T.unsnoc prettya of checkLastAChar f = case T.unsnoc prettya of
Just (_,la) -> f la Just (_,la) -> f la
_ -> False _ -> False
------------------------------------------------------------------------------
-- megaparsec stream boilerplate
data MyStream = MyStream
{ myStreamInput :: String
, unMyStream :: [WithPos Token]
}
instance M.Stream MyStream where
type Token MyStream = WithPos Token
type Tokens MyStream = [WithPos Token]
tokenToChunk Proxy x = [x]
tokensToChunk Proxy xs = xs
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ (MyStream _ []) = Nothing
take1_ (MyStream str (t:ts)) = Just
( t
, MyStream (drop (tokensLength pxy (t NE.:|[])) str) ts
)
takeN_ n (MyStream str s)
| n <= 0 = Just ([], MyStream str s)
| null s = Nothing
| otherwise =
let (x, s') = splitAt n s
in case NE.nonEmpty x of
Nothing -> Just (x, MyStream str s')
Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s')
takeWhile_ f (MyStream str s) =
let (x, s') = DL.span f s
in case NE.nonEmpty x of
Nothing -> (x, MyStream str s')
Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s')
instance VisualStream MyStream where
showTokens Proxy = DL.intercalate " "
. NE.toList
. fmap (showMyToken . tokenVal)
tokensLength Proxy xs = sum (tokenLength <$> xs)
instance TraversableStream MyStream where
reachOffset o M.PosState {..} =
( Just (prefix ++ restOfLine)
, PosState
{ pstateInput = MyStream
{ myStreamInput = postStr
, unMyStream = post
}
, pstateOffset = max pstateOffset o
, pstateSourcePos = newSourcePos
, pstateTabWidth = pstateTabWidth
, pstateLinePrefix = prefix
}
)
where
prefix =
if sameLine
then pstateLinePrefix ++ preLine
else preLine
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos
newSourcePos =
case post of
[] -> case unMyStream pstateInput of
[] -> pstateSourcePos
xs -> endPos (last xs)
(x:_) -> startPos x
(pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput)
(preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput)
preLine = reverse . takeWhile (/= '\n') . reverse $ preStr
tokensConsumed =
case NE.nonEmpty pre of
Nothing -> 0
Just nePre -> tokensLength pxy nePre
restOfLine = takeWhile (/= '\n') postStr
pxy :: Proxy MyStream
pxy = Proxy
showMyToken :: Token -> String
-- todo: how to do this properly?
showMyToken = T.unpack . prettyToken ansi2011

View file

@ -177,10 +177,7 @@ fixing them in the syntax but leaving them till the semantic checking
-} -}
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
-- | This is the module with the parser functions. -- | This is the module with the parser functions.
module Language.SQL.SimpleSQL.Parse module Language.SQL.SimpleSQL.Parse
(parseQueryExpr (parseQueryExpr
@ -195,14 +192,6 @@ import Text.Megaparsec
(ParsecT (ParsecT
,runParserT ,runParserT
,Stream(..)
,PosState(..)
,TraversableStream(..)
,VisualStream(..)
--,ErrorItem(Tokens)
,sourceLine
,ParseErrorBundle(..) ,ParseErrorBundle(..)
,errorBundlePretty ,errorBundlePretty
@ -229,10 +218,7 @@ import Control.Monad.Reader
,ask ,ask
) )
import qualified Data.List as DL
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as Set import qualified Data.Set as Set
import Data.Proxy (Proxy(..))
import Data.Void (Void) import Data.Void (Void)
import Control.Monad (guard, void) import Control.Monad (guard, void)
@ -315,7 +301,7 @@ parseScalarExpr = wrapParse scalarExpr
data ParseError data ParseError
= LexError L.ParseError = LexError L.ParseError
| ParseError (ParseErrorBundle MyStream Void) | ParseError (ParseErrorBundle L.MyStream Void)
prettyError :: ParseError -> Text prettyError :: ParseError -> Text
prettyError (LexError e) = T.pack $ errorBundlePretty e prettyError (LexError e) = T.pack $ errorBundlePretty e
@ -337,10 +323,10 @@ wrapParse :: Parser a
-> Text -> Text
-> Either ParseError a -> Either ParseError a
wrapParse parser d f p src = do wrapParse parser d f p src = do
lx <- either (Left . LexError) Right $ L.lexSQL d f p src lx <- either (Left . LexError) Right $ L.lexSQLWithPositions d f p src
either (Left . ParseError) Right $ either (Left . ParseError) Right $
runReader (runParserT (parser <* (eof <?> "")) (T.unpack f) runReader (runParserT (parser <* (eof <?> "")) (T.unpack f)
$ MyStream (T.unpack src) $ filter notSpace lx) d $ L.MyStream (T.unpack src) $ filter notSpace lx) d
where where
notSpace = notSpace' . L.tokenVal notSpace = notSpace' . L.tokenVal
notSpace' (L.Whitespace {}) = False notSpace' (L.Whitespace {}) = False
@ -352,7 +338,7 @@ wrapParse parser d f p src = do
-- parsing code -- parsing code
type Parser = ParsecT Void MyStream (Reader Dialect) type Parser = ParsecT Void L.MyStream (Reader Dialect)
{- {-
------------------------------------------------ ------------------------------------------------
@ -2384,87 +2370,3 @@ queryDialect f = do
d <- ask d <- ask
pure $ f d pure $ f d
------------------------------------------------------------------------------
-- parsec stream boilerplate
data MyStream = MyStream
{ myStreamInput :: String
, unMyStream :: [L.WithPos L.Token]
}
instance Stream MyStream where
type Token MyStream = L.WithPos L.Token
type Tokens MyStream = [L.WithPos L.Token]
tokenToChunk Proxy x = [x]
tokensToChunk Proxy xs = xs
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ (MyStream _ []) = Nothing
take1_ (MyStream str (t:ts)) = Just
( t
, MyStream (drop (tokensLength pxy (t NE.:|[])) str) ts
)
takeN_ n (MyStream str s)
| n <= 0 = Just ([], MyStream str s)
| null s = Nothing
| otherwise =
let (x, s') = splitAt n s
in case NE.nonEmpty x of
Nothing -> Just (x, MyStream str s')
Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s')
takeWhile_ f (MyStream str s) =
let (x, s') = DL.span f s
in case NE.nonEmpty x of
Nothing -> (x, MyStream str s')
Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s')
instance VisualStream MyStream where
showTokens Proxy = DL.intercalate " "
. NE.toList
. fmap (showMyToken . L.tokenVal)
tokensLength Proxy xs = sum (L.tokenLength <$> xs)
instance TraversableStream MyStream where
reachOffset o PosState {..} =
( Just (prefix ++ restOfLine)
, PosState
{ pstateInput = MyStream
{ myStreamInput = postStr
, unMyStream = post
}
, pstateOffset = max pstateOffset o
, pstateSourcePos = newSourcePos
, pstateTabWidth = pstateTabWidth
, pstateLinePrefix = prefix
}
)
where
prefix =
if sameLine
then pstateLinePrefix ++ preLine
else preLine
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos
newSourcePos =
case post of
[] -> case unMyStream pstateInput of
[] -> pstateSourcePos
xs -> L.endPos (last xs)
(x:_) -> L.startPos x
(pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput)
(preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput)
preLine = reverse . takeWhile (/= '\n') . reverse $ preStr
tokensConsumed =
case NE.nonEmpty pre of
Nothing -> 0
Just nePre -> tokensLength pxy nePre
restOfLine = takeWhile (/= '\n') postStr
pxy :: Proxy MyStream
pxy = Proxy
showMyToken :: L.Token -> String
-- todo: how to do this properly?
showMyToken = T.unpack . L.prettyToken ansi2011

View file

@ -82,7 +82,7 @@ bootstrapTests = Group "bootstrap tests" $
] ++ map (\a -> (a, [Symbol a])) ( ] ++ map (\a -> (a, [Symbol a])) (
["!=", "<>", ">=", "<=", "||"] ["!=", "<>", ">=", "<=", "||"]
++ map T.singleton ("(),-+*/<>=." :: String))) ++ map T.singleton ("(),-+*/<>=." :: [Char])))
ansiLexerTable :: [(Text,[Token])] ansiLexerTable :: [(Text,[Token])]

View file

@ -110,8 +110,7 @@ itemToTest (LexFails d s) = makeLexingFailsTest d s
makeLexerTest :: Dialect -> Text -> [Lex.Token] -> T.TestTree makeLexerTest :: Dialect -> Text -> [Lex.Token] -> T.TestTree
makeLexerTest d s ts = H.testCase (T.unpack s) $ do makeLexerTest d s ts = H.testCase (T.unpack s) $ do
let lx = either (error . T.unpack . Lex.prettyError) id $ Lex.lexSQL d "" Nothing s let ts1 = either (error . T.unpack . Lex.prettyError) id $ Lex.lexSQL d "" Nothing s
ts1 = map Lex.tokenVal lx
H.assertEqual "" ts ts1 H.assertEqual "" ts ts1
let s' = Lex.prettyTokens d $ ts1 let s' = Lex.prettyTokens d $ ts1
H.assertEqual "pretty print" s s' H.assertEqual "pretty print" s s'