move the megaparsec stream stuff to the lexer, restore the old lex return without source positions, add a new lex function that does return source positions
This commit is contained in:
parent
e76aa2818b
commit
de121d1fd6
|
@ -73,16 +73,21 @@ try again to add annotation to the ast
|
||||||
-- | Lexer for SQL.
|
-- | Lexer for SQL.
|
||||||
{-# LANGUAGE TupleSections #-}
|
{-# LANGUAGE TupleSections #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
{-# LANGUAGE TypeFamilies #-}
|
||||||
|
|
||||||
module Language.SQL.SimpleSQL.Lex
|
module Language.SQL.SimpleSQL.Lex
|
||||||
(Token(..)
|
(Token(..)
|
||||||
,WithPos(..)
|
,WithPos(..)
|
||||||
,lexSQL
|
,lexSQL
|
||||||
|
,lexSQLWithPositions
|
||||||
,prettyToken
|
,prettyToken
|
||||||
,prettyTokens
|
,prettyTokens
|
||||||
,ParseError
|
,ParseError
|
||||||
,prettyError
|
,prettyError
|
||||||
,tokenListWillPrintAndLex
|
,tokenListWillPrintAndLex
|
||||||
,ansi2011
|
,ansi2011
|
||||||
|
,MyStream(..)
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Language.SQL.SimpleSQL.Dialect
|
import Language.SQL.SimpleSQL.Dialect
|
||||||
|
@ -94,6 +99,10 @@ import Text.Megaparsec
|
||||||
(Parsec
|
(Parsec
|
||||||
,runParser'
|
,runParser'
|
||||||
|
|
||||||
|
,PosState(..)
|
||||||
|
,TraversableStream(..)
|
||||||
|
,VisualStream(..)
|
||||||
|
|
||||||
,ParseErrorBundle(..)
|
,ParseErrorBundle(..)
|
||||||
,errorBundlePretty
|
,errorBundlePretty
|
||||||
|
|
||||||
|
@ -125,10 +134,13 @@ import Text.Megaparsec.Char
|
||||||
,char
|
,char
|
||||||
)
|
)
|
||||||
import Text.Megaparsec.State (initialState)
|
import Text.Megaparsec.State (initialState)
|
||||||
import Control.Applicative ((<**>))
|
|
||||||
|
|
||||||
|
import qualified Data.List as DL
|
||||||
|
import qualified Data.List.NonEmpty as NE
|
||||||
|
import Data.Proxy (Proxy(..))
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
|
|
||||||
|
import Control.Applicative ((<**>))
|
||||||
import Data.Char
|
import Data.Char
|
||||||
(isAlphaNum
|
(isAlphaNum
|
||||||
,isAlpha
|
,isAlpha
|
||||||
|
@ -184,17 +196,34 @@ data Token
|
||||||
-- main api functions
|
-- main api functions
|
||||||
|
|
||||||
-- | Lex some SQL to a list of tokens.
|
-- | Lex some SQL to a list of tokens.
|
||||||
lexSQL :: Dialect
|
lexSQLWithPositions
|
||||||
-- ^ dialect of SQL to use
|
:: Dialect
|
||||||
-> Text
|
-- ^ dialect of SQL to use
|
||||||
-- ^ filename to use in error messages
|
-> Text
|
||||||
-> Maybe (Int,Int)
|
-- ^ filename to use in error messages
|
||||||
-- ^ line number and column number of the first character
|
-> Maybe (Int,Int)
|
||||||
-- in the source to use in error messages
|
-- ^ line number and column number of the first character
|
||||||
-> Text
|
-- in the source to use in error messages
|
||||||
-- ^ the SQL source to lex
|
-> Text
|
||||||
-> Either ParseError [WithPos Token]
|
-- ^ the SQL source to lex
|
||||||
lexSQL dialect fn p src = myParse fn p (many (sqlToken dialect) <* (eof <?> "")) src
|
-> Either ParseError [WithPos Token]
|
||||||
|
lexSQLWithPositions dialect fn p src = myParse fn p (many (sqlToken dialect) <* (eof <?> "")) src
|
||||||
|
|
||||||
|
|
||||||
|
-- | Lex some SQL to a list of tokens.
|
||||||
|
lexSQL
|
||||||
|
:: Dialect
|
||||||
|
-- ^ dialect of SQL to use
|
||||||
|
-> Text
|
||||||
|
-- ^ filename to use in error messages
|
||||||
|
-> Maybe (Int,Int)
|
||||||
|
-- ^ line number and column number of the first character
|
||||||
|
-- in the source to use in error messages
|
||||||
|
-> Text
|
||||||
|
-- ^ the SQL source to lex
|
||||||
|
-> Either ParseError [Token]
|
||||||
|
lexSQL dialect fn p src =
|
||||||
|
fmap (map tokenVal) $ lexSQLWithPositions dialect fn p src
|
||||||
|
|
||||||
myParse :: Text -> (Maybe (Int,Int)) -> Parser a -> Text -> Either ParseError a
|
myParse :: Text -> (Maybe (Int,Int)) -> Parser a -> Text -> Either ParseError a
|
||||||
myParse name sp' p s =
|
myParse name sp' p s =
|
||||||
|
@ -823,3 +852,88 @@ TODO: not 100% on this always being bad
|
||||||
checkLastAChar f = case T.unsnoc prettya of
|
checkLastAChar f = case T.unsnoc prettya of
|
||||||
Just (_,la) -> f la
|
Just (_,la) -> f la
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- megaparsec stream boilerplate
|
||||||
|
|
||||||
|
data MyStream = MyStream
|
||||||
|
{ myStreamInput :: String
|
||||||
|
, unMyStream :: [WithPos Token]
|
||||||
|
}
|
||||||
|
|
||||||
|
instance M.Stream MyStream where
|
||||||
|
type Token MyStream = WithPos Token
|
||||||
|
type Tokens MyStream = [WithPos Token]
|
||||||
|
|
||||||
|
tokenToChunk Proxy x = [x]
|
||||||
|
tokensToChunk Proxy xs = xs
|
||||||
|
chunkToTokens Proxy = id
|
||||||
|
chunkLength Proxy = length
|
||||||
|
chunkEmpty Proxy = null
|
||||||
|
take1_ (MyStream _ []) = Nothing
|
||||||
|
take1_ (MyStream str (t:ts)) = Just
|
||||||
|
( t
|
||||||
|
, MyStream (drop (tokensLength pxy (t NE.:|[])) str) ts
|
||||||
|
)
|
||||||
|
takeN_ n (MyStream str s)
|
||||||
|
| n <= 0 = Just ([], MyStream str s)
|
||||||
|
| null s = Nothing
|
||||||
|
| otherwise =
|
||||||
|
let (x, s') = splitAt n s
|
||||||
|
in case NE.nonEmpty x of
|
||||||
|
Nothing -> Just (x, MyStream str s')
|
||||||
|
Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s')
|
||||||
|
takeWhile_ f (MyStream str s) =
|
||||||
|
let (x, s') = DL.span f s
|
||||||
|
in case NE.nonEmpty x of
|
||||||
|
Nothing -> (x, MyStream str s')
|
||||||
|
Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s')
|
||||||
|
|
||||||
|
instance VisualStream MyStream where
|
||||||
|
showTokens Proxy = DL.intercalate " "
|
||||||
|
. NE.toList
|
||||||
|
. fmap (showMyToken . tokenVal)
|
||||||
|
tokensLength Proxy xs = sum (tokenLength <$> xs)
|
||||||
|
|
||||||
|
instance TraversableStream MyStream where
|
||||||
|
reachOffset o M.PosState {..} =
|
||||||
|
( Just (prefix ++ restOfLine)
|
||||||
|
, PosState
|
||||||
|
{ pstateInput = MyStream
|
||||||
|
{ myStreamInput = postStr
|
||||||
|
, unMyStream = post
|
||||||
|
}
|
||||||
|
, pstateOffset = max pstateOffset o
|
||||||
|
, pstateSourcePos = newSourcePos
|
||||||
|
, pstateTabWidth = pstateTabWidth
|
||||||
|
, pstateLinePrefix = prefix
|
||||||
|
}
|
||||||
|
)
|
||||||
|
where
|
||||||
|
prefix =
|
||||||
|
if sameLine
|
||||||
|
then pstateLinePrefix ++ preLine
|
||||||
|
else preLine
|
||||||
|
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos
|
||||||
|
newSourcePos =
|
||||||
|
case post of
|
||||||
|
[] -> case unMyStream pstateInput of
|
||||||
|
[] -> pstateSourcePos
|
||||||
|
xs -> endPos (last xs)
|
||||||
|
(x:_) -> startPos x
|
||||||
|
(pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput)
|
||||||
|
(preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput)
|
||||||
|
preLine = reverse . takeWhile (/= '\n') . reverse $ preStr
|
||||||
|
tokensConsumed =
|
||||||
|
case NE.nonEmpty pre of
|
||||||
|
Nothing -> 0
|
||||||
|
Just nePre -> tokensLength pxy nePre
|
||||||
|
restOfLine = takeWhile (/= '\n') postStr
|
||||||
|
|
||||||
|
pxy :: Proxy MyStream
|
||||||
|
pxy = Proxy
|
||||||
|
|
||||||
|
showMyToken :: Token -> String
|
||||||
|
-- todo: how to do this properly?
|
||||||
|
showMyToken = T.unpack . prettyToken ansi2011
|
||||||
|
|
|
@ -177,10 +177,7 @@ fixing them in the syntax but leaving them till the semantic checking
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE TupleSections #-}
|
{-# LANGUAGE TupleSections #-}
|
||||||
{-# LANGUAGE LambdaCase #-}
|
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE RecordWildCards #-}
|
|
||||||
{-# LANGUAGE TypeFamilies #-}
|
|
||||||
-- | This is the module with the parser functions.
|
-- | This is the module with the parser functions.
|
||||||
module Language.SQL.SimpleSQL.Parse
|
module Language.SQL.SimpleSQL.Parse
|
||||||
(parseQueryExpr
|
(parseQueryExpr
|
||||||
|
@ -195,14 +192,6 @@ import Text.Megaparsec
|
||||||
(ParsecT
|
(ParsecT
|
||||||
,runParserT
|
,runParserT
|
||||||
|
|
||||||
,Stream(..)
|
|
||||||
,PosState(..)
|
|
||||||
,TraversableStream(..)
|
|
||||||
,VisualStream(..)
|
|
||||||
--,ErrorItem(Tokens)
|
|
||||||
|
|
||||||
,sourceLine
|
|
||||||
|
|
||||||
,ParseErrorBundle(..)
|
,ParseErrorBundle(..)
|
||||||
,errorBundlePretty
|
,errorBundlePretty
|
||||||
|
|
||||||
|
@ -229,10 +218,7 @@ import Control.Monad.Reader
|
||||||
,ask
|
,ask
|
||||||
)
|
)
|
||||||
|
|
||||||
import qualified Data.List as DL
|
|
||||||
import qualified Data.List.NonEmpty as NE
|
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import Data.Proxy (Proxy(..))
|
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
|
|
||||||
import Control.Monad (guard, void)
|
import Control.Monad (guard, void)
|
||||||
|
@ -315,7 +301,7 @@ parseScalarExpr = wrapParse scalarExpr
|
||||||
|
|
||||||
data ParseError
|
data ParseError
|
||||||
= LexError L.ParseError
|
= LexError L.ParseError
|
||||||
| ParseError (ParseErrorBundle MyStream Void)
|
| ParseError (ParseErrorBundle L.MyStream Void)
|
||||||
|
|
||||||
prettyError :: ParseError -> Text
|
prettyError :: ParseError -> Text
|
||||||
prettyError (LexError e) = T.pack $ errorBundlePretty e
|
prettyError (LexError e) = T.pack $ errorBundlePretty e
|
||||||
|
@ -337,10 +323,10 @@ wrapParse :: Parser a
|
||||||
-> Text
|
-> Text
|
||||||
-> Either ParseError a
|
-> Either ParseError a
|
||||||
wrapParse parser d f p src = do
|
wrapParse parser d f p src = do
|
||||||
lx <- either (Left . LexError) Right $ L.lexSQL d f p src
|
lx <- either (Left . LexError) Right $ L.lexSQLWithPositions d f p src
|
||||||
either (Left . ParseError) Right $
|
either (Left . ParseError) Right $
|
||||||
runReader (runParserT (parser <* (eof <?> "")) (T.unpack f)
|
runReader (runParserT (parser <* (eof <?> "")) (T.unpack f)
|
||||||
$ MyStream (T.unpack src) $ filter notSpace lx) d
|
$ L.MyStream (T.unpack src) $ filter notSpace lx) d
|
||||||
where
|
where
|
||||||
notSpace = notSpace' . L.tokenVal
|
notSpace = notSpace' . L.tokenVal
|
||||||
notSpace' (L.Whitespace {}) = False
|
notSpace' (L.Whitespace {}) = False
|
||||||
|
@ -352,7 +338,7 @@ wrapParse parser d f p src = do
|
||||||
|
|
||||||
-- parsing code
|
-- parsing code
|
||||||
|
|
||||||
type Parser = ParsecT Void MyStream (Reader Dialect)
|
type Parser = ParsecT Void L.MyStream (Reader Dialect)
|
||||||
|
|
||||||
{-
|
{-
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
@ -2384,87 +2370,3 @@ queryDialect f = do
|
||||||
d <- ask
|
d <- ask
|
||||||
pure $ f d
|
pure $ f d
|
||||||
|
|
||||||
------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
-- parsec stream boilerplate
|
|
||||||
|
|
||||||
data MyStream = MyStream
|
|
||||||
{ myStreamInput :: String
|
|
||||||
, unMyStream :: [L.WithPos L.Token]
|
|
||||||
}
|
|
||||||
|
|
||||||
instance Stream MyStream where
|
|
||||||
type Token MyStream = L.WithPos L.Token
|
|
||||||
type Tokens MyStream = [L.WithPos L.Token]
|
|
||||||
|
|
||||||
tokenToChunk Proxy x = [x]
|
|
||||||
tokensToChunk Proxy xs = xs
|
|
||||||
chunkToTokens Proxy = id
|
|
||||||
chunkLength Proxy = length
|
|
||||||
chunkEmpty Proxy = null
|
|
||||||
take1_ (MyStream _ []) = Nothing
|
|
||||||
take1_ (MyStream str (t:ts)) = Just
|
|
||||||
( t
|
|
||||||
, MyStream (drop (tokensLength pxy (t NE.:|[])) str) ts
|
|
||||||
)
|
|
||||||
takeN_ n (MyStream str s)
|
|
||||||
| n <= 0 = Just ([], MyStream str s)
|
|
||||||
| null s = Nothing
|
|
||||||
| otherwise =
|
|
||||||
let (x, s') = splitAt n s
|
|
||||||
in case NE.nonEmpty x of
|
|
||||||
Nothing -> Just (x, MyStream str s')
|
|
||||||
Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s')
|
|
||||||
takeWhile_ f (MyStream str s) =
|
|
||||||
let (x, s') = DL.span f s
|
|
||||||
in case NE.nonEmpty x of
|
|
||||||
Nothing -> (x, MyStream str s')
|
|
||||||
Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s')
|
|
||||||
|
|
||||||
instance VisualStream MyStream where
|
|
||||||
showTokens Proxy = DL.intercalate " "
|
|
||||||
. NE.toList
|
|
||||||
. fmap (showMyToken . L.tokenVal)
|
|
||||||
tokensLength Proxy xs = sum (L.tokenLength <$> xs)
|
|
||||||
|
|
||||||
instance TraversableStream MyStream where
|
|
||||||
reachOffset o PosState {..} =
|
|
||||||
( Just (prefix ++ restOfLine)
|
|
||||||
, PosState
|
|
||||||
{ pstateInput = MyStream
|
|
||||||
{ myStreamInput = postStr
|
|
||||||
, unMyStream = post
|
|
||||||
}
|
|
||||||
, pstateOffset = max pstateOffset o
|
|
||||||
, pstateSourcePos = newSourcePos
|
|
||||||
, pstateTabWidth = pstateTabWidth
|
|
||||||
, pstateLinePrefix = prefix
|
|
||||||
}
|
|
||||||
)
|
|
||||||
where
|
|
||||||
prefix =
|
|
||||||
if sameLine
|
|
||||||
then pstateLinePrefix ++ preLine
|
|
||||||
else preLine
|
|
||||||
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos
|
|
||||||
newSourcePos =
|
|
||||||
case post of
|
|
||||||
[] -> case unMyStream pstateInput of
|
|
||||||
[] -> pstateSourcePos
|
|
||||||
xs -> L.endPos (last xs)
|
|
||||||
(x:_) -> L.startPos x
|
|
||||||
(pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput)
|
|
||||||
(preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput)
|
|
||||||
preLine = reverse . takeWhile (/= '\n') . reverse $ preStr
|
|
||||||
tokensConsumed =
|
|
||||||
case NE.nonEmpty pre of
|
|
||||||
Nothing -> 0
|
|
||||||
Just nePre -> tokensLength pxy nePre
|
|
||||||
restOfLine = takeWhile (/= '\n') postStr
|
|
||||||
|
|
||||||
pxy :: Proxy MyStream
|
|
||||||
pxy = Proxy
|
|
||||||
|
|
||||||
showMyToken :: L.Token -> String
|
|
||||||
-- todo: how to do this properly?
|
|
||||||
showMyToken = T.unpack . L.prettyToken ansi2011
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ bootstrapTests = Group "bootstrap tests" $
|
||||||
|
|
||||||
] ++ map (\a -> (a, [Symbol a])) (
|
] ++ map (\a -> (a, [Symbol a])) (
|
||||||
["!=", "<>", ">=", "<=", "||"]
|
["!=", "<>", ">=", "<=", "||"]
|
||||||
++ map T.singleton ("(),-+*/<>=." :: String)))
|
++ map T.singleton ("(),-+*/<>=." :: [Char])))
|
||||||
|
|
||||||
|
|
||||||
ansiLexerTable :: [(Text,[Token])]
|
ansiLexerTable :: [(Text,[Token])]
|
||||||
|
|
|
@ -110,8 +110,7 @@ itemToTest (LexFails d s) = makeLexingFailsTest d s
|
||||||
|
|
||||||
makeLexerTest :: Dialect -> Text -> [Lex.Token] -> T.TestTree
|
makeLexerTest :: Dialect -> Text -> [Lex.Token] -> T.TestTree
|
||||||
makeLexerTest d s ts = H.testCase (T.unpack s) $ do
|
makeLexerTest d s ts = H.testCase (T.unpack s) $ do
|
||||||
let lx = either (error . T.unpack . Lex.prettyError) id $ Lex.lexSQL d "" Nothing s
|
let ts1 = either (error . T.unpack . Lex.prettyError) id $ Lex.lexSQL d "" Nothing s
|
||||||
ts1 = map Lex.tokenVal lx
|
|
||||||
H.assertEqual "" ts ts1
|
H.assertEqual "" ts ts1
|
||||||
let s' = Lex.prettyTokens d $ ts1
|
let s' = Lex.prettyTokens d $ ts1
|
||||||
H.assertEqual "pretty print" s s'
|
H.assertEqual "pretty print" s s'
|
||||||
|
|
Loading…
Reference in a new issue