From 913fce068bf3ac33111aa3a4c4592615ec903f7a Mon Sep 17 00:00:00 2001 From: Jake Wheat Date: Sat, 1 Aug 2015 12:13:53 +0300 Subject: [PATCH] small fixes work on the haddock remove the old attoparsec position stuff from the lexer change the lexer to accept position info in the same way as the parser replace sqlindent with new test exe which can parse, lex and indent --- Language/SQL/SimpleSQL/Lexer.lhs | 79 +++++++++++----------- Language/SQL/SimpleSQL/Parser.lhs | 18 ++--- TODO | 18 +++++ simple-sql-parser.cabal | 25 ++----- tools/Language/SQL/SimpleSQL/Tests.lhs | 2 +- tools/SQLIndent.lhs | 16 ----- tools/SimpleSqlParserTool.lhs | 92 ++++++++++++++++++++++++++ 7 files changed, 161 insertions(+), 89 deletions(-) delete mode 100644 tools/SQLIndent.lhs create mode 100644 tools/SimpleSqlParserTool.lhs diff --git a/Language/SQL/SimpleSQL/Lexer.lhs b/Language/SQL/SimpleSQL/Lexer.lhs index 9f019ea..1409e1c 100644 --- a/Language/SQL/SimpleSQL/Lexer.lhs +++ b/Language/SQL/SimpleSQL/Lexer.lhs @@ -3,14 +3,15 @@ Lexer TODO: left factor to get rid of trys +add some notes on why there is a separate lexer. + > -- | This is the module contains a Lexer for SQL. > {-# LANGUAGE TupleSections #-} > module Language.SQL.SimpleSQL.Lexer -> (lexSQL -> ,Token(..) +> (Token(..) +> ,lexSQL > ,prettyToken > ,prettyTokens -> ,Position > ,ParseError(..) > ,Dialect(..)) where @@ -18,7 +19,10 @@ left factor to get rid of trys > import Text.Parsec (option,string,manyTill,anyChar > ,try,string,many1,oneOf,digit,(<|>),choice,char,eof -> ,many,runParser,lookAhead,satisfy) +> ,many,runParser,lookAhead,satisfy +> ,setPosition,getPosition +> ,setSourceColumn,setSourceLine +> ,sourceLine, sourceColumn) > import Language.SQL.SimpleSQL.Combinators > import Language.SQL.SimpleSQL.Errors > import Control.Applicative hiding ((<|>), many) @@ -26,11 +30,13 @@ left factor to get rid of trys > import Control.Monad > import Prelude hiding (takeWhile) > import Text.Parsec.String (Parser) +> import Data.Maybe > -- | Represents a lexed token > data Token -> -- | a symbol is one of the following +> -- | A symbol is one of the following +> -- > -- * multi char symbols <> <= >= != || > -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( ) > -- @@ -47,7 +53,7 @@ left factor to get rid of trys > -- | This is a dialect specific quoted identifier with the quote > -- characters explicit. The first and second fields are the -> -- starting and ending quote characters.n +> -- starting and ending quote characters. > | DQIdentifier String String String > > -- | This is a host param symbol, e.g. :param @@ -57,25 +63,23 @@ left factor to get rid of trys > | SqlString String > > -- | This is a character set string literal. The first field is -> -- the charatecter set (one of nNbBxX). +> -- the character set (one of nNbBxX, or u&, U&). > | CSSqlString String String > -> -- | a number literal (integral or otherwise), stored in original format +> -- | A number literal (integral or otherwise), stored in original format > -- unchanged > | SqlNumber String > -> -- | non-significant whitespace (space, tab, newline) (strictly speaking, -> -- it is up to the client to decide whether the whitespace is significant -> -- or not) +> -- | Whitespace, one or more of space, tab or newline. > | Whitespace String > -> -- | a commented line using --, contains every character starting with the +> -- | A commented line using --, contains every character starting with the > -- \'--\' and including the terminating newline character if there is one > -- - this will be missing if the last line in the source is a line comment > -- with no trailing newline > | LineComment String > -> -- | a block comment, \/* stuff *\/, includes the comment delimiters +> -- | A block comment, \/* stuff *\/, includes the comment delimiters > | BlockComment String > > deriving (Eq,Show) @@ -112,38 +116,29 @@ left factor to get rid of trys TODO: try to make all parsers applicative only -> type Position = (String,Int,Int) - -> addPosition :: Position -> String -> Position -> addPosition = addPosition' - -> addPosition' :: Position -> String -> Position -> addPosition' (f,l,c) [] = (f,l,c) -> addPosition' (f,l,_) ('\n':xs) = addPosition' (f,l+1,0) xs -> addPosition' (f,l,c) (_:xs) = addPosition' (f,l,c+1) xs - - - -> lexSQL :: Dialect -> Position -> String -> Either ParseError [(Position,Token)] -> lexSQL dialect pos@(fn,_,_) txt = -> either (Left . convParseError fn) Right -> $ runParser (many_p pos <* eof) () "" txt +> lexSQL :: Dialect +> -- ^ dialect of SQL to use +> -> FilePath +> -- ^ filename to use in error messages +> -> Maybe (Int,Int) +> -- ^ line number and column number of the first character +> -- in the source to use in error messages +> -> String +> -- ^ the SQL source to lex +> -> Either ParseError [((String,Int,Int),Token)] +> lexSQL dialect fn p src = +> let (l,c) = fromMaybe (1,0) p +> in either (Left . convParseError src) Right +> $ runParser (setPos (l,c) *> many (sqlToken dialect) <* eof) () fn src > where - -> many_p pos' = some_p pos' `mplus` return [] -> some_p pos' = do -> tok <- sqlToken dialect pos' -> let pos'' = advancePos dialect pos' (snd tok) -> (tok:) <$> many_p pos'' - -> advancePos :: Dialect -> Position -> Token -> Position -> advancePos dialect pos tok = -> let pt = prettyToken dialect tok -> in addPosition pos pt +> setPos (l,c) = fmap up getPosition >>= setPosition +> where up = flip setSourceColumn c . flip setSourceLine l > -- | parser for a sql token -> sqlToken :: Dialect -> Position -> Parser (Position,Token) -> sqlToken d p = +> sqlToken :: Dialect -> Parser ((String,Int,Int),Token) +> sqlToken d = do +> p' <- getPosition +> let p = ("",sourceLine p', sourceColumn p') > (p,) <$> choice [sqlString d > ,identifier d > ,hostParam d diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index cf55c47..5ad8fc2 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -262,7 +262,7 @@ converts the error return to the nice wrapper > -> Either ParseError a > wrapParse parser d f p src = do > let (l,c) = fromMaybe (1,0) p -> lx <- L.lexSQL d (f,l,c) src +> lx <- L.lexSQL d f (Just (l,c)) src > either (Left . convParseError src) Right > $ runParser (setPos p *> parser <* eof) > d f $ filter keep lx @@ -998,17 +998,12 @@ for the escape now there is a separate lexer ... > c <- escapeChar > pure $ \v -> ctor v c > where -> escapeChar = escapeIden <|> escapeSym -> escapeIden = do -> c <- identifierTok -> case c of +> escapeChar :: Parser Char +> escapeChar = (identifierTok <|> symbolTok) >>= oneOnly +> oneOnly :: String -> Parser Char +> oneOnly c = case c of > [c'] -> return c' > _ -> fail "escape char must be single char" -> escapeSym = do -> c <- symbolTok -> case c of -> [c'] -> return c' -> _ -> fail "escape char must be single char" === collate @@ -1988,9 +1983,8 @@ different parsers can be used for different dialects > type ParseState = Dialect -> type Token = (L.Position,L.Token) +> type Token = ((String,Int,Int),L.Token) -> --type Parser = Parsec String ParseState > type Parser = GenParser Token ParseState > guardDialect :: [Dialect] -> Parser () diff --git a/TODO b/TODO index a8266ed..c0f39ed 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,21 @@ + +What will make this library nice and complete: +List of all the SQL that it doesn't support +annotation, with positions coming from the parser +dml +ddl +procedural sql +dialects: reasonable support for sql server and oracle, and maybe also + postgres, mysql, teradata, redshift, sqlite, db2, sap stuff, etc. +good work on error messages +fixity code + get it right +review names of syntax +defaults handled better (use default/nothing instead of substituting + in the default) +evaluate uu parsing lib -> could at least remove need to do left + factoring, and maybe help make better error messages also +----- + work on reasonable subset of sql which is similar to the current subset and smaller than the complete 2011 target: describe the exact target set for the next release diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal index eeaf701..a29f82f 100644 --- a/simple-sql-parser.cabal +++ b/simple-sql-parser.cabal @@ -22,8 +22,8 @@ source-repository head type: git location: https://github.com/JakeWheat/simple-sql-parser.git -Flag sqlindent - Description: Build SQLIndent exe +Flag parserexe + Description: Build SimpleSqlParserTool exe Default: False library @@ -80,29 +80,18 @@ Test-Suite Tests default-language: Haskell2010 ghc-options: -Wall -threaded -executable SQLIndent - main-is: SQLIndent.lhs +executable SimpleSqlParserTool + main-is: SimpleSqlParserTool.lhs hs-source-dirs: .,tools Build-Depends: base >=4.5 && <4.9, parsec >=3.1 && <3.2, mtl >=2.1 && <2.3, - pretty >= 1.1 && < 1.2 + pretty >= 1.1 && < 1.2, + pretty-show >= 1.6 && < 1.7 other-extensions: TupleSections,DeriveDataTypeable default-language: Haskell2010 ghc-options: -Wall - if flag(sqlindent) + if flag(parserexe) buildable: True else buildable: False - -executable TestLex - main-is: TestLex.lhs - hs-source-dirs: .,tools - Build-Depends: base >=4.5 && <4.9, - parsec >=3.1 && <3.2, - mtl >=2.1 && <2.3, - pretty >= 1.1 && < 1.2 - other-extensions: TupleSections,DeriveDataTypeable - default-language: Haskell2010 - ghc-options: -Wall - buildable: False diff --git a/tools/Language/SQL/SimpleSQL/Tests.lhs b/tools/Language/SQL/SimpleSQL/Tests.lhs index d9fa97f..3e50d78 100644 --- a/tools/Language/SQL/SimpleSQL/Tests.lhs +++ b/tools/Language/SQL/SimpleSQL/Tests.lhs @@ -80,7 +80,7 @@ order on the generated documentation. > makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree > makeLexerTest d s ts = H.testCase s $ do -> let lx = either (error . show) id $ lexSQL d ("", 1, 1) s +> let lx = either (error . show) id $ lexSQL d "" Nothing s > H.assertEqual "" ts $ map snd lx > let s' = prettyTokens d $ map snd lx > H.assertEqual "pretty print" s s' diff --git a/tools/SQLIndent.lhs b/tools/SQLIndent.lhs deleted file mode 100644 index f34ed75..0000000 --- a/tools/SQLIndent.lhs +++ /dev/null @@ -1,16 +0,0 @@ - -> import System.Environment - -> import Language.SQL.SimpleSQL.Pretty -> import Language.SQL.SimpleSQL.Parser - -> main :: IO () -> main = do -> args <- getArgs -> case args of -> [f] -> do -> src <- readFile f -> either (error . peFormattedError) -> (putStrLn . prettyQueryExprs) -> $ parseQueryExprs f Nothing src -> _ -> error "please pass filename to indent" diff --git a/tools/SimpleSqlParserTool.lhs b/tools/SimpleSqlParserTool.lhs new file mode 100644 index 0000000..1ef6c36 --- /dev/null +++ b/tools/SimpleSqlParserTool.lhs @@ -0,0 +1,92 @@ + +Simple command line tool to experiment with simple-sql-parser + +Commands: + +parse: parse sql from file, stdin or from command line +lex: lex sql same +indent: parse then pretty print sql + +> {-# LANGUAGE TupleSections #-} +> import System.Environment +> import Control.Monad +> import Data.Maybe +> import System.Exit +> import Data.List + +> import Language.SQL.SimpleSQL.Pretty +> import Language.SQL.SimpleSQL.Parser +> import Language.SQL.SimpleSQL.Syntax +> import Language.SQL.SimpleSQL.Lexer + + +> main :: IO () +> main = do +> args <- getArgs +> case args of +> [] -> do +> showHelp $ Just "no command given" +> (c:as) -> do +> let cmd = lookup c commands +> maybe (showHelp (Just "command not recognised")) +> (\(_,cmd') -> cmd' as) +> cmd + +> commands :: [(String, (String,[String] -> IO ()))] +> commands = +> [("help", helpCommand) +> ,("parse", parseCommand) +> ,("lex", lexCommand) +> ,("indent", indentCommand)] + +> showHelp :: Maybe String -> IO () +> showHelp msg = do +> maybe (return ()) (\e -> putStrLn $ "Error: " ++ e) msg +> putStrLn "Usage:\n SimpleSqlParserTool command args" +> forM_ commands $ \(c, (h,_)) -> do +> putStrLn $ c ++ "\t" ++ h +> when (isJust msg) $ exitFailure + +> helpCommand :: (String,[String] -> IO ()) +> helpCommand = +> ("show help for this progam", \_ -> showHelp Nothing) + +> getInput :: [String] -> IO (FilePath,String) +> getInput as = +> case as of +> ["-"] -> error "read stdin" +> ("-c":as') -> return ("-", unwords as') +> [filename] -> (filename,) <$> readFile filename +> _ -> showHelp (Just "arguments not recognised") >> error "" + +> parseCommand :: (String,[String] -> IO ()) +> parseCommand = +> ("parse SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . prettyQueryExprs SQL2011) +> $ parseQueryExprs SQL2011 f Nothing src +> ) + +> lexCommand :: (String,[String] -> IO ()) +> lexCommand = +> ("lex SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . intercalate ",\n" . map show) +> $ lexSQL SQL2011 f Nothing src +> ) + + +> indentCommand :: (String,[String] -> IO ()) +> indentCommand = +> ("parse then pretty print SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . prettyQueryExprs SQL2011) +> $ parseQueryExprs SQL2011 f Nothing src + +> )