diff --git a/Language/SQL/SimpleSQL/Lexer.lhs b/Language/SQL/SimpleSQL/Lexer.lhs index 9f019ea..1409e1c 100644 --- a/Language/SQL/SimpleSQL/Lexer.lhs +++ b/Language/SQL/SimpleSQL/Lexer.lhs @@ -3,14 +3,15 @@ Lexer TODO: left factor to get rid of trys +add some notes on why there is a separate lexer. + > -- | This is the module contains a Lexer for SQL. > {-# LANGUAGE TupleSections #-} > module Language.SQL.SimpleSQL.Lexer -> (lexSQL -> ,Token(..) +> (Token(..) +> ,lexSQL > ,prettyToken > ,prettyTokens -> ,Position > ,ParseError(..) > ,Dialect(..)) where @@ -18,7 +19,10 @@ left factor to get rid of trys > import Text.Parsec (option,string,manyTill,anyChar > ,try,string,many1,oneOf,digit,(<|>),choice,char,eof -> ,many,runParser,lookAhead,satisfy) +> ,many,runParser,lookAhead,satisfy +> ,setPosition,getPosition +> ,setSourceColumn,setSourceLine +> ,sourceLine, sourceColumn) > import Language.SQL.SimpleSQL.Combinators > import Language.SQL.SimpleSQL.Errors > import Control.Applicative hiding ((<|>), many) @@ -26,11 +30,13 @@ left factor to get rid of trys > import Control.Monad > import Prelude hiding (takeWhile) > import Text.Parsec.String (Parser) +> import Data.Maybe > -- | Represents a lexed token > data Token -> -- | a symbol is one of the following +> -- | A symbol is one of the following +> -- > -- * multi char symbols <> <= >= != || > -- * single char symbols: * + - < > ^ / % ~ & | ? ( ) [ ] , ; ( ) > -- @@ -47,7 +53,7 @@ left factor to get rid of trys > -- | This is a dialect specific quoted identifier with the quote > -- characters explicit. The first and second fields are the -> -- starting and ending quote characters.n +> -- starting and ending quote characters. > | DQIdentifier String String String > > -- | This is a host param symbol, e.g. :param @@ -57,25 +63,23 @@ left factor to get rid of trys > | SqlString String > > -- | This is a character set string literal. The first field is -> -- the charatecter set (one of nNbBxX). +> -- the character set (one of nNbBxX, or u&, U&). > | CSSqlString String String > -> -- | a number literal (integral or otherwise), stored in original format +> -- | A number literal (integral or otherwise), stored in original format > -- unchanged > | SqlNumber String > -> -- | non-significant whitespace (space, tab, newline) (strictly speaking, -> -- it is up to the client to decide whether the whitespace is significant -> -- or not) +> -- | Whitespace, one or more of space, tab or newline. > | Whitespace String > -> -- | a commented line using --, contains every character starting with the +> -- | A commented line using --, contains every character starting with the > -- \'--\' and including the terminating newline character if there is one > -- - this will be missing if the last line in the source is a line comment > -- with no trailing newline > | LineComment String > -> -- | a block comment, \/* stuff *\/, includes the comment delimiters +> -- | A block comment, \/* stuff *\/, includes the comment delimiters > | BlockComment String > > deriving (Eq,Show) @@ -112,38 +116,29 @@ left factor to get rid of trys TODO: try to make all parsers applicative only -> type Position = (String,Int,Int) - -> addPosition :: Position -> String -> Position -> addPosition = addPosition' - -> addPosition' :: Position -> String -> Position -> addPosition' (f,l,c) [] = (f,l,c) -> addPosition' (f,l,_) ('\n':xs) = addPosition' (f,l+1,0) xs -> addPosition' (f,l,c) (_:xs) = addPosition' (f,l,c+1) xs - - - -> lexSQL :: Dialect -> Position -> String -> Either ParseError [(Position,Token)] -> lexSQL dialect pos@(fn,_,_) txt = -> either (Left . convParseError fn) Right -> $ runParser (many_p pos <* eof) () "" txt +> lexSQL :: Dialect +> -- ^ dialect of SQL to use +> -> FilePath +> -- ^ filename to use in error messages +> -> Maybe (Int,Int) +> -- ^ line number and column number of the first character +> -- in the source to use in error messages +> -> String +> -- ^ the SQL source to lex +> -> Either ParseError [((String,Int,Int),Token)] +> lexSQL dialect fn p src = +> let (l,c) = fromMaybe (1,0) p +> in either (Left . convParseError src) Right +> $ runParser (setPos (l,c) *> many (sqlToken dialect) <* eof) () fn src > where - -> many_p pos' = some_p pos' `mplus` return [] -> some_p pos' = do -> tok <- sqlToken dialect pos' -> let pos'' = advancePos dialect pos' (snd tok) -> (tok:) <$> many_p pos'' - -> advancePos :: Dialect -> Position -> Token -> Position -> advancePos dialect pos tok = -> let pt = prettyToken dialect tok -> in addPosition pos pt +> setPos (l,c) = fmap up getPosition >>= setPosition +> where up = flip setSourceColumn c . flip setSourceLine l > -- | parser for a sql token -> sqlToken :: Dialect -> Position -> Parser (Position,Token) -> sqlToken d p = +> sqlToken :: Dialect -> Parser ((String,Int,Int),Token) +> sqlToken d = do +> p' <- getPosition +> let p = ("",sourceLine p', sourceColumn p') > (p,) <$> choice [sqlString d > ,identifier d > ,hostParam d diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index cf55c47..5ad8fc2 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -262,7 +262,7 @@ converts the error return to the nice wrapper > -> Either ParseError a > wrapParse parser d f p src = do > let (l,c) = fromMaybe (1,0) p -> lx <- L.lexSQL d (f,l,c) src +> lx <- L.lexSQL d f (Just (l,c)) src > either (Left . convParseError src) Right > $ runParser (setPos p *> parser <* eof) > d f $ filter keep lx @@ -998,17 +998,12 @@ for the escape now there is a separate lexer ... > c <- escapeChar > pure $ \v -> ctor v c > where -> escapeChar = escapeIden <|> escapeSym -> escapeIden = do -> c <- identifierTok -> case c of +> escapeChar :: Parser Char +> escapeChar = (identifierTok <|> symbolTok) >>= oneOnly +> oneOnly :: String -> Parser Char +> oneOnly c = case c of > [c'] -> return c' > _ -> fail "escape char must be single char" -> escapeSym = do -> c <- symbolTok -> case c of -> [c'] -> return c' -> _ -> fail "escape char must be single char" === collate @@ -1988,9 +1983,8 @@ different parsers can be used for different dialects > type ParseState = Dialect -> type Token = (L.Position,L.Token) +> type Token = ((String,Int,Int),L.Token) -> --type Parser = Parsec String ParseState > type Parser = GenParser Token ParseState > guardDialect :: [Dialect] -> Parser () diff --git a/TODO b/TODO index a8266ed..c0f39ed 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,21 @@ + +What will make this library nice and complete: +List of all the SQL that it doesn't support +annotation, with positions coming from the parser +dml +ddl +procedural sql +dialects: reasonable support for sql server and oracle, and maybe also + postgres, mysql, teradata, redshift, sqlite, db2, sap stuff, etc. +good work on error messages +fixity code + get it right +review names of syntax +defaults handled better (use default/nothing instead of substituting + in the default) +evaluate uu parsing lib -> could at least remove need to do left + factoring, and maybe help make better error messages also +----- + work on reasonable subset of sql which is similar to the current subset and smaller than the complete 2011 target: describe the exact target set for the next release diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal index eeaf701..a29f82f 100644 --- a/simple-sql-parser.cabal +++ b/simple-sql-parser.cabal @@ -22,8 +22,8 @@ source-repository head type: git location: https://github.com/JakeWheat/simple-sql-parser.git -Flag sqlindent - Description: Build SQLIndent exe +Flag parserexe + Description: Build SimpleSqlParserTool exe Default: False library @@ -80,29 +80,18 @@ Test-Suite Tests default-language: Haskell2010 ghc-options: -Wall -threaded -executable SQLIndent - main-is: SQLIndent.lhs +executable SimpleSqlParserTool + main-is: SimpleSqlParserTool.lhs hs-source-dirs: .,tools Build-Depends: base >=4.5 && <4.9, parsec >=3.1 && <3.2, mtl >=2.1 && <2.3, - pretty >= 1.1 && < 1.2 + pretty >= 1.1 && < 1.2, + pretty-show >= 1.6 && < 1.7 other-extensions: TupleSections,DeriveDataTypeable default-language: Haskell2010 ghc-options: -Wall - if flag(sqlindent) + if flag(parserexe) buildable: True else buildable: False - -executable TestLex - main-is: TestLex.lhs - hs-source-dirs: .,tools - Build-Depends: base >=4.5 && <4.9, - parsec >=3.1 && <3.2, - mtl >=2.1 && <2.3, - pretty >= 1.1 && < 1.2 - other-extensions: TupleSections,DeriveDataTypeable - default-language: Haskell2010 - ghc-options: -Wall - buildable: False diff --git a/tools/Language/SQL/SimpleSQL/Tests.lhs b/tools/Language/SQL/SimpleSQL/Tests.lhs index d9fa97f..3e50d78 100644 --- a/tools/Language/SQL/SimpleSQL/Tests.lhs +++ b/tools/Language/SQL/SimpleSQL/Tests.lhs @@ -80,7 +80,7 @@ order on the generated documentation. > makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree > makeLexerTest d s ts = H.testCase s $ do -> let lx = either (error . show) id $ lexSQL d ("", 1, 1) s +> let lx = either (error . show) id $ lexSQL d "" Nothing s > H.assertEqual "" ts $ map snd lx > let s' = prettyTokens d $ map snd lx > H.assertEqual "pretty print" s s' diff --git a/tools/SQLIndent.lhs b/tools/SQLIndent.lhs deleted file mode 100644 index f34ed75..0000000 --- a/tools/SQLIndent.lhs +++ /dev/null @@ -1,16 +0,0 @@ - -> import System.Environment - -> import Language.SQL.SimpleSQL.Pretty -> import Language.SQL.SimpleSQL.Parser - -> main :: IO () -> main = do -> args <- getArgs -> case args of -> [f] -> do -> src <- readFile f -> either (error . peFormattedError) -> (putStrLn . prettyQueryExprs) -> $ parseQueryExprs f Nothing src -> _ -> error "please pass filename to indent" diff --git a/tools/SimpleSqlParserTool.lhs b/tools/SimpleSqlParserTool.lhs new file mode 100644 index 0000000..1ef6c36 --- /dev/null +++ b/tools/SimpleSqlParserTool.lhs @@ -0,0 +1,92 @@ + +Simple command line tool to experiment with simple-sql-parser + +Commands: + +parse: parse sql from file, stdin or from command line +lex: lex sql same +indent: parse then pretty print sql + +> {-# LANGUAGE TupleSections #-} +> import System.Environment +> import Control.Monad +> import Data.Maybe +> import System.Exit +> import Data.List + +> import Language.SQL.SimpleSQL.Pretty +> import Language.SQL.SimpleSQL.Parser +> import Language.SQL.SimpleSQL.Syntax +> import Language.SQL.SimpleSQL.Lexer + + +> main :: IO () +> main = do +> args <- getArgs +> case args of +> [] -> do +> showHelp $ Just "no command given" +> (c:as) -> do +> let cmd = lookup c commands +> maybe (showHelp (Just "command not recognised")) +> (\(_,cmd') -> cmd' as) +> cmd + +> commands :: [(String, (String,[String] -> IO ()))] +> commands = +> [("help", helpCommand) +> ,("parse", parseCommand) +> ,("lex", lexCommand) +> ,("indent", indentCommand)] + +> showHelp :: Maybe String -> IO () +> showHelp msg = do +> maybe (return ()) (\e -> putStrLn $ "Error: " ++ e) msg +> putStrLn "Usage:\n SimpleSqlParserTool command args" +> forM_ commands $ \(c, (h,_)) -> do +> putStrLn $ c ++ "\t" ++ h +> when (isJust msg) $ exitFailure + +> helpCommand :: (String,[String] -> IO ()) +> helpCommand = +> ("show help for this progam", \_ -> showHelp Nothing) + +> getInput :: [String] -> IO (FilePath,String) +> getInput as = +> case as of +> ["-"] -> error "read stdin" +> ("-c":as') -> return ("-", unwords as') +> [filename] -> (filename,) <$> readFile filename +> _ -> showHelp (Just "arguments not recognised") >> error "" + +> parseCommand :: (String,[String] -> IO ()) +> parseCommand = +> ("parse SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . prettyQueryExprs SQL2011) +> $ parseQueryExprs SQL2011 f Nothing src +> ) + +> lexCommand :: (String,[String] -> IO ()) +> lexCommand = +> ("lex SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . intercalate ",\n" . map show) +> $ lexSQL SQL2011 f Nothing src +> ) + + +> indentCommand :: (String,[String] -> IO ()) +> indentCommand = +> ("parse then pretty print SQL from file/stdin/command line (use -c to parse from command line)" +> ,\args -> do +> (f,src) <- getInput args +> either (error . peFormattedError) +> (putStrLn . prettyQueryExprs SQL2011) +> $ parseQueryExprs SQL2011 f Nothing src + +> )