From 913fce068bf3ac33111aa3a4c4592615ec903f7a Mon Sep 17 00:00:00 2001
From: Jake Wheat <jakewheatmail@gmail.com>
Date: Sat, 1 Aug 2015 12:13:53 +0300
Subject: [PATCH] small fixes

work on the haddock
remove the old attoparsec position stuff from the lexer
change the lexer to accept position info in the same way as the parser
replace sqlindent with new test exe which can parse, lex and indent
---
 Language/SQL/SimpleSQL/Lexer.lhs       | 79 +++++++++++-----------
 Language/SQL/SimpleSQL/Parser.lhs      | 18 ++---
 TODO                                   | 18 +++++
 simple-sql-parser.cabal                | 25 ++-----
 tools/Language/SQL/SimpleSQL/Tests.lhs |  2 +-
 tools/SQLIndent.lhs                    | 16 -----
 tools/SimpleSqlParserTool.lhs          | 92 ++++++++++++++++++++++++++
 7 files changed, 161 insertions(+), 89 deletions(-)
 delete mode 100644 tools/SQLIndent.lhs
 create mode 100644 tools/SimpleSqlParserTool.lhs

diff --git a/Language/SQL/SimpleSQL/Lexer.lhs b/Language/SQL/SimpleSQL/Lexer.lhs
index 9f019ea..1409e1c 100644
--- a/Language/SQL/SimpleSQL/Lexer.lhs
+++ b/Language/SQL/SimpleSQL/Lexer.lhs
@@ -3,14 +3,15 @@ Lexer TODO:
 
 left factor to get rid of trys
 
+add some notes on why there is a separate lexer.
+
 > -- | This is the module contains a Lexer for SQL.
 > {-# LANGUAGE TupleSections #-}
 > module Language.SQL.SimpleSQL.Lexer
->     (lexSQL
->     ,Token(..)
+>     (Token(..)
+>     ,lexSQL
 >     ,prettyToken
 >     ,prettyTokens
->     ,Position
 >     ,ParseError(..)
 >     ,Dialect(..)) where
 
@@ -18,7 +19,10 @@ left factor to get rid of trys
 
 > import Text.Parsec (option,string,manyTill,anyChar
 >                    ,try,string,many1,oneOf,digit,(<|>),choice,char,eof
->                    ,many,runParser,lookAhead,satisfy)
+>                    ,many,runParser,lookAhead,satisfy
+>                    ,setPosition,getPosition
+>                    ,setSourceColumn,setSourceLine
+>                    ,sourceLine, sourceColumn)
 > import Language.SQL.SimpleSQL.Combinators
 > import Language.SQL.SimpleSQL.Errors
 > import Control.Applicative hiding ((<|>), many)
@@ -26,11 +30,13 @@ left factor to get rid of trys
 > import Control.Monad
 > import Prelude hiding (takeWhile)
 > import Text.Parsec.String (Parser)
+> import Data.Maybe
 
 
 > -- | Represents a lexed token
 > data Token
->     -- | a symbol is one of the following
+>     -- | A symbol is one of the following
+>     --
 >     -- * multi char symbols <> <= >= != ||
 >     -- * single char symbols: * + -  < >  ^ / %  ~ & | ? ( ) [ ] , ; ( )
 >     --
@@ -47,7 +53,7 @@ left factor to get rid of trys
 
 >     -- | This is a dialect specific quoted identifier with the quote
 >     -- characters explicit. The first and second fields are the
->     -- starting and ending quote characters.n
+>     -- starting and ending quote characters.
 >     | DQIdentifier String String String
 >
 >     -- | This is a host param symbol, e.g. :param
@@ -57,25 +63,23 @@ left factor to get rid of trys
 >     | SqlString String
 >
 >     -- | This is a character set string literal. The first field is
->     -- the charatecter set (one of nNbBxX).
+>     -- the character set (one of nNbBxX, or u&, U&).
 >     | CSSqlString String String
 >
->     -- | a number literal (integral or otherwise), stored in original format
+>     -- | A number literal (integral or otherwise), stored in original format
 >     -- unchanged
 >     | SqlNumber String
 >
->     -- | non-significant whitespace (space, tab, newline) (strictly speaking,
->     -- it is up to the client to decide whether the whitespace is significant
->     -- or not)
+>     -- | Whitespace, one or more of space, tab or newline.
 >     | Whitespace String
 >
->     -- | a commented line using --, contains every character starting with the
+>     -- | A commented line using --, contains every character starting with the
 >     -- \'--\' and including the terminating newline character if there is one
 >     -- - this will be missing if the last line in the source is a line comment
 >     -- with no trailing newline
 >     | LineComment String
 >
->     -- | a block comment, \/* stuff *\/, includes the comment delimiters
+>     -- | A block comment, \/* stuff *\/, includes the comment delimiters
 >     | BlockComment String
 >
 >       deriving (Eq,Show)
@@ -112,38 +116,29 @@ left factor to get rid of trys
 
 TODO: try to make all parsers applicative only
 
-> type Position = (String,Int,Int)
-
-> addPosition :: Position -> String -> Position
-> addPosition = addPosition'
-
-> addPosition' :: Position -> String -> Position
-> addPosition' (f,l,c) [] = (f,l,c)
-> addPosition' (f,l,_) ('\n':xs) = addPosition' (f,l+1,0) xs
-> addPosition' (f,l,c) (_:xs) = addPosition' (f,l,c+1) xs
-
-
-
-> lexSQL :: Dialect -> Position -> String -> Either ParseError [(Position,Token)]
-> lexSQL dialect pos@(fn,_,_) txt =
->     either (Left . convParseError fn) Right
->     $ runParser (many_p pos <* eof) () "" txt
+> lexSQL :: Dialect
+>                   -- ^ dialect of SQL to use
+>                -> FilePath
+>                   -- ^ filename to use in error messages
+>                -> Maybe (Int,Int)
+>                   -- ^ line number and column number of the first character
+>                   -- in the source to use in error messages
+>                -> String
+>                   -- ^ the SQL source to lex
+>                -> Either ParseError [((String,Int,Int),Token)]
+> lexSQL dialect fn p src =
+>     let (l,c) = fromMaybe (1,0) p
+>     in either (Left . convParseError src) Right
+>        $ runParser (setPos (l,c) *> many (sqlToken dialect) <* eof) () fn src
 >   where
-
->      many_p pos' = some_p pos' `mplus` return []
->      some_p pos' = do
->        tok <- sqlToken dialect pos'
->        let pos'' = advancePos dialect pos' (snd tok)
->        (tok:) <$> many_p pos''
-
-> advancePos :: Dialect -> Position -> Token -> Position
-> advancePos dialect pos tok =
->     let pt = prettyToken dialect tok
->     in addPosition pos pt
+>     setPos (l,c) = fmap up getPosition >>= setPosition
+>        where up = flip setSourceColumn c . flip setSourceLine l
 
 > -- | parser for a sql token
-> sqlToken :: Dialect -> Position -> Parser (Position,Token)
-> sqlToken d p =
+> sqlToken :: Dialect -> Parser ((String,Int,Int),Token)
+> sqlToken d = do
+>     p' <- getPosition
+>     let p = ("",sourceLine p', sourceColumn p')
 >     (p,) <$> choice [sqlString d
 >                     ,identifier d
 >                     ,hostParam d
diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs
index cf55c47..5ad8fc2 100644
--- a/Language/SQL/SimpleSQL/Parser.lhs
+++ b/Language/SQL/SimpleSQL/Parser.lhs
@@ -262,7 +262,7 @@ converts the error return to the nice wrapper
 >           -> Either ParseError a
 > wrapParse parser d f p src = do
 >     let (l,c) = fromMaybe (1,0) p
->     lx <- L.lexSQL d (f,l,c) src
+>     lx <- L.lexSQL d f (Just (l,c)) src
 >     either (Left . convParseError src) Right
 >       $ runParser (setPos p *> parser <* eof)
 >                   d f $ filter keep lx
@@ -998,17 +998,12 @@ for the escape now there is a separate lexer ...
 >     c <- escapeChar
 >     pure $ \v -> ctor v c
 >   where
->     escapeChar = escapeIden <|> escapeSym
->     escapeIden = do
->                  c <- identifierTok
->                  case c of
+>     escapeChar :: Parser Char
+>     escapeChar = (identifierTok <|> symbolTok) >>= oneOnly
+>     oneOnly :: String -> Parser Char
+>     oneOnly c = case c of
 >                    [c'] -> return c'
 >                    _ -> fail "escape char must be single char"
->     escapeSym = do
->                c <- symbolTok
->                case c of
->                  [c'] -> return c'
->                  _ -> fail "escape char must be single char"
 
 === collate
 
@@ -1988,9 +1983,8 @@ different parsers can be used for different dialects
 
 > type ParseState = Dialect
 
-> type Token = (L.Position,L.Token)
+> type Token = ((String,Int,Int),L.Token)
 
-> --type Parser = Parsec String ParseState
 > type Parser = GenParser Token ParseState
 
 > guardDialect :: [Dialect] -> Parser ()
diff --git a/TODO b/TODO
index a8266ed..c0f39ed 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,21 @@
+
+What will make this library nice and complete:
+List of all the SQL that it doesn't support
+annotation, with positions coming from the parser
+dml
+ddl
+procedural sql
+dialects: reasonable support for sql server and oracle, and maybe also
+   postgres, mysql, teradata, redshift, sqlite, db2, sap stuff, etc.
+good work on error messages
+fixity code + get it right
+review names of syntax
+defaults handled better (use default/nothing instead of substituting
+   in the default)
+evaluate uu parsing lib -> could at least remove need to do left
+   factoring, and maybe help make better error messages also
+-----
+
 work on reasonable subset of sql which is similar to the current
    subset and smaller than the complete 2011 target: describe the
    exact target set for the next release
diff --git a/simple-sql-parser.cabal b/simple-sql-parser.cabal
index eeaf701..a29f82f 100644
--- a/simple-sql-parser.cabal
+++ b/simple-sql-parser.cabal
@@ -22,8 +22,8 @@ source-repository head
   type:                git
   location:            https://github.com/JakeWheat/simple-sql-parser.git
 
-Flag sqlindent
-  Description: Build SQLIndent exe
+Flag parserexe
+  Description: Build SimpleSqlParserTool exe
   Default:     False
 
 library
@@ -80,29 +80,18 @@ Test-Suite Tests
   default-language:    Haskell2010
   ghc-options:         -Wall -threaded
 
-executable SQLIndent
-  main-is:             SQLIndent.lhs
+executable SimpleSqlParserTool
+  main-is:             SimpleSqlParserTool.lhs
   hs-source-dirs:      .,tools
   Build-Depends:       base >=4.5 && <4.9,
                        parsec >=3.1 && <3.2,
                        mtl >=2.1 && <2.3,
-                       pretty >= 1.1 && < 1.2
+                       pretty >= 1.1 && < 1.2,
+                       pretty-show >= 1.6 && < 1.7
   other-extensions:    TupleSections,DeriveDataTypeable
   default-language:    Haskell2010
   ghc-options:         -Wall
-  if flag(sqlindent)
+  if flag(parserexe)
     buildable:         True
   else
     buildable:         False
-
-executable TestLex
-  main-is:             TestLex.lhs
-  hs-source-dirs:      .,tools
-  Build-Depends:       base >=4.5 && <4.9,
-                       parsec >=3.1 && <3.2,
-                       mtl >=2.1 && <2.3,
-                       pretty >= 1.1 && < 1.2
-  other-extensions:    TupleSections,DeriveDataTypeable
-  default-language:    Haskell2010
-  ghc-options:         -Wall
-  buildable:           False
diff --git a/tools/Language/SQL/SimpleSQL/Tests.lhs b/tools/Language/SQL/SimpleSQL/Tests.lhs
index d9fa97f..3e50d78 100644
--- a/tools/Language/SQL/SimpleSQL/Tests.lhs
+++ b/tools/Language/SQL/SimpleSQL/Tests.lhs
@@ -80,7 +80,7 @@ order on the generated documentation.
 
 > makeLexerTest :: Dialect -> String -> [Token] -> T.TestTree
 > makeLexerTest d s ts = H.testCase s $ do
->     let lx = either (error . show) id $ lexSQL d ("", 1, 1) s
+>     let lx = either (error . show) id $ lexSQL d "" Nothing s
 >     H.assertEqual "" ts $ map snd lx
 >     let s' = prettyTokens d $ map snd lx
 >     H.assertEqual "pretty print" s s'
diff --git a/tools/SQLIndent.lhs b/tools/SQLIndent.lhs
deleted file mode 100644
index f34ed75..0000000
--- a/tools/SQLIndent.lhs
+++ /dev/null
@@ -1,16 +0,0 @@
-
-> import System.Environment
-
-> import Language.SQL.SimpleSQL.Pretty
-> import Language.SQL.SimpleSQL.Parser
-
-> main :: IO ()
-> main = do
->     args <- getArgs
->     case args of
->       [f] -> do
->              src <- readFile f
->              either (error . peFormattedError)
->                     (putStrLn . prettyQueryExprs)
->                     $ parseQueryExprs f Nothing src
->       _ -> error "please pass filename to indent"
diff --git a/tools/SimpleSqlParserTool.lhs b/tools/SimpleSqlParserTool.lhs
new file mode 100644
index 0000000..1ef6c36
--- /dev/null
+++ b/tools/SimpleSqlParserTool.lhs
@@ -0,0 +1,92 @@
+
+Simple command line tool to experiment with simple-sql-parser
+
+Commands:
+
+parse: parse sql from file, stdin or from command line
+lex: lex sql same
+indent: parse then pretty print sql
+
+> {-# LANGUAGE TupleSections #-}
+> import System.Environment
+> import Control.Monad
+> import Data.Maybe
+> import System.Exit
+> import Data.List
+
+> import Language.SQL.SimpleSQL.Pretty
+> import Language.SQL.SimpleSQL.Parser
+> import Language.SQL.SimpleSQL.Syntax
+> import Language.SQL.SimpleSQL.Lexer
+
+
+> main :: IO ()
+> main = do
+>     args <- getArgs
+>     case args of
+>         [] -> do
+>               showHelp $ Just "no command given"
+>         (c:as) -> do
+>              let cmd = lookup c commands
+>              maybe (showHelp (Just "command not recognised"))
+>                    (\(_,cmd') -> cmd' as)
+>                    cmd
+
+> commands :: [(String, (String,[String] -> IO ()))]
+> commands =
+>     [("help", helpCommand)
+>     ,("parse", parseCommand)
+>     ,("lex", lexCommand)
+>     ,("indent", indentCommand)]
+
+> showHelp :: Maybe String -> IO ()
+> showHelp msg = do
+>           maybe (return ()) (\e -> putStrLn $ "Error: " ++ e) msg
+>           putStrLn "Usage:\n SimpleSqlParserTool command args"
+>           forM_ commands $ \(c, (h,_)) -> do
+>                putStrLn $ c ++ "\t" ++ h
+>           when (isJust msg) $ exitFailure
+
+> helpCommand :: (String,[String] -> IO ())
+> helpCommand =
+>     ("show help for this progam", \_ -> showHelp Nothing)
+
+> getInput :: [String] -> IO (FilePath,String)
+> getInput as =
+>     case as of
+>       ["-"] -> error "read stdin"
+>       ("-c":as') -> return ("-", unwords as')
+>       [filename] -> (filename,) <$> readFile filename
+>       _ -> showHelp (Just "arguments not recognised") >> error ""
+
+> parseCommand :: (String,[String] -> IO ())
+> parseCommand =
+>   ("parse SQL from file/stdin/command line (use -c to parse from command line)"
+>   ,\args -> do
+>       (f,src) <- getInput args
+>       either (error . peFormattedError)
+>           (putStrLn . prettyQueryExprs SQL2011)
+>           $ parseQueryExprs SQL2011 f Nothing src
+>   )
+
+> lexCommand :: (String,[String] -> IO ())
+> lexCommand =
+>   ("lex SQL from file/stdin/command line (use -c to parse from command line)"
+>   ,\args -> do
+>       (f,src) <- getInput args
+>       either (error . peFormattedError)
+>              (putStrLn . intercalate ",\n" . map show)
+>              $ lexSQL SQL2011 f Nothing src
+>   )
+
+
+> indentCommand :: (String,[String] -> IO ())
+> indentCommand =
+>   ("parse then pretty print SQL from file/stdin/command line (use -c to parse from command line)"
+>   ,\args -> do
+>       (f,src) <- getInput args
+>       either (error . peFormattedError)
+>           (putStrLn . prettyQueryExprs SQL2011)
+>           $ parseQueryExprs SQL2011 f Nothing src
+
+>   )