440 lines
14 KiB
Plaintext
440 lines
14 KiB
Plaintext
|
|
:toc: right
|
|
:sectnums:
|
|
:toclevels: 10
|
|
:source-highlighter: pygments
|
|
|
|
= simple-sql-parser
|
|
|
|
== Overview
|
|
|
|
A parser for SQL in Haskell. Also includes a pretty printer which
|
|
formats output nicely. Current target is to parse most SQL:2011
|
|
queries, plus a good subset of DDL, non-query DML, transaction
|
|
management, access control and session management.
|
|
|
|
This is the documentation for version 0.7.0. Documentation for other
|
|
versions is available here:
|
|
http://jakewheat.github.io/simple-sql-parser/.
|
|
|
|
Status: covers a lot of queries already, but the public API is
|
|
probably not very stable, since adding support for all the
|
|
not-yet-supported ANSI SQL syntax, then other dialects of SQL is
|
|
likely to change the abstract syntax types considerably.
|
|
|
|
Tested with GHC 9.8.1, 9.6.4, and 9.4.8.
|
|
|
|
== Feature support
|
|
|
|
* query expressions
|
|
** select lists
|
|
** from clause
|
|
** where clause
|
|
** group by clause
|
|
** having clause
|
|
** order by clause
|
|
** offset and fetch
|
|
** set operators
|
|
** common table expressions
|
|
** wide range of scalar expressions
|
|
* DDL (ansi dialect)
|
|
** create,drop schema
|
|
** create, alter, drop table
|
|
** create, drop view
|
|
** create, alter, drop domain
|
|
** create, drop assertion
|
|
** create, alter, drop sequence
|
|
* non-query DML
|
|
** delete
|
|
** truncate
|
|
** insert
|
|
** update
|
|
* Access control
|
|
** grant, revoke - permissions and roles
|
|
** create, drop role
|
|
* Transaction management
|
|
** begin, commit, rollback, savepoints
|
|
|
|
See the link:supported_sql.html[] page for details on
|
|
the supported SQL.
|
|
|
|
Here is all the link:test_cases.html[simple-sql-parser test cases]
|
|
rendered in a webpage so you can get an idea of what it supports.
|
|
|
|
== Examples
|
|
|
|
Simple expression:
|
|
|
|
[source,sql]
|
|
----
|
|
select a + b * c
|
|
----
|
|
|
|
Parsed AST:
|
|
|
|
[source,haskell]
|
|
----
|
|
Select
|
|
{ qeSetQuantifier = SQDefault
|
|
, qeSelectList =
|
|
[ ( BinOp
|
|
(Iden [ Name Nothing "a" ])
|
|
[ Name Nothing "+" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "b" ])
|
|
[ Name Nothing "*" ]
|
|
(Iden [ Name Nothing "c" ]))
|
|
, Nothing
|
|
)
|
|
]
|
|
, qeFrom = []
|
|
, qeWhere = Nothing
|
|
, qeGroupBy = []
|
|
, qeHaving = Nothing
|
|
, qeOrderBy = []
|
|
, qeOffset = Nothing
|
|
, qeFetchFirst = Nothing
|
|
}
|
|
----
|
|
|
|
TPC-H query 21:
|
|
|
|
[source,sql]
|
|
----
|
|
select
|
|
s_name,
|
|
count(*) as numwait
|
|
from
|
|
supplier,
|
|
lineitem l1,
|
|
orders,
|
|
nation
|
|
where
|
|
s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l2
|
|
where
|
|
l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey
|
|
)
|
|
and not exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l3
|
|
where
|
|
l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate
|
|
)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'INDIA'
|
|
group by
|
|
s_name
|
|
order by
|
|
numwait desc,
|
|
s_name
|
|
fetch first 100 rows only;
|
|
----
|
|
|
|
Parsed:
|
|
|
|
[source,haskell]
|
|
----
|
|
Select
|
|
{ qeSetQuantifier = SQDefault
|
|
, qeSelectList =
|
|
[ ( Iden [ Name Nothing "s_name" ] , Nothing )
|
|
, ( App [ Name Nothing "count" ] [ Star ]
|
|
, Just (Name Nothing "numwait")
|
|
)
|
|
]
|
|
, qeFrom =
|
|
[ TRSimple [ Name Nothing "supplier" ]
|
|
, TRAlias
|
|
(TRSimple [ Name Nothing "lineitem" ])
|
|
(Alias (Name Nothing "l1") Nothing)
|
|
, TRSimple [ Name Nothing "orders" ]
|
|
, TRSimple [ Name Nothing "nation" ]
|
|
]
|
|
, qeWhere =
|
|
Just
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(Iden [ Name Nothing "s_suppkey" ])
|
|
[ Name Nothing "=" ]
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_suppkey" ]))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "o_orderkey" ])
|
|
[ Name Nothing "=" ]
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_orderkey" ])))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "o_orderstatus" ])
|
|
[ Name Nothing "=" ]
|
|
(StringLit "'" "'" "F")))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_receiptdate" ])
|
|
[ Name Nothing ">" ]
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_commitdate" ])))
|
|
[ Name Nothing "and" ]
|
|
(SubQueryExpr
|
|
SqExists
|
|
Select
|
|
{ qeSetQuantifier = SQDefault
|
|
, qeSelectList = [ ( Star , Nothing ) ]
|
|
, qeFrom =
|
|
[ TRAlias
|
|
(TRSimple [ Name Nothing "lineitem" ])
|
|
(Alias (Name Nothing "l2") Nothing)
|
|
]
|
|
, qeWhere =
|
|
Just
|
|
(BinOp
|
|
(BinOp
|
|
(Iden [ Name Nothing "l2" , Name Nothing "l_orderkey" ])
|
|
[ Name Nothing "=" ]
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_orderkey" ]))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "l2" , Name Nothing "l_suppkey" ])
|
|
[ Name Nothing "<>" ]
|
|
(Iden [ Name Nothing "l1" , Name Nothing "l_suppkey" ])))
|
|
, qeGroupBy = []
|
|
, qeHaving = Nothing
|
|
, qeOrderBy = []
|
|
, qeOffset = Nothing
|
|
, qeFetchFirst = Nothing
|
|
}))
|
|
[ Name Nothing "and" ]
|
|
(PrefixOp
|
|
[ Name Nothing "not" ]
|
|
(SubQueryExpr
|
|
SqExists
|
|
Select
|
|
{ qeSetQuantifier = SQDefault
|
|
, qeSelectList = [ ( Star , Nothing ) ]
|
|
, qeFrom =
|
|
[ TRAlias
|
|
(TRSimple [ Name Nothing "lineitem" ])
|
|
(Alias (Name Nothing "l3") Nothing)
|
|
]
|
|
, qeWhere =
|
|
Just
|
|
(BinOp
|
|
(BinOp
|
|
(BinOp
|
|
(Iden [ Name Nothing "l3" , Name Nothing "l_orderkey" ])
|
|
[ Name Nothing "=" ]
|
|
(Iden
|
|
[ Name Nothing "l1" , Name Nothing "l_orderkey" ]))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "l3" , Name Nothing "l_suppkey" ])
|
|
[ Name Nothing "<>" ]
|
|
(Iden
|
|
[ Name Nothing "l1" , Name Nothing "l_suppkey" ])))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "l3" , Name Nothing "l_receiptdate" ])
|
|
[ Name Nothing ">" ]
|
|
(Iden
|
|
[ Name Nothing "l3" , Name Nothing "l_commitdate" ])))
|
|
, qeGroupBy = []
|
|
, qeHaving = Nothing
|
|
, qeOrderBy = []
|
|
, qeOffset = Nothing
|
|
, qeFetchFirst = Nothing
|
|
})))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "s_nationkey" ])
|
|
[ Name Nothing "=" ]
|
|
(Iden [ Name Nothing "n_nationkey" ])))
|
|
[ Name Nothing "and" ]
|
|
(BinOp
|
|
(Iden [ Name Nothing "n_name" ])
|
|
[ Name Nothing "=" ]
|
|
(StringLit "'" "'" "INDIA")))
|
|
, qeGroupBy = [ SimpleGroup (Iden [ Name Nothing "s_name" ]) ]
|
|
, qeHaving = Nothing
|
|
, qeOrderBy =
|
|
[ SortSpec (Iden [ Name Nothing "numwait" ]) Desc NullsOrderDefault
|
|
, SortSpec
|
|
(Iden [ Name Nothing "s_name" ]) DirDefault NullsOrderDefault
|
|
]
|
|
, qeOffset = Nothing
|
|
, qeFetchFirst = Just (NumLit "100")
|
|
}
|
|
----
|
|
|
|
|
|
Output from the simple-sql-parser pretty printer:
|
|
|
|
[source,sql]
|
|
----
|
|
select s_name, count(*) as numwait
|
|
from supplier,
|
|
lineitem as l1,
|
|
orders,
|
|
nation
|
|
where s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (select *
|
|
from lineitem as l2
|
|
where l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey)
|
|
and not exists (select *
|
|
from lineitem as l3
|
|
where l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'INDIA'
|
|
group by s_name
|
|
order by numwait desc, s_name
|
|
fetch first 100 rows only;
|
|
----
|
|
|
|
Parsing some SQL and printing the AST:
|
|
|
|
[source,haskell]
|
|
----
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
import System.Environment
|
|
import Text.Show.Pretty
|
|
import System.IO
|
|
|
|
import Language.SQL.SimpleSQL.Parse
|
|
(parseStatements
|
|
,ParseError
|
|
,prettyError
|
|
,ansi2011)
|
|
|
|
import Language.SQL.SimpleSQL.Syntax (Statement)
|
|
import qualified Data.Text as T
|
|
|
|
main :: IO ()
|
|
main = do
|
|
args <- getArgs
|
|
case args of
|
|
[] -> do
|
|
-- read from stdin
|
|
c <- getContents
|
|
doIt c
|
|
["-s", sql] -> do
|
|
-- parse arg given
|
|
doIt sql
|
|
[f] ->
|
|
-- read file
|
|
withFile f ReadMode (\h -> do
|
|
x <- hGetContents h
|
|
doIt x)
|
|
_ -> do
|
|
putStrLn "use no arguments to stream sql from stdin, e.g.:\n\
|
|
\ cat some.sql | SimpleSQLParserExample\n\
|
|
\n\
|
|
\use -s to parse sql on command line, e.g.:\n\
|
|
\ SimpleSQLParserExample -s \"select * from t\"\n\
|
|
\use a single arg to parse a file, e.g.\n\
|
|
\ SimpleSQLParserExample some.sql"
|
|
|
|
doIt :: String -> IO ()
|
|
doIt src = do
|
|
let parsed :: Either ParseError [Statement]
|
|
parsed = parseStatements ansi2011 "" Nothing (T.pack src)
|
|
either (error . T.unpack . prettyError)
|
|
(putStrLn . ppShow)
|
|
parsed
|
|
----
|
|
|
|
|
|
|
|
== Installation
|
|
|
|
Use cabal, stack or your usual system to work with the released package.
|
|
|
|
Working with the latest development version:
|
|
|
|
----
|
|
git clone https://github.com/JakeWheat/simple-sql-parser.git
|
|
cd simple-sql-parser
|
|
cabal build
|
|
----
|
|
|
|
=== Running the tests
|
|
|
|
Get the source using 'cabal unpack' or 'git clone', then change to the
|
|
source directory.
|
|
|
|
You can run the tests using cabal:
|
|
|
|
----
|
|
cabal test
|
|
----
|
|
|
|
Or you can run them directly which gives more options. The tests use
|
|
tasty, which provides the command line options. --hide-successes
|
|
with --ansi-tricks=false so it works is a good option to use:
|
|
|
|
|
|
----
|
|
cabal run test:Tests -- --hide-successes --ansi-tricks=false
|
|
----
|
|
|
|
== Reporting bugs
|
|
|
|
Please report bugs here:
|
|
|
|
https://github.com/JakeWheat/simple-sql-parser/issues
|
|
|
|
A good bug report (or feature request) should have an example of the
|
|
SQL which is failing.
|
|
|
|
Feature requests are welcome, but please note that there is no-one
|
|
generally available to work on these, so you should either make a pull
|
|
request, or find someone willing to write the fixes and make a pull
|
|
request.
|
|
|
|
There is a related tutorial on implementing a SQL parser here:
|
|
http://jakewheat.github.io/intro_to_parsing/ (TODO: this is out of date, hopefully it will be updated at some point)
|
|
|
|
== Contributing
|
|
|
|
Contributions are welcome, there are some notes on these pages: link:contributing.html[], link:release_checklist.html[].
|
|
|
|
== Links
|
|
|
|
* Haddock: link:haddock/index.html[]
|
|
* Supported SQL: link:supported_sql.html[]
|
|
* Test cases: link:test_cases.html[simple-sql-parser test cases]
|
|
* Homepage: http://jakewheat.github.io/simple-sql-parser/latest
|
|
* Hackage: http://hackage.haskell.org/package/simple-sql-parser
|
|
* Repository: https://github.com/JakeWheat/simple-sql-parser
|
|
* Bug tracker: https://github.com/JakeWheat/simple-sql-parser/issues
|
|
* Changes: https://github.com/JakeWheat/simple-sql-parser/blob/master/changelog
|
|
* Other versions: http://jakewheat.github.io/simple-sql-parser/
|
|
* Contact: +++jakewheat@tutanota.com+++
|
|
|
|
The simple-sql-parser is a lot less simple than it used to be. If you
|
|
just need to parse much simpler SQL than this, or want to start with a
|
|
simpler parser and modify it slightly, you could also look at the
|
|
basic query parser in the intro_to_parsing project, the code is here:
|
|
link:https://github.com/JakeWheat/intro_to_parsing/blob/master/SimpleSQLQueryParser0.lhs[SimpleSQLQueryParser].
|