checkpoint during parser conversion to megaparsec

2024-01-10 07:40:24 +00:00 · 2024-01-10 07:40:24 +00:00 · ab687318fb
commit ab687318fb
parent 9396aa8cba
31 changed files with 633 additions and 1186 deletions
--- a/Language/SQL/SimpleSQL/Dialect.hs
+++ b/Language/SQL/SimpleSQL/Dialect.hs
@ -3,6 +3,7 @@
 -- Data types to represent different dialect options

 {-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
 module Language.SQL.SimpleSQL.Dialect
    (Dialect(..)
    ,ansi2011
@ -12,6 +13,7 @@ module Language.SQL.SimpleSQL.Dialect
    ,sqlserver
    ) where

+import Data.Text (Text)
 import Data.Data

 -- | Used to set the dialect used for parsing and pretty printing,
@ -55,14 +57,14 @@ import Data.Data

 data Dialect = Dialect
    { -- | reserved keywords
-     diKeywords :: [String]
+     diKeywords :: [Text]
      -- | keywords with identifier exception
-    ,diIdentifierKeywords :: [String]
+    ,diIdentifierKeywords :: [Text]
      -- | keywords with app exception
-    ,diAppKeywords :: [String]
+    ,diAppKeywords :: [Text]
     -- | keywords with type exception plus all the type names which
     -- are multiple words
-    ,diSpecialTypeNames :: [String]
+    ,diSpecialTypeNames :: [Text]
     -- | allow ansi fetch first syntax
    ,diFetchFirst :: Bool
     -- | allow limit keyword (mysql, postgres,
@ -179,7 +181,7 @@ quoted. If you want to match one of these dialects exactly with this
 parser, I think it will be a lot of work.
 -}

-ansi2011ReservedKeywords :: [String]
+ansi2011ReservedKeywords :: [Text]
 ansi2011ReservedKeywords =
    [--"abs" -- function
     "all" -- keyword only?
@ -508,7 +510,7 @@ ansi2011ReservedKeywords =
    ]


-ansi2011TypeNames :: [String]
+ansi2011TypeNames :: [Text]
 ansi2011TypeNames =
    ["double precision"
    ,"character varying"
--- a/Language/SQL/SimpleSQL/Errors.hs
+++ b/Language/SQL/SimpleSQL/Errors.hs
@ -1,53 +0,0 @@
-
-- | helpers to work with parsec errors more nicely
-module Language.SQL.SimpleSQL.Errors
-    (ParseError(..)
-    --,formatError
-    ,convParseError
-    ) where
-
-import Text.Parsec (sourceColumn,sourceLine,sourceName,errorPos)
-import qualified Text.Parsec as P (ParseError)
-
-- | Type to represent parse errors.
-data ParseError = ParseError
-                  {peErrorString :: String
-                   -- ^ contains the error message
-                  ,peFilename :: FilePath
-                   -- ^ filename location for the error
-                  ,pePosition :: (Int,Int)
-                   -- ^ line number and column number location for the error
-                  ,peFormattedError :: String
-                   -- ^ formatted error with the position, error
-                   -- message and source context
-                  } deriving (Eq,Show)
-
-convParseError :: String -> P.ParseError -> ParseError
-convParseError src e =
-    ParseError
-    {peErrorString = show e
-    ,peFilename = sourceName p
-    ,pePosition = (sourceLine p, sourceColumn p)
-    ,peFormattedError = formatError src e}
-  where
-    p = errorPos e
-
-{-
-format the error more nicely: emacs format for positioning, plus
-context
-}
-
-formatError :: String -> P.ParseError -> String
-formatError src e =
-    sourceName p ++ ":" ++ show (sourceLine p)
-    ++ ":" ++ show (sourceColumn p) ++ ":"
-    ++ context
-    ++ show e
-  where
-    context =
-        let lns = take 1 $ drop (sourceLine p - 1) $ lines src
-        in case lns of
-             [x] -> "\n" ++ x ++ "\n"
-                    ++ replicate (sourceColumn p - 1) ' ' ++ "^\n"
-             _ -> ""
-    p = errorPos e
--- a/Language/SQL/SimpleSQL/Lex.hs
+++ b/Language/SQL/SimpleSQL/Lex.hs
@ -155,7 +155,7 @@ data Token
    | LineComment Text
    -- | A block comment, \/* stuff *\/, includes the comment delimiters
    | BlockComment Text
-      deriving (Eq,Show)
+      deriving (Eq,Show,Ord)

 ------------------------------------------------------------------------------

--- a/Language/SQL/SimpleSQL/Parse.hs
+++ b/Language/SQL/SimpleSQL/Parse.hs
--- a/Language/SQL/SimpleSQL/Pretty.hs
+++ b/Language/SQL/SimpleSQL/Pretty.hs
@ -15,6 +15,9 @@ TODO: there should be more comments in this file, especially the bits
 which have been changed to try to improve the layout of the output.
 -}

+import Prelude hiding (show)
+import qualified Prelude as P
+
 import Prettyprinter (Doc
                     ,parens
                     ,nest
@ -31,24 +34,24 @@ import Prettyprinter (Doc
                     )
 import qualified Prettyprinter as P

-import Prettyprinter.Render.Text (renderLazy)
+import Prettyprinter.Render.Text (renderStrict)

 import Data.Maybe (maybeToList, catMaybes)
 import Data.List (intercalate)

-import qualified Data.Text.Lazy as L
 import qualified Data.Text as T
+import Data.Text (Text)

 import Language.SQL.SimpleSQL.Syntax
 import Language.SQL.SimpleSQL.Dialect


 -- | Convert a query expr ast to concrete syntax.
-prettyQueryExpr :: Dialect -> QueryExpr -> String
+prettyQueryExpr :: Dialect -> QueryExpr -> Text
 prettyQueryExpr d = render . queryExpr d

 -- | Convert a value expr ast to concrete syntax.
-prettyScalarExpr :: Dialect -> ScalarExpr -> String
+prettyScalarExpr :: Dialect -> ScalarExpr -> Text
 prettyScalarExpr d = render . scalarExpr d

 -- | A terminating semicolon.
@ -56,20 +59,20 @@ terminator :: Doc a
 terminator = pretty ";\n"

 -- | Convert a statement ast to concrete syntax.
-prettyStatement :: Dialect -> Statement -> String
+prettyStatement :: Dialect -> Statement -> Text
 prettyStatement _ EmptyStatement = render terminator
 prettyStatement d s = render (statement d s)

 -- | Convert a list of statements to concrete syntax. A semicolon
 -- is inserted after each statement.
-prettyStatements :: Dialect -> [Statement] -> String
+prettyStatements :: Dialect -> [Statement] -> Text
 prettyStatements d = render . vsep . map prettyStatementWithSemicolon
  where
    prettyStatementWithSemicolon :: Statement -> Doc a
    prettyStatementWithSemicolon s = statement d s <> terminator

-render :: Doc a -> String -- L.Text
-render = L.unpack . renderLazy . layoutPretty defaultLayoutOptions
+render :: Doc a -> Text
+render = renderStrict . layoutPretty defaultLayoutOptions

 -- = scalar expressions

@ -88,7 +91,7 @@ scalarExpr _ (IntervalLit s v f t) =
 scalarExpr _ (Iden i) = names i
 scalarExpr _ Star = pretty "*"
 scalarExpr _ Parameter = pretty "?"
-scalarExpr _ (PositionalArg n) = pretty $ "$" ++ show n
+scalarExpr _ (PositionalArg n) = pretty $ T.cons '$' $ show n
 scalarExpr _ (HostParameter p i) =
    pretty p
    <+> me (\i' -> pretty "indicator" <+> pretty i') i
@ -140,7 +143,7 @@ scalarExpr dia (SpecialOp nm [a,b,c]) | nm `elem` [[Name Nothing "between"]
                                                 ,[Name Nothing "not between"]] =
  sep [scalarExpr dia a
      ,names nm <+> scalarExpr dia b
-      ,nest (length (unnames nm) + 1) $ pretty "and" <+> scalarExpr dia c]
+      ,nest (T.length (unnames nm) + 1) $ pretty "and" <+> scalarExpr dia c]

 scalarExpr d (SpecialOp [Name Nothing "rowctor"] as) =
    parens $ commaSep $ map (scalarExpr d) as
@ -164,7 +167,7 @@ scalarExpr d e@(BinOp _ op _) | op `elem` [[Name Nothing "and"]
                       : map ((names op <+>) . scalarExpr d) es)
      [] -> mempty -- shouldn't be possible
  where
-    ands (BinOp a op' b) | op == op' = ands a ++ ands b
+    ands (BinOp a op' b) | op == op' = ands a <> ands b
    ands x = [x]
 -- special case for . we don't use whitespace
 scalarExpr d (BinOp e0 [Name Nothing "."] e1) =
@ -174,9 +177,9 @@ scalarExpr d (BinOp e0 f e1) =

 scalarExpr dia (Case t ws els) =
    sep $ [pretty "case" <+> me (scalarExpr dia) t]
-          ++ map w ws
-          ++ maybeToList (fmap e els)
-          ++ [pretty "end"]
+          <> map w ws
+          <> maybeToList (fmap e els)
+          <> [pretty "end"]
  where
    w (t0,t1) =
      pretty "when" <+> nest 5 (commaSep $ map (scalarExpr dia) t0)
@ -261,7 +264,7 @@ scalarExpr _ (NextValueFor ns) =
    pretty "next value for" <+> names ns

 scalarExpr d (VEComment cmt v) =
-    vsep $ map comment cmt ++ [scalarExpr d v]
+    vsep $ map comment cmt <> [scalarExpr d v]

 scalarExpr _ (OdbcLiteral t s) =
    pretty "{" <> lt t <+> squotes (pretty s) <> pretty "}"
@ -278,13 +281,13 @@ scalarExpr d (Convert t e Nothing) =
 scalarExpr d (Convert t e (Just i)) =
    pretty "convert(" <> typeName t <> pretty "," <+> scalarExpr d e <> pretty "," <+> pretty (show i) <> pretty ")"

-unname :: Name -> String
+unname :: Name -> Text
 unname (Name Nothing n) = n
 unname (Name (Just (s,e)) n) =
-    s ++ n ++ e
+    s <> n <> e

-unnames :: [Name] -> String
-unnames ns = intercalate "." $ map unname ns
+unnames :: [Name] -> Text
+unnames ns = T.intercalate "." $ map unname ns


 name :: Name -> Doc a
@ -404,7 +407,7 @@ queryExpr d (Values vs) =
    <+> nest 7 (commaSep (map (parens . commaSep . map (scalarExpr d)) vs))
 queryExpr _ (Table t) = pretty "table" <+> names t
 queryExpr d (QEComment cmt v) =
-    vsep $ map comment cmt ++ [queryExpr d v]
+    vsep $ map comment cmt <> [queryExpr d v]


 alias :: Alias -> Doc a
@ -450,10 +453,10 @@ from d ts =
        pretty "using" <+> parens (commaSep $ map name es)
    joinCond Nothing = mempty

-maybeScalarExpr :: Dialect -> String -> Maybe ScalarExpr -> Doc a
+maybeScalarExpr :: Dialect -> Text -> Maybe ScalarExpr -> Doc a
 maybeScalarExpr d k = me
      (\e -> sep [pretty k
-                 ,nest (length k + 1) $ scalarExpr d e])
+                 ,nest (T.length k + 1) $ scalarExpr d e])

 grpBy :: Dialect -> [GroupingExpr] -> Doc a
 grpBy _ [] = mempty
@ -725,7 +728,7 @@ columnDef d (ColumnDef n t mdef cons) =
    pcon ColNullableConstraint = texts ["null"]
    pcon ColUniqueConstraint = pretty "unique"
    pcon (ColPrimaryKeyConstraint autoincrement) = 
-      texts $ ["primary","key"] ++ ["autoincrement"|autoincrement]
+      texts $ ["primary","key"] <> ["autoincrement"|autoincrement]
    --pcon ColPrimaryKeyConstraint = texts ["primary","key"]
    pcon (ColCheckConstraint v) = pretty "check" <+> parens (scalarExpr d v)
    pcon (ColReferencesConstraint tb c m u del) =
@ -757,7 +760,7 @@ refMatch m = case m of
                     MatchPartial -> texts ["match","partial"]
                     MatchSimple -> texts ["match", "simple"]

-refAct :: String -> ReferentialAction -> Doc a
+refAct :: Text -> ReferentialAction -> Doc a
 refAct t a = case a of
                     DefaultReferentialAction -> mempty
                     RefCascade -> texts ["on", t, "cascade"]
@ -863,11 +866,14 @@ me = maybe mempty
 comment :: Comment -> Doc a
 comment (BlockComment str) = pretty "/*" <+> pretty str <+> pretty "*/"

-texts :: [String] -> Doc a
+texts :: [Text] -> Doc a
 texts ts = sep $ map pretty ts

 -- regular pretty completely defeats the type checker when you want
 -- to change the ast and get type errors, instead it just produces
 -- incorrect code.
-pretty :: String -> Doc a
-pretty = P.pretty . T.pack
+pretty :: Text -> Doc a
+pretty = P.pretty
+
+show :: Show a => a -> Text
+show = T.pack . P.show
--- a/Language/SQL/SimpleSQL/Syntax.hs
+++ b/Language/SQL/SimpleSQL/Syntax.hs
@ -62,6 +62,8 @@ module Language.SQL.SimpleSQL.Syntax
    ,Comment(..)
    ) where

+import Data.Text (Text)
+
 import Data.Data

 -- | Represents a value expression. This is used for the expressions
@ -82,21 +84,21 @@ data ScalarExpr
      -- * 1e5
      --
      -- * 12.34e-6
-      NumLit String
+      NumLit Text
      -- | string literal, with the start and end quote
-      -- e.g. 'test' -> StringLit "'" "'" "test"
-    | StringLit String String String
+      -- e.g. 'test' -> TextLit "'" "'" "test"
+    | StringLit Text Text Text
      -- | text of interval literal, units of interval precision,
      -- e.g. interval 3 days (3)
    | IntervalLit
      {ilSign :: Maybe Sign -- ^ if + or - used
-      ,ilLiteral :: String -- ^ literal text
+      ,ilLiteral :: Text -- ^ literal text
      ,ilFrom :: IntervalTypeField
      ,ilTo :: Maybe IntervalTypeField
      }

      -- | prefix 'typed literal', e.g. int '42'
-    | TypedLit TypeName String
+    | TypedLit TypeName Text

      -- | identifier with parts separated by dots
    | Iden [Name]
@ -105,9 +107,9 @@ data ScalarExpr

    | Parameter -- ^ Represents a ? in a parameterized query
    | PositionalArg Int -- ^ Represents an e.g. $1 in a parameterized query
-    | HostParameter String (Maybe String) -- ^ represents a host
+    | HostParameter Text (Maybe Text) -- ^ represents a host
                                          -- parameter, e.g. :a. The
-                                          -- Maybe String is for the
+                                          -- Maybe Text is for the
                                          -- indicator, e.g. :var
                                          -- indicator :nl

@ -163,7 +165,7 @@ data ScalarExpr
      -- of commas. The maybe is for the first unnamed argument
      -- if it is present, and the list is for the keyword argument
      -- pairs.
-    | SpecialOpK [Name] (Maybe ScalarExpr) [(String,ScalarExpr)]
+    | SpecialOpK [Name] (Maybe ScalarExpr) [(Text,ScalarExpr)]

      -- | cast(a as typename)
    | Cast ScalarExpr TypeName
@ -215,7 +217,7 @@ in other places
    | MultisetQueryCtor QueryExpr
    | NextValueFor [Name]
    | VEComment [Comment] ScalarExpr
-    | OdbcLiteral OdbcLiteralType String
+    | OdbcLiteral OdbcLiteralType Text
      -- ^ an odbc literal e.g. {d '2000-01-01'}
    | OdbcFunc ScalarExpr
      -- ^ an odbc function call e.g. {fn CHARACTER_LENGTH('test')}
@ -228,7 +230,7 @@ in other places
 -- * "test" -> Name (Just "\"","\"") "test"
 -- * `something` -> Name (Just ("`","`") "something"
 -- * [ms] -> Name (Just ("[","]") "ms"
-data Name = Name (Maybe (String,String)) String
+data Name = Name (Maybe (Text,Text)) Text
            deriving (Eq,Show,Read,Data,Typeable)

 -- | Represents a type name, used in casts.
@ -246,7 +248,7 @@ data TypeName
    | MultisetTypeName TypeName
      deriving (Eq,Show,Read,Data,Typeable)

-data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer))
+data IntervalTypeField = Itf Text (Maybe (Integer, Maybe Integer))
                         deriving (Eq,Show,Read,Data,Typeable)

 data Sign = Plus | Minus
@ -739,6 +741,6 @@ data PrivilegeAction =

 -- | Comment. Useful when generating SQL code programmatically. The
 -- parser doesn't produce these.
-newtype Comment = BlockComment String
+newtype Comment = BlockComment Text
               deriving (Eq,Show,Read,Data,Typeable)