diff --git a/Language/SQL/SimpleSQL/Parser.lhs b/Language/SQL/SimpleSQL/Parser.lhs index 769856b..a44eb52 100644 --- a/Language/SQL/SimpleSQL/Parser.lhs +++ b/Language/SQL/SimpleSQL/Parser.lhs @@ -136,7 +136,7 @@ which parses as a typed literal > ] <* lookAhead quote > literal :: Parser ValueExpr -> literal = number <|> stringValue <|> interval <|> try characterSetLiteral +> literal = number <|> stringValue <|> try interval <|> try characterSetLiteral == Names @@ -512,15 +512,93 @@ a match (select a from t) typename: used in casts. Special cases for the multi keyword typenames that SQL supports. +TODO: this need heavy refactoring + > typeName :: Parser TypeName > typeName = -> (choice [try multiWordParsers -> ,mktn <$> identifierBlacklist blacklist] -> >>= optionSuffix precision -> ) <?> "typename" +> (rowTypeName <|> intervalTypeName <|> otherTypeName) +> >>= tnSuffix +> <?> "typename" > where -> mktn = TypeName . (:[]) . Name -> multiWordParsers = mktn . unwords <$> makeKeywordTree +> -- row type names - a little like create table +> rowTypeName = +> RowTypeName <$> (keyword_ "row" *> parens (commaSep1 rowField)) +> rowField = (,) <$> name <*> typeName +> -- interval type names: interval a [to b] +> intervalTypeName = +> keyword_ "interval" *> +> choice +> [IntervalTypeName <$> intervalField +> <*> optionMaybe (keyword_ "to" *> intervalField) +> ,return $ TypeName [Name "interval"]] +> intervalField = +> Itf +> <$> identifierBlacklist blacklist +> <*> optionMaybe +> (parens ((,) <$> unsignedInteger +> <*> optionMaybe (comma *> unsignedInteger))) +> -- other type names, which includes: +> -- precision, scale, lob scale and units, timezone, character +> -- set and collations +> otherTypeName = do +> tn <- (try multiWordParsers <|> names) +> choice [try $ timezone tn +> ,try (precscale tn) >>= optionSuffix charSuffix +> ,try $ lob tn +> ,optionSuffix charSuffix $ TypeName tn] +> timezone tn = do +> TimeTypeName tn +> <$> optionMaybe prec +> <*> choice [True <$ keywords_ ["with", "time","zone"] +> ,False <$ keywords_ ["without", "time","zone"]] +> charSuffix (PrecTypeName t p) = chars t (Just p) +> charSuffix (TypeName t) = chars t Nothing +> charSuffix _ = fail "" +> chars tn p = +> ((,) <$> option [] charSet +> <*> optionMaybe tcollate) +> >>= uncurry mkit +> where +> mkit [] Nothing = fail "" +> mkit a b = return $ CharTypeName tn p a b +> lob tn = parens $ do +> (x,y) <- lobPrecToken +> z <- optionMaybe lobUnits +> return $ LobTypeName tn x y z +> precscale tn = parens (commaSep unsignedInteger) >>= makeWrap +> where +> makeWrap [a] = return $ PrecTypeName tn a +> makeWrap [a,b] = return $ PrecScaleTypeName tn a b +> makeWrap _ = fail "there must be one or two precision components" +> prec = parens unsignedInteger +> charSet = keywords_ ["character", "set"] *> names +> tcollate = keyword_ "collate" *> name +> lobPrecToken = lexeme $ do +> p <- read <$> many1 digit <?> "unsigned integer" +> x <- choice [Just LobK <$ keyword_ "k" +> ,Just LobM <$ keyword_ "m" +> ,Just LobG <$ keyword_ "g" +> ,return Nothing] +> return (p,x) +> lobUnits = choice [LobCharacters <$ keyword_ "characters" +> ,LobCodeUnits <$ keyword_ "code_units" +> ,LobOctets <$ keyword_ "octets"] +> -- deal with multiset and array suffixes +> tnSuffix x = +> multisetSuffix x <|> arraySuffix x <|> return x +> multisetSuffix x = +> (MultisetTypeName x <$ keyword_ "multiset") >>= tnSuffix +> arraySuffix x = +> (keyword_ "array" >> ArrayTypeName x +> <$> optionMaybe (brackets unsignedInteger) +> ) >>= tnSuffix +> -- special cases: there are a fixed set of multi word +> -- sql types, they all have to be listed here +> -- if it isn't in this list, the base of the +> -- typename must parse as a regular dotted identifier chain +> -- schema/etc. qualifiers are not supported for the multi word +> -- typenames +> multiWordParsers = (:[]) . Name . unwords <$> makeKeywordTree > ["double precision" > ,"character varying" > ,"char varying" @@ -534,19 +612,9 @@ that SQL supports. > ,"nchar large object" > ,"nchar varying" > ,"bit varying" +> ,"binary large object" > ] -todo: timestamp types: - - | TIME [ <left paren> <time precision> <right paren> ] [ WITH TIME ZONE ] - | TIMESTAMParser [ <left paren> <timestamp precision> <right paren> ] [ WITH TIME ZONE ] - - -> precision t = parens (commaSep unsignedInteger) >>= makeWrap t -> makeWrap (TypeName t) [a] = return $ PrecTypeName t a -> makeWrap (TypeName t) [a,b] = return $ PrecScaleTypeName t a b -> makeWrap _ _ = fail "there must be one or two precision components" - == value expression parens, row ctor and scalar subquery @@ -582,17 +650,21 @@ thick. > parseTrees :: [[String]] -> Parser [String] > parseTrees ws = do > let gs :: [[[String]]] -> gs = groupBy ((==) `on` head) ws +> gs = groupBy ((==) `on` safeHead) ws > choice $ map parseGroup gs > parseGroup :: [[String]] -> Parser [String] > parseGroup l@((k:_):_) = do > keyword_ k -> let tls = map tail l +> let tls = catMaybes $ map safeTail l > pr = (k:) <$> parseTrees tls > if (or $ map null tls) > then pr <|> return [k] > else pr > parseGroup _ = guard False >> error "impossible" +> safeHead (x:_) = Just x +> safeHead [] = Nothing +> safeTail (_:x) = Just x +> safeTail [] = Nothing == operator parsing diff --git a/Language/SQL/SimpleSQL/Pretty.lhs b/Language/SQL/SimpleSQL/Pretty.lhs index a31ac7e..e2c9c0d 100644 --- a/Language/SQL/SimpleSQL/Pretty.lhs +++ b/Language/SQL/SimpleSQL/Pretty.lhs @@ -215,14 +215,14 @@ which have been changed to try to improve the layout of the output. > typeName (LobTypeName t i m u) = > names t > <> parens (text (show i) -> <> me (\x -> case x of +> <> me (\x -> case x of > LobK -> text "K" > LobM -> text "M" -> LobG -> text "G") m) -> <+> me (\x -> case x of -> LobCharacters -> text "CHARACTERS" -> LobCodeUnits -> text "CODE_UNITS" -> LobOctets -> text "OCTETS") u +> LobG -> text "G") m +> <+> me (\x -> case x of +> LobCharacters -> text "CHARACTERS" +> LobCodeUnits -> text "CODE_UNITS" +> LobOctets -> text "OCTETS") u) > typeName (CharTypeName t i cs col) = > names t > <> me (\x -> parens (text $ show x)) i @@ -250,10 +250,10 @@ which have been changed to try to improve the layout of the output. > <+> me (\(x,x1) -> parens (text (show x) > <+> me (\y -> (sep [comma,text (show y)])) x1)) p -> typeName (ArrayType tn sz) = -> typeName tn <+> text "array" <+> me (text . show) sz +> typeName (ArrayTypeName tn sz) = +> typeName tn <+> text "array" <+> me (brackets . text . show) sz -> typeName (MultisetType tn) = +> typeName (MultisetTypeName tn) = > typeName tn <+> text "multiset" diff --git a/Language/SQL/SimpleSQL/Syntax.lhs b/Language/SQL/SimpleSQL/Syntax.lhs index 5c22ad9..620b472 100644 --- a/Language/SQL/SimpleSQL/Syntax.lhs +++ b/Language/SQL/SimpleSQL/Syntax.lhs @@ -161,17 +161,17 @@ TODO: add ref and scope, any others? > = TypeName [Name] > | PrecTypeName [Name] Integer > | PrecScaleTypeName [Name] Integer Integer -> | LobTypeName [Name] Int (Maybe LobMultiplier) (Maybe LobUnits) +> | LobTypeName [Name] Integer (Maybe LobMultiplier) (Maybe LobUnits) > -- precision, characterset, collate > | CharTypeName [Name] (Maybe Integer) [Name] (Maybe Name) > | TimeTypeName [Name] (Maybe Integer) Bool -- true == with time zone > | RowTypeName [(Name,TypeName)] > | IntervalTypeName IntervalTypeField (Maybe IntervalTypeField) -> | ArrayType TypeName (Maybe Integer) -> | MultisetType TypeName +> | ArrayTypeName TypeName (Maybe Integer) +> | MultisetTypeName TypeName > deriving (Eq,Show,Read,Data,Typeable) -> data IntervalTypeField = Itf String (Maybe (Int, Maybe Int)) +> data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer)) > deriving (Eq,Show,Read,Data,Typeable) > data LobMultiplier = LobK | LobM | LobG diff --git a/tools/Language/SQL/SimpleSQL/SQL2003.lhs b/tools/Language/SQL/SimpleSQL/SQL2003.lhs index a59335f..c720c76 100644 --- a/tools/Language/SQL/SimpleSQL/SQL2003.lhs +++ b/tools/Language/SQL/SimpleSQL/SQL2003.lhs @@ -511,7 +511,7 @@ TODO: all the stuff with character set representations. <Unicode representation> ::= <character representation> | <Unicode escape value> > unicodeStringLiterals :: TestItem -> unicodeStringLiterals = Group "national character string literals" $ map (uncurry TestValueExpr) +> unicodeStringLiterals = Group "unicode string literals" $ map (uncurry TestValueExpr) > [("U&'something'", CSStringLit "U&" "something") > ,("u&'something' escape =" > ,Escape (CSStringLit "u&" "something") '=') @@ -929,20 +929,157 @@ create a list of type name variations: > typeNames :: [(String,TypeName)] > typeNames = -> [-- example of every standard type name -> ("character", TypeName "character") -> -- 1 single prec + 1 with multiname -> -- 1 scale + with multiname -> -- lob prec + with multiname -> -- 1 with and without tz -> -- chars: (single/multiname) x prec x charset x collate -> -- single row field, two row field -> -- interval each type raw -> -- one type with single suff -> -- one type with double suff -> -- a to b with raw -> -- a to b with single suff -> ] +> basicTypes +> ++ concatMap makeArray basicTypes +> ++ map makeMultiset basicTypes +> where +> makeArray (s,t) = [(s ++ " array", ArrayTypeName t Nothing) +> ,(s ++ " array[5]", ArrayTypeName t (Just 5))] +> makeMultiset (s,t) = (s ++ " multiset", MultisetTypeName t) +> basicTypes = +> -- example of every standard type name +> map (\t -> (t,TypeName [Name t])) +> ["character" +> ,"char" +> ,"character varying" +> ,"char varying" +> ,"varchar" +> ,"character large object" +> ,"char large object" +> ,"clob" +> ,"national character" +> ,"national char" +> ,"nchar" +> ,"national character varying" +> ,"national char varying" +> ,"nchar varying" +> ,"national character large object" +> ,"nchar large object" +> ,"nclob" +> ,"binary large object" +> ,"blob" +> ,"numeric" +> ,"decimal" +> ,"dec" +> ,"smallint" +> ,"integer" +> ,"int" +> ,"bigint" +> ,"float" +> ,"real" +> ,"double precision" +> ,"boolean" +> ,"date" +> ,"time" +> ,"timestamp"] +> --interval -- not allowed without interval qualifier +> --row -- not allowed without row type body +> --ref -- todo +> --scope -- todo +> -- array -- not allowed on own +> -- multiset -- not allowed on own + +> ++ +> [-- 1 single prec + 1 with multiname +> ("char(5)", PrecTypeName [Name "char"] 5) +> ,("char varying(5)", PrecTypeName [Name "char varying"] 5) +> -- 1 scale +> ,("decimal(15,2)", PrecScaleTypeName [Name "decimal"] 15 2) +> -- lob prec + with multiname +> ,("blob(3M)", LobTypeName [Name "blob"] 3 (Just LobM) Nothing) +> ,("blob(4M characters) " +> ,LobTypeName [Name "blob"] 4 (Just LobM) (Just LobCharacters)) +> ,("blob(5 code_units) " +> ,LobTypeName [Name "blob"] 5 Nothing (Just LobCodeUnits)) +> ,("blob(6G octets) " +> ,LobTypeName [Name "blob"] 6 (Just LobG) (Just LobOctets)) +> ,("national character large object(7K) " +> ,LobTypeName [Name "national character large object"] 7 (Just LobK) Nothing) +> -- 1 with and without tz +> ,("time with time zone" +> ,TimeTypeName [Name "time"] Nothing True) +> ,("datetime(3) without time zone" +> ,TimeTypeName [Name "datetime"] (Just 3) False) +> -- chars: (single/multiname) x prec x charset x collate +> -- 1111 +> ,("char varying(5) character set something collate something_insensitive" +> ,CharTypeName [Name "char varying"] (Just 5) +> [Name "something"] (Just "something_insensitive")) +> -- 0111 +> ,("char(5) character set something collate something_insensitive" +> ,CharTypeName [Name "char"] (Just 5) +> [Name "something"] (Just "something_insensitive")) + +> -- 1011 +> ,("char varying character set something collate something_insensitive" +> ,CharTypeName [Name "char varying"] Nothing +> [Name "something"] (Just "something_insensitive")) +> -- 0011 +> ,("char character set something collate something_insensitive" +> ,CharTypeName [Name "char"] Nothing +> [Name "something"] (Just "something_insensitive")) + +> -- 1101 +> ,("char varying(5) collate something_insensitive" +> ,CharTypeName [Name "char varying"] (Just 5) +> [] (Just "something_insensitive")) +> -- 0101 +> ,("char(5) collate something_insensitive" +> ,CharTypeName [Name "char"] (Just 5) +> [] (Just "something_insensitive")) +> -- 1001 +> ,("char varying collate something_insensitive" +> ,CharTypeName [Name "char varying"] Nothing +> [] (Just "something_insensitive")) +> -- 0001 +> ,("char collate something_insensitive" +> ,CharTypeName [Name "char"] Nothing +> [] (Just "something_insensitive")) + +> -- 1110 +> ,("char varying(5) character set something" +> ,CharTypeName [Name "char varying"] (Just 5) +> [Name "something"] Nothing) +> -- 0110 +> ,("char(5) character set something" +> ,CharTypeName [Name "char"] (Just 5) +> [Name "something"] Nothing) +> -- 1010 +> ,("char varying character set something" +> ,CharTypeName [Name "char varying"] Nothing +> [Name "something"] Nothing) +> -- 0010 +> ,("char character set something" +> ,CharTypeName [Name "char"] Nothing +> [Name "something"] Nothing) +> -- 1100 +> ,("char varying character set something" +> ,CharTypeName [Name "char varying"] Nothing +> [Name "something"] Nothing) + +> -- single row field, two row field +> ,("row(a int)", RowTypeName [(Name "a", TypeName [Name "int"])]) +> ,("row(a int,b char)" +> ,RowTypeName [(Name "a", TypeName [Name "int"]) +> ,(Name "b", TypeName [Name "char"])]) +> -- interval each type raw +> ,("interval year" +> ,IntervalTypeName (Itf "year" Nothing) Nothing) +> -- one type with single suffix +> -- one type with double suffix +> ,("interval year(2)" +> ,IntervalTypeName (Itf "year" $ Just (2,Nothing)) Nothing) +> ,("interval second(2,5)" +> ,IntervalTypeName (Itf "second" $ Just (2,Just 5)) Nothing) +> -- a to b with raw +> -- a to b with single suffix +> ,("interval year to month" +> ,IntervalTypeName (Itf "year" Nothing) +> (Just $ Itf "month" Nothing)) +> ,("interval year(4) to second(2,3)" +> ,IntervalTypeName (Itf "year" $ Just (4,Nothing)) +> (Just $ Itf "second" $ Just (2, Just 3))) +> ] Now test each variation in both cast expression and typed literal expression