1
Fork 0

complete basic typename support

This commit is contained in:
Jake Wheat 2014-04-18 19:49:00 +03:00
parent fbdcacc604
commit 2ff8580dbf
4 changed files with 257 additions and 48 deletions

View file

@ -136,7 +136,7 @@ which parses as a typed literal
> ] <* lookAhead quote
> literal :: Parser ValueExpr
> literal = number <|> stringValue <|> interval <|> try characterSetLiteral
> literal = number <|> stringValue <|> try interval <|> try characterSetLiteral
== Names
@ -512,15 +512,93 @@ a match (select a from t)
typename: used in casts. Special cases for the multi keyword typenames
that SQL supports.
TODO: this need heavy refactoring
> typeName :: Parser TypeName
> typeName =
> (choice [try multiWordParsers
> ,mktn <$> identifierBlacklist blacklist]
> >>= optionSuffix precision
> ) <?> "typename"
> (rowTypeName <|> intervalTypeName <|> otherTypeName)
> >>= tnSuffix
> <?> "typename"
> where
> mktn = TypeName . (:[]) . Name
> multiWordParsers = mktn . unwords <$> makeKeywordTree
> -- row type names - a little like create table
> rowTypeName =
> RowTypeName <$> (keyword_ "row" *> parens (commaSep1 rowField))
> rowField = (,) <$> name <*> typeName
> -- interval type names: interval a [to b]
> intervalTypeName =
> keyword_ "interval" *>
> choice
> [IntervalTypeName <$> intervalField
> <*> optionMaybe (keyword_ "to" *> intervalField)
> ,return $ TypeName [Name "interval"]]
> intervalField =
> Itf
> <$> identifierBlacklist blacklist
> <*> optionMaybe
> (parens ((,) <$> unsignedInteger
> <*> optionMaybe (comma *> unsignedInteger)))
> -- other type names, which includes:
> -- precision, scale, lob scale and units, timezone, character
> -- set and collations
> otherTypeName = do
> tn <- (try multiWordParsers <|> names)
> choice [try $ timezone tn
> ,try (precscale tn) >>= optionSuffix charSuffix
> ,try $ lob tn
> ,optionSuffix charSuffix $ TypeName tn]
> timezone tn = do
> TimeTypeName tn
> <$> optionMaybe prec
> <*> choice [True <$ keywords_ ["with", "time","zone"]
> ,False <$ keywords_ ["without", "time","zone"]]
> charSuffix (PrecTypeName t p) = chars t (Just p)
> charSuffix (TypeName t) = chars t Nothing
> charSuffix _ = fail ""
> chars tn p =
> ((,) <$> option [] charSet
> <*> optionMaybe tcollate)
> >>= uncurry mkit
> where
> mkit [] Nothing = fail ""
> mkit a b = return $ CharTypeName tn p a b
> lob tn = parens $ do
> (x,y) <- lobPrecToken
> z <- optionMaybe lobUnits
> return $ LobTypeName tn x y z
> precscale tn = parens (commaSep unsignedInteger) >>= makeWrap
> where
> makeWrap [a] = return $ PrecTypeName tn a
> makeWrap [a,b] = return $ PrecScaleTypeName tn a b
> makeWrap _ = fail "there must be one or two precision components"
> prec = parens unsignedInteger
> charSet = keywords_ ["character", "set"] *> names
> tcollate = keyword_ "collate" *> name
> lobPrecToken = lexeme $ do
> p <- read <$> many1 digit <?> "unsigned integer"
> x <- choice [Just LobK <$ keyword_ "k"
> ,Just LobM <$ keyword_ "m"
> ,Just LobG <$ keyword_ "g"
> ,return Nothing]
> return (p,x)
> lobUnits = choice [LobCharacters <$ keyword_ "characters"
> ,LobCodeUnits <$ keyword_ "code_units"
> ,LobOctets <$ keyword_ "octets"]
> -- deal with multiset and array suffixes
> tnSuffix x =
> multisetSuffix x <|> arraySuffix x <|> return x
> multisetSuffix x =
> (MultisetTypeName x <$ keyword_ "multiset") >>= tnSuffix
> arraySuffix x =
> (keyword_ "array" >> ArrayTypeName x
> <$> optionMaybe (brackets unsignedInteger)
> ) >>= tnSuffix
> -- special cases: there are a fixed set of multi word
> -- sql types, they all have to be listed here
> -- if it isn't in this list, the base of the
> -- typename must parse as a regular dotted identifier chain
> -- schema/etc. qualifiers are not supported for the multi word
> -- typenames
> multiWordParsers = (:[]) . Name . unwords <$> makeKeywordTree
> ["double precision"
> ,"character varying"
> ,"char varying"
@ -534,19 +612,9 @@ that SQL supports.
> ,"nchar large object"
> ,"nchar varying"
> ,"bit varying"
> ,"binary large object"
> ]
todo: timestamp types:
| TIME [ <left paren> <time precision> <right paren> ] [ WITH TIME ZONE ]
| TIMESTAMParser [ <left paren> <timestamp precision> <right paren> ] [ WITH TIME ZONE ]
> precision t = parens (commaSep unsignedInteger) >>= makeWrap t
> makeWrap (TypeName t) [a] = return $ PrecTypeName t a
> makeWrap (TypeName t) [a,b] = return $ PrecScaleTypeName t a b
> makeWrap _ _ = fail "there must be one or two precision components"
== value expression parens, row ctor and scalar subquery
@ -582,17 +650,21 @@ thick.
> parseTrees :: [[String]] -> Parser [String]
> parseTrees ws = do
> let gs :: [[[String]]]
> gs = groupBy ((==) `on` head) ws
> gs = groupBy ((==) `on` safeHead) ws
> choice $ map parseGroup gs
> parseGroup :: [[String]] -> Parser [String]
> parseGroup l@((k:_):_) = do
> keyword_ k
> let tls = map tail l
> let tls = catMaybes $ map safeTail l
> pr = (k:) <$> parseTrees tls
> if (or $ map null tls)
> then pr <|> return [k]
> else pr
> parseGroup _ = guard False >> error "impossible"
> safeHead (x:_) = Just x
> safeHead [] = Nothing
> safeTail (_:x) = Just x
> safeTail [] = Nothing
== operator parsing

View file

@ -218,11 +218,11 @@ which have been changed to try to improve the layout of the output.
> <> me (\x -> case x of
> LobK -> text "K"
> LobM -> text "M"
> LobG -> text "G") m)
> LobG -> text "G") m
> <+> me (\x -> case x of
> LobCharacters -> text "CHARACTERS"
> LobCodeUnits -> text "CODE_UNITS"
> LobOctets -> text "OCTETS") u
> LobOctets -> text "OCTETS") u)
> typeName (CharTypeName t i cs col) =
> names t
> <> me (\x -> parens (text $ show x)) i
@ -250,10 +250,10 @@ which have been changed to try to improve the layout of the output.
> <+> me (\(x,x1) -> parens (text (show x)
> <+> me (\y -> (sep [comma,text (show y)])) x1)) p
> typeName (ArrayType tn sz) =
> typeName tn <+> text "array" <+> me (text . show) sz
> typeName (ArrayTypeName tn sz) =
> typeName tn <+> text "array" <+> me (brackets . text . show) sz
> typeName (MultisetType tn) =
> typeName (MultisetTypeName tn) =
> typeName tn <+> text "multiset"

View file

@ -161,17 +161,17 @@ TODO: add ref and scope, any others?
> = TypeName [Name]
> | PrecTypeName [Name] Integer
> | PrecScaleTypeName [Name] Integer Integer
> | LobTypeName [Name] Int (Maybe LobMultiplier) (Maybe LobUnits)
> | LobTypeName [Name] Integer (Maybe LobMultiplier) (Maybe LobUnits)
> -- precision, characterset, collate
> | CharTypeName [Name] (Maybe Integer) [Name] (Maybe Name)
> | TimeTypeName [Name] (Maybe Integer) Bool -- true == with time zone
> | RowTypeName [(Name,TypeName)]
> | IntervalTypeName IntervalTypeField (Maybe IntervalTypeField)
> | ArrayType TypeName (Maybe Integer)
> | MultisetType TypeName
> | ArrayTypeName TypeName (Maybe Integer)
> | MultisetTypeName TypeName
> deriving (Eq,Show,Read,Data,Typeable)
> data IntervalTypeField = Itf String (Maybe (Int, Maybe Int))
> data IntervalTypeField = Itf String (Maybe (Integer, Maybe Integer))
> deriving (Eq,Show,Read,Data,Typeable)
> data LobMultiplier = LobK | LobM | LobG

View file

@ -511,7 +511,7 @@ TODO: all the stuff with character set representations.
<Unicode representation> ::= <character representation> | <Unicode escape value>
> unicodeStringLiterals :: TestItem
> unicodeStringLiterals = Group "national character string literals" $ map (uncurry TestValueExpr)
> unicodeStringLiterals = Group "unicode string literals" $ map (uncurry TestValueExpr)
> [("U&'something'", CSStringLit "U&" "something")
> ,("u&'something' escape ="
> ,Escape (CSStringLit "u&" "something") '=')
@ -929,19 +929,156 @@ create a list of type name variations:
> typeNames :: [(String,TypeName)]
> typeNames =
> [-- example of every standard type name
> ("character", TypeName "character")
> -- 1 single prec + 1 with multiname
> -- 1 scale + with multiname
> basicTypes
> ++ concatMap makeArray basicTypes
> ++ map makeMultiset basicTypes
> where
> makeArray (s,t) = [(s ++ " array", ArrayTypeName t Nothing)
> ,(s ++ " array[5]", ArrayTypeName t (Just 5))]
> makeMultiset (s,t) = (s ++ " multiset", MultisetTypeName t)
> basicTypes =
> -- example of every standard type name
> map (\t -> (t,TypeName [Name t]))
> ["character"
> ,"char"
> ,"character varying"
> ,"char varying"
> ,"varchar"
> ,"character large object"
> ,"char large object"
> ,"clob"
> ,"national character"
> ,"national char"
> ,"nchar"
> ,"national character varying"
> ,"national char varying"
> ,"nchar varying"
> ,"national character large object"
> ,"nchar large object"
> ,"nclob"
> ,"binary large object"
> ,"blob"
> ,"numeric"
> ,"decimal"
> ,"dec"
> ,"smallint"
> ,"integer"
> ,"int"
> ,"bigint"
> ,"float"
> ,"real"
> ,"double precision"
> ,"boolean"
> ,"date"
> ,"time"
> ,"timestamp"]
> --interval -- not allowed without interval qualifier
> --row -- not allowed without row type body
> --ref -- todo
> --scope -- todo
> -- array -- not allowed on own
> -- multiset -- not allowed on own
> ++
> [-- 1 single prec + 1 with multiname
> ("char(5)", PrecTypeName [Name "char"] 5)
> ,("char varying(5)", PrecTypeName [Name "char varying"] 5)
> -- 1 scale
> ,("decimal(15,2)", PrecScaleTypeName [Name "decimal"] 15 2)
> -- lob prec + with multiname
> ,("blob(3M)", LobTypeName [Name "blob"] 3 (Just LobM) Nothing)
> ,("blob(4M characters) "
> ,LobTypeName [Name "blob"] 4 (Just LobM) (Just LobCharacters))
> ,("blob(5 code_units) "
> ,LobTypeName [Name "blob"] 5 Nothing (Just LobCodeUnits))
> ,("blob(6G octets) "
> ,LobTypeName [Name "blob"] 6 (Just LobG) (Just LobOctets))
> ,("national character large object(7K) "
> ,LobTypeName [Name "national character large object"] 7 (Just LobK) Nothing)
> -- 1 with and without tz
> ,("time with time zone"
> ,TimeTypeName [Name "time"] Nothing True)
> ,("datetime(3) without time zone"
> ,TimeTypeName [Name "datetime"] (Just 3) False)
> -- chars: (single/multiname) x prec x charset x collate
> -- 1111
> ,("char varying(5) character set something collate something_insensitive"
> ,CharTypeName [Name "char varying"] (Just 5)
> [Name "something"] (Just "something_insensitive"))
> -- 0111
> ,("char(5) character set something collate something_insensitive"
> ,CharTypeName [Name "char"] (Just 5)
> [Name "something"] (Just "something_insensitive"))
> -- 1011
> ,("char varying character set something collate something_insensitive"
> ,CharTypeName [Name "char varying"] Nothing
> [Name "something"] (Just "something_insensitive"))
> -- 0011
> ,("char character set something collate something_insensitive"
> ,CharTypeName [Name "char"] Nothing
> [Name "something"] (Just "something_insensitive"))
> -- 1101
> ,("char varying(5) collate something_insensitive"
> ,CharTypeName [Name "char varying"] (Just 5)
> [] (Just "something_insensitive"))
> -- 0101
> ,("char(5) collate something_insensitive"
> ,CharTypeName [Name "char"] (Just 5)
> [] (Just "something_insensitive"))
> -- 1001
> ,("char varying collate something_insensitive"
> ,CharTypeName [Name "char varying"] Nothing
> [] (Just "something_insensitive"))
> -- 0001
> ,("char collate something_insensitive"
> ,CharTypeName [Name "char"] Nothing
> [] (Just "something_insensitive"))
> -- 1110
> ,("char varying(5) character set something"
> ,CharTypeName [Name "char varying"] (Just 5)
> [Name "something"] Nothing)
> -- 0110
> ,("char(5) character set something"
> ,CharTypeName [Name "char"] (Just 5)
> [Name "something"] Nothing)
> -- 1010
> ,("char varying character set something"
> ,CharTypeName [Name "char varying"] Nothing
> [Name "something"] Nothing)
> -- 0010
> ,("char character set something"
> ,CharTypeName [Name "char"] Nothing
> [Name "something"] Nothing)
> -- 1100
> ,("char varying character set something"
> ,CharTypeName [Name "char varying"] Nothing
> [Name "something"] Nothing)
> -- single row field, two row field
> ,("row(a int)", RowTypeName [(Name "a", TypeName [Name "int"])])
> ,("row(a int,b char)"
> ,RowTypeName [(Name "a", TypeName [Name "int"])
> ,(Name "b", TypeName [Name "char"])])
> -- interval each type raw
> -- one type with single suff
> -- one type with double suff
> ,("interval year"
> ,IntervalTypeName (Itf "year" Nothing) Nothing)
> -- one type with single suffix
> -- one type with double suffix
> ,("interval year(2)"
> ,IntervalTypeName (Itf "year" $ Just (2,Nothing)) Nothing)
> ,("interval second(2,5)"
> ,IntervalTypeName (Itf "second" $ Just (2,Just 5)) Nothing)
> -- a to b with raw
> -- a to b with single suff
> -- a to b with single suffix
> ,("interval year to month"
> ,IntervalTypeName (Itf "year" Nothing)
> (Just $ Itf "month" Nothing))
> ,("interval year(4) to second(2,3)"
> ,IntervalTypeName (Itf "year" $ Just (4,Nothing))
> (Just $ Itf "second" $ Just (2, Just 3)))
> ]
Now test each variation in both cast expression and typed literal