Skip to content

Commit

Permalink
feat(io): descriptor implementation
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
  • Loading branch information
aarnphm committed Sep 13, 2022
1 parent f01a04c commit 5c7939e
Show file tree
Hide file tree
Showing 10 changed files with 885 additions and 218 deletions.
6 changes: 5 additions & 1 deletion bentoml/_internal/external_typing/__init__.py
Expand Up @@ -8,6 +8,8 @@

from pandas import Series as PdSeries
from pandas import DataFrame as PdDataFrame
from pandas._typing import Dtype as PdDType
from pandas._typing import DtypeArg as PdDTypeArg
from pyarrow.plasma import ObjectID
from pyarrow.plasma import PlasmaClient

Expand All @@ -17,7 +19,7 @@
# numpy is always required by bentoml
from numpy import generic as NpGeneric
from numpy.typing import NDArray as _NDArray
from numpy.typing import DTypeLike as NpDTypeLike # type: ignore (incomplete numpy types)
from numpy.typing import DTypeLike as NpDTypeLike

NpNDArray = _NDArray[t.Any]

Expand All @@ -36,6 +38,8 @@
__all__ = [
"PdSeries",
"PdDataFrame",
"PdDType",
"PdDTypeArg",
"DataFrameOrient",
"SeriesOrient",
"ObjectID",
Expand Down
106 changes: 98 additions & 8 deletions bentoml/_internal/io_descriptors/base.py
Expand Up @@ -3,14 +3,22 @@
import typing as t
from abc import ABC
from abc import abstractmethod
from typing import overload
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from types import UnionType

from google.protobuf import message
from google.protobuf import struct_pb2
from google.protobuf import wrappers_pb2
from typing_extensions import Self
from starlette.requests import Request
from starlette.responses import Response
from google.protobuf.internal.containers import MessageMap

from bentoml.grpc.types import ProtoField
from bentoml.grpc.v1alpha1 import service_pb2 as pb

from ..types import LazyType
from ..context import InferenceApiContext as Context
Expand Down Expand Up @@ -39,20 +47,31 @@ class IODescriptor(ABC, t.Generic[IOType]):

HTTP_METHODS = ["POST"]

_init_str: str = ""

_mime_type: str

def __new__(cls: t.Type[Self], *args: t.Any, **kwargs: t.Any) -> Self:
_rpc_content_type: str
_proto_field: str

def __new__( # pylint: disable=unused-argument
cls: t.Type[Self],
*args: t.Any,
**kwargs: t.Any,
) -> Self:
self = super().__new__(cls)
# default mime type is application/json
self._mime_type = "application/json"
self._init_str = cls.__qualname__
# default grpc content type is application/grpc
self._rpc_content_type = "application/grpc"

return self

@property
def accepted_proto_fields(self) -> ProtoField:
"""
Returns a proto field that the IODescriptor can accept.
Note that all proto field will also accept _internal_bytes_contents
"""
return t.cast("ProtoField", self._proto_field)

def __repr__(self) -> str:
return self._init_str
return self.__class__.__qualname__

@abstractmethod
def input_type(self) -> InputType:
Expand Down Expand Up @@ -83,3 +102,74 @@ async def to_http_response(
self, obj: IOType, ctx: Context | None = None
) -> Response:
...

@overload
@abstractmethod
async def from_proto(
self,
field: wrappers_pb2.StringValue | pb.Part | bytes,
*,
_use_internal_bytes_contents: bool,
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self,
field: struct_pb2.Value | pb.Part | bytes,
*,
_use_internal_bytes_contents: bool,
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self, field: MessageMap[str, pb.Part], *, _use_internal_bytes_contents: bool
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self, field: pb.NDArray | pb.Part | bytes, *, _use_internal_bytes_contents: bool
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self, field: pb.File | pb.Part | bytes, *, _use_internal_bytes_contents: bool
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self,
field: pb.DataFrame | pb.Part | bytes,
*,
_use_internal_bytes_contents: bool,
) -> IOType:
...

@overload
@abstractmethod
async def from_proto(
self, field: pb.Series | pb.Part | bytes, *, _use_internal_bytes_contents: bool
) -> IOType:
...

@abstractmethod
async def from_proto(
self,
field: message.Message | bytes | MessageMap[str, pb.Part],
*,
_use_internal_bytes_contents: bool = False,
) -> IOType:
...

@abstractmethod
async def to_proto(self, obj: IOType) -> MessageMap[str, pb.Part] | message.Message:
...
84 changes: 76 additions & 8 deletions bentoml/_internal/io_descriptors/file.py
Expand Up @@ -13,6 +13,7 @@
from .base import IODescriptor
from ..types import FileLike
from ..utils.http import set_cookies
from ...exceptions import BadInput
from ...exceptions import BentoMLException
from ..service.openapi import SUCCESS_DESCRIPTION
from ..service.openapi.specification import Schema
Expand All @@ -23,10 +24,17 @@
logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from bentoml.grpc.v1alpha1 import service_pb2 as pb

from ..context import InferenceApiContext as Context

FileKind: t.TypeAlias = t.Literal["binaryio", "textio"]
FileType: t.TypeAlias = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]
else:
from bentoml.grpc.utils import import_generated_stubs

pb, _ = import_generated_stubs()

FileType = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]


class File(IODescriptor[FileType]):
Expand Down Expand Up @@ -100,16 +108,16 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]:
"""

_proto_field: str = "file"

def __new__( # pylint: disable=arguments-differ # returning subclass from new
cls, kind: FileKind = "binaryio", mime_type: str | None = None
) -> File:
mime_type = mime_type if mime_type is not None else "application/octet-stream"

if kind == "binaryio":
res = object.__new__(BytesIOFile)
else:
raise ValueError(f"invalid File kind '{kind}'")

res._mime_type = mime_type
return res

Expand All @@ -134,11 +142,7 @@ def openapi_responses(self) -> OpenAPIResponse:
content={self._mime_type: MediaType(schema=self.openapi_schema())},
)

async def to_http_response(
self,
obj: FileType,
ctx: Context | None = None,
):
async def to_http_response(self, obj: FileType, ctx: Context | None = None):
if isinstance(obj, bytes):
body = obj
else:
Expand All @@ -155,6 +159,36 @@ async def to_http_response(
res = Response(body)
return res

async def to_proto(self, obj: FileType) -> pb.File:
from bentoml.grpc.utils import mimetype_to_filetype_pb_map

if isinstance(obj, bytes):
body = obj
else:
body = obj.read()

try:
kind = mimetype_to_filetype_pb_map()[self._mime_type]
except KeyError:
raise BadInput(
f"{self._mime_type} doesn't have a corresponding File 'kind'"
) from None

return pb.File(kind=kind, content=body)

if TYPE_CHECKING:

async def from_proto(
self,
field: pb.File | pb.Part | bytes,
*,
_use_internal_bytes_contents: bool = False,
) -> FileLike[bytes]:
...

async def from_http_request(self, request: Request) -> t.IO[bytes]:
...


class BytesIOFile(File):
async def from_http_request(self, request: Request) -> t.IO[bytes]:
Expand Down Expand Up @@ -183,3 +217,37 @@ async def from_http_request(self, request: Request) -> t.IO[bytes]:
raise BentoMLException(
f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead"
)

async def from_proto(
self,
field: pb.File | pb.Part | bytes,
*,
_use_internal_bytes_contents: bool = False,
) -> FileLike[bytes]:
from bentoml.grpc.utils import filetype_pb_to_mimetype_map

mapping = filetype_pb_to_mimetype_map()
# check if the request message has the correct field
if not _use_internal_bytes_contents:
if isinstance(field, pb.Part):
field = field.file
assert isinstance(field, pb.File)
if field.kind:
try:
mime_type = mapping[field.kind]
if mime_type != self._mime_type:
raise BadInput(
f"Inferred mime_type from 'kind' is '{mime_type}', while '{repr(self)}' is expecting '{self._mime_type}'",
)
except KeyError:
raise BadInput(
f"{field.kind} is not a valid File kind. Accepted file kind: {[names for names,_ in pb.File.FileType.items()]}",
) from None
content = field.content
if not content:
raise BadInput("Content is empty!") from None
else:
assert isinstance(field, bytes)
content = field

return FileLike[bytes](io.BytesIO(content), "<content>")

0 comments on commit 5c7939e

Please sign in to comment.