Skip to content

Commit

Permalink
feat(io): descriptor implementation
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
  • Loading branch information
aarnphm committed Sep 16, 2022
1 parent 3e9b65d commit 4bdb054
Show file tree
Hide file tree
Showing 10 changed files with 718 additions and 220 deletions.
6 changes: 5 additions & 1 deletion bentoml/_internal/external_typing/__init__.py
Expand Up @@ -8,6 +8,8 @@

from pandas import Series as PdSeries
from pandas import DataFrame as PdDataFrame
from pandas._typing import Dtype as PdDType
from pandas._typing import DtypeArg as PdDTypeArg
from pyarrow.plasma import ObjectID
from pyarrow.plasma import PlasmaClient

Expand All @@ -17,7 +19,7 @@
# numpy is always required by bentoml
from numpy import generic as NpGeneric
from numpy.typing import NDArray as _NDArray
from numpy.typing import DTypeLike as NpDTypeLike # type: ignore (incomplete numpy types)
from numpy.typing import DTypeLike as NpDTypeLike

NpNDArray = _NDArray[t.Any]

Expand All @@ -36,6 +38,8 @@
__all__ = [
"PdSeries",
"PdDataFrame",
"PdDType",
"PdDTypeArg",
"DataFrameOrient",
"SeriesOrient",
"ObjectID",
Expand Down
25 changes: 13 additions & 12 deletions bentoml/_internal/io_descriptors/base.py
Expand Up @@ -8,10 +8,11 @@
if TYPE_CHECKING:
from types import UnionType

from typing_extensions import Self
from starlette.requests import Request
from starlette.responses import Response

from bentoml.grpc.types import ProtoField

from ..types import LazyType
from ..context import InferenceApiContext as Context
from ..service.openapi.specification import Schema
Expand Down Expand Up @@ -39,20 +40,12 @@ class IODescriptor(ABC, t.Generic[IOType]):

HTTP_METHODS = ["POST"]

_init_str: str = ""

_mime_type: str

def __new__(cls: t.Type[Self], *args: t.Any, **kwargs: t.Any) -> Self:
self = super().__new__(cls)
# default mime type is application/json
self._mime_type = "application/json"
self._init_str = cls.__qualname__

return self
_rpc_content_type: str = "application/grpc"
_proto_fields: tuple[ProtoField]

def __repr__(self) -> str:
return self._init_str
return self.__class__.__qualname__

@abstractmethod
def input_type(self) -> InputType:
Expand Down Expand Up @@ -83,3 +76,11 @@ async def to_http_response(
self, obj: IOType, ctx: Context | None = None
) -> Response:
...

@abstractmethod
async def from_proto(self, field: t.Any) -> IOType:
...

@abstractmethod
async def to_proto(self, obj: IOType) -> t.Any:
...
71 changes: 63 additions & 8 deletions bentoml/_internal/io_descriptors/file.py
Expand Up @@ -13,6 +13,7 @@
from .base import IODescriptor
from ..types import FileLike
from ..utils.http import set_cookies
from ...exceptions import BadInput
from ...exceptions import BentoMLException
from ..service.openapi import SUCCESS_DESCRIPTION
from ..service.openapi.specification import Schema
Expand All @@ -23,10 +24,17 @@
logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from bentoml.grpc.v1alpha1 import service_pb2 as pb

from ..context import InferenceApiContext as Context

FileKind: t.TypeAlias = t.Literal["binaryio", "textio"]
FileType: t.TypeAlias = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]
else:
from bentoml.grpc.utils import import_generated_stubs

pb, _ = import_generated_stubs()

FileType = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]


class File(IODescriptor[FileType]):
Expand Down Expand Up @@ -100,16 +108,16 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]:
"""

_proto_fields = ("file",)

def __new__( # pylint: disable=arguments-differ # returning subclass from new
cls, kind: FileKind = "binaryio", mime_type: str | None = None
) -> File:
mime_type = mime_type if mime_type is not None else "application/octet-stream"

if kind == "binaryio":
res = object.__new__(BytesIOFile)
else:
raise ValueError(f"invalid File kind '{kind}'")

res._mime_type = mime_type
return res

Expand All @@ -134,11 +142,7 @@ def openapi_responses(self) -> OpenAPIResponse:
content={self._mime_type: MediaType(schema=self.openapi_schema())},
)

async def to_http_response(
self,
obj: FileType,
ctx: Context | None = None,
):
async def to_http_response(self, obj: FileType, ctx: Context | None = None):
if isinstance(obj, bytes):
body = obj
else:
Expand All @@ -155,6 +159,31 @@ async def to_http_response(
res = Response(body)
return res

async def to_proto(self, obj: FileType) -> pb.File:
from bentoml.grpc.utils import mimetype_to_filetype_pb_map

if isinstance(obj, bytes):
body = obj
else:
body = obj.read()

try:
kind = mimetype_to_filetype_pb_map()[self._mime_type]
except KeyError:
raise BadInput(
f"{self._mime_type} doesn't have a corresponding File 'kind'"
) from None

return pb.File(kind=kind, content=body)

if TYPE_CHECKING:

async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]:
...

async def from_http_request(self, request: Request) -> t.IO[bytes]:
...


class BytesIOFile(File):
async def from_http_request(self, request: Request) -> t.IO[bytes]:
Expand Down Expand Up @@ -183,3 +212,29 @@ async def from_http_request(self, request: Request) -> t.IO[bytes]:
raise BentoMLException(
f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead"
)

async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]:
from bentoml.grpc.utils import filetype_pb_to_mimetype_map

mapping = filetype_pb_to_mimetype_map()
# check if the request message has the correct field
if isinstance(field, bytes):
content = field
else:
assert isinstance(field, pb.File)
if field.kind:
try:
mime_type = mapping[field.kind]
if mime_type != self._mime_type:
raise BadInput(
f"Inferred mime_type from 'kind' is '{mime_type}', while '{repr(self)}' is expecting '{self._mime_type}'",
)
except KeyError:
raise BadInput(
f"{field.kind} is not a valid File kind. Accepted file kind: {[names for names,_ in pb.File.FileType.items()]}",
) from None
content = field.content
if not content:
raise BadInput("Content is empty!") from None

return FileLike[bytes](io.BytesIO(content), "<content>")
95 changes: 71 additions & 24 deletions bentoml/_internal/io_descriptors/image.py
Expand Up @@ -15,7 +15,6 @@
from ..utils.http import set_cookies
from ...exceptions import BadInput
from ...exceptions import InvalidArgument
from ...exceptions import InternalServerError
from ..service.openapi import SUCCESS_DESCRIPTION
from ..service.openapi.specification import Schema
from ..service.openapi.specification import Response as OpenAPIResponse
Expand All @@ -25,26 +24,33 @@
if TYPE_CHECKING:
from types import UnionType

import PIL
import PIL.Image

from bentoml.grpc.v1alpha1 import service_pb2 as pb

from .. import external_typing as ext
from ..context import InferenceApiContext as Context

_Mode = t.Literal[
"1", "CMYK", "F", "HSV", "I", "L", "LAB", "P", "RGB", "RGBA", "RGBX", "YCbCr"
]
else:
from bentoml.grpc.utils import import_generated_stubs

# NOTE: pillow-simd only benefits users who want to do preprocessing
# TODO: add options for users to choose between simd and native mode
_exc = f"'Pillow' is required to use {__name__}. Install with: 'pip install -U Pillow'."
_exc = "'Pillow' is required to use the Image IO descriptor. Install it with: 'pip install -U Pillow'."
PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=_exc)
PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=_exc)

pb, _ = import_generated_stubs()


# NOTES: we will keep type in quotation to avoid backward compatibility
# with numpy < 1.20, since we will use the latest stubs from the main branch of numpy.
# that enable a new way to type hint an ndarray.
ImageType: t.TypeAlias = t.Union["PIL.Image.Image", "ext.NpNDArray"]
ImageType = t.Union["PIL.Image.Image", "ext.NpNDArray"]

DEFAULT_PIL_MODE = "RGB"

Expand Down Expand Up @@ -135,32 +141,26 @@ async def predict_image(f: Image) -> NDArray[Any]:
:obj:`Image`: IO Descriptor that either a :code:`PIL.Image.Image` or a :code:`np.ndarray` representing an image.
"""

MIME_EXT_MAPPING: t.Dict[str, str] = {}
MIME_EXT_MAPPING: dict[str, str] = {}

_proto_fields = ("file",)

def __init__(
self,
pilmode: _Mode | None = DEFAULT_PIL_MODE,
mime_type: str = "image/jpeg",
):
try:
import PIL.Image
except ImportError:
raise InternalServerError(
"`Pillow` is required to use {__name__}\n Instructions: `pip install -U Pillow`"
)
PIL.Image.init()
self.MIME_EXT_MAPPING.update({v: k for k, v in PIL.Image.MIME.items()})

if mime_type.lower() not in self.MIME_EXT_MAPPING: # pragma: no cover
raise InvalidArgument(
f"Invalid Image mime_type '{mime_type}', "
f"Supported mime types are {', '.join(PIL.Image.MIME.values())} "
)
f"Invalid Image mime_type '{mime_type}'. Supported mime types are {', '.join(PIL.Image.MIME.values())}."
) from None
if pilmode is not None and pilmode not in PIL.Image.MODES: # pragma: no cover
raise InvalidArgument(
f"Invalid Image pilmode '{pilmode}', "
f"Supported PIL modes are {', '.join(PIL.Image.MODES)} "
)
f"Invalid Image pilmode '{pilmode}'. Supported PIL modes are {', '.join(PIL.Image.MODES)}."
) from None

self._mime_type = mime_type.lower()
self._pilmode: _Mode | None = pilmode
Expand Down Expand Up @@ -197,13 +197,12 @@ async def from_http_request(self, request: Request) -> ImageType:
bytes_ = await request.body()
else:
raise BadInput(
f"{self.__class__.__name__} should get `multipart/form-data`, "
f"`{self._mime_type}` or `image/*`, got {content_type} instead"
f"{self.__class__.__name__} should get 'multipart/form-data', '{self._mime_type}' or 'image/*', got '{content_type}' instead."
)
try:
return PIL.Image.open(io.BytesIO(bytes_))
except PIL.UnidentifiedImageError:
raise BadInput("Failed reading image file uploaded") from None
except PIL.UnidentifiedImageError as e:
raise BadInput(f"Failed reading image file uploaded: {e}") from None

async def to_http_response(
self, obj: ImageType, ctx: Context | None = None
Expand All @@ -213,10 +212,9 @@ async def to_http_response(
elif LazyType[PIL.Image.Image]("PIL.Image.Image").isinstance(obj):
image = obj
else:
raise InternalServerError(
f"Unsupported Image type received: {type(obj)}, `{self.__class__.__name__}`"
" only supports `np.ndarray` and `PIL.Image`"
)
raise BadInput(
f"Unsupported Image type received: '{type(obj)}', the Image IO descriptor only supports 'np.ndarray' and 'PIL.Image'."
) from None
filename = f"output.{self._format.lower()}"

ret = io.BytesIO()
Expand Down Expand Up @@ -248,3 +246,52 @@ async def to_http_response(
media_type=self._mime_type,
headers={"content-disposition": content_disposition},
)

async def from_proto(self, field: pb.File | bytes) -> ImageType:
from bentoml.grpc.utils import filetype_pb_to_mimetype_map

mapping = filetype_pb_to_mimetype_map()
# check if the request message has the correct field
if isinstance(field, bytes):
content = field
else:
assert isinstance(field, pb.File)
if field.kind:
try:
mime_type = mapping[field.kind]
if mime_type != self._mime_type:
raise BadInput(
f"Inferred mime_type from 'kind' is '{mime_type}', while '{repr(self)}' is expecting '{self._mime_type}'",
)
except KeyError:
raise BadInput(
f"{field.kind} is not a valid File kind. Accepted file kind: {[names for names,_ in pb.File.FileType.items()]}",
) from None
content = field.content
if not content:
raise BadInput("Content is empty!") from None

return PIL.Image.open(io.BytesIO(content))

async def to_proto(self, obj: ImageType) -> pb.File:
from bentoml.grpc.utils import mimetype_to_filetype_pb_map

if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(obj):
image = PIL.Image.fromarray(obj, mode=self._pilmode)
elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(obj):
image = obj
else:
raise BadInput(
f"Unsupported Image type received: '{type(obj)}', the Image IO descriptor only supports 'np.ndarray' and 'PIL.Image'.",
) from None
ret = io.BytesIO()
image.save(ret, format=self._format)

try:
kind = mimetype_to_filetype_pb_map()[self._mime_type]
except KeyError:
raise BadInput(
f"{self._mime_type} doesn't have a corresponding File 'kind'",
) from None

return pb.File(kind=kind, content=ret.getvalue())

0 comments on commit 4bdb054

Please sign in to comment.