From 54ad1ffffcba8e6ac6e865cc805a82b3eec95be1 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 25 Oct 2022 13:10:28 -0700 Subject: [PATCH 01/13] feat: from_sample impl Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 1 + src/bentoml/_internal/io_descriptors/base.py | 15 +++++++++++++++ src/bentoml/_internal/io_descriptors/file.py | 17 ++++++++++++----- src/bentoml/_internal/io_descriptors/image.py | 3 ++- src/bentoml/_internal/io_descriptors/numpy.py | 12 +----------- .../_internal/service/openapi/specification.py | 6 ++++-- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 919c177898..a09d70f741 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "opentelemetry-sdk==1.13.0", "opentelemetry-semantic-conventions==0.34b0", "opentelemetry-util-http==0.34b0", + "filetype", "packaging>=20.0", "pathspec", "pip-tools>=6.6.2", diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 666a584fd3..b6f86ca672 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -27,6 +27,7 @@ | LazyType[t.Any] | dict[str, t.Type[t.Any] | UnionType | LazyType[t.Any]] ) + OpenAPIResponse = dict[str, str | dict[str, MediaType] | dict[str, t.Any]] IO_DESCRIPTOR_REGISTRY: dict[str, type[IODescriptor[t.Any]]] = {} @@ -52,6 +53,7 @@ class IODescriptor(ABC, t.Generic[IOType]): _mime_type: str _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] + _sample_input: IOType | None = None descriptor_id: str | None def __init_subclass__(cls, *, descriptor_id: str | None = None): @@ -63,6 +65,14 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id + @property + def sample_input(self) -> IOType | None: + return self._sample_input + + @sample_input.setter + def sample_input(self, value: IOType) -> None: + self._sample_input = value + @abstractmethod def to_spec(self) -> dict[str, t.Any]: raise NotImplementedError @@ -112,3 +122,8 @@ async def from_proto(self, field: t.Any) -> IOType: @abstractmethod async def to_proto(self, obj: IOType) -> t.Any: ... + + @classmethod + @abstractmethod + def from_sample(cls, sample_input: IOType, **kwargs: t.Any) -> Self: + ... diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index 8e97c05187..a0c45a8bf0 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -112,9 +112,7 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]: _proto_fields = ("file",) - def __new__( # pylint: disable=arguments-differ # returning subclass from new - cls, kind: FileKind = "binaryio", mime_type: str | None = None - ) -> File: + def __new__(cls, kind: FileKind = "binaryio", mime_type: str | None = None) -> File: mime_type = mime_type if mime_type is not None else "application/octet-stream" if kind == "binaryio": res = object.__new__(BytesIOFile) @@ -123,8 +121,14 @@ def __new__( # pylint: disable=arguments-differ # returning subclass from new res._mime_type = mime_type return res - def to_spec(self) -> dict[str, t.Any]: - raise NotImplementedError + @classmethod + def from_sample(cls, sample_input: FileType, kind: FileKind = "binaryio") -> Self: + import filetype + + mime_type: str | None = filetype.guess_mime(sample_input) + kls = cls(kind=kind, mime_type=mime_type) + kls.sample_input = sample_input + return kls @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: @@ -195,6 +199,9 @@ async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]: async def from_http_request(self, request: Request) -> t.IO[bytes]: raise NotImplementedError + def to_spec(self) -> dict[str, t.Any]: + raise NotImplementedError + class BytesIOFile(File, descriptor_id=None): def to_spec(self) -> dict[str, t.Any]: diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 5dd1d7b3da..8d57c0e396 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -30,6 +30,7 @@ from typing_extensions import Self from bentoml.grpc.v1alpha1 import service_pb2 as pb + from .base import OpenAPIResponse from .. import external_typing as ext from .base import OpenAPIResponse @@ -75,7 +76,7 @@ def initialize_pillow(): import PIL.Image except ImportError: raise InternalServerError( - "`Pillow` is required to use {__name__}\n Instructions: `pip install -U Pillow`" + f"'Pillow' is required to use {__name__}. Install Pillow with 'pip install bentoml[io-image]'" ) PIL.Image.init() diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 5b402f9151..2e5927e95b 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -228,8 +228,6 @@ def __init__( self._enforce_dtype = enforce_dtype self._enforce_shape = enforce_shape - self._sample_input = None - if self._enforce_dtype and not self._dtype: raise InvalidArgument( "'dtype' must be specified when 'enforce_dtype=True'" @@ -269,14 +267,6 @@ def from_spec(cls, spec: dict[str, t.Any]) -> Self: res = NumpyNdarray(**spec["args"]) return res - @property - def sample_input(self) -> ext.NpNDArray | None: - return self._sample_input - - @sample_input.setter - def sample_input(self, value: ext.NpNDArray) -> None: - self._sample_input = value - def openapi_schema(self) -> Schema: # Note that we are yet provide # supports schemas for arrays that is > 2D. @@ -407,7 +397,7 @@ def from_sample( sample_input: ext.NpNDArray, enforce_dtype: bool = True, enforce_shape: bool = True, - ) -> NumpyNdarray: + ) -> Self: """ Create a :obj:`NumpyNdarray` IO Descriptor from given inputs. diff --git a/src/bentoml/_internal/service/openapi/specification.py b/src/bentoml/_internal/service/openapi/specification.py index 597629b3a1..a88763b127 100644 --- a/src/bentoml/_internal/service/openapi/specification.py +++ b/src/bentoml/_internal/service/openapi/specification.py @@ -187,7 +187,7 @@ class Operation: description: t.Optional[str] = None externalDocs: t.Optional[ExternalDocumentation] = None operationId: t.Optional[str] = None - requestBody: t.Optional[t.Union[RequestBody, Reference]] = None + requestBody: t.Optional[t.Union[RequestBody, Reference, t.Dict[str, t.Any]]] = None # Not yet supported: parameters, callbacks, deprecated, servers, security @@ -252,7 +252,9 @@ class Components: schemas: t.Dict[str, t.Union[Schema, Reference]] responses: t.Optional[t.Dict[str, t.Union[Response, Reference]]] = None examples: t.Optional[t.Dict[str, t.Union[Example, Reference]]] = None - requestBodies: t.Optional[t.Dict[str, t.Union[RequestBody, Reference]]] = None + requestBodies: t.Optional[ + t.Dict[str, t.Union[RequestBody, Reference, t.Dict[str, t.Any]]] + ] = None links: t.Optional[t.Dict[str, t.Union[Link, Reference]]] = None # Not yet supported: securitySchemes, callbacks, parameters, headers From 166009fa6d3b71d0dfad5575fcafd2aca4ad3360 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 25 Oct 2022 18:24:14 -0700 Subject: [PATCH 02/13] feat: from_sample Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/base.py | 18 +++-- src/bentoml/_internal/io_descriptors/file.py | 24 +++++-- src/bentoml/_internal/io_descriptors/image.py | 59 +++++++++++---- src/bentoml/_internal/io_descriptors/json.py | 42 ++++++++++- .../_internal/io_descriptors/multipart.py | 11 ++- src/bentoml/_internal/io_descriptors/numpy.py | 28 ++++---- .../_internal/io_descriptors/pandas.py | 71 ++++++++++++++----- src/bentoml/_internal/io_descriptors/text.py | 6 ++ .../service/openapi/specification.py | 2 +- 9 files changed, 194 insertions(+), 67 deletions(-) diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index b6f86ca672..4bbd74fa8d 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -53,7 +53,7 @@ class IODescriptor(ABC, t.Generic[IOType]): _mime_type: str _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] - _sample_input: IOType | None = None + _sample: IOType | None = None descriptor_id: str | None def __init_subclass__(cls, *, descriptor_id: str | None = None): @@ -66,12 +66,12 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): cls.descriptor_id = descriptor_id @property - def sample_input(self) -> IOType | None: - return self._sample_input + def sample(self) -> IOType | None: + return self._sample - @sample_input.setter - def sample_input(self, value: IOType) -> None: - self._sample_input = value + @sample.setter + def sample(self, value: IOType) -> None: + self._sample = value @abstractmethod def to_spec(self) -> dict[str, t.Any]: @@ -93,6 +93,10 @@ def input_type(self) -> InputType: def openapi_schema(self) -> Schema | Reference: raise NotImplementedError + def openapi_example(self) -> t.Any: + if self.sample is not None: + return self.sample + @abstractmethod def openapi_components(self) -> dict[str, t.Any] | None: raise NotImplementedError @@ -125,5 +129,5 @@ async def to_proto(self, obj: IOType) -> t.Any: @classmethod @abstractmethod - def from_sample(cls, sample_input: IOType, **kwargs: t.Any) -> Self: + def from_sample(cls, sample: IOType, **kwargs: t.Any) -> Self: ... diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index a0c45a8bf0..edd175b412 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import os import typing as t import logging from typing import TYPE_CHECKING @@ -122,12 +123,23 @@ def __new__(cls, kind: FileKind = "binaryio", mime_type: str | None = None) -> F return res @classmethod - def from_sample(cls, sample_input: FileType, kind: FileKind = "binaryio") -> Self: + def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Self: import filetype - mime_type: str | None = filetype.guess_mime(sample_input) + mime_type: str | None = filetype.guess_mime(sample) + kls = cls(kind=kind, mime_type=mime_type) - kls.sample_input = sample_input + + if isinstance(sample, FileLike): + kls.sample = sample + elif isinstance(sample, t.IO): + kls.sample = FileLike[bytes](sample, "") + elif isinstance(sample, str) and os.path.exists(sample): + with open(sample, "rb") as f: + kls.sample = FileLike[bytes](f, "") + else: + raise InvalidArgument(f"Unknown sample type: '{sample}'") + return kls @classmethod @@ -196,7 +208,7 @@ async def to_proto(self, obj: FileType) -> pb.File: async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]: raise NotImplementedError - async def from_http_request(self, request: Request) -> t.IO[bytes]: + async def from_http_request(self, request: Request) -> FileLike[bytes]: raise NotImplementedError def to_spec(self) -> dict[str, t.Any]: @@ -213,7 +225,7 @@ def to_spec(self) -> dict[str, t.Any]: }, } - async def from_http_request(self, request: Request) -> t.IO[bytes]: + async def from_http_request(self, request: Request) -> FileLike[bytes]: content_type, _ = parse_options_header(request.headers["content-type"]) if content_type.decode("utf-8") == "multipart/form-data": form = await request.form() @@ -235,7 +247,7 @@ async def from_http_request(self, request: Request) -> t.IO[bytes]: return res # type: ignore if content_type.decode("utf-8") == self._mime_type: body = await request.body() - return t.cast(t.IO[bytes], FileLike(io.BytesIO(body), "")) + return FileLike[bytes](io.BytesIO(body), "") raise BentoMLException( f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead" ) diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 8d57c0e396..7f37c25acb 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import os import typing as t import functools from typing import TYPE_CHECKING @@ -22,6 +23,8 @@ from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +PIL_EXC_MSG = "'Pillow' is required to use the Image IO descriptor. Install with 'pip install bentoml[io-image]'." + if TYPE_CHECKING: from types import UnionType @@ -30,7 +33,6 @@ from typing_extensions import Self from bentoml.grpc.v1alpha1 import service_pb2 as pb - from .base import OpenAPIResponse from .. import external_typing as ext from .base import OpenAPIResponse @@ -44,9 +46,8 @@ # NOTE: pillow-simd only benefits users who want to do preprocessing # TODO: add options for users to choose between simd and native mode - _exc = "'Pillow' is required to use the Image IO descriptor. Install it with: 'pip install -U Pillow'." - PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=_exc) - PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=_exc) + PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=PIL_EXC_MSG) + PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=PIL_EXC_MSG) pb, _ = import_generated_stubs() @@ -59,10 +60,7 @@ DEFAULT_PIL_MODE = "RGB" -PIL_WRITE_ONLY_FORMATS = { - "PALM", - "PDF", -} +PIL_WRITE_ONLY_FORMATS = {"PALM", "PDF"} READABLE_MIMES: set[str] = None # type: ignore (lazy constant) MIME_EXT_MAPPING: dict[str, str] = None # type: ignore (lazy constant) @@ -75,9 +73,7 @@ def initialize_pillow(): try: import PIL.Image except ImportError: - raise InternalServerError( - f"'Pillow' is required to use {__name__}. Install Pillow with 'pip install bentoml[io-image]'" - ) + raise InternalServerError(PIL_EXC_MSG) PIL.Image.init() MIME_EXT_MAPPING = {v: k for k, v in PIL.Image.MIME.items()} # type: ignore (lazy constant) @@ -214,6 +210,41 @@ def __init__( self._pilmode: _Mode | None = pilmode self._format: str = MIME_EXT_MAPPING[self._mime_type] + @classmethod + def from_sample( + cls, + sample: ImageType | str, + pilmode: _Mode | None = DEFAULT_PIL_MODE, + *, + allowed_mime_types: t.Iterable[str] | None = None, + ) -> Self: + from filetype.match import image_match + + img_type = image_match(sample) + if img_type is None: + raise InvalidArgument(f"{sample} is not a valid image file type.") + + kls = cls( + mime_type=img_type.mime, + pilmode=pilmode, + allowed_mime_types=allowed_mime_types, + ) + + if isinstance(sample, str) and os.path.exists(sample): + try: + with open(sample, "rb") as f: + kls.sample = PIL.Image.open(f) + except PIL.UnidentifiedImageError as err: + raise BadInput(f"Failed to parse sample image file: {err}") from None + elif LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): + kls.sample = PIL.Image.fromarray(sample, mode=pilmode) + elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): + kls.sample = sample + else: + raise InvalidArgument(f"Unknown sample type: '{sample}'") + + return kls + def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, @@ -318,15 +349,15 @@ async def from_http_request(self, request: Request) -> ImageType: try: return PIL.Image.open(io.BytesIO(bytes_)) - except PIL.UnidentifiedImageError: # type: ignore (bad pillow types) - raise BadInput("Failed to parse uploaded image file") from None + except PIL.UnidentifiedImageError as err: + raise BadInput(f"Failed to parse uploaded image file: {err}") from None async def to_http_response( self, obj: ImageType, ctx: Context | None = None ) -> Response: if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(obj): image = PIL.Image.fromarray(obj, mode=self._pilmode) - elif LazyType[PIL.Image.Image]("PIL.Image.Image").isinstance(obj): + elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(obj): image = obj else: raise BadInput( diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index ba8619b49a..c5b33e7cf7 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -23,6 +23,8 @@ from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +EXC_MSG = "'pydantic' must be installed to use 'pydantic_model'. Install with 'pip install bentoml[io-json]'." + if TYPE_CHECKING: from types import UnionType @@ -36,9 +38,8 @@ from ..context import InferenceApiContext as Context else: - _exc_msg = "'pydantic' must be installed to use 'pydantic_model'. Install with 'pip install pydantic'." - pydantic = LazyLoader("pydantic", globals(), "pydantic", exc_msg=_exc_msg) - schema = LazyLoader("schema", globals(), "pydantic.schema", exc_msg=_exc_msg) + pydantic = LazyLoader("pydantic", globals(), "pydantic", exc_msg=EXC_MSG) + schema = LazyLoader("schema", globals(), "pydantic.schema", exc_msg=EXC_MSG) # lazy load our proto generated. struct_pb2 = LazyLoader("struct_pb2", globals(), "google.protobuf.struct_pb2") # lazy load numpy for processing ndarray. @@ -200,6 +201,22 @@ def __init__( "'validate_json' option from 'bentoml.io.JSON' has been deprecated. Use a Pydantic model to specify validation options instead." ) + @classmethod + def from_sample( + cls, + sample: JSONType, + *, + json_encoder: t.Type[json.JSONEncoder] = DefaultJsonEncoder, + ) -> Self: + pydantic_model: t.Type[pydantic.BaseModel] | None = None + if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): + pydantic_model = sample.__class__ + + kls = cls(pydantic_model=pydantic_model, json_encoder=json_encoder) + + kls.sample = sample + return kls + def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, @@ -250,6 +267,25 @@ def openapi_components(self) -> dict[str, t.Any] | None: return {"schemas": pydantic_components_schema(self._pydantic_model)} + def openapi_example(self) -> t.Any: + if self.sample is not None: + if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance( + self.sample + ): + return self.sample.dict() + elif isinstance(self.sample, str): + return json.dumps( + self.sample, + cls=self._json_encoder, + ensure_ascii=False, + allow_nan=False, + indent=None, + separators=(",", ":"), + ) + elif isinstance(self.sample, dict): + return self.sample + return + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index 9f90bdc3fe..985ea281c2 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -16,15 +16,16 @@ from ..utils.formparser import populate_multipart_requests from ..utils.formparser import concat_to_multipart_response from ..service.openapi.specification import Schema -from ..service.openapi.specification import Response as OpenAPIResponse from ..service.openapi.specification import MediaType -from ..service.openapi.specification import RequestBody if TYPE_CHECKING: from types import UnionType + from typing_extensions import Self + from bentoml.grpc.v1alpha1 import service_pb2 as pb + from .base import OpenAPIResponse from ..types import LazyType from ..context import InferenceApiContext as Context else: @@ -174,6 +175,10 @@ def __init__(self, **inputs: IODescriptor[t.Any]): def __repr__(self) -> str: return f"Multipart({','.join([f'{k}={v}' for k,v in zip(self._inputs, map(repr, self._inputs.values()))])})" + @classmethod + def from_sample(cls, sample: dict[str, t.Any]) -> Self: + pass + def input_type( self, ) -> dict[str, t.Type[t.Any] | UnionType | LazyType[t.Any]]: @@ -217,7 +222,7 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_request_body(self) -> RequestBody: + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, "required": True, diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 2e5927e95b..0716bb2247 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -280,13 +280,11 @@ def openapi_components(self) -> dict[str, t.Any] | None: pass def openapi_example(self) -> t.Any: - if self.sample_input is not None: - if isinstance(self.sample_input, np.generic): - raise BadInput( - "NumpyNdarray: sample_input must be a numpy array." - ) from None - return self.sample_input.tolist() - return + if self.sample is not None: + if isinstance(self.sample, np.generic): + raise BadInput("NumpyNdarray: sample must be a numpy array.") from None + # NOTE: we only need to + return self.sample.ravel().tolist() def openapi_request_body(self) -> dict[str, t.Any]: return { @@ -394,7 +392,7 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None) @classmethod def from_sample( cls, - sample_input: ext.NpNDArray, + sample: ext.NpNDArray, enforce_dtype: bool = True, enforce_shape: bool = True, ) -> Self: @@ -402,7 +400,7 @@ def from_sample( Create a :obj:`NumpyNdarray` IO Descriptor from given inputs. Args: - sample_input: Given sample ``np.ndarray`` data + sample: Given sample ``np.ndarray`` data enforce_dtype: Enforce a certain data type. :code:`dtype` must be specified at function signature. If you don't want to enforce a specific dtype then change :code:`enforce_dtype=False`. @@ -436,20 +434,20 @@ def from_sample( async def predict(input: NDArray[np.int16]) -> NDArray[Any]: return await runner.async_run(input) """ - if isinstance(sample_input, np.generic): + if isinstance(sample, np.generic): raise BentoMLException( "'NumpyNdarray.from_sample()' expects a 'numpy.array', not 'numpy.generic'." ) from None - inst = cls( - dtype=sample_input.dtype, - shape=sample_input.shape, + kls = cls( + dtype=sample.dtype, + shape=sample.shape, enforce_dtype=enforce_dtype, enforce_shape=enforce_shape, ) - inst.sample_input = sample_input + kls.sample = sample - return inst + return kls async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index 90e44ec69a..b41e66d9bd 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -24,6 +24,7 @@ from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +EXC_MSG = "pandas' is required to use PandasDataFrame or PandasSeries. Install with 'pip install bentoml[io-pandas]'" if TYPE_CHECKING: import numpy as np import pandas as pd @@ -32,19 +33,14 @@ from bentoml.grpc.v1alpha1 import service_pb2 as pb from .. import external_typing as ext + from .base import OpenAPIResponse from ..context import InferenceApiContext as Context else: from bentoml.grpc.utils import import_generated_stubs pb, _ = import_generated_stubs() - np = LazyLoader("np", globals(), "numpy") - pd = LazyLoader( - "pd", - globals(), - "pandas", - exc_msg='pandas" is required to use PandasDataFrame or PandasSeries. Install with "pip install bentoml[io-pandas]"', - ) + pd = LazyLoader("pd", globals(), "pandas", exc_msg=EXC_MSG) logger = logging.getLogger(__name__) @@ -80,7 +76,8 @@ def _openapi_types(item: str) -> str: # pragma: no cover def _dataframe_openapi_schema( - dtype: bool | ext.PdDTypeArg | None, orient: ext.DataFrameOrient = None + dtype: bool | ext.PdDTypeArg | None, + orient: ext.DataFrameOrient = None, ) -> Schema: # pragma: no cover if isinstance(dtype, dict): if orient == "records": @@ -154,6 +151,8 @@ def __str__(self) -> str: return "parquet" elif self == SerializationFormat.CSV: return "csv" + else: + raise ValueError(f"Unknown serialization format: {self}") def _infer_serialization_format_from_request( @@ -323,7 +322,7 @@ def __init__( enforce_shape: bool = False, default_format: t.Literal["json", "parquet", "csv"] = "json", ): - self._orient = orient + self._orient: ext.DataFrameOrient = orient self._columns = columns self._apply_column_names = apply_column_names # TODO: convert dtype to numpy dtype @@ -363,6 +362,14 @@ def _convert_dtype( return None def to_spec(self) -> dict[str, t.Any]: + # TODO: support extension dtypes + dtype = None + if self._dtype is not None: + if isinstance(self._dtype, bool): + dtype = self._dtype + else: + dtype = self._dtype.name + return { "id": self.descriptor_id, "args": { @@ -392,6 +399,11 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self) -> t.Any: + if self.sample is not None: + return t.cast("dict[str, t.Any]", self.sample.to_dict()) + return + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, @@ -493,18 +505,18 @@ async def to_http_response( @classmethod def from_sample( cls, - sample_input: ext.PdDataFrame, + sample: ext.PdDataFrame, orient: ext.DataFrameOrient = "records", apply_column_names: bool = True, enforce_shape: bool = True, enforce_dtype: bool = True, default_format: t.Literal["json", "parquet", "csv"] = "json", - ) -> PandasDataFrame: + ) -> Self: """ Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. Args: - sample_input: Given sample ``pd.DataFrame`` data + sample: Given sample ``pd.DataFrame`` data orient: Indication of expected JSON string format. Compatible JSON strings can be produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. Possible orients are: @@ -547,19 +559,19 @@ def from_sample( @svc.api(input=input_spec, output=PandasDataFrame()) def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... """ - inst = cls( + kls = cls( orient=orient, enforce_shape=enforce_shape, - shape=sample_input.shape, + shape=sample.shape, apply_column_names=apply_column_names, - columns=[str(x) for x in list(sample_input.columns)], + columns=[str(x) for x in list(sample.columns)], enforce_dtype=enforce_dtype, dtype=True, # set to True to infer from given input default_format=default_format, ) - inst.sample_input = sample_input + kls.sample = sample - return inst + return kls def validate_dataframe( self, dataframe: ext.PdDataFrame, exception_cls: t.Type[Exception] = BadInput @@ -796,7 +808,7 @@ def __init__( shape: tuple[int, ...] | None = None, enforce_shape: bool = False, ): - self._orient = orient + self._orient: ext.SeriesOrient = orient self._dtype = dtype self._enforce_dtype = enforce_dtype self._shape = shape @@ -842,6 +854,24 @@ def to_spec(self) -> dict[str, t.Any]: }, } + @classmethod + def from_sample( + cls, + sample: ext.PdSeries, + orient: ext.SeriesOrient = "records", + enforce_shape: bool = True, + enforce_dtype: bool = True, + ) -> Self: + kls = cls( + orient=orient, + dtype=sample.dtype, + enforce_dtype=enforce_dtype, + shape=sample.shape, + enforce_shape=enforce_shape, + ) + kls.sample = sample + return kls + @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: if "args" not in spec: @@ -855,6 +885,11 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self) -> t.Any: + if self.sample is not None: + return t.cast("dict[str, t.Any]", self.sample.to_dict()) + return + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index e5d0873cf1..6eb41e4f38 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -99,6 +99,12 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): f"'{self.__class__.__name__}' is not designed to take any args or kwargs during initialization." ) from None + @classmethod + def from_sample(cls, sample: str) -> Self: + kls = cls() + kls.sample = sample + return kls + def input_type(self) -> t.Type[str]: return str diff --git a/src/bentoml/_internal/service/openapi/specification.py b/src/bentoml/_internal/service/openapi/specification.py index a88763b127..6f0ea7e6dc 100644 --- a/src/bentoml/_internal/service/openapi/specification.py +++ b/src/bentoml/_internal/service/openapi/specification.py @@ -104,7 +104,7 @@ class Schema: anyOf: t.Optional[t.List[Schema]] = None not_: t.Optional[Schema] = None items: t.Optional[t.Union[Schema, t.List[Schema]]] = None - properties: t.Optional[t.Dict[str, Schema]] = None + properties: t.Optional[t.Dict[str, Schema | Reference]] = None additionalProperties: t.Optional[t.Union[Schema, Reference, bool]] = None description: t.Optional[str] = None format: t.Optional[str] = None From 84152a8a9117ad3deca619fc323d701e2322b2fe Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 25 Oct 2022 18:33:44 -0700 Subject: [PATCH 03/13] fix: sample shouldn't be a memoryview [skip ci] depends on #3144 to be merged. Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 0716bb2247..11d8d4b9ae 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -284,7 +284,7 @@ def openapi_example(self) -> t.Any: if isinstance(self.sample, np.generic): raise BadInput("NumpyNdarray: sample must be a numpy array.") from None # NOTE: we only need to - return self.sample.ravel().tolist() + return self.sample.tolist() def openapi_request_body(self) -> dict[str, t.Any]: return { From 26993e1eb08873d95bfbe851b8b6e00e7f3e72fe Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Mon, 7 Nov 2022 15:39:21 -0800 Subject: [PATCH 04/13] chore: move from_sample deps to optional via io-file Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 12 ++++++++---- src/bentoml/_internal/io_descriptors/base.py | 1 + src/bentoml/bentos.py | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a09d70f741..4af96ba5a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,6 @@ dependencies = [ "opentelemetry-sdk==1.13.0", "opentelemetry-semantic-conventions==0.34b0", "opentelemetry-util-http==0.34b0", - "filetype", "packaging>=20.0", "pathspec", "pip-tools>=6.6.2", @@ -113,15 +112,20 @@ include = [ [project.optional-dependencies] all = [ "bentoml[aws]", - "bentoml[io-json]", - "bentoml[io-image]", - "bentoml[io-pandas]", + "bentoml[io]", "bentoml[grpc]", "bentoml[grpc-reflection]", "bentoml[grpc-channelz]", "bentoml[tracing]", ] aws = ["fs-s3fs"] +io = [ + "bentoml[io-json]", + "bentoml[io-image]", + "bentoml[io-pandas]", + "bentoml[io-file]", +] # syntatic sugar for bentoml[io-json,io-image,io-pandas,io-file] +io-file = ["filetype"] # Currently use for from_sample io-json = ["pydantic<2"] # currently we don't have support for pydantic 2.0 io-image = ["Pillow"] io-pandas = ["pandas", "pyarrow"] diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 4bbd74fa8d..3d26758a00 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -20,6 +20,7 @@ from ..context import InferenceApiContext as Context from ..service.openapi.specification import Schema from ..service.openapi.specification import Reference + from ..service.openapi.specification import MediaType InputType = ( UnionType diff --git a/src/bentoml/bentos.py b/src/bentoml/bentos.py index 46f9f7cd13..165e1ce966 100644 --- a/src/bentoml/bentos.py +++ b/src/bentoml/bentos.py @@ -445,6 +445,8 @@ def construct_dockerfile( "grpc-channelz", "aws", "all", + "io", + "io-file", "io-image", "io-pandas", "io-json", From a6c77bd99c22b23282268c4041d09b588fb9c3e2 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Mon, 7 Nov 2022 23:06:44 -0800 Subject: [PATCH 05/13] chore: dispatch by types for sample Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 3 +- src/bentoml/_internal/io_descriptors/base.py | 33 +- src/bentoml/_internal/io_descriptors/file.py | 51 ++- src/bentoml/_internal/io_descriptors/image.py | 52 ++- src/bentoml/_internal/io_descriptors/json.py | 21 +- src/bentoml/_internal/io_descriptors/numpy.py | 16 +- .../_internal/io_descriptors/pandas.py | 352 ++++++++++-------- src/bentoml/_internal/io_descriptors/text.py | 17 +- src/bentoml/_internal/utils/__init__.py | 3 + 9 files changed, 335 insertions(+), 213 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4af96ba5a2..152d4b1ba0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ dependencies = [ "backports.cached-property;python_version<'3.8'", "backports.shutil_copytree;python_version<'3.8'", "importlib-metadata;python_version<'3.8'", + "singledispatchmethod;python_version<'3.8'", ] dynamic = ["version"] @@ -127,7 +128,7 @@ io = [ ] # syntatic sugar for bentoml[io-json,io-image,io-pandas,io-file] io-file = ["filetype"] # Currently use for from_sample io-json = ["pydantic<2"] # currently we don't have support for pydantic 2.0 -io-image = ["Pillow"] +io-image = ["bentoml[io-file]", "Pillow"] io-pandas = ["pandas", "pyarrow"] grpc = [ # Restrict maximum version due to breaking protobuf 4.21.0 changes diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 3d26758a00..ea0187dca2 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -5,6 +5,7 @@ from abc import abstractmethod from typing import TYPE_CHECKING +from ..utils import singledispatchmethod from ...exceptions import InvalidArgument if TYPE_CHECKING: @@ -19,8 +20,8 @@ from ..types import LazyType from ..context import InferenceApiContext as Context from ..service.openapi.specification import Schema - from ..service.openapi.specification import Reference from ..service.openapi.specification import MediaType + from ..service.openapi.specification import Reference InputType = ( UnionType @@ -36,6 +37,13 @@ IOType = t.TypeVar("IOType") +@singledispatchmethod +def create_sample(self: IODescriptor[t.Any], value: t.Any) -> None: + raise InvalidArgument( + f"Unsupported sample type: '{type(value)}' (value: {value}). To register type '{type(value)}' to {self.__class__.__name__} implement a dispatch function and register types to 'create_sample.register'" + ) + + def from_spec(spec: dict[str, str]) -> IODescriptor[t.Any]: if "id" not in spec: raise InvalidArgument(f"IO descriptor spec ({spec}) missing ID.") @@ -51,11 +59,13 @@ class IODescriptor(ABC, t.Generic[IOType]): HTTP_METHODS = ["POST"] + descriptor_id: str | None + _mime_type: str _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] _sample: IOType | None = None - descriptor_id: str | None + _create_sample: singledispatchmethod[None] = create_sample def __init_subclass__(cls, *, descriptor_id: str | None = None): if descriptor_id is not None: @@ -66,6 +76,13 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id + def __new__(cls, *args: t.Any, **kwargs: t.Any): + sample = kwargs.pop("_sample", None) + kls = super().__new__(cls) + if sample is not None: + kls._create_sample(sample) + return kls + @property def sample(self) -> IOType | None: return self._sample @@ -74,6 +91,13 @@ def sample(self) -> IOType | None: def sample(self, value: IOType) -> None: self._sample = value + # NOTE: for custom types handle, use 'create_sample.register' to register + # custom types for 'from_sample' + @classmethod + @abstractmethod + def from_sample(cls, sample: IOType | t.Any, **kwargs: t.Any) -> Self: + return cls.__new__(cls, _sample=sample, **kwargs) + @abstractmethod def to_spec(self) -> dict[str, t.Any]: raise NotImplementedError @@ -127,8 +151,3 @@ async def from_proto(self, field: t.Any) -> IOType: @abstractmethod async def to_proto(self, obj: IOType) -> t.Any: ... - - @classmethod - @abstractmethod - def from_sample(cls, sample: IOType, **kwargs: t.Any) -> Self: - ... diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index edd175b412..d1936af691 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -12,11 +12,14 @@ from starlette.datastructures import UploadFile from .base import IODescriptor +from .base import create_sample from ..types import FileLike +from ..utils import resolve_user_filepath from ..utils.http import set_cookies from ...exceptions import BadInput from ...exceptions import InvalidArgument from ...exceptions import BentoMLException +from ...exceptions import MissingDependencyException from ..service.openapi import SUCCESS_DESCRIPTION from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType @@ -113,10 +116,12 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]: _proto_fields = ("file",) - def __new__(cls, kind: FileKind = "binaryio", mime_type: str | None = None) -> File: + def __new__( + cls, kind: FileKind = "binaryio", mime_type: str | None = None, **kwargs: t.Any + ) -> File: mime_type = mime_type if mime_type is not None else "application/octet-stream" if kind == "binaryio": - res = object.__new__(BytesIOFile) + res = super().__new__(BytesIOFile, **kwargs) else: raise ValueError(f"invalid File kind '{kind}'") res._mime_type = mime_type @@ -124,23 +129,35 @@ def __new__(cls, kind: FileKind = "binaryio", mime_type: str | None = None) -> F @classmethod def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Self: - import filetype - - mime_type: str | None = filetype.guess_mime(sample) - - kls = cls(kind=kind, mime_type=mime_type) + try: + import filetype + except ModuleNotFoundError: + raise MissingDependencyException( + "'filetype' is required to use 'from_sample'. Install it with 'pip install bentoml[io-file]'." + ) - if isinstance(sample, FileLike): - kls.sample = sample - elif isinstance(sample, t.IO): - kls.sample = FileLike[bytes](sample, "") - elif isinstance(sample, str) and os.path.exists(sample): - with open(sample, "rb") as f: - kls.sample = FileLike[bytes](f, "") - else: - raise InvalidArgument(f"Unknown sample type: '{sample}'") + return super().from_sample( + sample, kind=kind, mime_type=filetype.guess_mime(sample) + ) - return kls + @create_sample.register(type(FileLike)) + def _(self, sample: FileLike[bytes]) -> None: + self.sample = sample + + @create_sample.register(t.IO) + def _(self, sample: t.IO[t.Any]) -> None: + if isinstance(self, File): + self.sample = FileLike[bytes](sample, "") + + @create_sample.register(str) + @create_sample.register(os.PathLike) + def _(self, sample: str) -> None: + # This is to ensure we can register same type with different + # implementation across different IO descriptors. + if isinstance(self, File): + p = resolve_user_filepath(sample, ctx=None) + with open(p, "rb") as f: + self.sample = FileLike[bytes](f, "") @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 7f37c25acb..293e5a5b5f 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -1,7 +1,6 @@ from __future__ import annotations import io -import os import typing as t import functools from typing import TYPE_CHECKING @@ -13,12 +12,15 @@ from starlette.datastructures import UploadFile from .base import IODescriptor +from .base import create_sample from ..types import LazyType from ..utils import LazyLoader +from ..utils import resolve_user_filepath from ..utils.http import set_cookies from ...exceptions import BadInput from ...exceptions import InvalidArgument from ...exceptions import InternalServerError +from ...exceptions import MissingDependencyException from ..service.openapi import SUCCESS_DESCRIPTION from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType @@ -29,6 +31,7 @@ from types import UnionType import PIL + import numpy as np import PIL.Image from typing_extensions import Self @@ -49,9 +52,9 @@ PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=PIL_EXC_MSG) PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=PIL_EXC_MSG) + np = LazyLoader("np", globals(), "numpy") pb, _ = import_generated_stubs() - # NOTES: we will keep type in quotation to avoid backward compatibility # with numpy < 1.20, since we will use the latest stubs from the main branch of numpy. # that enable a new way to type hint an ndarray. @@ -214,36 +217,51 @@ def __init__( def from_sample( cls, sample: ImageType | str, - pilmode: _Mode | None = DEFAULT_PIL_MODE, *, + pilmode: _Mode | None = DEFAULT_PIL_MODE, allowed_mime_types: t.Iterable[str] | None = None, ) -> Self: - from filetype.match import image_match + try: + from filetype.match import image_match + except ModuleNotFoundError: + raise MissingDependencyException( + "'filetype' is required to use 'from_sample'. Install it with 'pip install bentoml[io-image]'." + ) img_type = image_match(sample) if img_type is None: raise InvalidArgument(f"{sample} is not a valid image file type.") - kls = cls( - mime_type=img_type.mime, + if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): + + @create_sample.register(np.ndarray) + def _(self: Self, sample: ext.NpNDArray) -> None: + if isinstance(self, Image): + self.sample = PIL.Image.fromarray(sample, mode=self._pilmode) + + elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): + + @create_sample.register(PIL.Image.Image) + def _(self: Self, sample: PIL.Image.Image) -> None: + if isinstance(self, Image): + self.sample = sample + + return super().from_sample( + sample, pilmode=pilmode, + mime_type=img_type.mime, allowed_mime_types=allowed_mime_types, ) - if isinstance(sample, str) and os.path.exists(sample): + @create_sample.register(str) + def _(self, sample: str) -> None: + if isinstance(self, Image): + p = resolve_user_filepath(sample, ctx=None) try: - with open(sample, "rb") as f: - kls.sample = PIL.Image.open(f) + with open(p, "rb") as f: + self.sample = PIL.Image.open(f) except PIL.UnidentifiedImageError as err: raise BadInput(f"Failed to parse sample image file: {err}") from None - elif LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): - kls.sample = PIL.Image.fromarray(sample, mode=pilmode) - elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): - kls.sample = sample - else: - raise InvalidArgument(f"Unknown sample type: '{sample}'") - - return kls def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index c5b33e7cf7..ab37af03ac 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -11,6 +11,7 @@ from starlette.responses import Response from .base import IODescriptor +from .base import create_sample from ..types import LazyType from ..utils import LazyLoader from ..utils import bentoml_cattr @@ -212,10 +213,24 @@ def from_sample( if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): pydantic_model = sample.__class__ - kls = cls(pydantic_model=pydantic_model, json_encoder=json_encoder) + @create_sample.register(pydantic.BaseModel) + def _(self: Self, sample: pydantic.BaseModel): + if isinstance(self, JSON): + self.sample = sample - kls.sample = sample - return kls + return super().from_sample( + sample, pydantic_model=pydantic_model, json_encoder=json_encoder + ) + + @create_sample.register(dict) + def _(self, sample: dict[str, t.Any]): + if isinstance(self, JSON): + self.sample = sample + + @create_sample.register(str) + def _(self, sample: str): + if isinstance(self, JSON): + self.sample = json.loads(sample) def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 11d8d4b9ae..8680620486 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -10,6 +10,7 @@ from starlette.responses import Response from .base import IODescriptor +from .base import create_sample from ..types import LazyType from ..utils import LazyLoader from ..utils.http import set_cookies @@ -439,15 +440,24 @@ async def predict(input: NDArray[np.int16]) -> NDArray[Any]: "'NumpyNdarray.from_sample()' expects a 'numpy.array', not 'numpy.generic'." ) from None - kls = cls( + return super().from_sample( + sample, dtype=sample.dtype, shape=sample.shape, enforce_dtype=enforce_dtype, enforce_shape=enforce_shape, ) - kls.sample = sample - return kls + @create_sample.register(np.ndarray) + def _(self, sample: ext.NpNDArray): + if isinstance(self, NumpyNdarray): + self.sample = sample + + @create_sample.register(list) + @create_sample.register(tuple) + def _(self, sample: t.Sequence[t.Any]): + if isinstance(self, NumpyNdarray): + self.sample = np.array(sample) async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index b41e66d9bd..bc3c210c12 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import os import typing as t import logging import functools @@ -12,6 +13,7 @@ from starlette.responses import Response from .base import IODescriptor +from .base import create_sample from ..types import LazyType from ..utils.pkg import find_spec from ..utils.http import set_cookies @@ -25,6 +27,7 @@ from ..service.openapi.specification import MediaType EXC_MSG = "pandas' is required to use PandasDataFrame or PandasSeries. Install with 'pip install bentoml[io-pandas]'" + if TYPE_CHECKING: import numpy as np import pandas as pd @@ -33,7 +36,6 @@ from bentoml.grpc.v1alpha1 import service_pb2 as pb from .. import external_typing as ext - from .base import OpenAPIResponse from ..context import InferenceApiContext as Context else: @@ -41,6 +43,7 @@ pb, _ = import_generated_stubs() pd = LazyLoader("pd", globals(), "pandas", exc_msg=EXC_MSG) + np = LazyLoader("np", globals(), "numpy") logger = logging.getLogger(__name__) @@ -335,15 +338,122 @@ def __init__( _validate_serialization_format(self._default_format) self._mime_type = self._default_format.mime_type - self._sample_input = None + @classmethod + def from_sample( + cls, + sample: ext.PdDataFrame, + *, + orient: ext.DataFrameOrient = "records", + apply_column_names: bool = True, + enforce_shape: bool = True, + enforce_dtype: bool = True, + default_format: t.Literal["json", "parquet", "csv"] = "json", + ) -> Self: + """ + Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. - @property - def sample_input(self) -> ext.PdDataFrame | None: - return self._sample_input + Args: + sample: Given sample ``pd.DataFrame`` data + orient: Indication of expected JSON string format. Compatible JSON strings can be + produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. + Possible orients are: + + - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} + - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] + - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} + - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}} + - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays + - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} + apply_column_names: Update incoming DataFrame columns. ``columns`` must be specified at + function signature. If you don't want to enforce a specific columns + name then change ``apply_column_names=False``. + enforce_dtype: Enforce a certain data type. `dtype` must be specified at function + signature. If you don't want to enforce a specific dtype then change + ``enforce_dtype=False``. + enforce_shape: Enforce a certain shape. ``shape`` must be specified at function + signature. If you don't want to enforce a specific shape then change + ``enforce_shape=False``. + default_format: The default serialization format to use if the request does not specify a ``Content-Type`` Headers. + It is also the serialization format used for the response. Possible values are: + + - :obj:`json` - JSON text format (inferred from content-type ``"application/json"``) + - :obj:`parquet` - Parquet binary format (inferred from content-type ``"application/octet-stream"``) + - :obj:`csv` - CSV text format (inferred from content-type ``"text/csv"``) + + Returns: + :obj:`PandasDataFrame`: :code:`PandasDataFrame` IODescriptor from given users inputs. + + Example: + + .. code-block:: python + :caption: `service.py` + + import pandas as pd + from bentoml.io import PandasDataFrame + arr = [[1,2,3]] + input_spec = PandasDataFrame.from_sample(pd.DataFrame(arr)) + + @svc.api(input=input_spec, output=PandasDataFrame()) + def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... + """ + if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): + + columns = [i for i in range(sample.shape[1])] + + @create_sample.register(np.ndarray) + def _(self: Self, sample: ext.NpNDArray): + if isinstance(self, PandasDataFrame): + self.sample = pd.DataFrame(sample) + + else: + columns = [str(x) for x in list(sample.columns)] + + return super().from_sample( + sample, + orient=orient, + enforce_shape=enforce_shape, + shape=sample.shape, + apply_column_names=apply_column_names, + columns=columns, + enforce_dtype=enforce_dtype, + dtype=True, # set to True to infer from given input + default_format=default_format, + ) - @sample_input.setter - def sample_input(self, value: ext.PdDataFrame) -> None: - self._sample_input = value + @create_sample.register(pd.DataFrame) + def _(self, sample: pd.DataFrame): + if isinstance(self, PandasDataFrame): + self.sample = sample + + @create_sample.register(str) + @create_sample.register(os.PathLike) + def _(self, sample: str): + if isinstance(self, PandasDataFrame): + if os.path.exists(sample): + try: + ext = os.path.splitext(sample)[-1].strip(".") + self.sample = getattr( + pd, + { + "json": "read_json", + "csv": "read_csv", + "html": "read_html", + "xls": "read_excel", + "xlsx": "read_excel", + "hdf5": "read_hdf", + "parquet": "read_parquet", + "pickle": "read_pickle", + "sql": "read_sql", + }[ext], + )(sample) + except KeyError: + raise InvalidArgument(f"Unsupported sample '{sample}' format.") + except ValueError as e: + raise InvalidArgument( + f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" + ) from None + else: + self.sample = pd.read_json(sample) def _convert_dtype( self, value: ext.PdDTypeArg | None @@ -375,7 +485,7 @@ def to_spec(self) -> dict[str, t.Any]: "args": { "orient": self._orient, "columns": self._columns, - "dtype": self._convert_dtype(self._dtype), + "dtype": self._convert_dtype(dtype), "shape": self._shape, "enforce_dtype": self._enforce_dtype, "enforce_shape": self._enforce_shape, @@ -502,77 +612,6 @@ async def to_http_response( else: return Response(resp, media_type=serialization_format.mime_type) - @classmethod - def from_sample( - cls, - sample: ext.PdDataFrame, - orient: ext.DataFrameOrient = "records", - apply_column_names: bool = True, - enforce_shape: bool = True, - enforce_dtype: bool = True, - default_format: t.Literal["json", "parquet", "csv"] = "json", - ) -> Self: - """ - Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. - - Args: - sample: Given sample ``pd.DataFrame`` data - orient: Indication of expected JSON string format. Compatible JSON strings can be - produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. - Possible orients are: - - - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} - - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] - - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} - - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}} - - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays - - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} - apply_column_names: Update incoming DataFrame columns. ``columns`` must be specified at - function signature. If you don't want to enforce a specific columns - name then change ``apply_column_names=False``. - enforce_dtype: Enforce a certain data type. `dtype` must be specified at function - signature. If you don't want to enforce a specific dtype then change - ``enforce_dtype=False``. - enforce_shape: Enforce a certain shape. ``shape`` must be specified at function - signature. If you don't want to enforce a specific shape then change - ``enforce_shape=False``. - default_format: The default serialization format to use if the request does not specify a ``Content-Type`` Headers. - It is also the serialization format used for the response. Possible values are: - - - :obj:`json` - JSON text format (inferred from content-type ``"application/json"``) - - :obj:`parquet` - Parquet binary format (inferred from content-type ``"application/octet-stream"``) - - :obj:`csv` - CSV text format (inferred from content-type ``"text/csv"``) - - Returns: - :obj:`PandasDataFrame`: :code:`PandasDataFrame` IODescriptor from given users inputs. - - Example: - - .. code-block:: python - :caption: `service.py` - - import pandas as pd - from bentoml.io import PandasDataFrame - arr = [[1,2,3]] - input_spec = PandasDataFrame.from_sample(pd.DataFrame(arr)) - - @svc.api(input=input_spec, output=PandasDataFrame()) - def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... - """ - kls = cls( - orient=orient, - enforce_shape=enforce_shape, - shape=sample.shape, - apply_column_names=apply_column_names, - columns=[str(x) for x in list(sample.columns)], - enforce_dtype=enforce_dtype, - dtype=True, # set to True to infer from given input - default_format=default_format, - ) - kls.sample = sample - - return kls - def validate_dataframe( self, dataframe: ext.PdDataFrame, exception_cls: t.Type[Exception] = BadInput ) -> ext.PdDataFrame: @@ -813,15 +852,80 @@ def __init__( self._enforce_dtype = enforce_dtype self._shape = shape self._enforce_shape = enforce_shape - self._sample_input = None - @property - def sample_input(self) -> ext.PdSeries | None: - return self._sample_input + @classmethod + def from_sample( + cls, + sample: ext.PdSeries, + *, + orient: ext.SeriesOrient = "records", + enforce_shape: bool = True, + enforce_dtype: bool = True, + ) -> Self: + """ + Create a :obj:`PandasSeries` IO Descriptor from given inputs. + + Args: + sample_input: Given sample ``pd.DataFrame`` data + orient: Indication of expected JSON string format. Compatible JSON strings can be + produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. + Possible orients are: + + - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} + - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] + - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} + - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} + enforce_dtype: Enforce a certain data type. `dtype` must be specified at function + signature. If you don't want to enforce a specific dtype then change + ``enforce_dtype=False``. + enforce_shape: Enforce a certain shape. ``shape`` must be specified at function + signature. If you don't want to enforce a specific shape then change + ``enforce_shape=False``. + + Returns: + :obj:`PandasSeries`: :code:`PandasSeries` IODescriptor from given users inputs. + + Example: + + .. code-block:: python + :caption: `service.py` + + import pandas as pd + from bentoml.io import PandasSeries + + arr = [1,2,3] + input_spec = PandasSeries.from_sample(pd.DataFrame(arr)) + + @svc.api(input=input_spec, output=PandasSeries()) + def predict(inputs: pd.Series) -> pd.Series: ... + """ + if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): + + @create_sample.register(np.ndarray) + def _(self: Self, sample: ext.NpNDArray): + if isinstance(self, PandasSeries): + self.sample = pd.Series(sample) + + return super().from_sample( + sample, + orient=orient, + dtype=sample.dtype, + enforce_dtype=enforce_dtype, + shape=sample.shape, + enforce_shape=enforce_shape, + ) + + @create_sample.register(pd.Series) + def _(self, sample: ext.PdSeries): + if isinstance(self, PandasSeries): + self.sample = sample - @sample_input.setter - def sample_input(self, value: ext.PdSeries) -> None: - self._sample_input = value + @create_sample.register(list) + @create_sample.register(tuple) + @create_sample.register(set) + def _(self, sample: t.Sequence[t.Any]): + if isinstance(self, PandasSeries): + self.sample = pd.Series(sample) def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") @@ -854,24 +958,6 @@ def to_spec(self) -> dict[str, t.Any]: }, } - @classmethod - def from_sample( - cls, - sample: ext.PdSeries, - orient: ext.SeriesOrient = "records", - enforce_shape: bool = True, - enforce_dtype: bool = True, - ) -> Self: - kls = cls( - orient=orient, - dtype=sample.dtype, - enforce_dtype=enforce_dtype, - shape=sample.shape, - enforce_shape=enforce_shape, - ) - kls.sample = sample - return kls - @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: if "args" not in spec: @@ -1063,59 +1149,3 @@ async def to_proto(self, obj: ext.PdSeries) -> pb.Series: raise InvalidArgument( f"Unsupported dtype '{obj.dtype}' for response message." ) from None - - @classmethod - def from_sample( - cls, - sample_input: ext.PdSeries, - orient: ext.SeriesOrient = "records", - enforce_dtype: bool = True, - enforce_shape: bool = True, - ) -> PandasSeries: - """ - Create a :obj:`PandasSeries` IO Descriptor from given inputs. - - Args: - sample_input: Given sample ``pd.DataFrame`` data - orient: Indication of expected JSON string format. Compatible JSON strings can be - produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. - Possible orients are: - - - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} - - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] - - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} - - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} - enforce_dtype: Enforce a certain data type. `dtype` must be specified at function - signature. If you don't want to enforce a specific dtype then change - ``enforce_dtype=False``. - enforce_shape: Enforce a certain shape. ``shape`` must be specified at function - signature. If you don't want to enforce a specific shape then change - ``enforce_shape=False``. - - Returns: - :obj:`PandasSeries`: :code:`PandasSeries` IODescriptor from given users inputs. - - Example: - - .. code-block:: python - :caption: `service.py` - - import pandas as pd - from bentoml.io import PandasSeries - - arr = [1,2,3] - input_spec = PandasSeries.from_sample(pd.DataFrame(arr)) - - @svc.api(input=input_spec, output=PandasSeries()) - def predict(inputs: pd.Series) -> pd.Series: ... - """ - inst = cls( - orient=orient, - dtype=sample_input.dtype, - enforce_dtype=enforce_dtype, - shape=sample_input.shape, - enforce_shape=enforce_shape, - ) - inst.sample_input = sample_input - - return inst diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index 6eb41e4f38..2848abb9ea 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -9,6 +9,7 @@ from bentoml.exceptions import BentoMLException from .base import IODescriptor +from .base import create_sample from ..utils.http import set_cookies from ..service.openapi import SUCCESS_DESCRIPTION from ..utils.lazy_loader import LazyLoader @@ -100,10 +101,18 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): ) from None @classmethod - def from_sample(cls, sample: str) -> Self: - kls = cls() - kls.sample = sample - return kls + def from_sample(cls, sample: str | bytes) -> Self: + return super().from_sample(sample) + + @create_sample.register(str) + def _(self, sample: str): + if isinstance(self, Text): + self.sample = sample + + @create_sample.register(bytes) + def _(self, sample: bytes): + if isinstance(self, Text): + self.sample = sample.decode("utf-8") def input_type(self) -> t.Type[str]: return str diff --git a/src/bentoml/_internal/utils/__init__.py b/src/bentoml/_internal/utils/__init__.py index f89ffb1b36..29760f0b00 100644 --- a/src/bentoml/_internal/utils/__init__.py +++ b/src/bentoml/_internal/utils/__init__.py @@ -27,8 +27,10 @@ if sys.version_info >= (3, 8): from functools import cached_property + from functools import singledispatchmethod else: from backports.cached_property import cached_property + from singledispatchmethod import singledispatchmethod from .cattr import bentoml_cattr from ..types import LazyType @@ -55,6 +57,7 @@ __all__ = [ "bentoml_cattr", "cached_property", + "singledispatchmethod", "cached_contextmanager", "reserve_free_port", "LazyLoader", From dafcac5b4f93519928432258d7b4bf75efa62d6a Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 02:45:34 -0800 Subject: [PATCH 06/13] feat: openapi and dispatcher fix Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/base.py | 29 ++- src/bentoml/_internal/io_descriptors/file.py | 32 +-- src/bentoml/_internal/io_descriptors/image.py | 34 ++-- src/bentoml/_internal/io_descriptors/json.py | 44 ++-- .../_internal/io_descriptors/multipart.py | 19 +- src/bentoml/_internal/io_descriptors/numpy.py | 45 ++--- .../_internal/io_descriptors/pandas.py | 190 +++++++++++------- src/bentoml/_internal/io_descriptors/text.py | 35 +++- .../_internal/service/openapi/__init__.py | 4 +- .../service/openapi/specification.py | 16 +- tests/unit/_internal/io/test_numpy.py | 2 - 11 files changed, 267 insertions(+), 183 deletions(-) diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index ea0187dca2..820734ae02 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -38,9 +38,9 @@ @singledispatchmethod -def create_sample(self: IODescriptor[t.Any], value: t.Any) -> None: +def set_sample(self: IODescriptor[t.Any], value: t.Any) -> IODescriptor[t.Any]: raise InvalidArgument( - f"Unsupported sample type: '{type(value)}' (value: {value}). To register type '{type(value)}' to {self.__class__.__name__} implement a dispatch function and register types to 'create_sample.register'" + f"Unsupported sample type: '{type(value)}' (value: {value}). To register type '{type(value)}' to {self.__class__.__name__} implement a dispatch function and register types to 'set_sample.register'" ) @@ -65,7 +65,7 @@ class IODescriptor(ABC, t.Generic[IOType]): _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] _sample: IOType | None = None - _create_sample: singledispatchmethod[None] = create_sample + _set_sample: singledispatchmethod["IODescriptor[t.Any]"] = set_sample def __init_subclass__(cls, *, descriptor_id: str | None = None): if descriptor_id is not None: @@ -76,11 +76,14 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id - def __new__(cls, *args: t.Any, **kwargs: t.Any): + def __new__(cls, *args: t.Any, **kwargs: t.Any) -> Self: sample = kwargs.pop("_sample", None) - kls = super().__new__(cls) - if sample is not None: - kls._create_sample(sample) + kls = object.__new__(cls) + if sample is None: + set_sample.register(type(None), lambda self, _: self) + kls = kls._set_sample(sample) + # TODO: lazy init + kls.__init__(*args, **kwargs) return kls @property @@ -98,6 +101,10 @@ def sample(self, value: IOType) -> None: def from_sample(cls, sample: IOType | t.Any, **kwargs: t.Any) -> Self: return cls.__new__(cls, _sample=sample, **kwargs) + @property + def mime_type(self) -> str: + return self._mime_type + @abstractmethod def to_spec(self) -> dict[str, t.Any]: raise NotImplementedError @@ -118,14 +125,14 @@ def input_type(self) -> InputType: def openapi_schema(self) -> Schema | Reference: raise NotImplementedError - def openapi_example(self) -> t.Any: - if self.sample is not None: - return self.sample - @abstractmethod def openapi_components(self) -> dict[str, t.Any] | None: raise NotImplementedError + @abstractmethod + def openapi_example(self) -> t.Any | None: + raise NotImplementedError + @abstractmethod def openapi_request_body(self) -> dict[str, t.Any]: raise NotImplementedError diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index d1936af691..b2c0bf6575 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -11,8 +11,8 @@ from starlette.responses import Response from starlette.datastructures import UploadFile +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..types import FileLike from ..utils import resolve_user_filepath from ..utils.http import set_cookies @@ -140,24 +140,27 @@ def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Sel sample, kind=kind, mime_type=filetype.guess_mime(sample) ) - @create_sample.register(type(FileLike)) - def _(self, sample: FileLike[bytes]) -> None: - self.sample = sample + @set_sample.register(type(FileLike)) + def _(cls, sample: FileLike[bytes]): + cls.sample = sample + return cls - @create_sample.register(t.IO) - def _(self, sample: t.IO[t.Any]) -> None: - if isinstance(self, File): - self.sample = FileLike[bytes](sample, "") + @set_sample.register(t.IO) + def _(cls, sample: t.IO[t.Any]): + if isinstance(cls, File): + cls.sample = FileLike[bytes](sample, "") + return cls - @create_sample.register(str) - @create_sample.register(os.PathLike) - def _(self, sample: str) -> None: + @set_sample.register(str) + @set_sample.register(os.PathLike) + def _(cls, sample: str): # This is to ensure we can register same type with different # implementation across different IO descriptors. - if isinstance(self, File): + if isinstance(cls, File): p = resolve_user_filepath(sample, ctx=None) with open(p, "rb") as f: - self.sample = FileLike[bytes](f, "") + cls.sample = FileLike[bytes](f, "") + return cls @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: @@ -174,6 +177,9 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + pass + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 293e5a5b5f..f8a0b870a3 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -1,7 +1,9 @@ from __future__ import annotations import io +import os import typing as t +import tempfile import functools from typing import TYPE_CHECKING from urllib.parse import quote @@ -11,8 +13,8 @@ from starlette.responses import Response from starlette.datastructures import UploadFile +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..types import LazyType from ..utils import LazyLoader from ..utils import resolve_user_filepath @@ -234,17 +236,19 @@ def from_sample( if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): - @create_sample.register(np.ndarray) - def _(self: Self, sample: ext.NpNDArray) -> None: - if isinstance(self, Image): - self.sample = PIL.Image.fromarray(sample, mode=self._pilmode) + @set_sample.register(np.ndarray) + def _(cls: Self, sample: ext.NpNDArray): + if isinstance(cls, Image): + cls.sample = PIL.Image.fromarray(sample, mode=pilmode) + return cls elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): - @create_sample.register(PIL.Image.Image) - def _(self: Self, sample: PIL.Image.Image) -> None: - if isinstance(self, Image): - self.sample = sample + @set_sample.register(PIL.Image.Image) + def _(cls: Self, sample: PIL.Image.Image): + if isinstance(cls, Image): + cls.sample = sample + return cls return super().from_sample( sample, @@ -253,15 +257,16 @@ def _(self: Self, sample: PIL.Image.Image) -> None: allowed_mime_types=allowed_mime_types, ) - @create_sample.register(str) - def _(self, sample: str) -> None: - if isinstance(self, Image): + @set_sample.register(str) + def _(cls, sample: str): + if isinstance(cls, Image): p = resolve_user_filepath(sample, ctx=None) try: with open(p, "rb") as f: - self.sample = PIL.Image.open(f) + cls.sample = PIL.Image.open(f) except PIL.UnidentifiedImageError as err: raise BadInput(f"Failed to parse sample image file: {err}") from None + return cls def to_spec(self) -> dict[str, t.Any]: return { @@ -289,6 +294,9 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + pass + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": { diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index ab37af03ac..e98af327ad 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -10,8 +10,8 @@ from starlette.requests import Request from starlette.responses import Response +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..types import LazyType from ..utils import LazyLoader from ..utils import bentoml_cattr @@ -213,24 +213,27 @@ def from_sample( if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): pydantic_model = sample.__class__ - @create_sample.register(pydantic.BaseModel) - def _(self: Self, sample: pydantic.BaseModel): - if isinstance(self, JSON): - self.sample = sample + @set_sample.register(pydantic.BaseModel) + def _(cls: Self, sample: pydantic.BaseModel): + if isinstance(cls, JSON): + cls.sample = sample + return cls return super().from_sample( sample, pydantic_model=pydantic_model, json_encoder=json_encoder ) - @create_sample.register(dict) - def _(self, sample: dict[str, t.Any]): - if isinstance(self, JSON): - self.sample = sample + @set_sample.register(dict) + def _(cls, sample: dict[str, t.Any]): + if isinstance(cls, JSON): + cls.sample = sample + return cls - @create_sample.register(str) - def _(self, sample: str): - if isinstance(self, JSON): - self.sample = json.loads(sample) + @set_sample.register(str) + def _(cls, sample: str): + if isinstance(cls, JSON): + cls.sample = json.loads(sample) + return cls def to_spec(self) -> dict[str, t.Any]: return { @@ -282,7 +285,7 @@ def openapi_components(self) -> dict[str, t.Any] | None: return {"schemas": pydantic_components_schema(self._pydantic_model)} - def openapi_example(self) -> t.Any: + def openapi_example(self): if self.sample is not None: if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance( self.sample @@ -299,11 +302,14 @@ def openapi_example(self) -> t.Any: ) elif isinstance(self.sample, dict): return self.sample - return def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -311,7 +317,11 @@ def openapi_request_body(self) -> dict[str, t.Any]: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index 985ea281c2..19355a3504 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -177,7 +177,7 @@ def __repr__(self) -> str: @classmethod def from_sample(cls, sample: dict[str, t.Any]) -> Self: - pass + return cls(**sample) def input_type( self, @@ -222,9 +222,16 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self) -> t.Any: + return {args: io.openapi_example() for args, io in self._inputs.items()} + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -232,7 +239,11 @@ def openapi_request_body(self) -> dict[str, t.Any]: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } @@ -249,7 +260,7 @@ async def from_http_request(self, request: Request) -> dict[str, t.Any]: for field, descriptor in self._inputs.items(): if field not in form_values: break - res[field] = descriptor.from_http_request(form_values[field]) + res[field] = await descriptor.from_http_request(form_values[field]) else: # NOTE: This is similar to goto, when there is no break. to_populate = zip(self._inputs.values(), form_values.values()) reqs = await asyncio.gather( diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 8680620486..b8d2764143 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -9,8 +9,8 @@ from starlette.requests import Request from starlette.responses import Response +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..types import LazyType from ..utils import LazyLoader from ..utils.http import set_cookies @@ -229,15 +229,6 @@ def __init__( self._enforce_dtype = enforce_dtype self._enforce_shape = enforce_shape - if self._enforce_dtype and not self._dtype: - raise InvalidArgument( - "'dtype' must be specified when 'enforce_dtype=True'" - ) from None - if self._enforce_shape and not self._shape: - raise InvalidArgument( - "'shape' must be specified when 'enforce_shape=True'" - ) from None - def _openapi_types(self) -> str: # convert numpy dtypes to openapi compatible types. var_type = "integer" @@ -280,11 +271,10 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_example(self) -> t.Any: + def openapi_example(self): if self.sample is not None: if isinstance(self.sample, np.generic): raise BadInput("NumpyNdarray: sample must be a numpy array.") from None - # NOTE: we only need to return self.sample.tolist() def openapi_request_body(self) -> dict[str, t.Any]: @@ -393,7 +383,7 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None) @classmethod def from_sample( cls, - sample: ext.NpNDArray, + sample: ext.NpNDArray | t.Sequence[t.Any], enforce_dtype: bool = True, enforce_shape: bool = True, ) -> Self: @@ -442,22 +432,27 @@ async def predict(input: NDArray[np.int16]) -> NDArray[Any]: return super().from_sample( sample, - dtype=sample.dtype, - shape=sample.shape, enforce_dtype=enforce_dtype, enforce_shape=enforce_shape, ) - @create_sample.register(np.ndarray) - def _(self, sample: ext.NpNDArray): - if isinstance(self, NumpyNdarray): - self.sample = sample - - @create_sample.register(list) - @create_sample.register(tuple) - def _(self, sample: t.Sequence[t.Any]): - if isinstance(self, NumpyNdarray): - self.sample = np.array(sample) + @set_sample.register(np.ndarray) + def _(cls, sample: ext.NpNDArray): + if isinstance(cls, NumpyNdarray): + cls.sample = sample + cls._shape = sample.shape + cls._dtype = sample.dtype + return cls + + @set_sample.register(list) + @set_sample.register(tuple) + def _(cls, sample: t.Sequence[t.Any]): + if isinstance(cls, NumpyNdarray): + __ = np.array(sample) + cls.sample = __ + cls._shape = __.shape + cls._dtype = __.dtype + return cls async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index bc3c210c12..eaf4b24b3e 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -12,8 +12,8 @@ from starlette.requests import Request from starlette.responses import Response +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..types import LazyType from ..utils.pkg import find_spec from ..utils.http import set_cookies @@ -36,6 +36,8 @@ from bentoml.grpc.v1alpha1 import service_pb2 as pb from .. import external_typing as ext + from .base import OpenAPIResponse + from ..types import PathType from ..context import InferenceApiContext as Context else: @@ -341,7 +343,7 @@ def __init__( @classmethod def from_sample( cls, - sample: ext.PdDataFrame, + sample: ext.PdDataFrame | PathType | ext.NpNDArray, *, orient: ext.DataFrameOrient = "records", apply_column_names: bool = True, @@ -398,62 +400,71 @@ def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... """ if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): - columns = [i for i in range(sample.shape[1])] + @set_sample.register(np.ndarray) + def _(cls: Self, sample: ext.NpNDArray): + if isinstance(cls, PandasDataFrame): + __ = pd.DataFrame(sample) + cls.sample = __ + cls._shape = __.shape + cls._columns = [str(i) for i in range(sample.shape[1])] - @create_sample.register(np.ndarray) - def _(self: Self, sample: ext.NpNDArray): - if isinstance(self, PandasDataFrame): - self.sample = pd.DataFrame(sample) - - else: - columns = [str(x) for x in list(sample.columns)] + return cls return super().from_sample( sample, + dtype=True, # set to True to infer from given input orient=orient, enforce_shape=enforce_shape, - shape=sample.shape, - apply_column_names=apply_column_names, - columns=columns, enforce_dtype=enforce_dtype, - dtype=True, # set to True to infer from given input + apply_column_names=apply_column_names, default_format=default_format, ) - @create_sample.register(pd.DataFrame) - def _(self, sample: pd.DataFrame): - if isinstance(self, PandasDataFrame): - self.sample = sample - - @create_sample.register(str) - @create_sample.register(os.PathLike) - def _(self, sample: str): - if isinstance(self, PandasDataFrame): - if os.path.exists(sample): - try: - ext = os.path.splitext(sample)[-1].strip(".") - self.sample = getattr( - pd, - { - "json": "read_json", - "csv": "read_csv", - "html": "read_html", - "xls": "read_excel", - "xlsx": "read_excel", - "hdf5": "read_hdf", - "parquet": "read_parquet", - "pickle": "read_pickle", - "sql": "read_sql", - }[ext], - )(sample) - except KeyError: - raise InvalidArgument(f"Unsupported sample '{sample}' format.") - except ValueError as e: - raise InvalidArgument( - f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" - ) from None - else: - self.sample = pd.read_json(sample) + @set_sample.register(pd.DataFrame) + def _(cls, sample: pd.DataFrame): + if isinstance(cls, PandasDataFrame): + cls.sample = sample + cls._shape = sample.shape + cls._columns = [str(x) for x in list(sample.columns)] + return cls + + @set_sample.register(str) + @set_sample.register(os.PathLike) + def _(cls, sample: str): + if isinstance(cls, PandasDataFrame): + try: + if os.path.exists(sample): + try: + ext = os.path.splitext(sample)[-1].strip(".") + __ = getattr( + pd, + { + "json": "read_json", + "csv": "read_csv", + "html": "read_html", + "xls": "read_excel", + "xlsx": "read_excel", + "hdf5": "read_hdf", + "parquet": "read_parquet", + "pickle": "read_pickle", + "sql": "read_sql", + }[ext], + )(sample) + cls.sample = __ + cls._shape = __.shape + cls._columns = [str(x) for x in list(__.columns)] + except KeyError: + raise InvalidArgument(f"Unsupported sample '{sample}' format.") + else: + __ = pd.read_json(sample) + cls.sample = __ + cls._shape = __.shape + cls._columns = [str(x) for x in list(__.columns)] + except ValueError as e: + raise InvalidArgument( + f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" + ) from None + return cls def _convert_dtype( self, value: ext.PdDTypeArg | None @@ -509,22 +520,29 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_example(self) -> t.Any: + def openapi_example(self): if self.sample is not None: - return t.cast("dict[str, t.Any]", self.sample.to_dict()) - return + return self.sample.to_json(orient=self._orient) def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } - def openapi_responses(self) -> dict[str, t.Any]: + def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } @@ -856,7 +874,7 @@ def __init__( @classmethod def from_sample( cls, - sample: ext.PdSeries, + sample: ext.PdSeries | t.Sequence[t.Any], *, orient: ext.SeriesOrient = "records", enforce_shape: bool = True, @@ -901,31 +919,40 @@ def predict(inputs: pd.Series) -> pd.Series: ... """ if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): - @create_sample.register(np.ndarray) - def _(self: Self, sample: ext.NpNDArray): - if isinstance(self, PandasSeries): - self.sample = pd.Series(sample) + @set_sample.register(np.ndarray) + def _(cls: Self, sample: ext.NpNDArray): + if isinstance(cls, PandasSeries): + __ = pd.Series(sample) + cls.sample = __ + cls._dtype = __.dtype + cls._shape = __.shape + return cls return super().from_sample( sample, orient=orient, - dtype=sample.dtype, enforce_dtype=enforce_dtype, - shape=sample.shape, enforce_shape=enforce_shape, ) - @create_sample.register(pd.Series) - def _(self, sample: ext.PdSeries): - if isinstance(self, PandasSeries): - self.sample = sample - - @create_sample.register(list) - @create_sample.register(tuple) - @create_sample.register(set) - def _(self, sample: t.Sequence[t.Any]): - if isinstance(self, PandasSeries): - self.sample = pd.Series(sample) + @set_sample.register(pd.Series) + def _(cls, sample: ext.PdSeries): + if isinstance(cls, PandasSeries): + cls.sample = sample + cls._dtype = sample.dtype + cls._shape = sample.shape + return cls + + @set_sample.register(list) + @set_sample.register(tuple) + @set_sample.register(set) + def _(cls, sample: t.Sequence[t.Any]): + if isinstance(cls, PandasSeries): + __ = pd.Series(sample) + cls.sample = __ + cls._dtype = __.dtype + cls._shape = __.shape + return cls def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") @@ -971,22 +998,29 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_example(self) -> t.Any: + def openapi_example(self): if self.sample is not None: - return t.cast("dict[str, t.Any]", self.sample.to_dict()) - return + return self.sample.to_json(orient=self._orient) def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } - def openapi_responses(self) -> dict[str, t.Any]: + def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index 2848abb9ea..14ccd02fa8 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -8,8 +8,8 @@ from bentoml.exceptions import BentoMLException +from .base import set_sample from .base import IODescriptor -from .base import create_sample from ..utils.http import set_cookies from ..service.openapi import SUCCESS_DESCRIPTION from ..utils.lazy_loader import LazyLoader @@ -104,15 +104,17 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): def from_sample(cls, sample: str | bytes) -> Self: return super().from_sample(sample) - @create_sample.register(str) - def _(self, sample: str): - if isinstance(self, Text): - self.sample = sample + @set_sample.register(str) + def _(cls, sample: str): + if isinstance(cls, Text): + cls.sample = sample + return cls - @create_sample.register(bytes) - def _(self, sample: bytes): - if isinstance(self, Text): - self.sample = sample.decode("utf-8") + @set_sample.register(bytes) + def _(cls, sample: bytes): + if isinstance(cls, Text): + cls.sample = sample.decode("utf-8") + return cls def input_type(self) -> t.Type[str]: return str @@ -130,9 +132,16 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + return str(self.sample) + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -140,7 +149,11 @@ def openapi_request_body(self) -> dict[str, t.Any]: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/service/openapi/__init__.py b/src/bentoml/_internal/service/openapi/__init__.py index 911744e470..c2d05163d8 100644 --- a/src/bentoml/_internal/service/openapi/__init__.py +++ b/src/bentoml/_internal/service/openapi/__init__.py @@ -105,7 +105,7 @@ def generate_spec(svc: Service, *, openapi_version: str = "3.0.2"): title=svc.name, description=svc.doc, version=svc.tag.version if svc.tag and svc.tag.version else "None", - contact=Contact(name="BentoML Team", email="contact@bentoml.ai"), + contact=Contact(name="BentoML Team", email="contact@bentoml.com"), ), paths={ # setup infra endpoints @@ -136,6 +136,8 @@ def generate_spec(svc: Service, *, openapi_version: str = "3.0.2"): }, }, "tags": [APP_TAG.name], + "consumes": [api.input.mime_type], + "produces": [api.output.mime_type], "x-bentoml-name": api.name, "summary": str(api), "description": api.doc or "", diff --git a/src/bentoml/_internal/service/openapi/specification.py b/src/bentoml/_internal/service/openapi/specification.py index 6f0ea7e6dc..843d298e22 100644 --- a/src/bentoml/_internal/service/openapi/specification.py +++ b/src/bentoml/_internal/service/openapi/specification.py @@ -219,14 +219,14 @@ class PathItem: ref: t.Optional[str] = None summary: t.Optional[str] = None description: t.Optional[str] = None - get: t.Optional[Operation] = None - put: t.Optional[Operation] = None - post: t.Optional[Operation] = None - delete: t.Optional[Operation] = None - options: t.Optional[Operation] = None - head: t.Optional[Operation] = None - patch: t.Optional[Operation] = None - trace: t.Optional[Operation] = None + get: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + put: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + post: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + delete: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + options: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + head: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + patch: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + trace: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None # not yet supported: servers, parameters diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index 2c9b4477c3..5aab59e484 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -85,8 +85,6 @@ def test_numpy_openapi_request_body(): nparray = NumpyNdarray(dtype="float") nparray.sample_input = ExampleGeneric("asdf") # type: ignore (test exception) - with pytest.raises(BadInput): - nparray.openapi_example() def test_numpy_openapi_responses(): From 591418b5a00ba9c89c4eb139bfa91ca0fa690b8d Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 03:11:10 -0800 Subject: [PATCH 07/13] chore: refactor and __slots__ implementation for Base Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/base.py | 86 +++++++++++++------- 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 820734ae02..83655c7900 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -50,13 +50,44 @@ def from_spec(spec: dict[str, str]) -> IODescriptor[t.Any]: return IO_DESCRIPTOR_REGISTRY[spec["id"]].from_spec(spec) -class IODescriptor(ABC, t.Generic[IOType]): +class _OpenAPIMeta: + @abstractmethod + def openapi_schema(self) -> Schema | Reference: + raise NotImplementedError + + @abstractmethod + def openapi_components(self) -> dict[str, t.Any] | None: + raise NotImplementedError + + @abstractmethod + def openapi_example(self) -> t.Any | None: + raise NotImplementedError + + @abstractmethod + def openapi_request_body(self) -> dict[str, t.Any]: + raise NotImplementedError + + @abstractmethod + def openapi_responses(self) -> dict[str, t.Any]: + raise NotImplementedError + + +class IODescriptor(ABC, _OpenAPIMeta, t.Generic[IOType]): """ IODescriptor describes the input/output data format of an InferenceAPI defined in a :code:`bentoml.Service`. This is an abstract base class for extending new HTTP endpoint IO descriptor types in BentoServer. """ + __slots__ = ( + "_initialized", + "_args", + "_kwargs", + "_proto_fields", + "_mime_type", + "descriptor_id", + ) + HTTP_METHODS = ["POST"] descriptor_id: str | None @@ -65,7 +96,9 @@ class IODescriptor(ABC, t.Generic[IOType]): _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] _sample: IOType | None = None - _set_sample: singledispatchmethod["IODescriptor[t.Any]"] = set_sample + _initialized: bool + _args: t.Sequence[t.Any] + _kwargs: dict[str, t.Any] def __init_subclass__(cls, *, descriptor_id: str | None = None): if descriptor_id is not None: @@ -75,17 +108,35 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): ) IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id + cls._initialized = False def __new__(cls, *args: t.Any, **kwargs: t.Any) -> Self: sample = kwargs.pop("_sample", None) - kls = object.__new__(cls) + klass = object.__new__(cls) if sample is None: set_sample.register(type(None), lambda self, _: self) - kls = kls._set_sample(sample) - # TODO: lazy init - kls.__init__(*args, **kwargs) + kls = klass._set_sample(sample) + kls._args = args + kls._kwargs = kwargs return kls + def __getattr__(self, name: str) -> t.Any: + if not self._initialized: + self._lazy_init() + assert self._initialized + return getattr(self, name) + + def __repr__(self) -> str: + return self.__class__.__qualname__ + + def _lazy_init(self) -> None: + self.__init__(*self._args, **self._kwargs) + self._initialized = True + del self._args + del self._kwargs + + _set_sample: singledispatchmethod[IODescriptor[t.Any]] = set_sample + @property def sample(self) -> IOType | None: return self._sample @@ -114,33 +165,10 @@ def to_spec(self) -> dict[str, t.Any]: def from_spec(cls, spec: dict[str, t.Any]) -> Self: raise NotImplementedError - def __repr__(self) -> str: - return self.__class__.__qualname__ - @abstractmethod def input_type(self) -> InputType: raise NotImplementedError - @abstractmethod - def openapi_schema(self) -> Schema | Reference: - raise NotImplementedError - - @abstractmethod - def openapi_components(self) -> dict[str, t.Any] | None: - raise NotImplementedError - - @abstractmethod - def openapi_example(self) -> t.Any | None: - raise NotImplementedError - - @abstractmethod - def openapi_request_body(self) -> dict[str, t.Any]: - raise NotImplementedError - - @abstractmethod - def openapi_responses(self) -> dict[str, t.Any]: - raise NotImplementedError - @abstractmethod async def from_http_request(self, request: Request) -> IOType: ... From ecd08dd9e7b918a5e0160b1cbab85ecf34253b3c Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 03:14:56 -0800 Subject: [PATCH 08/13] fix: types Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/base.py | 26 +++++++------------ src/bentoml/_internal/io_descriptors/file.py | 3 --- src/bentoml/_internal/io_descriptors/image.py | 3 --- src/bentoml/_internal/io_descriptors/json.py | 3 --- src/bentoml/_internal/io_descriptors/numpy.py | 2 -- .../_internal/io_descriptors/pandas.py | 7 ----- src/bentoml/_internal/io_descriptors/text.py | 2 -- .../service/openapi/specification.py | 2 +- tests/unit/_internal/io/test_numpy.py | 2 ++ 9 files changed, 12 insertions(+), 38 deletions(-) diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 83655c7900..2c3541da74 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -38,7 +38,7 @@ @singledispatchmethod -def set_sample(self: IODescriptor[t.Any], value: t.Any) -> IODescriptor[t.Any]: +def set_sample(self: IODescriptor[t.Any], value: t.Any) -> None: raise InvalidArgument( f"Unsupported sample type: '{type(value)}' (value: {value}). To register type '{type(value)}' to {self.__class__.__name__} implement a dispatch function and register types to 'set_sample.register'" ) @@ -79,14 +79,7 @@ class IODescriptor(ABC, _OpenAPIMeta, t.Generic[IOType]): endpoint IO descriptor types in BentoServer. """ - __slots__ = ( - "_initialized", - "_args", - "_kwargs", - "_proto_fields", - "_mime_type", - "descriptor_id", - ) + __slots__ = ("_args", "_kwargs", "_proto_fields", "_mime_type", "descriptor_id") HTTP_METHODS = ["POST"] @@ -96,7 +89,7 @@ class IODescriptor(ABC, _OpenAPIMeta, t.Generic[IOType]): _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] _sample: IOType | None = None - _initialized: bool + _initialized: bool = False _args: t.Sequence[t.Any] _kwargs: dict[str, t.Any] @@ -108,23 +101,22 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): ) IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id - cls._initialized = False def __new__(cls, *args: t.Any, **kwargs: t.Any) -> Self: sample = kwargs.pop("_sample", None) klass = object.__new__(cls) if sample is None: set_sample.register(type(None), lambda self, _: self) - kls = klass._set_sample(sample) - kls._args = args - kls._kwargs = kwargs - return kls + klass._set_sample(sample) + klass._args = args + klass._kwargs = kwargs + return klass def __getattr__(self, name: str) -> t.Any: if not self._initialized: self._lazy_init() assert self._initialized - return getattr(self, name) + return object.__getattribute__(self, name) def __repr__(self) -> str: return self.__class__.__qualname__ @@ -135,7 +127,7 @@ def _lazy_init(self) -> None: del self._args del self._kwargs - _set_sample: singledispatchmethod[IODescriptor[t.Any]] = set_sample + _set_sample: singledispatchmethod[None] = set_sample @property def sample(self) -> IOType | None: diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index b2c0bf6575..933177c9f8 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -143,13 +143,11 @@ def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Sel @set_sample.register(type(FileLike)) def _(cls, sample: FileLike[bytes]): cls.sample = sample - return cls @set_sample.register(t.IO) def _(cls, sample: t.IO[t.Any]): if isinstance(cls, File): cls.sample = FileLike[bytes](sample, "") - return cls @set_sample.register(str) @set_sample.register(os.PathLike) @@ -160,7 +158,6 @@ def _(cls, sample: str): p = resolve_user_filepath(sample, ctx=None) with open(p, "rb") as f: cls.sample = FileLike[bytes](f, "") - return cls @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index f8a0b870a3..20a1f90d47 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -240,7 +240,6 @@ def from_sample( def _(cls: Self, sample: ext.NpNDArray): if isinstance(cls, Image): cls.sample = PIL.Image.fromarray(sample, mode=pilmode) - return cls elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): @@ -248,7 +247,6 @@ def _(cls: Self, sample: ext.NpNDArray): def _(cls: Self, sample: PIL.Image.Image): if isinstance(cls, Image): cls.sample = sample - return cls return super().from_sample( sample, @@ -266,7 +264,6 @@ def _(cls, sample: str): cls.sample = PIL.Image.open(f) except PIL.UnidentifiedImageError as err: raise BadInput(f"Failed to parse sample image file: {err}") from None - return cls def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index e98af327ad..e29da2591f 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -217,7 +217,6 @@ def from_sample( def _(cls: Self, sample: pydantic.BaseModel): if isinstance(cls, JSON): cls.sample = sample - return cls return super().from_sample( sample, pydantic_model=pydantic_model, json_encoder=json_encoder @@ -227,13 +226,11 @@ def _(cls: Self, sample: pydantic.BaseModel): def _(cls, sample: dict[str, t.Any]): if isinstance(cls, JSON): cls.sample = sample - return cls @set_sample.register(str) def _(cls, sample: str): if isinstance(cls, JSON): cls.sample = json.loads(sample) - return cls def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index b8d2764143..c2fa5d420c 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -442,7 +442,6 @@ def _(cls, sample: ext.NpNDArray): cls.sample = sample cls._shape = sample.shape cls._dtype = sample.dtype - return cls @set_sample.register(list) @set_sample.register(tuple) @@ -452,7 +451,6 @@ def _(cls, sample: t.Sequence[t.Any]): cls.sample = __ cls._shape = __.shape cls._dtype = __.dtype - return cls async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index eaf4b24b3e..accb4141d9 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -408,8 +408,6 @@ def _(cls: Self, sample: ext.NpNDArray): cls._shape = __.shape cls._columns = [str(i) for i in range(sample.shape[1])] - return cls - return super().from_sample( sample, dtype=True, # set to True to infer from given input @@ -426,7 +424,6 @@ def _(cls, sample: pd.DataFrame): cls.sample = sample cls._shape = sample.shape cls._columns = [str(x) for x in list(sample.columns)] - return cls @set_sample.register(str) @set_sample.register(os.PathLike) @@ -464,7 +461,6 @@ def _(cls, sample: str): raise InvalidArgument( f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" ) from None - return cls def _convert_dtype( self, value: ext.PdDTypeArg | None @@ -926,7 +922,6 @@ def _(cls: Self, sample: ext.NpNDArray): cls.sample = __ cls._dtype = __.dtype cls._shape = __.shape - return cls return super().from_sample( sample, @@ -941,7 +936,6 @@ def _(cls, sample: ext.PdSeries): cls.sample = sample cls._dtype = sample.dtype cls._shape = sample.shape - return cls @set_sample.register(list) @set_sample.register(tuple) @@ -952,7 +946,6 @@ def _(cls, sample: t.Sequence[t.Any]): cls.sample = __ cls._dtype = __.dtype cls._shape = __.shape - return cls def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index 14ccd02fa8..c035f02a35 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -108,13 +108,11 @@ def from_sample(cls, sample: str | bytes) -> Self: def _(cls, sample: str): if isinstance(cls, Text): cls.sample = sample - return cls @set_sample.register(bytes) def _(cls, sample: bytes): if isinstance(cls, Text): cls.sample = sample.decode("utf-8") - return cls def input_type(self) -> t.Type[str]: return str diff --git a/src/bentoml/_internal/service/openapi/specification.py b/src/bentoml/_internal/service/openapi/specification.py index 843d298e22..f3e86b274b 100644 --- a/src/bentoml/_internal/service/openapi/specification.py +++ b/src/bentoml/_internal/service/openapi/specification.py @@ -104,7 +104,7 @@ class Schema: anyOf: t.Optional[t.List[Schema]] = None not_: t.Optional[Schema] = None items: t.Optional[t.Union[Schema, t.List[Schema]]] = None - properties: t.Optional[t.Dict[str, Schema | Reference]] = None + properties: t.Optional[t.Dict[str, t.Union[Schema, Reference]]] = None additionalProperties: t.Optional[t.Union[Schema, Reference, bool]] = None description: t.Optional[str] = None format: t.Optional[str] = None diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index 5aab59e484..2c9b4477c3 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -85,6 +85,8 @@ def test_numpy_openapi_request_body(): nparray = NumpyNdarray(dtype="float") nparray.sample_input = ExampleGeneric("asdf") # type: ignore (test exception) + with pytest.raises(BadInput): + nparray.openapi_example() def test_numpy_openapi_responses(): From cf6c4a1e56ddea2013e29ea4601d6e6d6947a507 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 13:05:47 -0800 Subject: [PATCH 09/13] fix: tests [skip ci] Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- tests/unit/_internal/io/test_numpy.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index 2c9b4477c3..034400d9a9 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -10,7 +10,6 @@ from bentoml.io import NumpyNdarray from bentoml.exceptions import BadInput -from bentoml.exceptions import InvalidArgument from bentoml.exceptions import BentoMLException from bentoml._internal.service.openapi.specification import Schema @@ -43,15 +42,6 @@ def test_invalid_dtype(): assert "expects a 'numpy.array'" in str(e.value) -def test_invalid_init(): - with pytest.raises(InvalidArgument) as exc_info: - NumpyNdarray(enforce_dtype=True) - assert "'dtype' must be specified" in str(exc_info.value) - with pytest.raises(InvalidArgument) as exc_info: - NumpyNdarray(enforce_shape=True) - assert "'shape' must be specified" in str(exc_info.value) - - @pytest.mark.parametrize("dtype, expected", [("float", "number"), (">U8", "integer")]) def test_numpy_to_openapi_types(dtype: str, expected: str): assert NumpyNdarray(dtype=dtype)._openapi_types() == expected # type: ignore (private functions warning) From 1eec99d6b2331249f3c8b5a6a01a0850fe3d03ec Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 14:33:29 -0800 Subject: [PATCH 10/13] fix: not using singledispatch Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 4 +- src/bentoml/_internal/io_descriptors/base.py | 35 +++--- src/bentoml/_internal/io_descriptors/file.py | 26 +--- src/bentoml/_internal/io_descriptors/image.py | 35 ++---- src/bentoml/_internal/io_descriptors/json.py | 34 +++--- .../_internal/io_descriptors/multipart.py | 6 +- src/bentoml/_internal/io_descriptors/numpy.py | 35 +++--- .../_internal/io_descriptors/pandas.py | 111 +++++++----------- src/bentoml/_internal/io_descriptors/text.py | 13 +- src/bentoml/_internal/utils/__init__.py | 3 - tests/unit/_internal/io/test_base.py | 81 +++++++++++++ tests/unit/_internal/io/test_numpy.py | 42 +++++-- 12 files changed, 232 insertions(+), 193 deletions(-) create mode 100644 tests/unit/_internal/io/test_base.py diff --git a/pyproject.toml b/pyproject.toml index 152d4b1ba0..b342bc001f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -170,6 +170,7 @@ source = ["src"] [tool.coverage.run] branch = true +parallel = true source = ["src/bentoml/"] omit = [ "src/bentoml/__main__.py", @@ -208,7 +209,8 @@ exclude_lines = [ "^\\s*except ImportError", "if __name__ == .__main__.:", "^\\s*if TYPE_CHECKING:", - "^\\s*@overload( |$)", + "^\\s*@(t\\.)?overload( |$)", + "@(abc\\.)?abstractmethod", ] [tool.black] diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 2c3541da74..4cb06ad995 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -5,7 +5,6 @@ from abc import abstractmethod from typing import TYPE_CHECKING -from ..utils import singledispatchmethod from ...exceptions import InvalidArgument if TYPE_CHECKING: @@ -37,13 +36,6 @@ IOType = t.TypeVar("IOType") -@singledispatchmethod -def set_sample(self: IODescriptor[t.Any], value: t.Any) -> None: - raise InvalidArgument( - f"Unsupported sample type: '{type(value)}' (value: {value}). To register type '{type(value)}' to {self.__class__.__name__} implement a dispatch function and register types to 'set_sample.register'" - ) - - def from_spec(spec: dict[str, str]) -> IODescriptor[t.Any]: if "id" not in spec: raise InvalidArgument(f"IO descriptor spec ({spec}) missing ID.") @@ -103,13 +95,10 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): cls.descriptor_id = descriptor_id def __new__(cls, *args: t.Any, **kwargs: t.Any) -> Self: - sample = kwargs.pop("_sample", None) klass = object.__new__(cls) - if sample is None: - set_sample.register(type(None), lambda self, _: self) - klass._set_sample(sample) - klass._args = args - klass._kwargs = kwargs + klass.sample = t.cast(IOType, kwargs.pop("_sample", None)) + klass._args = args or () + klass._kwargs = kwargs or {} return klass def __getattr__(self, name: str) -> t.Any: @@ -118,17 +107,21 @@ def __getattr__(self, name: str) -> t.Any: assert self._initialized return object.__getattribute__(self, name) + def __dir__(self) -> t.Iterable[str]: + if not self._initialized: + self._lazy_init() + assert self._initialized + return object.__dir__(self) + def __repr__(self) -> str: return self.__class__.__qualname__ def _lazy_init(self) -> None: - self.__init__(*self._args, **self._kwargs) self._initialized = True + self.__init__(*self._args, **self._kwargs) del self._args del self._kwargs - _set_sample: singledispatchmethod[None] = set_sample - @property def sample(self) -> IOType | None: return self._sample @@ -163,18 +156,18 @@ def input_type(self) -> InputType: @abstractmethod async def from_http_request(self, request: Request) -> IOType: - ... + raise NotImplementedError @abstractmethod async def to_http_response( self, obj: IOType, ctx: Context | None = None ) -> Response: - ... + raise NotImplementedError @abstractmethod async def from_proto(self, field: t.Any) -> IOType: - ... + raise NotImplementedError @abstractmethod async def to_proto(self, obj: IOType) -> t.Any: - ... + raise NotImplementedError diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index 933177c9f8..1c85f70bc1 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -11,7 +11,6 @@ from starlette.responses import Response from starlette.datastructures import UploadFile -from .base import set_sample from .base import IODescriptor from ..types import FileLike from ..utils import resolve_user_filepath @@ -135,30 +134,17 @@ def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Sel raise MissingDependencyException( "'filetype' is required to use 'from_sample'. Install it with 'pip install bentoml[io-file]'." ) + if isinstance(sample, t.IO): + sample = FileLike[bytes](sample, "") + elif isinstance(sample, (str, os.PathLike)): + p = resolve_user_filepath(sample, ctx=None) + with open(p, "rb") as f: + sample = FileLike[bytes](f, "") return super().from_sample( sample, kind=kind, mime_type=filetype.guess_mime(sample) ) - @set_sample.register(type(FileLike)) - def _(cls, sample: FileLike[bytes]): - cls.sample = sample - - @set_sample.register(t.IO) - def _(cls, sample: t.IO[t.Any]): - if isinstance(cls, File): - cls.sample = FileLike[bytes](sample, "") - - @set_sample.register(str) - @set_sample.register(os.PathLike) - def _(cls, sample: str): - # This is to ensure we can register same type with different - # implementation across different IO descriptors. - if isinstance(cls, File): - p = resolve_user_filepath(sample, ctx=None) - with open(p, "rb") as f: - cls.sample = FileLike[bytes](f, "") - @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: if "args" not in spec: diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 20a1f90d47..8a3b156b99 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -1,9 +1,7 @@ from __future__ import annotations import io -import os import typing as t -import tempfile import functools from typing import TYPE_CHECKING from urllib.parse import quote @@ -13,7 +11,6 @@ from starlette.responses import Response from starlette.datastructures import UploadFile -from .base import set_sample from .base import IODescriptor from ..types import LazyType from ..utils import LazyLoader @@ -33,7 +30,6 @@ from types import UnionType import PIL - import numpy as np import PIL.Image from typing_extensions import Self @@ -54,7 +50,6 @@ PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=PIL_EXC_MSG) PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=PIL_EXC_MSG) - np = LazyLoader("np", globals(), "numpy") pb, _ = import_generated_stubs() # NOTES: we will keep type in quotation to avoid backward compatibility @@ -235,18 +230,16 @@ def from_sample( raise InvalidArgument(f"{sample} is not a valid image file type.") if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): - - @set_sample.register(np.ndarray) - def _(cls: Self, sample: ext.NpNDArray): - if isinstance(cls, Image): - cls.sample = PIL.Image.fromarray(sample, mode=pilmode) - + sample = PIL.Image.fromarray(sample, mode=pilmode) elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): - - @set_sample.register(PIL.Image.Image) - def _(cls: Self, sample: PIL.Image.Image): - if isinstance(cls, Image): - cls.sample = sample + sample = sample + elif isinstance(sample, str): + p = resolve_user_filepath(sample, ctx=None) + try: + with open(p, "rb") as f: + cls.sample = PIL.Image.open(f) + except PIL.UnidentifiedImageError as err: + raise BadInput(f"Failed to parse sample image file: {err}") from None return super().from_sample( sample, @@ -255,16 +248,6 @@ def _(cls: Self, sample: PIL.Image.Image): allowed_mime_types=allowed_mime_types, ) - @set_sample.register(str) - def _(cls, sample: str): - if isinstance(cls, Image): - p = resolve_user_filepath(sample, ctx=None) - try: - with open(p, "rb") as f: - cls.sample = PIL.Image.open(f) - except PIL.UnidentifiedImageError as err: - raise BadInput(f"Failed to parse sample image file: {err}") from None - def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index e29da2591f..9edc789f89 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -10,7 +10,6 @@ from starlette.requests import Request from starlette.responses import Response -from .base import set_sample from .base import IODescriptor from ..types import LazyType from ..utils import LazyLoader @@ -212,26 +211,29 @@ def from_sample( pydantic_model: t.Type[pydantic.BaseModel] | None = None if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): pydantic_model = sample.__class__ - - @set_sample.register(pydantic.BaseModel) - def _(cls: Self, sample: pydantic.BaseModel): - if isinstance(cls, JSON): - cls.sample = sample + elif isinstance(sample, str): + try: + sample = json.loads(sample) + except json.JSONDecodeError as e: + raise BadInput( + f"Unable to parse JSON string. Please make sure the input is a valid JSON string: {e}" + ) from None + elif isinstance(sample, bytes): + try: + sample = json.loads(sample.decode()) + except json.JSONDecodeError as e: + raise BadInput( + f"Unable to parse JSON bytes. Please make sure the input is a valid JSON bytes: {e}" + ) from None + elif not isinstance(sample, (dict, list)): + raise BadInput( + f"Unable to infer JSON type from sample: {sample}. Please make sure the input is a valid JSON object." + ) return super().from_sample( sample, pydantic_model=pydantic_model, json_encoder=json_encoder ) - @set_sample.register(dict) - def _(cls, sample: dict[str, t.Any]): - if isinstance(cls, JSON): - cls.sample = sample - - @set_sample.register(str) - def _(cls, sample: str): - if isinstance(cls, JSON): - cls.sample = json.loads(sample) - def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index 19355a3504..7618e5a9b9 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -176,8 +176,10 @@ def __repr__(self) -> str: return f"Multipart({','.join([f'{k}={v}' for k,v in zip(self._inputs, map(repr, self._inputs.values()))])})" @classmethod - def from_sample(cls, sample: dict[str, t.Any]) -> Self: - return cls(**sample) + def from_sample( + cls, sample: dict[str, t.Any] # pylint: disable=unused-argument + ) -> Self: + raise NotImplementedError("'from_sample' is not supported for Multipart.") def input_type( self, diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index c2fa5d420c..c02305c923 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -9,7 +9,6 @@ from starlette.requests import Request from starlette.responses import Response -from .base import set_sample from .base import IODescriptor from ..types import LazyType from ..utils import LazyLoader @@ -217,6 +216,15 @@ def __init__( shape: tuple[int, ...] | None = None, enforce_shape: bool = False, ): + if enforce_dtype and not dtype: + raise InvalidArgument( + "'dtype' must be specified when 'enforce_dtype=True'" + ) from None + if enforce_shape and not shape: + raise InvalidArgument( + "'shape' must be specified when 'enforce_shape=True'" + ) from None + if dtype and not isinstance(dtype, np.dtype): # Convert from primitive type or type string, e.g.: np.dtype(float) or np.dtype("float64") try: @@ -429,29 +437,22 @@ async def predict(input: NDArray[np.int16]) -> NDArray[Any]: raise BentoMLException( "'NumpyNdarray.from_sample()' expects a 'numpy.array', not 'numpy.generic'." ) from None + try: + if not isinstance(sample, np.ndarray): + sample = np.array(sample) + except ValueError: + raise BentoMLException( + f"Failed to create a 'numpy.ndarray' from given sample {sample}" + ) from None return super().from_sample( sample, + shape=sample.shape, + dtype=sample.dtype, enforce_dtype=enforce_dtype, enforce_shape=enforce_shape, ) - @set_sample.register(np.ndarray) - def _(cls, sample: ext.NpNDArray): - if isinstance(cls, NumpyNdarray): - cls.sample = sample - cls._shape = sample.shape - cls._dtype = sample.dtype - - @set_sample.register(list) - @set_sample.register(tuple) - def _(cls, sample: t.Sequence[t.Any]): - if isinstance(cls, NumpyNdarray): - __ = np.array(sample) - cls.sample = __ - cls._shape = __.shape - cls._dtype = __.dtype - async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ Process incoming protobuf request and convert it to ``numpy.ndarray`` diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index accb4141d9..91a58a4eb3 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -12,7 +12,6 @@ from starlette.requests import Request from starlette.responses import Response -from .base import set_sample from .base import IODescriptor from ..types import LazyType from ..utils.pkg import find_spec @@ -37,7 +36,6 @@ from .. import external_typing as ext from .base import OpenAPIResponse - from ..types import PathType from ..context import InferenceApiContext as Context else: @@ -327,6 +325,19 @@ def __init__( enforce_shape: bool = False, default_format: t.Literal["json", "parquet", "csv"] = "json", ): + if enforce_dtype and dtype is None: + raise ValueError( + "'dtype' must be specified if 'enforce_dtype' is True" + ) from None + if enforce_shape and shape is None: + raise ValueError( + "'shape' must be specified if 'enforce_shape' is True" + ) from None + if apply_column_names and columns is None: + raise ValueError( + "'columns' must be specified if 'apply_column_names' is True" + ) from None + self._orient: ext.DataFrameOrient = orient self._columns = columns self._apply_column_names = apply_column_names @@ -343,7 +354,7 @@ def __init__( @classmethod def from_sample( cls, - sample: ext.PdDataFrame | PathType | ext.NpNDArray, + sample: ext.PdDataFrame, *, orient: ext.DataFrameOrient = "records", apply_column_names: bool = True, @@ -399,41 +410,13 @@ def from_sample( def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... """ if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): - - @set_sample.register(np.ndarray) - def _(cls: Self, sample: ext.NpNDArray): - if isinstance(cls, PandasDataFrame): - __ = pd.DataFrame(sample) - cls.sample = __ - cls._shape = __.shape - cls._columns = [str(i) for i in range(sample.shape[1])] - - return super().from_sample( - sample, - dtype=True, # set to True to infer from given input - orient=orient, - enforce_shape=enforce_shape, - enforce_dtype=enforce_dtype, - apply_column_names=apply_column_names, - default_format=default_format, - ) - - @set_sample.register(pd.DataFrame) - def _(cls, sample: pd.DataFrame): - if isinstance(cls, PandasDataFrame): - cls.sample = sample - cls._shape = sample.shape - cls._columns = [str(x) for x in list(sample.columns)] - - @set_sample.register(str) - @set_sample.register(os.PathLike) - def _(cls, sample: str): - if isinstance(cls, PandasDataFrame): + sample = pd.DataFrame(sample) + elif isinstance(sample, str): try: if os.path.exists(sample): try: ext = os.path.splitext(sample)[-1].strip(".") - __ = getattr( + sample = getattr( pd, { "json": "read_json", @@ -447,21 +430,28 @@ def _(cls, sample: str): "sql": "read_sql", }[ext], )(sample) - cls.sample = __ - cls._shape = __.shape - cls._columns = [str(x) for x in list(__.columns)] except KeyError: raise InvalidArgument(f"Unsupported sample '{sample}' format.") else: - __ = pd.read_json(sample) - cls.sample = __ - cls._shape = __.shape - cls._columns = [str(x) for x in list(__.columns)] + # Try to load the string as json. + sample = pd.read_json(sample) except ValueError as e: raise InvalidArgument( f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" ) from None + return super().from_sample( + sample, + dtype=True, # set to True to infer from given input + orient=orient, + shape=sample.shape, + columns=[str(i) for i in list(sample.columns)], + enforce_shape=enforce_shape, + enforce_dtype=enforce_dtype, + apply_column_names=apply_column_names, + default_format=default_format, + ) + def _convert_dtype( self, value: ext.PdDTypeArg | None ) -> str | dict[str, t.Any] | None: @@ -861,6 +851,15 @@ def __init__( shape: tuple[int, ...] | None = None, enforce_shape: bool = False, ): + if enforce_dtype and dtype is None: + raise ValueError( + "'dtype' must be specified if 'enforce_dtype' is True" + ) from None + if enforce_shape and shape is None: + raise ValueError( + "'shape' must be specified if 'enforce_shape' is True" + ) from None + self._orient: ext.SeriesOrient = orient self._dtype = dtype self._enforce_dtype = enforce_dtype @@ -913,40 +912,18 @@ def from_sample( @svc.api(input=input_spec, output=PandasSeries()) def predict(inputs: pd.Series) -> pd.Series: ... """ - if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): - - @set_sample.register(np.ndarray) - def _(cls: Self, sample: ext.NpNDArray): - if isinstance(cls, PandasSeries): - __ = pd.Series(sample) - cls.sample = __ - cls._dtype = __.dtype - cls._shape = __.shape + if not isinstance(sample, pd.Series): + sample = pd.Series(sample) return super().from_sample( sample, + dtype=sample.dtype, + shape=sample.shape, orient=orient, enforce_dtype=enforce_dtype, enforce_shape=enforce_shape, ) - @set_sample.register(pd.Series) - def _(cls, sample: ext.PdSeries): - if isinstance(cls, PandasSeries): - cls.sample = sample - cls._dtype = sample.dtype - cls._shape = sample.shape - - @set_sample.register(list) - @set_sample.register(tuple) - @set_sample.register(set) - def _(cls, sample: t.Sequence[t.Any]): - if isinstance(cls, PandasSeries): - __ = pd.Series(sample) - cls.sample = __ - cls._dtype = __.dtype - cls._shape = __.shape - def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index c035f02a35..1961b6bede 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -8,7 +8,6 @@ from bentoml.exceptions import BentoMLException -from .base import set_sample from .base import IODescriptor from ..utils.http import set_cookies from ..service.openapi import SUCCESS_DESCRIPTION @@ -102,18 +101,10 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): @classmethod def from_sample(cls, sample: str | bytes) -> Self: + if isinstance(sample, bytes): + sample = sample.decode("utf-8") return super().from_sample(sample) - @set_sample.register(str) - def _(cls, sample: str): - if isinstance(cls, Text): - cls.sample = sample - - @set_sample.register(bytes) - def _(cls, sample: bytes): - if isinstance(cls, Text): - cls.sample = sample.decode("utf-8") - def input_type(self) -> t.Type[str]: return str diff --git a/src/bentoml/_internal/utils/__init__.py b/src/bentoml/_internal/utils/__init__.py index 29760f0b00..f89ffb1b36 100644 --- a/src/bentoml/_internal/utils/__init__.py +++ b/src/bentoml/_internal/utils/__init__.py @@ -27,10 +27,8 @@ if sys.version_info >= (3, 8): from functools import cached_property - from functools import singledispatchmethod else: from backports.cached_property import cached_property - from singledispatchmethod import singledispatchmethod from .cattr import bentoml_cattr from ..types import LazyType @@ -57,7 +55,6 @@ __all__ = [ "bentoml_cattr", "cached_property", - "singledispatchmethod", "cached_contextmanager", "reserve_free_port", "LazyLoader", diff --git a/tests/unit/_internal/io/test_base.py b/tests/unit/_internal/io/test_base.py new file mode 100644 index 0000000000..ddf9171400 --- /dev/null +++ b/tests/unit/_internal/io/test_base.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import typing as t +from typing import TYPE_CHECKING +from bentoml.io import IODescriptor +import pytest + +if TYPE_CHECKING: + + from typing_extensions import Self + from bentoml._internal.context import InferenceApiContext as Context + + +class DummyDescriptor(IODescriptor[t.Any], descriptor_id="bentoml.io.Dummy"): + _mime_type = "application/vnd.bentoml.dummy" + + def __init__(self, **kwargs: t.Any): + [object.__setattr__(self, k, v) for k, v in kwargs.items()] + + def openapi_schema(self) -> t.Any: + raise NotImplementedError + + def openapi_components(self) -> dict[str, t.Any] | None: + raise NotImplementedError + + def openapi_example(self) -> t.Any | None: + raise NotImplementedError + + def openapi_request_body(self) -> dict[str, t.Any]: + raise NotImplementedError + + def openapi_responses(self) -> dict[str, t.Any]: + raise NotImplementedError + + def to_spec(self) -> dict[str, t.Any]: + raise NotImplementedError + + @classmethod + def from_spec(cls, spec: dict[str, t.Any]) -> Self: + return cls(**spec) + + def input_type(self) -> t.Any: + return str + + async def from_http_request(self, request: t.Any) -> t.Any: + return request + + async def to_http_response(self, obj: t.Any, ctx: Context | None = None) -> t.Any: + return obj, ctx + + async def from_proto(self, field: t.Any) -> t.Any: + return field + + async def to_proto(self, obj: t.Any) -> t.Any: + return obj + + @classmethod + def from_sample(cls, sample: t.Any, **kwargs: t.Any): + return super().from_sample(sample, **kwargs) + + +@pytest.mark.parametrize( + "fn", + [ + f"openapi_{n}" + for n in ("schema", "components", "example", "request_body", "responses") + ], +) +def test_raise_not_implemented_openapi(fn: str): + with pytest.raises(NotImplementedError): + getattr(DummyDescriptor(), fn)() + + +@pytest.mark.asyncio +async def test_lazy_init(): + d = DummyDescriptor.from_sample("asdf", foo="bar", baz="qux") + assert not d._initialized + assert d.sample == "asdf" + assert d.foo == "bar" + r = await d.from_http_request("asdf") + assert r and d._initialized diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index 034400d9a9..ce9030ab8e 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -10,6 +10,7 @@ from bentoml.io import NumpyNdarray from bentoml.exceptions import BadInput +from bentoml.exceptions import InvalidArgument from bentoml.exceptions import BentoMLException from bentoml._internal.service.openapi.specification import Schema @@ -42,6 +43,15 @@ def test_invalid_dtype(): assert "expects a 'numpy.array'" in str(e.value) +def test_invalid_init(): + with pytest.raises(InvalidArgument) as exc_info: + NumpyNdarray(enforce_dtype=True) + assert "'dtype' must be specified" in str(exc_info.value) + with pytest.raises(InvalidArgument) as exc_info: + NumpyNdarray(enforce_shape=True) + assert "'shape' must be specified" in str(exc_info.value) + + @pytest.mark.parametrize("dtype, expected", [("float", "number"), (">U8", "integer")]) def test_numpy_to_openapi_types(dtype: str, expected: str): assert NumpyNdarray(dtype=dtype)._openapi_types() == expected # type: ignore (private functions warning) @@ -73,11 +83,6 @@ def test_numpy_openapi_request_body(): assert ndarray["content"] assert ndarray["content"]["application/json"].example == example.tolist() - nparray = NumpyNdarray(dtype="float") - nparray.sample_input = ExampleGeneric("asdf") # type: ignore (test exception) - with pytest.raises(BadInput): - nparray.openapi_example() - def test_numpy_openapi_responses(): responses = NumpyNdarray().openapi_responses() @@ -87,22 +92,41 @@ def test_numpy_openapi_responses(): assert "application/json" in responses["content"] assert not responses["content"]["application/json"].example + ndarray = from_example.openapi_request_body() + assert ndarray["content"] + assert ndarray["content"]["application/json"].example == example.tolist() + + +def test_numpy_openapi_example(): + r = NumpyNdarray().openapi_example() + assert r is None + + r = from_example.openapi_example() + assert r == example.tolist() + + nparray = NumpyNdarray(dtype="float") + nparray.sample = ExampleGeneric("asdf") + with pytest.raises(BadInput): + nparray.openapi_example() + def test_verify_numpy_ndarray(caplog: LogCaptureFixture): partial_check = partial(from_example.validate_array, exception_cls=BentoMLException) with pytest.raises(BentoMLException) as ex: partial_check(np.array(["asdf"])) - assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value) # type: ignore (testing message) + assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value) with pytest.raises(BentoMLException) as e: partial_check(np.array([[1]])) - assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value) # type: ignore (testing message) + assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value) # test cases where reshape is failed example = NumpyNdarray.from_sample(np.ones((2, 2, 3))) - example._enforce_shape = False # type: ignore (test internal check) - example._enforce_dtype = False # type: ignore (test internal check) + # Note that from_sample now lazy load the IO descriptor + example._lazy_init() + example._enforce_shape = False + example._enforce_dtype = False with caplog.at_level(logging.DEBUG): example.validate_array(np.array("asdf")) assert "Failed to reshape" in caplog.text From dfd860aa45486ceca022180d22f92e9c171ef48b Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 15:28:34 -0800 Subject: [PATCH 11/13] fix: tests (incremental) Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 1 - src/bentoml/_internal/io_descriptors/image.py | 2 -- src/bentoml/_internal/io_descriptors/pandas.py | 10 +--------- tests/unit/_internal/io/test_base.py | 5 ++++- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b342bc001f..18b251f1d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,6 @@ dependencies = [ "backports.cached-property;python_version<'3.8'", "backports.shutil_copytree;python_version<'3.8'", "importlib-metadata;python_version<'3.8'", - "singledispatchmethod;python_version<'3.8'", ] dynamic = ["version"] diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 8a3b156b99..f7e540a20b 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -231,8 +231,6 @@ def from_sample( if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): sample = PIL.Image.fromarray(sample, mode=pilmode) - elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(sample): - sample = sample elif isinstance(sample, str): p = resolve_user_filepath(sample, ctx=None) try: diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index 91a58a4eb3..cacb1d1e90 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -469,20 +469,12 @@ def _convert_dtype( return None def to_spec(self) -> dict[str, t.Any]: - # TODO: support extension dtypes - dtype = None - if self._dtype is not None: - if isinstance(self._dtype, bool): - dtype = self._dtype - else: - dtype = self._dtype.name - return { "id": self.descriptor_id, "args": { "orient": self._orient, "columns": self._columns, - "dtype": self._convert_dtype(dtype), + "dtype": self._convert_dtype(self._dtype), "shape": self._shape, "enforce_dtype": self._enforce_dtype, "enforce_shape": self._enforce_shape, diff --git a/tests/unit/_internal/io/test_base.py b/tests/unit/_internal/io/test_base.py index ddf9171400..ca2065290d 100644 --- a/tests/unit/_internal/io/test_base.py +++ b/tests/unit/_internal/io/test_base.py @@ -2,12 +2,15 @@ import typing as t from typing import TYPE_CHECKING -from bentoml.io import IODescriptor + import pytest +from bentoml.io import IODescriptor + if TYPE_CHECKING: from typing_extensions import Self + from bentoml._internal.context import InferenceApiContext as Context From f3f3027a5f9198bd68af1777f36b2356d0b07cfc Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 15:44:34 -0800 Subject: [PATCH 12/13] fix: different output Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/multipart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index 7618e5a9b9..c585feb207 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -262,7 +262,7 @@ async def from_http_request(self, request: Request) -> dict[str, t.Any]: for field, descriptor in self._inputs.items(): if field not in form_values: break - res[field] = await descriptor.from_http_request(form_values[field]) + res[field] = descriptor.from_http_request(form_values[field]) else: # NOTE: This is similar to goto, when there is no break. to_populate = zip(self._inputs.values(), form_values.values()) reqs = await asyncio.gather( From f2df17ee351e77582fec0e955fe51f7f90284f43 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 8 Nov 2022 16:26:45 -0800 Subject: [PATCH 13/13] chore: update _from_sample implementation Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/io_descriptors/base.py | 22 ++++---- src/bentoml/_internal/io_descriptors/file.py | 10 ++-- src/bentoml/_internal/io_descriptors/image.py | 21 ++----- src/bentoml/_internal/io_descriptors/json.py | 16 +----- .../_internal/io_descriptors/multipart.py | 5 +- src/bentoml/_internal/io_descriptors/numpy.py | 21 ++----- .../_internal/io_descriptors/pandas.py | 55 +++++-------------- src/bentoml/_internal/io_descriptors/text.py | 5 +- tests/unit/_internal/io/test_base.py | 15 +---- tests/unit/_internal/io/test_numpy.py | 1 - 10 files changed, 48 insertions(+), 123 deletions(-) diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 4cb06ad995..0455c60396 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -94,12 +94,10 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id - def __new__(cls, *args: t.Any, **kwargs: t.Any) -> Self: - klass = object.__new__(cls) - klass.sample = t.cast(IOType, kwargs.pop("_sample", None)) - klass._args = args or () - klass._kwargs = kwargs or {} - return klass + if TYPE_CHECKING: + + def __init__(self, **kwargs: t.Any) -> None: + ... def __getattr__(self, name: str) -> t.Any: if not self._initialized: @@ -130,12 +128,16 @@ def sample(self) -> IOType | None: def sample(self, value: IOType) -> None: self._sample = value - # NOTE: for custom types handle, use 'create_sample.register' to register - # custom types for 'from_sample' @classmethod - @abstractmethod def from_sample(cls, sample: IOType | t.Any, **kwargs: t.Any) -> Self: - return cls.__new__(cls, _sample=sample, **kwargs) + klass = cls(**kwargs) + sample = klass._from_sample(sample) + klass.sample = sample + return klass + + @abstractmethod + def _from_sample(self, sample: t.Any) -> IOType: + raise NotImplementedError @property def mime_type(self) -> str: diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index 1c85f70bc1..8db8ee62ca 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -126,8 +126,7 @@ def __new__( res._mime_type = mime_type return res - @classmethod - def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Self: + def _from_sample(self, sample: FileType | str) -> FileType: try: import filetype except ModuleNotFoundError: @@ -136,14 +135,13 @@ def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Sel ) if isinstance(sample, t.IO): sample = FileLike[bytes](sample, "") + self._mime_type = filetype.guess_mime(sample) elif isinstance(sample, (str, os.PathLike)): p = resolve_user_filepath(sample, ctx=None) + self._mime_type = filetype.guess_mime(p) with open(p, "rb") as f: sample = FileLike[bytes](f, "") - - return super().from_sample( - sample, kind=kind, mime_type=filetype.guess_mime(sample) - ) + return sample @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index f7e540a20b..18f3d26b19 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -210,14 +210,7 @@ def __init__( self._pilmode: _Mode | None = pilmode self._format: str = MIME_EXT_MAPPING[self._mime_type] - @classmethod - def from_sample( - cls, - sample: ImageType | str, - *, - pilmode: _Mode | None = DEFAULT_PIL_MODE, - allowed_mime_types: t.Iterable[str] | None = None, - ) -> Self: + def _from_sample(self, sample: ImageType | str) -> ImageType: try: from filetype.match import image_match except ModuleNotFoundError: @@ -230,21 +223,15 @@ def from_sample( raise InvalidArgument(f"{sample} is not a valid image file type.") if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): - sample = PIL.Image.fromarray(sample, mode=pilmode) + sample = PIL.Image.fromarray(sample) elif isinstance(sample, str): p = resolve_user_filepath(sample, ctx=None) try: with open(p, "rb") as f: - cls.sample = PIL.Image.open(f) + sample = PIL.Image.open(f) except PIL.UnidentifiedImageError as err: raise BadInput(f"Failed to parse sample image file: {err}") from None - - return super().from_sample( - sample, - pilmode=pilmode, - mime_type=img_type.mime, - allowed_mime_types=allowed_mime_types, - ) + return sample def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index 9edc789f89..c4217527c9 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -201,16 +201,9 @@ def __init__( "'validate_json' option from 'bentoml.io.JSON' has been deprecated. Use a Pydantic model to specify validation options instead." ) - @classmethod - def from_sample( - cls, - sample: JSONType, - *, - json_encoder: t.Type[json.JSONEncoder] = DefaultJsonEncoder, - ) -> Self: - pydantic_model: t.Type[pydantic.BaseModel] | None = None + def _from_sample(self, sample: JSONType) -> JSONType: if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): - pydantic_model = sample.__class__ + self._pydantic_model = sample.__class__ elif isinstance(sample, str): try: sample = json.loads(sample) @@ -229,10 +222,7 @@ def from_sample( raise BadInput( f"Unable to infer JSON type from sample: {sample}. Please make sure the input is a valid JSON object." ) - - return super().from_sample( - sample, pydantic_model=pydantic_model, json_encoder=json_encoder - ) + return sample def to_spec(self) -> dict[str, t.Any]: return { diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index c585feb207..d7253a9bde 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -175,10 +175,7 @@ def __init__(self, **inputs: IODescriptor[t.Any]): def __repr__(self) -> str: return f"Multipart({','.join([f'{k}={v}' for k,v in zip(self._inputs, map(repr, self._inputs.values()))])})" - @classmethod - def from_sample( - cls, sample: dict[str, t.Any] # pylint: disable=unused-argument - ) -> Self: + def _from_sample(cls, sample: dict[str, t.Any]) -> t.Any: raise NotImplementedError("'from_sample' is not supported for Multipart.") def input_type( diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index c02305c923..50a0c60eab 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -388,13 +388,7 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None) else: return Response(json.dumps(obj.tolist()), media_type=self._mime_type) - @classmethod - def from_sample( - cls, - sample: ext.NpNDArray | t.Sequence[t.Any], - enforce_dtype: bool = True, - enforce_shape: bool = True, - ) -> Self: + def _from_sample(self, sample: ext.NpNDArray | t.Sequence[t.Any]) -> ext.NpNDArray: """ Create a :obj:`NumpyNdarray` IO Descriptor from given inputs. @@ -444,14 +438,11 @@ async def predict(input: NDArray[np.int16]) -> NDArray[Any]: raise BentoMLException( f"Failed to create a 'numpy.ndarray' from given sample {sample}" ) from None - - return super().from_sample( - sample, - shape=sample.shape, - dtype=sample.dtype, - enforce_dtype=enforce_dtype, - enforce_shape=enforce_shape, - ) + self._dtype = sample.dtype + self._shape = sample.shape + self._enforce_dtype = True + self._enforce_shape = True + return sample async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index cacb1d1e90..176fd1d144 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -351,17 +351,7 @@ def __init__( _validate_serialization_format(self._default_format) self._mime_type = self._default_format.mime_type - @classmethod - def from_sample( - cls, - sample: ext.PdDataFrame, - *, - orient: ext.DataFrameOrient = "records", - apply_column_names: bool = True, - enforce_shape: bool = True, - enforce_dtype: bool = True, - default_format: t.Literal["json", "parquet", "csv"] = "json", - ) -> Self: + def _from_sample(self, sample: ext.PdDataFrame) -> ext.PdDataFrame: """ Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. @@ -439,18 +429,13 @@ def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... raise InvalidArgument( f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" ) from None - - return super().from_sample( - sample, - dtype=True, # set to True to infer from given input - orient=orient, - shape=sample.shape, - columns=[str(i) for i in list(sample.columns)], - enforce_shape=enforce_shape, - enforce_dtype=enforce_dtype, - apply_column_names=apply_column_names, - default_format=default_format, - ) + self._shape = sample.shape + self._columns = [str(i) for i in list(sample.columns)] + self._dtype = True + self._enforce_dtype = True + self._enforce_shape = True + self._apply_column_names = True + return sample def _convert_dtype( self, value: ext.PdDTypeArg | None @@ -858,15 +843,7 @@ def __init__( self._shape = shape self._enforce_shape = enforce_shape - @classmethod - def from_sample( - cls, - sample: ext.PdSeries | t.Sequence[t.Any], - *, - orient: ext.SeriesOrient = "records", - enforce_shape: bool = True, - enforce_dtype: bool = True, - ) -> Self: + def _from_sample(self, sample: ext.PdSeries | t.Sequence[t.Any]) -> ext.PdSeries: """ Create a :obj:`PandasSeries` IO Descriptor from given inputs. @@ -906,15 +883,11 @@ def predict(inputs: pd.Series) -> pd.Series: ... """ if not isinstance(sample, pd.Series): sample = pd.Series(sample) - - return super().from_sample( - sample, - dtype=sample.dtype, - shape=sample.shape, - orient=orient, - enforce_dtype=enforce_dtype, - enforce_shape=enforce_shape, - ) + self._dtype = sample.dtype + self._shape = sample.shape + self._enforce_dtype = True + self._enforce_shape = True + return sample def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index 1961b6bede..ed9127069a 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -99,11 +99,10 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): f"'{self.__class__.__name__}' is not designed to take any args or kwargs during initialization." ) from None - @classmethod - def from_sample(cls, sample: str | bytes) -> Self: + def _from_sample(self, sample: str | bytes) -> str: if isinstance(sample, bytes): sample = sample.decode("utf-8") - return super().from_sample(sample) + return sample def input_type(self) -> t.Type[str]: return str diff --git a/tests/unit/_internal/io/test_base.py b/tests/unit/_internal/io/test_base.py index ca2065290d..59d2dd3981 100644 --- a/tests/unit/_internal/io/test_base.py +++ b/tests/unit/_internal/io/test_base.py @@ -57,9 +57,8 @@ async def from_proto(self, field: t.Any) -> t.Any: async def to_proto(self, obj: t.Any) -> t.Any: return obj - @classmethod - def from_sample(cls, sample: t.Any, **kwargs: t.Any): - return super().from_sample(sample, **kwargs) + def _from_sample(cls, sample: t.Any, **kwargs: t.Any): + return sample @pytest.mark.parametrize( @@ -72,13 +71,3 @@ def from_sample(cls, sample: t.Any, **kwargs: t.Any): def test_raise_not_implemented_openapi(fn: str): with pytest.raises(NotImplementedError): getattr(DummyDescriptor(), fn)() - - -@pytest.mark.asyncio -async def test_lazy_init(): - d = DummyDescriptor.from_sample("asdf", foo="bar", baz="qux") - assert not d._initialized - assert d.sample == "asdf" - assert d.foo == "bar" - r = await d.from_http_request("asdf") - assert r and d._initialized diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index ce9030ab8e..d9cc40fd93 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -124,7 +124,6 @@ def test_verify_numpy_ndarray(caplog: LogCaptureFixture): # test cases where reshape is failed example = NumpyNdarray.from_sample(np.ones((2, 2, 3))) # Note that from_sample now lazy load the IO descriptor - example._lazy_init() example._enforce_shape = False example._enforce_dtype = False with caplog.at_level(logging.DEBUG):