diff --git a/pyproject.toml b/pyproject.toml index 919c177898..18b251f1d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,17 +112,22 @@ include = [ [project.optional-dependencies] all = [ "bentoml[aws]", - "bentoml[io-json]", - "bentoml[io-image]", - "bentoml[io-pandas]", + "bentoml[io]", "bentoml[grpc]", "bentoml[grpc-reflection]", "bentoml[grpc-channelz]", "bentoml[tracing]", ] aws = ["fs-s3fs"] +io = [ + "bentoml[io-json]", + "bentoml[io-image]", + "bentoml[io-pandas]", + "bentoml[io-file]", +] # syntatic sugar for bentoml[io-json,io-image,io-pandas,io-file] +io-file = ["filetype"] # Currently use for from_sample io-json = ["pydantic<2"] # currently we don't have support for pydantic 2.0 -io-image = ["Pillow"] +io-image = ["bentoml[io-file]", "Pillow"] io-pandas = ["pandas", "pyarrow"] grpc = [ # Restrict maximum version due to breaking protobuf 4.21.0 changes @@ -164,6 +169,7 @@ source = ["src"] [tool.coverage.run] branch = true +parallel = true source = ["src/bentoml/"] omit = [ "src/bentoml/__main__.py", @@ -202,7 +208,8 @@ exclude_lines = [ "^\\s*except ImportError", "if __name__ == .__main__.:", "^\\s*if TYPE_CHECKING:", - "^\\s*@overload( |$)", + "^\\s*@(t\\.)?overload( |$)", + "@(abc\\.)?abstractmethod", ] [tool.black] diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 666a584fd3..0455c60396 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -19,6 +19,7 @@ from ..types import LazyType from ..context import InferenceApiContext as Context from ..service.openapi.specification import Schema + from ..service.openapi.specification import MediaType from ..service.openapi.specification import Reference InputType = ( @@ -27,6 +28,7 @@ | LazyType[t.Any] | dict[str, t.Type[t.Any] | UnionType | LazyType[t.Any]] ) + OpenAPIResponse = dict[str, str | dict[str, MediaType] | dict[str, t.Any]] IO_DESCRIPTOR_REGISTRY: dict[str, type[IODescriptor[t.Any]]] = {} @@ -40,19 +42,48 @@ def from_spec(spec: dict[str, str]) -> IODescriptor[t.Any]: return IO_DESCRIPTOR_REGISTRY[spec["id"]].from_spec(spec) -class IODescriptor(ABC, t.Generic[IOType]): +class _OpenAPIMeta: + @abstractmethod + def openapi_schema(self) -> Schema | Reference: + raise NotImplementedError + + @abstractmethod + def openapi_components(self) -> dict[str, t.Any] | None: + raise NotImplementedError + + @abstractmethod + def openapi_example(self) -> t.Any | None: + raise NotImplementedError + + @abstractmethod + def openapi_request_body(self) -> dict[str, t.Any]: + raise NotImplementedError + + @abstractmethod + def openapi_responses(self) -> dict[str, t.Any]: + raise NotImplementedError + + +class IODescriptor(ABC, _OpenAPIMeta, t.Generic[IOType]): """ IODescriptor describes the input/output data format of an InferenceAPI defined in a :code:`bentoml.Service`. This is an abstract base class for extending new HTTP endpoint IO descriptor types in BentoServer. """ + __slots__ = ("_args", "_kwargs", "_proto_fields", "_mime_type", "descriptor_id") + HTTP_METHODS = ["POST"] + descriptor_id: str | None + _mime_type: str _rpc_content_type: str = "application/grpc" _proto_fields: tuple[ProtoField] - descriptor_id: str | None + _sample: IOType | None = None + _initialized: bool = False + _args: t.Sequence[t.Any] + _kwargs: dict[str, t.Any] def __init_subclass__(cls, *, descriptor_id: str | None = None): if descriptor_id is not None: @@ -63,52 +94,82 @@ def __init_subclass__(cls, *, descriptor_id: str | None = None): IO_DESCRIPTOR_REGISTRY[descriptor_id] = cls cls.descriptor_id = descriptor_id - @abstractmethod - def to_spec(self) -> dict[str, t.Any]: - raise NotImplementedError + if TYPE_CHECKING: - @classmethod - @abstractmethod - def from_spec(cls, spec: dict[str, t.Any]) -> Self: - raise NotImplementedError + def __init__(self, **kwargs: t.Any) -> None: + ... + + def __getattr__(self, name: str) -> t.Any: + if not self._initialized: + self._lazy_init() + assert self._initialized + return object.__getattribute__(self, name) + + def __dir__(self) -> t.Iterable[str]: + if not self._initialized: + self._lazy_init() + assert self._initialized + return object.__dir__(self) def __repr__(self) -> str: return self.__class__.__qualname__ - @abstractmethod - def input_type(self) -> InputType: - raise NotImplementedError + def _lazy_init(self) -> None: + self._initialized = True + self.__init__(*self._args, **self._kwargs) + del self._args + del self._kwargs + + @property + def sample(self) -> IOType | None: + return self._sample + + @sample.setter + def sample(self, value: IOType) -> None: + self._sample = value + + @classmethod + def from_sample(cls, sample: IOType | t.Any, **kwargs: t.Any) -> Self: + klass = cls(**kwargs) + sample = klass._from_sample(sample) + klass.sample = sample + return klass @abstractmethod - def openapi_schema(self) -> Schema | Reference: + def _from_sample(self, sample: t.Any) -> IOType: raise NotImplementedError + @property + def mime_type(self) -> str: + return self._mime_type + @abstractmethod - def openapi_components(self) -> dict[str, t.Any] | None: + def to_spec(self) -> dict[str, t.Any]: raise NotImplementedError + @classmethod @abstractmethod - def openapi_request_body(self) -> dict[str, t.Any]: + def from_spec(cls, spec: dict[str, t.Any]) -> Self: raise NotImplementedError @abstractmethod - def openapi_responses(self) -> dict[str, t.Any]: + def input_type(self) -> InputType: raise NotImplementedError @abstractmethod async def from_http_request(self, request: Request) -> IOType: - ... + raise NotImplementedError @abstractmethod async def to_http_response( self, obj: IOType, ctx: Context | None = None ) -> Response: - ... + raise NotImplementedError @abstractmethod async def from_proto(self, field: t.Any) -> IOType: - ... + raise NotImplementedError @abstractmethod async def to_proto(self, obj: IOType) -> t.Any: - ... + raise NotImplementedError diff --git a/src/bentoml/_internal/io_descriptors/file.py b/src/bentoml/_internal/io_descriptors/file.py index 8e97c05187..8db8ee62ca 100644 --- a/src/bentoml/_internal/io_descriptors/file.py +++ b/src/bentoml/_internal/io_descriptors/file.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import os import typing as t import logging from typing import TYPE_CHECKING @@ -12,10 +13,12 @@ from .base import IODescriptor from ..types import FileLike +from ..utils import resolve_user_filepath from ..utils.http import set_cookies from ...exceptions import BadInput from ...exceptions import InvalidArgument from ...exceptions import BentoMLException +from ...exceptions import MissingDependencyException from ..service.openapi import SUCCESS_DESCRIPTION from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType @@ -112,19 +115,33 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]: _proto_fields = ("file",) - def __new__( # pylint: disable=arguments-differ # returning subclass from new - cls, kind: FileKind = "binaryio", mime_type: str | None = None + def __new__( + cls, kind: FileKind = "binaryio", mime_type: str | None = None, **kwargs: t.Any ) -> File: mime_type = mime_type if mime_type is not None else "application/octet-stream" if kind == "binaryio": - res = object.__new__(BytesIOFile) + res = super().__new__(BytesIOFile, **kwargs) else: raise ValueError(f"invalid File kind '{kind}'") res._mime_type = mime_type return res - def to_spec(self) -> dict[str, t.Any]: - raise NotImplementedError + def _from_sample(self, sample: FileType | str) -> FileType: + try: + import filetype + except ModuleNotFoundError: + raise MissingDependencyException( + "'filetype' is required to use 'from_sample'. Install it with 'pip install bentoml[io-file]'." + ) + if isinstance(sample, t.IO): + sample = FileLike[bytes](sample, "") + self._mime_type = filetype.guess_mime(sample) + elif isinstance(sample, (str, os.PathLike)): + p = resolve_user_filepath(sample, ctx=None) + self._mime_type = filetype.guess_mime(p) + with open(p, "rb") as f: + sample = FileLike[bytes](f, "") + return sample @classmethod def from_spec(cls, spec: dict[str, t.Any]) -> Self: @@ -141,6 +158,9 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + pass + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, @@ -192,7 +212,10 @@ async def to_proto(self, obj: FileType) -> pb.File: async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]: raise NotImplementedError - async def from_http_request(self, request: Request) -> t.IO[bytes]: + async def from_http_request(self, request: Request) -> FileLike[bytes]: + raise NotImplementedError + + def to_spec(self) -> dict[str, t.Any]: raise NotImplementedError @@ -206,7 +229,7 @@ def to_spec(self) -> dict[str, t.Any]: }, } - async def from_http_request(self, request: Request) -> t.IO[bytes]: + async def from_http_request(self, request: Request) -> FileLike[bytes]: content_type, _ = parse_options_header(request.headers["content-type"]) if content_type.decode("utf-8") == "multipart/form-data": form = await request.form() @@ -228,7 +251,7 @@ async def from_http_request(self, request: Request) -> t.IO[bytes]: return res # type: ignore if content_type.decode("utf-8") == self._mime_type: body = await request.body() - return t.cast(t.IO[bytes], FileLike(io.BytesIO(body), "")) + return FileLike[bytes](io.BytesIO(body), "") raise BentoMLException( f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead" ) diff --git a/src/bentoml/_internal/io_descriptors/image.py b/src/bentoml/_internal/io_descriptors/image.py index 5dd1d7b3da..18f3d26b19 100644 --- a/src/bentoml/_internal/io_descriptors/image.py +++ b/src/bentoml/_internal/io_descriptors/image.py @@ -14,14 +14,18 @@ from .base import IODescriptor from ..types import LazyType from ..utils import LazyLoader +from ..utils import resolve_user_filepath from ..utils.http import set_cookies from ...exceptions import BadInput from ...exceptions import InvalidArgument from ...exceptions import InternalServerError +from ...exceptions import MissingDependencyException from ..service.openapi import SUCCESS_DESCRIPTION from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +PIL_EXC_MSG = "'Pillow' is required to use the Image IO descriptor. Install with 'pip install bentoml[io-image]'." + if TYPE_CHECKING: from types import UnionType @@ -43,13 +47,11 @@ # NOTE: pillow-simd only benefits users who want to do preprocessing # TODO: add options for users to choose between simd and native mode - _exc = "'Pillow' is required to use the Image IO descriptor. Install it with: 'pip install -U Pillow'." - PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=_exc) - PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=_exc) + PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=PIL_EXC_MSG) + PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=PIL_EXC_MSG) pb, _ = import_generated_stubs() - # NOTES: we will keep type in quotation to avoid backward compatibility # with numpy < 1.20, since we will use the latest stubs from the main branch of numpy. # that enable a new way to type hint an ndarray. @@ -58,10 +60,7 @@ DEFAULT_PIL_MODE = "RGB" -PIL_WRITE_ONLY_FORMATS = { - "PALM", - "PDF", -} +PIL_WRITE_ONLY_FORMATS = {"PALM", "PDF"} READABLE_MIMES: set[str] = None # type: ignore (lazy constant) MIME_EXT_MAPPING: dict[str, str] = None # type: ignore (lazy constant) @@ -74,9 +73,7 @@ def initialize_pillow(): try: import PIL.Image except ImportError: - raise InternalServerError( - "`Pillow` is required to use {__name__}\n Instructions: `pip install -U Pillow`" - ) + raise InternalServerError(PIL_EXC_MSG) PIL.Image.init() MIME_EXT_MAPPING = {v: k for k, v in PIL.Image.MIME.items()} # type: ignore (lazy constant) @@ -213,6 +210,29 @@ def __init__( self._pilmode: _Mode | None = pilmode self._format: str = MIME_EXT_MAPPING[self._mime_type] + def _from_sample(self, sample: ImageType | str) -> ImageType: + try: + from filetype.match import image_match + except ModuleNotFoundError: + raise MissingDependencyException( + "'filetype' is required to use 'from_sample'. Install it with 'pip install bentoml[io-image]'." + ) + + img_type = image_match(sample) + if img_type is None: + raise InvalidArgument(f"{sample} is not a valid image file type.") + + if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(sample): + sample = PIL.Image.fromarray(sample) + elif isinstance(sample, str): + p = resolve_user_filepath(sample, ctx=None) + try: + with open(p, "rb") as f: + sample = PIL.Image.open(f) + except PIL.UnidentifiedImageError as err: + raise BadInput(f"Failed to parse sample image file: {err}") from None + return sample + def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, @@ -239,6 +259,9 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + pass + def openapi_request_body(self) -> dict[str, t.Any]: return { "content": { @@ -317,15 +340,15 @@ async def from_http_request(self, request: Request) -> ImageType: try: return PIL.Image.open(io.BytesIO(bytes_)) - except PIL.UnidentifiedImageError: # type: ignore (bad pillow types) - raise BadInput("Failed to parse uploaded image file") from None + except PIL.UnidentifiedImageError as err: + raise BadInput(f"Failed to parse uploaded image file: {err}") from None async def to_http_response( self, obj: ImageType, ctx: Context | None = None ) -> Response: if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(obj): image = PIL.Image.fromarray(obj, mode=self._pilmode) - elif LazyType[PIL.Image.Image]("PIL.Image.Image").isinstance(obj): + elif LazyType["PIL.Image.Image"]("PIL.Image.Image").isinstance(obj): image = obj else: raise BadInput( diff --git a/src/bentoml/_internal/io_descriptors/json.py b/src/bentoml/_internal/io_descriptors/json.py index ba8619b49a..c4217527c9 100644 --- a/src/bentoml/_internal/io_descriptors/json.py +++ b/src/bentoml/_internal/io_descriptors/json.py @@ -23,6 +23,8 @@ from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +EXC_MSG = "'pydantic' must be installed to use 'pydantic_model'. Install with 'pip install bentoml[io-json]'." + if TYPE_CHECKING: from types import UnionType @@ -36,9 +38,8 @@ from ..context import InferenceApiContext as Context else: - _exc_msg = "'pydantic' must be installed to use 'pydantic_model'. Install with 'pip install pydantic'." - pydantic = LazyLoader("pydantic", globals(), "pydantic", exc_msg=_exc_msg) - schema = LazyLoader("schema", globals(), "pydantic.schema", exc_msg=_exc_msg) + pydantic = LazyLoader("pydantic", globals(), "pydantic", exc_msg=EXC_MSG) + schema = LazyLoader("schema", globals(), "pydantic.schema", exc_msg=EXC_MSG) # lazy load our proto generated. struct_pb2 = LazyLoader("struct_pb2", globals(), "google.protobuf.struct_pb2") # lazy load numpy for processing ndarray. @@ -200,6 +201,29 @@ def __init__( "'validate_json' option from 'bentoml.io.JSON' has been deprecated. Use a Pydantic model to specify validation options instead." ) + def _from_sample(self, sample: JSONType) -> JSONType: + if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance(sample): + self._pydantic_model = sample.__class__ + elif isinstance(sample, str): + try: + sample = json.loads(sample) + except json.JSONDecodeError as e: + raise BadInput( + f"Unable to parse JSON string. Please make sure the input is a valid JSON string: {e}" + ) from None + elif isinstance(sample, bytes): + try: + sample = json.loads(sample.decode()) + except json.JSONDecodeError as e: + raise BadInput( + f"Unable to parse JSON bytes. Please make sure the input is a valid JSON bytes: {e}" + ) from None + elif not isinstance(sample, (dict, list)): + raise BadInput( + f"Unable to infer JSON type from sample: {sample}. Please make sure the input is a valid JSON object." + ) + return sample + def to_spec(self) -> dict[str, t.Any]: return { "id": self.descriptor_id, @@ -250,9 +274,31 @@ def openapi_components(self) -> dict[str, t.Any] | None: return {"schemas": pydantic_components_schema(self._pydantic_model)} + def openapi_example(self): + if self.sample is not None: + if LazyType["pydantic.BaseModel"]("pydantic.BaseModel").isinstance( + self.sample + ): + return self.sample.dict() + elif isinstance(self.sample, str): + return json.dumps( + self.sample, + cls=self._json_encoder, + ensure_ascii=False, + allow_nan=False, + indent=None, + separators=(",", ":"), + ) + elif isinstance(self.sample, dict): + return self.sample + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -260,7 +306,11 @@ def openapi_request_body(self) -> dict[str, t.Any]: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/io_descriptors/multipart.py b/src/bentoml/_internal/io_descriptors/multipart.py index 9f90bdc3fe..d7253a9bde 100644 --- a/src/bentoml/_internal/io_descriptors/multipart.py +++ b/src/bentoml/_internal/io_descriptors/multipart.py @@ -16,15 +16,16 @@ from ..utils.formparser import populate_multipart_requests from ..utils.formparser import concat_to_multipart_response from ..service.openapi.specification import Schema -from ..service.openapi.specification import Response as OpenAPIResponse from ..service.openapi.specification import MediaType -from ..service.openapi.specification import RequestBody if TYPE_CHECKING: from types import UnionType + from typing_extensions import Self + from bentoml.grpc.v1alpha1 import service_pb2 as pb + from .base import OpenAPIResponse from ..types import LazyType from ..context import InferenceApiContext as Context else: @@ -174,6 +175,9 @@ def __init__(self, **inputs: IODescriptor[t.Any]): def __repr__(self) -> str: return f"Multipart({','.join([f'{k}={v}' for k,v in zip(self._inputs, map(repr, self._inputs.values()))])})" + def _from_sample(cls, sample: dict[str, t.Any]) -> t.Any: + raise NotImplementedError("'from_sample' is not supported for Multipart.") + def input_type( self, ) -> dict[str, t.Type[t.Any] | UnionType | LazyType[t.Any]]: @@ -217,9 +221,16 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_request_body(self) -> RequestBody: + def openapi_example(self) -> t.Any: + return {args: io.openapi_example() for args, io in self._inputs.items()} + + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -227,7 +238,11 @@ def openapi_request_body(self) -> RequestBody: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/io_descriptors/numpy.py b/src/bentoml/_internal/io_descriptors/numpy.py index 5b402f9151..50a0c60eab 100644 --- a/src/bentoml/_internal/io_descriptors/numpy.py +++ b/src/bentoml/_internal/io_descriptors/numpy.py @@ -216,6 +216,15 @@ def __init__( shape: tuple[int, ...] | None = None, enforce_shape: bool = False, ): + if enforce_dtype and not dtype: + raise InvalidArgument( + "'dtype' must be specified when 'enforce_dtype=True'" + ) from None + if enforce_shape and not shape: + raise InvalidArgument( + "'shape' must be specified when 'enforce_shape=True'" + ) from None + if dtype and not isinstance(dtype, np.dtype): # Convert from primitive type or type string, e.g.: np.dtype(float) or np.dtype("float64") try: @@ -228,17 +237,6 @@ def __init__( self._enforce_dtype = enforce_dtype self._enforce_shape = enforce_shape - self._sample_input = None - - if self._enforce_dtype and not self._dtype: - raise InvalidArgument( - "'dtype' must be specified when 'enforce_dtype=True'" - ) from None - if self._enforce_shape and not self._shape: - raise InvalidArgument( - "'shape' must be specified when 'enforce_shape=True'" - ) from None - def _openapi_types(self) -> str: # convert numpy dtypes to openapi compatible types. var_type = "integer" @@ -269,14 +267,6 @@ def from_spec(cls, spec: dict[str, t.Any]) -> Self: res = NumpyNdarray(**spec["args"]) return res - @property - def sample_input(self) -> ext.NpNDArray | None: - return self._sample_input - - @sample_input.setter - def sample_input(self, value: ext.NpNDArray) -> None: - self._sample_input = value - def openapi_schema(self) -> Schema: # Note that we are yet provide # supports schemas for arrays that is > 2D. @@ -289,14 +279,11 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass - def openapi_example(self) -> t.Any: - if self.sample_input is not None: - if isinstance(self.sample_input, np.generic): - raise BadInput( - "NumpyNdarray: sample_input must be a numpy array." - ) from None - return self.sample_input.tolist() - return + def openapi_example(self): + if self.sample is not None: + if isinstance(self.sample, np.generic): + raise BadInput("NumpyNdarray: sample must be a numpy array.") from None + return self.sample.tolist() def openapi_request_body(self) -> dict[str, t.Any]: return { @@ -401,18 +388,12 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None) else: return Response(json.dumps(obj.tolist()), media_type=self._mime_type) - @classmethod - def from_sample( - cls, - sample_input: ext.NpNDArray, - enforce_dtype: bool = True, - enforce_shape: bool = True, - ) -> NumpyNdarray: + def _from_sample(self, sample: ext.NpNDArray | t.Sequence[t.Any]) -> ext.NpNDArray: """ Create a :obj:`NumpyNdarray` IO Descriptor from given inputs. Args: - sample_input: Given sample ``np.ndarray`` data + sample: Given sample ``np.ndarray`` data enforce_dtype: Enforce a certain data type. :code:`dtype` must be specified at function signature. If you don't want to enforce a specific dtype then change :code:`enforce_dtype=False`. @@ -446,20 +427,22 @@ def from_sample( async def predict(input: NDArray[np.int16]) -> NDArray[Any]: return await runner.async_run(input) """ - if isinstance(sample_input, np.generic): + if isinstance(sample, np.generic): raise BentoMLException( "'NumpyNdarray.from_sample()' expects a 'numpy.array', not 'numpy.generic'." ) from None - - inst = cls( - dtype=sample_input.dtype, - shape=sample_input.shape, - enforce_dtype=enforce_dtype, - enforce_shape=enforce_shape, - ) - inst.sample_input = sample_input - - return inst + try: + if not isinstance(sample, np.ndarray): + sample = np.array(sample) + except ValueError: + raise BentoMLException( + f"Failed to create a 'numpy.ndarray' from given sample {sample}" + ) from None + self._dtype = sample.dtype + self._shape = sample.shape + self._enforce_dtype = True + self._enforce_shape = True + return sample async def from_proto(self, field: pb.NDArray | bytes) -> ext.NpNDArray: """ diff --git a/src/bentoml/_internal/io_descriptors/pandas.py b/src/bentoml/_internal/io_descriptors/pandas.py index 90e44ec69a..176fd1d144 100644 --- a/src/bentoml/_internal/io_descriptors/pandas.py +++ b/src/bentoml/_internal/io_descriptors/pandas.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import os import typing as t import logging import functools @@ -24,6 +25,8 @@ from ..service.openapi.specification import Schema from ..service.openapi.specification import MediaType +EXC_MSG = "pandas' is required to use PandasDataFrame or PandasSeries. Install with 'pip install bentoml[io-pandas]'" + if TYPE_CHECKING: import numpy as np import pandas as pd @@ -32,19 +35,15 @@ from bentoml.grpc.v1alpha1 import service_pb2 as pb from .. import external_typing as ext + from .base import OpenAPIResponse from ..context import InferenceApiContext as Context else: from bentoml.grpc.utils import import_generated_stubs pb, _ = import_generated_stubs() + pd = LazyLoader("pd", globals(), "pandas", exc_msg=EXC_MSG) np = LazyLoader("np", globals(), "numpy") - pd = LazyLoader( - "pd", - globals(), - "pandas", - exc_msg='pandas" is required to use PandasDataFrame or PandasSeries. Install with "pip install bentoml[io-pandas]"', - ) logger = logging.getLogger(__name__) @@ -80,7 +79,8 @@ def _openapi_types(item: str) -> str: # pragma: no cover def _dataframe_openapi_schema( - dtype: bool | ext.PdDTypeArg | None, orient: ext.DataFrameOrient = None + dtype: bool | ext.PdDTypeArg | None, + orient: ext.DataFrameOrient = None, ) -> Schema: # pragma: no cover if isinstance(dtype, dict): if orient == "records": @@ -154,6 +154,8 @@ def __str__(self) -> str: return "parquet" elif self == SerializationFormat.CSV: return "csv" + else: + raise ValueError(f"Unknown serialization format: {self}") def _infer_serialization_format_from_request( @@ -323,7 +325,20 @@ def __init__( enforce_shape: bool = False, default_format: t.Literal["json", "parquet", "csv"] = "json", ): - self._orient = orient + if enforce_dtype and dtype is None: + raise ValueError( + "'dtype' must be specified if 'enforce_dtype' is True" + ) from None + if enforce_shape and shape is None: + raise ValueError( + "'shape' must be specified if 'enforce_shape' is True" + ) from None + if apply_column_names and columns is None: + raise ValueError( + "'columns' must be specified if 'apply_column_names' is True" + ) from None + + self._orient: ext.DataFrameOrient = orient self._columns = columns self._apply_column_names = apply_column_names # TODO: convert dtype to numpy dtype @@ -336,15 +351,91 @@ def __init__( _validate_serialization_format(self._default_format) self._mime_type = self._default_format.mime_type - self._sample_input = None + def _from_sample(self, sample: ext.PdDataFrame) -> ext.PdDataFrame: + """ + Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. - @property - def sample_input(self) -> ext.PdDataFrame | None: - return self._sample_input + Args: + sample: Given sample ``pd.DataFrame`` data + orient: Indication of expected JSON string format. Compatible JSON strings can be + produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. + Possible orients are: - @sample_input.setter - def sample_input(self, value: ext.PdDataFrame) -> None: - self._sample_input = value + - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} + - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] + - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} + - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}} + - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays + - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} + apply_column_names: Update incoming DataFrame columns. ``columns`` must be specified at + function signature. If you don't want to enforce a specific columns + name then change ``apply_column_names=False``. + enforce_dtype: Enforce a certain data type. `dtype` must be specified at function + signature. If you don't want to enforce a specific dtype then change + ``enforce_dtype=False``. + enforce_shape: Enforce a certain shape. ``shape`` must be specified at function + signature. If you don't want to enforce a specific shape then change + ``enforce_shape=False``. + default_format: The default serialization format to use if the request does not specify a ``Content-Type`` Headers. + It is also the serialization format used for the response. Possible values are: + + - :obj:`json` - JSON text format (inferred from content-type ``"application/json"``) + - :obj:`parquet` - Parquet binary format (inferred from content-type ``"application/octet-stream"``) + - :obj:`csv` - CSV text format (inferred from content-type ``"text/csv"``) + + Returns: + :obj:`PandasDataFrame`: :code:`PandasDataFrame` IODescriptor from given users inputs. + + Example: + + .. code-block:: python + :caption: `service.py` + + import pandas as pd + from bentoml.io import PandasDataFrame + arr = [[1,2,3]] + input_spec = PandasDataFrame.from_sample(pd.DataFrame(arr)) + + @svc.api(input=input_spec, output=PandasDataFrame()) + def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... + """ + if LazyType["ext.NpNDArray"]("numpy", "ndarray").isinstance(sample): + sample = pd.DataFrame(sample) + elif isinstance(sample, str): + try: + if os.path.exists(sample): + try: + ext = os.path.splitext(sample)[-1].strip(".") + sample = getattr( + pd, + { + "json": "read_json", + "csv": "read_csv", + "html": "read_html", + "xls": "read_excel", + "xlsx": "read_excel", + "hdf5": "read_hdf", + "parquet": "read_parquet", + "pickle": "read_pickle", + "sql": "read_sql", + }[ext], + )(sample) + except KeyError: + raise InvalidArgument(f"Unsupported sample '{sample}' format.") + else: + # Try to load the string as json. + sample = pd.read_json(sample) + except ValueError as e: + raise InvalidArgument( + f"Failed to create a 'pd.DataFrame' from sample {sample}: {e}" + ) from None + self._shape = sample.shape + self._columns = [str(i) for i in list(sample.columns)] + self._dtype = True + self._enforce_dtype = True + self._enforce_shape = True + self._apply_column_names = True + return sample def _convert_dtype( self, value: ext.PdDTypeArg | None @@ -392,17 +483,29 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + if self.sample is not None: + return self.sample.to_json(orient=self._orient) + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } - def openapi_responses(self) -> dict[str, t.Any]: + def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } @@ -490,77 +593,6 @@ async def to_http_response( else: return Response(resp, media_type=serialization_format.mime_type) - @classmethod - def from_sample( - cls, - sample_input: ext.PdDataFrame, - orient: ext.DataFrameOrient = "records", - apply_column_names: bool = True, - enforce_shape: bool = True, - enforce_dtype: bool = True, - default_format: t.Literal["json", "parquet", "csv"] = "json", - ) -> PandasDataFrame: - """ - Create a :obj:`PandasDataFrame` IO Descriptor from given inputs. - - Args: - sample_input: Given sample ``pd.DataFrame`` data - orient: Indication of expected JSON string format. Compatible JSON strings can be - produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. - Possible orients are: - - - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} - - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] - - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} - - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}} - - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays - - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} - apply_column_names: Update incoming DataFrame columns. ``columns`` must be specified at - function signature. If you don't want to enforce a specific columns - name then change ``apply_column_names=False``. - enforce_dtype: Enforce a certain data type. `dtype` must be specified at function - signature. If you don't want to enforce a specific dtype then change - ``enforce_dtype=False``. - enforce_shape: Enforce a certain shape. ``shape`` must be specified at function - signature. If you don't want to enforce a specific shape then change - ``enforce_shape=False``. - default_format: The default serialization format to use if the request does not specify a ``Content-Type`` Headers. - It is also the serialization format used for the response. Possible values are: - - - :obj:`json` - JSON text format (inferred from content-type ``"application/json"``) - - :obj:`parquet` - Parquet binary format (inferred from content-type ``"application/octet-stream"``) - - :obj:`csv` - CSV text format (inferred from content-type ``"text/csv"``) - - Returns: - :obj:`PandasDataFrame`: :code:`PandasDataFrame` IODescriptor from given users inputs. - - Example: - - .. code-block:: python - :caption: `service.py` - - import pandas as pd - from bentoml.io import PandasDataFrame - arr = [[1,2,3]] - input_spec = PandasDataFrame.from_sample(pd.DataFrame(arr)) - - @svc.api(input=input_spec, output=PandasDataFrame()) - def predict(inputs: pd.DataFrame) -> pd.DataFrame: ... - """ - inst = cls( - orient=orient, - enforce_shape=enforce_shape, - shape=sample_input.shape, - apply_column_names=apply_column_names, - columns=[str(x) for x in list(sample_input.columns)], - enforce_dtype=enforce_dtype, - dtype=True, # set to True to infer from given input - default_format=default_format, - ) - inst.sample_input = sample_input - - return inst - def validate_dataframe( self, dataframe: ext.PdDataFrame, exception_cls: t.Type[Exception] = BadInput ) -> ext.PdDataFrame: @@ -796,20 +828,66 @@ def __init__( shape: tuple[int, ...] | None = None, enforce_shape: bool = False, ): - self._orient = orient + if enforce_dtype and dtype is None: + raise ValueError( + "'dtype' must be specified if 'enforce_dtype' is True" + ) from None + if enforce_shape and shape is None: + raise ValueError( + "'shape' must be specified if 'enforce_shape' is True" + ) from None + + self._orient: ext.SeriesOrient = orient self._dtype = dtype self._enforce_dtype = enforce_dtype self._shape = shape self._enforce_shape = enforce_shape - self._sample_input = None - @property - def sample_input(self) -> ext.PdSeries | None: - return self._sample_input + def _from_sample(self, sample: ext.PdSeries | t.Sequence[t.Any]) -> ext.PdSeries: + """ + Create a :obj:`PandasSeries` IO Descriptor from given inputs. + + Args: + sample_input: Given sample ``pd.DataFrame`` data + orient: Indication of expected JSON string format. Compatible JSON strings can be + produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. + Possible orients are: - @sample_input.setter - def sample_input(self, value: ext.PdSeries) -> None: - self._sample_input = value + - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} + - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] + - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} + - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} + enforce_dtype: Enforce a certain data type. `dtype` must be specified at function + signature. If you don't want to enforce a specific dtype then change + ``enforce_dtype=False``. + enforce_shape: Enforce a certain shape. ``shape`` must be specified at function + signature. If you don't want to enforce a specific shape then change + ``enforce_shape=False``. + + Returns: + :obj:`PandasSeries`: :code:`PandasSeries` IODescriptor from given users inputs. + + Example: + + .. code-block:: python + :caption: `service.py` + + import pandas as pd + from bentoml.io import PandasSeries + + arr = [1,2,3] + input_spec = PandasSeries.from_sample(pd.DataFrame(arr)) + + @svc.api(input=input_spec, output=PandasSeries()) + def predict(inputs: pd.Series) -> pd.Series: ... + """ + if not isinstance(sample, pd.Series): + sample = pd.Series(sample) + self._dtype = sample.dtype + self._shape = sample.shape + self._enforce_dtype = True + self._enforce_shape = True + return sample def input_type(self) -> LazyType[ext.PdSeries]: return LazyType("pandas", "Series") @@ -855,17 +933,29 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + if self.sample is not None: + return self.sample.to_json(orient=self._orient) + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } - def openapi_responses(self) -> dict[str, t.Any]: + def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } @@ -1028,59 +1118,3 @@ async def to_proto(self, obj: ext.PdSeries) -> pb.Series: raise InvalidArgument( f"Unsupported dtype '{obj.dtype}' for response message." ) from None - - @classmethod - def from_sample( - cls, - sample_input: ext.PdSeries, - orient: ext.SeriesOrient = "records", - enforce_dtype: bool = True, - enforce_shape: bool = True, - ) -> PandasSeries: - """ - Create a :obj:`PandasSeries` IO Descriptor from given inputs. - - Args: - sample_input: Given sample ``pd.DataFrame`` data - orient: Indication of expected JSON string format. Compatible JSON strings can be - produced by :func:`pandas.io.json.to_json()` with a corresponding orient value. - Possible orients are: - - - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``} - - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}] - - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}} - - :obj:`table` - :code:`dict[str, Any]` ↦ {``schema``: { schema }, ``data``: { data }} - enforce_dtype: Enforce a certain data type. `dtype` must be specified at function - signature. If you don't want to enforce a specific dtype then change - ``enforce_dtype=False``. - enforce_shape: Enforce a certain shape. ``shape`` must be specified at function - signature. If you don't want to enforce a specific shape then change - ``enforce_shape=False``. - - Returns: - :obj:`PandasSeries`: :code:`PandasSeries` IODescriptor from given users inputs. - - Example: - - .. code-block:: python - :caption: `service.py` - - import pandas as pd - from bentoml.io import PandasSeries - - arr = [1,2,3] - input_spec = PandasSeries.from_sample(pd.DataFrame(arr)) - - @svc.api(input=input_spec, output=PandasSeries()) - def predict(inputs: pd.Series) -> pd.Series: ... - """ - inst = cls( - orient=orient, - dtype=sample_input.dtype, - enforce_dtype=enforce_dtype, - shape=sample_input.shape, - enforce_shape=enforce_shape, - ) - inst.sample_input = sample_input - - return inst diff --git a/src/bentoml/_internal/io_descriptors/text.py b/src/bentoml/_internal/io_descriptors/text.py index e5d0873cf1..ed9127069a 100644 --- a/src/bentoml/_internal/io_descriptors/text.py +++ b/src/bentoml/_internal/io_descriptors/text.py @@ -99,6 +99,11 @@ def __init__(self, *args: t.Any, **kwargs: t.Any): f"'{self.__class__.__name__}' is not designed to take any args or kwargs during initialization." ) from None + def _from_sample(self, sample: str | bytes) -> str: + if isinstance(sample, bytes): + sample = sample.decode("utf-8") + return sample + def input_type(self) -> t.Type[str]: return str @@ -115,9 +120,16 @@ def openapi_schema(self) -> Schema: def openapi_components(self) -> dict[str, t.Any] | None: pass + def openapi_example(self): + return str(self.sample) + def openapi_request_body(self) -> dict[str, t.Any]: return { - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "required": True, "x-bentoml-io-descriptor": self.to_spec(), } @@ -125,7 +137,11 @@ def openapi_request_body(self) -> dict[str, t.Any]: def openapi_responses(self) -> OpenAPIResponse: return { "description": SUCCESS_DESCRIPTION, - "content": {self._mime_type: MediaType(schema=self.openapi_schema())}, + "content": { + self._mime_type: MediaType( + schema=self.openapi_schema(), example=self.openapi_example() + ) + }, "x-bentoml-io-descriptor": self.to_spec(), } diff --git a/src/bentoml/_internal/service/openapi/__init__.py b/src/bentoml/_internal/service/openapi/__init__.py index 911744e470..c2d05163d8 100644 --- a/src/bentoml/_internal/service/openapi/__init__.py +++ b/src/bentoml/_internal/service/openapi/__init__.py @@ -105,7 +105,7 @@ def generate_spec(svc: Service, *, openapi_version: str = "3.0.2"): title=svc.name, description=svc.doc, version=svc.tag.version if svc.tag and svc.tag.version else "None", - contact=Contact(name="BentoML Team", email="contact@bentoml.ai"), + contact=Contact(name="BentoML Team", email="contact@bentoml.com"), ), paths={ # setup infra endpoints @@ -136,6 +136,8 @@ def generate_spec(svc: Service, *, openapi_version: str = "3.0.2"): }, }, "tags": [APP_TAG.name], + "consumes": [api.input.mime_type], + "produces": [api.output.mime_type], "x-bentoml-name": api.name, "summary": str(api), "description": api.doc or "", diff --git a/src/bentoml/_internal/service/openapi/specification.py b/src/bentoml/_internal/service/openapi/specification.py index 597629b3a1..f3e86b274b 100644 --- a/src/bentoml/_internal/service/openapi/specification.py +++ b/src/bentoml/_internal/service/openapi/specification.py @@ -104,7 +104,7 @@ class Schema: anyOf: t.Optional[t.List[Schema]] = None not_: t.Optional[Schema] = None items: t.Optional[t.Union[Schema, t.List[Schema]]] = None - properties: t.Optional[t.Dict[str, Schema]] = None + properties: t.Optional[t.Dict[str, t.Union[Schema, Reference]]] = None additionalProperties: t.Optional[t.Union[Schema, Reference, bool]] = None description: t.Optional[str] = None format: t.Optional[str] = None @@ -187,7 +187,7 @@ class Operation: description: t.Optional[str] = None externalDocs: t.Optional[ExternalDocumentation] = None operationId: t.Optional[str] = None - requestBody: t.Optional[t.Union[RequestBody, Reference]] = None + requestBody: t.Optional[t.Union[RequestBody, Reference, t.Dict[str, t.Any]]] = None # Not yet supported: parameters, callbacks, deprecated, servers, security @@ -219,14 +219,14 @@ class PathItem: ref: t.Optional[str] = None summary: t.Optional[str] = None description: t.Optional[str] = None - get: t.Optional[Operation] = None - put: t.Optional[Operation] = None - post: t.Optional[Operation] = None - delete: t.Optional[Operation] = None - options: t.Optional[Operation] = None - head: t.Optional[Operation] = None - patch: t.Optional[Operation] = None - trace: t.Optional[Operation] = None + get: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + put: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + post: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + delete: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + options: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + head: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + patch: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None + trace: t.Optional[t.Union[Operation, t.Dict[str, t.Any]]] = None # not yet supported: servers, parameters @@ -252,7 +252,9 @@ class Components: schemas: t.Dict[str, t.Union[Schema, Reference]] responses: t.Optional[t.Dict[str, t.Union[Response, Reference]]] = None examples: t.Optional[t.Dict[str, t.Union[Example, Reference]]] = None - requestBodies: t.Optional[t.Dict[str, t.Union[RequestBody, Reference]]] = None + requestBodies: t.Optional[ + t.Dict[str, t.Union[RequestBody, Reference, t.Dict[str, t.Any]]] + ] = None links: t.Optional[t.Dict[str, t.Union[Link, Reference]]] = None # Not yet supported: securitySchemes, callbacks, parameters, headers diff --git a/src/bentoml/bentos.py b/src/bentoml/bentos.py index 46f9f7cd13..165e1ce966 100644 --- a/src/bentoml/bentos.py +++ b/src/bentoml/bentos.py @@ -445,6 +445,8 @@ def construct_dockerfile( "grpc-channelz", "aws", "all", + "io", + "io-file", "io-image", "io-pandas", "io-json", diff --git a/tests/unit/_internal/io/test_base.py b/tests/unit/_internal/io/test_base.py new file mode 100644 index 0000000000..59d2dd3981 --- /dev/null +++ b/tests/unit/_internal/io/test_base.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import typing as t +from typing import TYPE_CHECKING + +import pytest + +from bentoml.io import IODescriptor + +if TYPE_CHECKING: + + from typing_extensions import Self + + from bentoml._internal.context import InferenceApiContext as Context + + +class DummyDescriptor(IODescriptor[t.Any], descriptor_id="bentoml.io.Dummy"): + _mime_type = "application/vnd.bentoml.dummy" + + def __init__(self, **kwargs: t.Any): + [object.__setattr__(self, k, v) for k, v in kwargs.items()] + + def openapi_schema(self) -> t.Any: + raise NotImplementedError + + def openapi_components(self) -> dict[str, t.Any] | None: + raise NotImplementedError + + def openapi_example(self) -> t.Any | None: + raise NotImplementedError + + def openapi_request_body(self) -> dict[str, t.Any]: + raise NotImplementedError + + def openapi_responses(self) -> dict[str, t.Any]: + raise NotImplementedError + + def to_spec(self) -> dict[str, t.Any]: + raise NotImplementedError + + @classmethod + def from_spec(cls, spec: dict[str, t.Any]) -> Self: + return cls(**spec) + + def input_type(self) -> t.Any: + return str + + async def from_http_request(self, request: t.Any) -> t.Any: + return request + + async def to_http_response(self, obj: t.Any, ctx: Context | None = None) -> t.Any: + return obj, ctx + + async def from_proto(self, field: t.Any) -> t.Any: + return field + + async def to_proto(self, obj: t.Any) -> t.Any: + return obj + + def _from_sample(cls, sample: t.Any, **kwargs: t.Any): + return sample + + +@pytest.mark.parametrize( + "fn", + [ + f"openapi_{n}" + for n in ("schema", "components", "example", "request_body", "responses") + ], +) +def test_raise_not_implemented_openapi(fn: str): + with pytest.raises(NotImplementedError): + getattr(DummyDescriptor(), fn)() diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py index 2c9b4477c3..d9cc40fd93 100644 --- a/tests/unit/_internal/io/test_numpy.py +++ b/tests/unit/_internal/io/test_numpy.py @@ -83,11 +83,6 @@ def test_numpy_openapi_request_body(): assert ndarray["content"] assert ndarray["content"]["application/json"].example == example.tolist() - nparray = NumpyNdarray(dtype="float") - nparray.sample_input = ExampleGeneric("asdf") # type: ignore (test exception) - with pytest.raises(BadInput): - nparray.openapi_example() - def test_numpy_openapi_responses(): responses = NumpyNdarray().openapi_responses() @@ -97,22 +92,40 @@ def test_numpy_openapi_responses(): assert "application/json" in responses["content"] assert not responses["content"]["application/json"].example + ndarray = from_example.openapi_request_body() + assert ndarray["content"] + assert ndarray["content"]["application/json"].example == example.tolist() + + +def test_numpy_openapi_example(): + r = NumpyNdarray().openapi_example() + assert r is None + + r = from_example.openapi_example() + assert r == example.tolist() + + nparray = NumpyNdarray(dtype="float") + nparray.sample = ExampleGeneric("asdf") + with pytest.raises(BadInput): + nparray.openapi_example() + def test_verify_numpy_ndarray(caplog: LogCaptureFixture): partial_check = partial(from_example.validate_array, exception_cls=BentoMLException) with pytest.raises(BentoMLException) as ex: partial_check(np.array(["asdf"])) - assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value) # type: ignore (testing message) + assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value) with pytest.raises(BentoMLException) as e: partial_check(np.array([[1]])) - assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value) # type: ignore (testing message) + assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value) # test cases where reshape is failed example = NumpyNdarray.from_sample(np.ones((2, 2, 3))) - example._enforce_shape = False # type: ignore (test internal check) - example._enforce_dtype = False # type: ignore (test internal check) + # Note that from_sample now lazy load the IO descriptor + example._enforce_shape = False + example._enforce_dtype = False with caplog.at_level(logging.DEBUG): example.validate_array(np.array("asdf")) assert "Failed to reshape" in caplog.text