From 7cba0ac0fb4e3845b0e969ba7d9a02147cd452e6 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Mon, 25 Jul 2022 16:43:03 -0700 Subject: [PATCH] grpc: serialization process (#2804) * chore: renaming for better coherency Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * refactor: cleanup files Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: fix formats Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: refactor exception to interceptor Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: cleanup some impl Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: cleanup note: codec are very much broken rn Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- .gitignore | 2 + MANIFEST.in | 1 + .../_internal/bento/build_dev_bentoml_whl.py | 5 +- bentoml/_internal/io_descriptors/base.py | 10 +- bentoml/_internal/io_descriptors/numpy.py | 57 +-- bentoml/_internal/server/__init__.py | 6 +- ...v_api_server.py => grpc_dev_api_server.py} | 0 bentoml/_internal/server/cli/http/__init__.py | 0 .../api_server.py => rest_api_server.py} | 2 +- ...v_api_server.py => rest_dev_api_server.py} | 0 .../server/grpc/interceptors/__init__.py | 230 +++++++++++ bentoml/_internal/server/grpc/server.py | 2 - bentoml/_internal/server/grpc/servicer.py | 16 +- bentoml/_internal/server/grpc/types.py | 98 +++++ bentoml/_internal/server/grpc/utils.py | 66 ---- bentoml/_internal/server/grpc_app.py | 13 +- bentoml/_internal/service/service.py | 6 + bentoml/_internal/utils/grpc/__init__.py | 58 +++ bentoml/_internal/utils/grpc/codec.py | 369 ++++++++++++++++++ bentoml/_internal/utils/grpc/serializer.py | 33 +- .../server/cli/grpc => grpc/v1}/__init__.py | 0 bentoml/grpc/v1/service.proto | 196 ++++++++-- bentoml/grpc/v1/service_pb2.pyi | 201 ---------- bentoml/grpc/v1/service_pb2_grpc.pyi | 69 ---- bentoml/grpc/v1/service_test.proto | 12 +- bentoml/grpc/v1/service_test_pb2.pyi | 114 ------ bentoml/grpc/v1/service_test_pb2_grpc.pyi | 69 ---- bentoml/grpc/v1/struct.proto | 116 +++--- bentoml/grpc/v1/struct_pb2.pyi | 284 -------------- bentoml/grpc/v1/types.proto | 61 --- bentoml/grpc/v1/types_pb2.pyi | 119 ------ bentoml/testing/server.py | 2 +- pyproject.toml | 1 + setup.cfg | 3 + setup.py | 21 +- 35 files changed, 1066 insertions(+), 1176 deletions(-) rename bentoml/_internal/server/cli/{grpc/dev_api_server.py => grpc_dev_api_server.py} (100%) delete mode 100644 bentoml/_internal/server/cli/http/__init__.py rename bentoml/_internal/server/cli/{http/api_server.py => rest_api_server.py} (98%) rename bentoml/_internal/server/cli/{http/dev_api_server.py => rest_dev_api_server.py} (100%) create mode 100644 bentoml/_internal/server/grpc/types.py delete mode 100644 bentoml/_internal/server/grpc/utils.py create mode 100644 bentoml/_internal/utils/grpc/codec.py rename bentoml/{_internal/server/cli/grpc => grpc/v1}/__init__.py (100%) delete mode 100644 bentoml/grpc/v1/service_pb2.pyi delete mode 100644 bentoml/grpc/v1/service_pb2_grpc.pyi delete mode 100644 bentoml/grpc/v1/service_test_pb2.pyi delete mode 100644 bentoml/grpc/v1/service_test_pb2_grpc.pyi delete mode 100644 bentoml/grpc/v1/struct_pb2.pyi delete mode 100644 bentoml/grpc/v1/types.proto delete mode 100644 bentoml/grpc/v1/types_pb2.pyi diff --git a/.gitignore b/.gitignore index 85e2e8aaa1d..38df946fcb1 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,5 @@ catboost_info # generated stub that is included in distribution *_pb2*.py +# also ignore generated stubs for typing +bentoml/grpc/**/*.pyi diff --git a/MANIFEST.in b/MANIFEST.in index 979de6e61c0..7b5d7154047 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ exclude CONTRIBUTING.md GOVERNANCE.md CODE_OF_CONDUCT.md DEVELOPMENT.md SECURITY exclude Makefile MANIFEST.in exclude *.yml exclude .git* +exclude bentoml/grpc/buf.yaml # Directories to exclude in PyPI package prune requirements diff --git a/bentoml/_internal/bento/build_dev_bentoml_whl.py b/bentoml/_internal/bento/build_dev_bentoml_whl.py index a4fd0e89864..ea03e6eb02f 100644 --- a/bentoml/_internal/bento/build_dev_bentoml_whl.py +++ b/bentoml/_internal/bento/build_dev_bentoml_whl.py @@ -43,15 +43,14 @@ def build_bentoml_editable_wheel(target_path: str) -> None: "BentoML is installed in `editable` mode; building BentoML distribution with the local BentoML code base. The built wheel file will be included in the target bento." ) try: - import build.env - from build import ProjectBuilder + from build.env import IsolatedEnvBuilder except ModuleNotFoundError: raise BentoMLException( f"Environment variable {BENTOML_DEV_BUILD}=True detected, which requires the `pypa/build` package. Make sure to install all dev dependencies via `pip install -r requirements/dev-requirements.txt` and try again." ) - with tempfile.TemporaryDirectory() as dist_dir, build.env.IsolatedEnvBuilder() as env: + with tempfile.TemporaryDirectory() as dist_dir, IsolatedEnvBuilder() as env: builder = ProjectBuilder(os.path.dirname(pyproject)) builder.python_executable = env.executable builder.scripts_dir = env.scripts_dir diff --git a/bentoml/_internal/io_descriptors/base.py b/bentoml/_internal/io_descriptors/base.py index 8385b21d9aa..09645c55e58 100644 --- a/bentoml/_internal/io_descriptors/base.py +++ b/bentoml/_internal/io_descriptors/base.py @@ -11,12 +11,12 @@ from starlette.requests import Request from starlette.responses import Response - from bentoml.grpc.v1.service_pb2 import CallRequest - from bentoml.grpc.v1.service_pb2 import CallResponse + from bentoml.grpc.v1.service_pb2 import InferenceRequest + from bentoml.grpc.v1.service_pb2 import InferenceResponse from ..types import LazyType from ..context import InferenceApiContext as Context - from ..server.grpc.utils import BentoServicerContext + from ..server.grpc.types import BentoServicerContext InputType = ( UnionType @@ -86,12 +86,12 @@ def generate_protobuf(self): @abstractmethod async def from_grpc_request( - self, request: CallRequest, context: BentoServicerContext + self, request: InferenceRequest, context: BentoServicerContext ) -> IOPyObj: ... @abstractmethod async def to_grpc_response( self, obj: IOPyObj, context: BentoServicerContext - ) -> CallResponse: + ) -> InferenceResponse: ... diff --git a/bentoml/_internal/io_descriptors/numpy.py b/bentoml/_internal/io_descriptors/numpy.py index 03265c729c6..dc011c3f9d5 100644 --- a/bentoml/_internal/io_descriptors/numpy.py +++ b/bentoml/_internal/io_descriptors/numpy.py @@ -18,13 +18,13 @@ from ...exceptions import InternalServerError if TYPE_CHECKING: - import grpc import numpy as np from bentoml.grpc.v1 import service_pb2 from .. import external_typing as ext from ..context import InferenceApiContext as Context + from ..server.grpc.types import BentoServicerContext else: np = LazyLoader("np", globals(), "numpy") @@ -251,36 +251,8 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None) else: return Response(json.dumps(obj.tolist()), media_type=MIME_TYPE_JSON) - def proto_to_arr(self, proto_arr): - """ - Convert given protobuf array to python list - """ - from google.protobuf.duration_pb2 import Duration - from google.protobuf.timestamp_pb2 import Timestamp - - from bentoml.grpc import service_pb2 - - array_type = self.WhichArray(proto_arr) - if not array_type: - raise ValueError("Provided array is either empty or invalid.") - - return_arr = [i for i in getattr(proto_arr, array_type)] - - if array_type == "timestamp_value": - return_arr = [Timestamp.ToDatetime(dt) for dt in return_arr] - elif array_type == "duration_value": - return_arr = [Duration.ToTimedelta(td) for td in return_arr] - - for i, item in enumerate(return_arr): - if isinstance(item, service_pb2.Array): - return_arr[i] = self.proto_to_arr(item) - elif isinstance(item, service_pb2.Tuple): - return_arr[i] = self.handle_tuple(item) - - return return_arr - async def from_grpc_request( - self, request: service_pb2.Request, context: grpc.ServicerContext + self, request: service_pb2.InferenceRequest, context: BentoServicerContext ) -> ext.NpNDArray: """ Process incoming protobuf request and convert it to `numpy.ndarray` @@ -292,29 +264,28 @@ async def from_grpc_request( a `numpy.ndarray` object. This can then be used inside users defined logics. """ - res: "ext.NpNDArray" - try: - res = np.array(self.proto_to_arr(request), dtype=self._dtype) - except ValueError: - res = np.array(self.proto_to_arr(request)) - res = self._verify_ndarray(res, BadInput) - return res + from bentoml.grpc.v1 import struct_pb2 + + from ..utils.grpc import proto_to_dict + + logger.info([f for f in struct_pb2.ContentsProto.DESCRIPTOR.fields]) + contents = proto_to_dict(request.contents) + return np.frombuffer(contents) async def to_grpc_response( - self, obj: ext.NpNDArray, context: grpc.ServicerContext - ) -> service_pb2.Response: + self, obj: ext.NpNDArray, context: BentoServicerContext + ) -> service_pb2.InferenceResponse: """ Process given objects and convert it to grpc protobuf response. Args: - obj (`np.ndarray`): - `np.ndarray` that will be serialized to protobuf + obj: `np.ndarray` that will be serialized to protobuf + context: grpc.aio.ServicerContext from grpc.aio.Server Returns: `io_descriptor_pb2.Array`: Protobuf representation of given `np.ndarray` """ - obj = self._verify_ndarray(obj, InternalServerError) - return self.arr_to_proto(obj) + pass def generate_protobuf(self): pass diff --git a/bentoml/_internal/server/__init__.py b/bentoml/_internal/server/__init__.py index 704681bc93e..276cb062f2e 100644 --- a/bentoml/_internal/server/__init__.py +++ b/bentoml/_internal/server/__init__.py @@ -29,9 +29,9 @@ SCRIPT_RUNNER = "bentoml._internal.server.cli.runner" -SCRIPT_API_SERVER = "bentoml._internal.server.cli.http.api_server" -SCRIPT_DEV_API_SERVER = "bentoml._internal.server.cli.http.dev_api_server" -SCRIPT_GRPC_DEV_API_SERVER = "bentoml._internal.server.cli.grpc.dev_api_server" +SCRIPT_API_SERVER = "bentoml._internal.server.cli.rest_api_server" +SCRIPT_DEV_API_SERVER = "bentoml._internal.server.cli.rest_dev_api_server" +SCRIPT_GRPC_DEV_API_SERVER = "bentoml._internal.server.cli.grpc_dev_api_server" MAX_AF_UNIX_PATH_LENGTH = 103 diff --git a/bentoml/_internal/server/cli/grpc/dev_api_server.py b/bentoml/_internal/server/cli/grpc_dev_api_server.py similarity index 100% rename from bentoml/_internal/server/cli/grpc/dev_api_server.py rename to bentoml/_internal/server/cli/grpc_dev_api_server.py diff --git a/bentoml/_internal/server/cli/http/__init__.py b/bentoml/_internal/server/cli/http/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/bentoml/_internal/server/cli/http/api_server.py b/bentoml/_internal/server/cli/rest_api_server.py similarity index 98% rename from bentoml/_internal/server/cli/http/api_server.py rename to bentoml/_internal/server/cli/rest_api_server.py index 1e891dbc43a..809d2d48e4c 100644 --- a/bentoml/_internal/server/cli/http/api_server.py +++ b/bentoml/_internal/server/cli/rest_api_server.py @@ -92,7 +92,7 @@ def main( watcher = Watcher( name="bento_api_server", cmd=sys.executable, - args=["-m", "bentoml._internal.server.cli.http.api_server"] + args=["-m", "bentoml._internal.server.cli.rest_api_server"] + unparse_click_params(params, ctx.command.params, factory=str), copy_env=True, numprocesses=1, diff --git a/bentoml/_internal/server/cli/http/dev_api_server.py b/bentoml/_internal/server/cli/rest_dev_api_server.py similarity index 100% rename from bentoml/_internal/server/cli/http/dev_api_server.py rename to bentoml/_internal/server/cli/rest_dev_api_server.py diff --git a/bentoml/_internal/server/grpc/interceptors/__init__.py b/bentoml/_internal/server/grpc/interceptors/__init__.py index e69de29bb2d..15008d6b1c1 100644 --- a/bentoml/_internal/server/grpc/interceptors/__init__.py +++ b/bentoml/_internal/server/grpc/interceptors/__init__.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import sys +import typing as t +import asyncio +import logging +from abc import ABCMeta +from abc import abstractmethod +from typing import TYPE_CHECKING + +import grpc +from grpc import aio + +from bentoml.exceptions import BentoMLException + +from ....utils.grpc import grpc_status_code +from ....utils.grpc import get_factory_and_method + +if TYPE_CHECKING: + from ..types import RequestType + from ..types import ResponseType + from ..types import RpcMethodHandler + from ..types import HandlerCallDetails + from ..types import BentoServicerContext + +AsyncClientInterceptorReturn = type( + "AsyncClientInterceptorReturn", (aio.Call, grpc.Future), {} +) + +logger = logging.getLogger(__name__) + + +# TODO: test cases. +class AsyncClientInterceptor(aio.UnaryUnaryClientInterceptor, metaclass=ABCMeta): + """ + Base class for BentoService client-side interceptors. + To implement, subclass this class and override ``intercept`` method. + """ + + @abstractmethod + async def intercept( + self, + continuation: t.Callable[ + [aio.ClientCallDetails, RequestType], + t.Awaitable[AsyncClientInterceptorReturn], + ], + client_call_details: aio.ClientCallDetails, + request: RequestType, + ) -> AsyncClientInterceptorReturn: + """ + Override this method to implement an async client interceptor. + This method is currently only support unary RPCs. + + The interceptor implementation should call ``method`` via ``grpc.aio.ClientCallDetails`` and + the ``request`` object as parameters. + + The `request` parameter may be type checked to determine if this is a singluar request + for unary RPCs or an iterator for client-streaming or client-server streaming RPCs. + + Args: + continuation: A coroutine that proceeds with the invocation by + executing the next interceptor in the chain or invoking the + actual RPC on the underlying Channel. It is the interceptor's + responsibility to call it if it decides to move the RPC forward. + The interceptor can use + ``call = await continuation(client_call_details, request)`` + to continue with the RPC. `continuation` returns the call to the + RPC. + client_call_details: A ClientCallDetails object describing the outgoing RPC. + request: The request value for the RPC. + + Returns: + The type returns should match the type of return value received by calling ``method``. + This is an object of both ``grpc.aio.Call`` for the RPC and a ``grpc.Future``. + The actual result from the RPC can be got by calling ``.result()`` on the value returned from ``method``. + """ + return await continuation(client_call_details, request) + + async def intercept_unary_unary( + self, + continuation: t.Callable[ + [aio.ClientCallDetails, ResponseType], + aio.UnaryUnaryCall[RequestType, ResponseType], + ], + client_call_details: aio.ClientCallDetails, + request: t.Any, + ) -> aio.UnaryUnaryCall[RequestType, ResponseType] | ResponseType: + """Implementation of grpc.UnaryUnaryClientInterceptor.""" + return await self.intercept(continuation, client_call_details, request) # type: ignore (unable to infer from mro) + + +# Modification from https://github.com/d5h-foss/grpc-interceptor +# added better typing that fits with BentoService signatures. +class AsyncServerInterceptor(aio.ServerInterceptor, metaclass=ABCMeta): + """ + Base class for BentoService server-side interceptors. + + To implement, subclass this class and override ``intercept`` method. + """ + + @abstractmethod + async def intercept( + self, + method: t.Callable[ + [RequestType, BentoServicerContext], t.Awaitable[ResponseType] + ], + request: RequestType, + context: BentoServicerContext, + method_name: str, + ) -> t.Any: + response_or_iterator = method(request, context) + if hasattr(response_or_iterator, "__aiter__"): + return response_or_iterator + else: + return await response_or_iterator + + # Implementation of grpc.ServerInterceptor, do not override. + async def intercept_service( + self, + continuation: t.Callable[[HandlerCallDetails], t.Awaitable[RpcMethodHandler]], + handler_call_details: HandlerCallDetails, + ) -> RpcMethodHandler: + """Implementation of grpc.aio.ServerInterceptor. + This is not part of the grpc_interceptor.AsyncServerInterceptor API, but must + have a public name. Do not override it, unless you know what you're doing. + """ + next_handler = await continuation(handler_call_details) + handler_factory, next_handler_method = get_factory_and_method(next_handler) + + if next_handler.response_streaming: + + async def invoke_intercept_method( # type: ignore (function redefinition) + request: RequestType, context: BentoServicerContext + ) -> t.AsyncGenerator[ResponseType, None]: + method_name = handler_call_details.method + coroutine_or_asyncgen = self.intercept( + next_handler_method, request, context, method_name + ) + + # Async server streaming handlers return async_generator, because they + # use the async def + yield syntax. However, this is NOT a coroutine + # and hence is not awaitable. This can be a problem if the interceptor + # ignores the individual streaming response items and simply returns the + # result of method(request, context). In that case the interceptor IS a + # coroutine, and hence should be awaited. In both cases, we need + # something we can iterate over so that THIS function is an + # async_generator like the actual RPC method. + if asyncio.iscoroutine(coroutine_or_asyncgen): + asyncgen_or_none = await coroutine_or_asyncgen + # If a handler is using the read/write API, it will return None. + if not asyncgen_or_none: + return + asyncgen = asyncgen_or_none + else: + asyncgen = coroutine_or_asyncgen + + async for r in asyncgen: + yield r + + else: + + async def invoke_intercept_method( + request: RequestType, context: BentoServicerContext + ) -> t.Awaitable[ResponseType]: + method_name = handler_call_details.method + return await self.intercept( + next_handler_method, + request, + context, + method_name, + ) + + return handler_factory( + invoke_intercept_method, + request_deserializer=next_handler.request_deserializer, + response_serializer=next_handler.response_serializer, + ) + + +class ExceptionHandlerInterceptor(AsyncServerInterceptor): + """An async interceptor that handles exceptions raised via BentoService.""" + + async def handle_exception( + self, + ex: BentoMLException, + context: BentoServicerContext, + method_name: str, + ) -> None: + """Handle an exception raised by a method. + + Args: + ex: The exception raised by the method. + context: The context of the RPC. + method_name: The name of the method. + """ + logger.error( + f"Error while invoking {method_name}: {ex}", exc_info=sys.exc_info() + ) + await context.abort(code=grpc_status_code(ex), details=str(ex)) + + async def generate_responses( + self, + context: BentoServicerContext, + method_name: str, + response_iterator: t.AsyncIterable[ResponseType], + ) -> t.AsyncGenerator[t.Any, None]: + """Yield all the responses, but check for errors along the way.""" + try: + async for r in response_iterator: + yield r + except BentoMLException as ex: + await self.handle_exception(ex, context, method_name) + + async def intercept( + self, + method: t.Callable[ + [RequestType, BentoServicerContext], t.Awaitable[ResponseType] + ], + request: RequestType, + context: BentoServicerContext, + method_name: str, + ) -> t.Any: + try: + response_or_iterator = method(request, context) + if not hasattr(response_or_iterator, "__aiter__"): + return await response_or_iterator + except BentoMLException as ex: + await self.handle_exception(ex, context, method_name) + + return self.generate_responses(context, method_name, response_or_iterator) # type: ignore (unknown variable warning) diff --git a/bentoml/_internal/server/grpc/server.py b/bentoml/_internal/server/grpc/server.py index 5aacb3556f4..1a4be0ed102 100644 --- a/bentoml/_internal/server/grpc/server.py +++ b/bentoml/_internal/server/grpc/server.py @@ -62,7 +62,6 @@ async def startup(self) -> None: handler() await self.server.start() - logger.debug("GRPC server started.") async def shutdown(self): # Running on_startup callback. @@ -73,7 +72,6 @@ async def shutdown(self): handler() await self.server.stop(grace=self._grace_period) - logger.debug("GRPC server stopped.") async def wait_for_termination(self, timeout: int | None = None) -> bool: return await self.server.wait_for_termination(timeout=timeout) diff --git a/bentoml/_internal/server/grpc/servicer.py b/bentoml/_internal/server/grpc/servicer.py index 328429c43e6..6cf61e77178 100644 --- a/bentoml/_internal/server/grpc/servicer.py +++ b/bentoml/_internal/server/grpc/servicer.py @@ -9,26 +9,28 @@ from bentoml.exceptions import MissingDependencyException from bentoml._internal.service.service import Service -from .utils import handle_grpc_error from ...configuration import get_debug_mode if TYPE_CHECKING: - from .utils import BentoServicerContext + from .types import BentoServicerContext def register_bento_servicer(service: Service, server: aio.Server) -> None: + """ + This is the actual implementation of BentoServicer. + Main inference entrypoint will be invoked via /bentoml.grpc..BentoService/Inference + """ from bentoml.grpc.v1 import service_pb2 as _service_pb2 from bentoml.grpc.v1 import service_pb2_grpc as _service_pb2_grpc class BentoServiceServicer(_service_pb2_grpc.BentoServiceServicer): """An asyncio implementation of BentoService servicer.""" - @handle_grpc_error - async def Call( + async def Inference( # type: ignore (no async types) self, - request: _service_pb2.CallRequest, + request: _service_pb2.InferenceRequest, context: BentoServicerContext, - ) -> _service_pb2.CallResponse | None: + ) -> _service_pb2.InferenceResponse | None: if request.api_name not in service.apis: raise UnprocessableEntity( f"given 'api_name' is not defined in {service.name}", @@ -46,7 +48,7 @@ async def Call( response = await api.output.to_grpc_response(output, context) return response - _service_pb2_grpc.add_BentoServiceServicer_to_server(BentoServiceServicer(), server) + _service_pb2_grpc.add_BentoServiceServicer_to_server(BentoServiceServicer(), server) # type: ignore (lack of asyncio types) async def register_health_servicer(server: aio.Server) -> None: diff --git a/bentoml/_internal/server/grpc/types.py b/bentoml/_internal/server/grpc/types.py new file mode 100644 index 00000000000..4a77a1760af --- /dev/null +++ b/bentoml/_internal/server/grpc/types.py @@ -0,0 +1,98 @@ +""" +Specific types for BentoService gRPC server. +""" +from __future__ import annotations + +from typing import Tuple +from typing import TypeVar +from typing import Callable +from typing import Optional +from typing import Awaitable +from typing import NamedTuple +from typing import TYPE_CHECKING + +import grpc + + +class HandlerCallDetails( + NamedTuple("HandlerCallDetails", method=str, invocation_metadata=Tuple[str, bytes]), + grpc.HandlerCallDetails, +): + """Describes an RPC that has just arrived for service. + + Attributes: + method: The method name of the RPC. + invocation_metadata: A sequence of metadatum, a key-value pair included in the HTTP header. + An example is: ``('binary-metadata-bin', b'\\x00\\xFF')`` + """ + + method: str + invocation_metadata: Tuple[str, bytes] + + +if TYPE_CHECKING: + from typing import Protocol + + from grpc import aio + + from bentoml.grpc.v1.service_pb2 import InferenceRequest + from bentoml.grpc.v1.service_pb2 import InferenceResponse + from bentoml.grpc.v1.service_pb2 import ServerLiveRequest + from bentoml.grpc.v1.service_pb2 import ServerLiveResponse + from bentoml.grpc.v1.service_pb2 import ServerReadyRequest + from bentoml.grpc.v1.service_pb2 import ServerReadyResponse + from bentoml.grpc.v1.service_pb2_grpc import BentoServiceServicer + + P = TypeVar("P", contravariant=True) + + ResponseType = InferenceResponse | ServerLiveResponse | ServerReadyResponse + RequestType = InferenceRequest | ServerLiveRequest | ServerReadyRequest + BentoServicerContext = aio.ServicerContext[ResponseType, RequestType] + + RequestDeserializerFn = Optional[Callable[[RequestType], object]] + ResponseSerializerFn = Optional[Callable[[bytes], ResponseType]] + + class AsyncHandlerProtocol(Protocol[P]): + def __call__( + self, + behaviour: Callable[[RequestType, BentoServicerContext], Awaitable[P]], + request_deserializer: RequestDeserializerFn = None, + response_serializer: ResponseSerializerFn = None, + ) -> grpc.RpcMethodHandler: + ... + + AsyncHandlerMethod = AsyncHandlerProtocol[ResponseType] + + class RpcMethodHandler( + NamedTuple( + "RpcMethodHandler", + request_streaming=bool, + response_streaming=bool, + request_deserializer=RequestDeserializerFn, + response_serializer=ResponseSerializerFn, + unary_unary=Optional[AsyncHandlerMethod], + unary_stream=Optional[AsyncHandlerMethod], + stream_unary=Optional[AsyncHandlerMethod], + stream_stream=Optional[AsyncHandlerMethod], + ), + grpc.RpcMethodHandler, + ): + """An implementation of a single RPC method.""" + + request_streaming: bool + response_streaming: bool + request_deserializer: RequestDeserializerFn + response_serializer: ResponseSerializerFn + unary_unary: Optional[AsyncHandlerMethod] + unary_stream: Optional[AsyncHandlerMethod] + stream_unary: Optional[AsyncHandlerMethod] + stream_stream: Optional[AsyncHandlerMethod] + + __all__ = [ + "RequestType", + "ResponseType", + "BentoServicerContext", + "BentoServiceServicer", + "HandlerCallDetails", + "RpcMethodHandler", + ] diff --git a/bentoml/_internal/server/grpc/utils.py b/bentoml/_internal/server/grpc/utils.py deleted file mode 100644 index 4d6afb5cc4e..00000000000 --- a/bentoml/_internal/server/grpc/utils.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import annotations - -import typing as t -import asyncio -import logging -from http import HTTPStatus -from typing import TYPE_CHECKING -from functools import wraps - -import grpc - -from bentoml.exceptions import BentoMLException - -logger = logging.getLogger(__name__) - -if TYPE_CHECKING: - from grpc import aio - - from bentoml.grpc.v1.service_pb2 import CallRequest - from bentoml.grpc.v1.service_pb2 import CallResponse - from bentoml.grpc.v1.service_pb2 import ServerLiveRequest - from bentoml.grpc.v1.service_pb2 import ServerLiveResponse - from bentoml.grpc.v1.service_pb2 import ServerReadyRequest - from bentoml.grpc.v1.service_pb2 import ServerReadyResponse - from bentoml.grpc.v1.service_pb2_grpc import BentoServiceServicer - - ResponseType = CallResponse | ServerLiveResponse | ServerReadyResponse - RequestType = CallRequest | ServerLiveRequest | ServerReadyRequest - BentoServicerContext = aio.ServicerContext[ResponseType, RequestType] - -_STATUS_CODE_MAPPING = { - HTTPStatus.BAD_REQUEST: grpc.StatusCode.INVALID_ARGUMENT, - HTTPStatus.INTERNAL_SERVER_ERROR: grpc.StatusCode.INTERNAL, - HTTPStatus.NOT_FOUND: grpc.StatusCode.NOT_FOUND, - HTTPStatus.UNPROCESSABLE_ENTITY: grpc.StatusCode.FAILED_PRECONDITION, -} - - -def grpc_status_code(err: BentoMLException) -> grpc.StatusCode: - """ - Convert BentoMLException.error_code to grpc.StatusCode. - """ - return _STATUS_CODE_MAPPING.get(err.error_code, grpc.StatusCode.UNKNOWN) - - -def handle_grpc_error(fn: t.Callable[..., t.Any]) -> t.Any: - """ - Decorator to handle grpc error. - """ - - @wraps(fn) - async def wrapper( - self: BentoServiceServicer, - request: RequestType, - context: BentoServicerContext, - ) -> ResponseType | None: - try: - if asyncio.iscoroutinefunction(fn): - return await fn(self, request, context) - else: - return fn(self, request, context) - except BentoMLException as e: - logger.error(e) - await context.abort(code=grpc_status_code(e), details=str(e)) - - return wrapper diff --git a/bentoml/_internal/server/grpc_app.py b/bentoml/_internal/server/grpc_app.py index 8c428851486..a224eba4f0d 100644 --- a/bentoml/_internal/server/grpc_app.py +++ b/bentoml/_internal/server/grpc_app.py @@ -53,7 +53,7 @@ def on_startup(self) -> OnStartup: on_startup: OnStartup = [ self.mark_as_ready, - self.bento_service.on_asgi_app_startup, + self.bento_service.on_grpc_server_startup, ] if BentoMLContainer.development_mode.get(): for runner in self.bento_service.runners: @@ -77,7 +77,7 @@ def on_startup(self) -> OnStartup: @property def on_shutdown(self) -> list[t.Callable[[], None]]: - on_shutdown = [self.bento_service.on_asgi_app_shutdown] + on_shutdown = [self.bento_service.on_grpc_server_shutdown] for runner in self.bento_service.runners: on_shutdown.append(runner.destroy) @@ -110,10 +110,13 @@ def options( @property def interceptors(self) -> list[aio.ServerInterceptor]: - interceptors: list[aio.ServerInterceptor] = [] + from .grpc.interceptors import ExceptionHandlerInterceptor + + # TODO: add access log, tracing, prometheus interceptors. + interceptors: list[aio.ServerInterceptor] = [ExceptionHandlerInterceptor()] + + # add users-defined interceptors. interceptors.extend( [interceptor() for interceptor in self.bento_service.interceptors] ) - - # TODO: add access log, tracing, prometheus interceptors. return interceptors diff --git a/bentoml/_internal/service/service.py b/bentoml/_internal/service/service.py index a1abe793196..24f8b439e64 100644 --- a/bentoml/_internal/service/service.py +++ b/bentoml/_internal/service/service.py @@ -210,6 +210,12 @@ def on_asgi_app_startup(self) -> None: def on_asgi_app_shutdown(self) -> None: pass + def on_grpc_server_startup(self) -> None: + pass + + def on_grpc_server_shutdown(self) -> None: + pass + @property def grpc_server(self) -> GRPCServer: from ..server.grpc_app import GRPCAppFactory diff --git a/bentoml/_internal/utils/grpc/__init__.py b/bentoml/_internal/utils/grpc/__init__.py index d231dd8f6c2..a5c940717e4 100644 --- a/bentoml/_internal/utils/grpc/__init__.py +++ b/bentoml/_internal/utils/grpc/__init__.py @@ -1,8 +1,48 @@ from __future__ import annotations import enum +import typing as t +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING from dataclasses import dataclass +import grpc + +from bentoml.exceptions import BentoMLException + +from .serializer import proto_to_dict + +if TYPE_CHECKING: + from ...server.grpc.types import RequestType + from ...server.grpc.types import ResponseType + from ...server.grpc.types import RpcMethodHandler + from ...server.grpc.types import BentoServicerContext + +__all__ = [ + "grpc_status_code", + "parse_method_name", + "get_method_type", + "get_factory_and_method", + "proto_to_dict", +] + +logger = logging.getLogger(__name__) + +_STATUS_CODE_MAPPING = { + HTTPStatus.BAD_REQUEST: grpc.StatusCode.INVALID_ARGUMENT, + HTTPStatus.INTERNAL_SERVER_ERROR: grpc.StatusCode.INTERNAL, + HTTPStatus.NOT_FOUND: grpc.StatusCode.NOT_FOUND, + HTTPStatus.UNPROCESSABLE_ENTITY: grpc.StatusCode.FAILED_PRECONDITION, +} + + +def grpc_status_code(err: BentoMLException) -> grpc.StatusCode: + """ + Convert BentoMLException.error_code to grpc.StatusCode. + """ + return _STATUS_CODE_MAPPING.get(err.error_code, grpc.StatusCode.UNKNOWN) + class RpcMethodType(str, enum.Enum): UNARY = "UNARY" @@ -59,3 +99,21 @@ def get_method_type(request_streaming: bool, response_streaming: bool) -> str: return RpcMethodType.BIDI_STREAMING else: return RpcMethodType.UNKNOWN + + +def get_factory_and_method( + rpc_handler: RpcMethodHandler, +) -> tuple[ + t.Callable[..., t.Any], + t.Callable[[RequestType, BentoServicerContext], t.Awaitable[ResponseType]], +]: + if rpc_handler.unary_unary: + return grpc.unary_unary_rpc_method_handler, rpc_handler.unary_unary + elif rpc_handler.unary_stream: + return grpc.unary_stream_rpc_method_handler, rpc_handler.unary_stream + elif rpc_handler.stream_unary: + return grpc.stream_unary_rpc_method_handler, rpc_handler.stream_unary + elif rpc_handler.stream_stream: + return grpc.stream_stream_rpc_method_handler, rpc_handler.stream_stream + else: + raise BentoMLException(f"RPC method handler {rpc_handler} does not exist.") diff --git a/bentoml/_internal/utils/grpc/codec.py b/bentoml/_internal/utils/grpc/codec.py new file mode 100644 index 00000000000..44a9fc02c9f --- /dev/null +++ b/bentoml/_internal/utils/grpc/codec.py @@ -0,0 +1,369 @@ +from __future__ import annotations + +import typing as t +from abc import ABCMeta +from abc import abstractmethod +from http import HTTPStatus +from typing import TYPE_CHECKING +from functools import partial +from collections import namedtuple + +from google.protobuf.message import Message + +from bentoml.exceptions import BentoMLException + +from ..lazy_loader import LazyLoader + +if TYPE_CHECKING: + from bentoml.grpc.v1 import struct_pb2 + + CoderCallback = list[ + tuple[ + t.Callable[[t.Any], bool], + t.Callable[[t.Any], bytes] | t.Callable[[bytes], t.Any], + ] + ] +else: + struct_pb2 = LazyLoader("struct_pb2", globals(), "bentoml.grpc.v1.struct_pb2") + + +F = t.Callable[..., t.Any] +P = t.TypeVar("P") + +__all__ = ["can_encode", "encode_structure", "decode_proto", "register_codec"] + + +def register_codec(cdc: t.Type[Codec[t.Any]]) -> None: + # register a codec. + _codecs.append(cdc()) + + +def _get_encoders() -> CoderCallback: + return [(c.can_encode, c.do_encode) for c in _codecs] + + +def _get_decoders() -> CoderCallback: + return [(c.can_decode, c.do_decode) for c in _codecs] + + +def _map_structure(obj: t.Any, coders: CoderCallback) -> Message: + for can, do in coders: + if can(obj): + fn = partial(_map_structure, coders=coders) + return do(obj, fn) + raise EncodeError(f"No codec found for {str(obj)} of type {type(obj)}") + + +def can_encode(nested: t.Any) -> bool: + # Determines whether a nested structure can be encoded into a proto. + try: + encode_structure(nested) + except EncodeError: + return False + return True + + +def encode_structure(nested: t.Any) -> Message: + # encode given nested object to result protos. + return _map_structure(nested, _get_encoders()) + + +def decode_proto(proto: Message) -> t.Any: + # decode given proto to nested object. + return _map_structure(proto, _get_decoders()) + + +class EncodeError(BentoMLException): + """Error raised when a codec cannot encode an object or not implemented.""" + + error_code = HTTPStatus.NOT_IMPLEMENTED + + +class Codec(t.Generic[P], metaclass=ABCMeta): + @abstractmethod + def can_encode(self, obj: t.Any) -> bool: + """ + Check if the codec can encode the given object. + """ + + @abstractmethod + def do_encode(self, obj: P, encode_fn: F) -> struct_pb2.ContentsProto: + """ + Encode the given object. + """ + + def can_decode(self, value: Message) -> bool: + """ + Check if the codec can decode the given bytes. + """ + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> P: + """ + Decode the given bytes. + """ + + +class _NoneCodec(Codec[None]): + def can_encode(self, obj: t.Any) -> bool: + return obj is None + + def do_encode(self, obj: None, encode_fn: F) -> struct_pb2.ContentsProto: + del obj, encode_fn + value = struct_pb2.ContentsProto() + value.none.CopyFrom(struct_pb2.NoneValue()) + return value + + def can_decode(self, value: Message) -> bool: + return value.HasField("none") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> None: + del value, decode_fn + return None + + +class _Float64Codec(Codec[float]): + def can_encode(self, obj: t.Any) -> bool: + return isinstance(obj, float) + + def do_encode(self, obj: float, encode_fn: F) -> struct_pb2.ContentsProto: + del encode_fn + val = struct_pb2.ContentsProto() + val.float64 = obj + return val + + def can_decode(self, value: Message) -> bool: + return value.HasField("float64") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> float: + del decode_fn + return float(value.float64) + + +class _Int64Codec(Codec[int]): + def can_encode(self, obj: t.Any) -> bool: + return not isinstance(obj, bool) and isinstance(obj, int) + + def do_encode(self, obj: int, encode_fn: F) -> struct_pb2.ContentsProto: + del encode_fn + value = struct_pb2.ContentsProto() + value.int64 = obj + return value + + def can_decode(self, value: Message) -> bool: + return value.HasField("int64") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> int: + del decode_fn + return int(value.int64) + + +class _StringCodec(Codec[str]): + def can_encode(self, obj: t.Any) -> bool: + return isinstance(obj, str) + + def do_encode(self, obj: str, encode_fn: F) -> struct_pb2.ContentsProto: + del encode_fn + val = struct_pb2.ContentsProto() + val.string = obj + return val + + def can_decode(self, value: Message) -> bool: + return value.HasField("string") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> str: + del decode_fn + return str(value.string) + + +class _BoolCodec(Codec[bool]): + def can_encode(self, obj: t.Any) -> bool: + return isinstance(obj, bool) + + def do_encode(self, obj: bool, encode_fn: F) -> struct_pb2.ContentsProto: + del encode_fn + val = struct_pb2.ContentsProto() + val.bool = obj + return val + + def can_decode(self, value: Message) -> bool: + return value.HasField("bool") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> bool: + del decode_fn + return value.bool + + +class _ShapeCodec(Codec[t.Tuple[t.List[t.Tuple[int, str]], bool]]): + def can_encode(self, obj: t.Any) -> bool: + return _is_tuple(obj) + + def do_encode( + self, obj: t.Tuple[t.List[t.Tuple[int, str]], bool], encode_fn: F + ) -> struct_pb2.ContentsProto: + return struct_pb2.ContentsProto() + + def can_decode(self, value: Message) -> bool: + return value.HasField("shape") + + def do_decode( + self, value: struct_pb2.ContentsProto, decode_fn: F + ) -> t.Tuple[t.List[t.Tuple[int, str]], bool]: + return value + + +class _ListCodec(Codec[t.List[t.Any]]): + def can_encode(self, obj: t.Any) -> bool: + return isinstance(obj, list) + + def do_encode(self, obj: list[t.Any], encode_fn: F) -> struct_pb2.ContentsProto: + encoded = struct_pb2.ContentsProto() + encoded.list.CopyFrom(struct_pb2.ListValue()) + for element in obj: + encoded.list.values.add().CopyFrom(encode_fn(element)) + return encoded + + def can_decode(self, value: Message) -> bool: + return value.HasField("list") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> list[t.Any]: + return [decode_fn(element) for element in value.list.values] + + +class _TupleCodec(Codec[t.Tuple[t.Any, ...]]): + def can_encode(self, obj: t.Any) -> bool: + return _is_tuple(obj) + + def do_encode( + self, obj: tuple[t.Any, ...], encode_fn: F + ) -> struct_pb2.ContentsProto: + encoded = struct_pb2.ContentsProto() + encoded.tuple.CopyFrom(struct_pb2.TupleValue()) + for element in obj: + encoded.tuple.values.add().CopyFrom(encode_fn(element)) + return encoded + + def can_decode(self, value: Message) -> bool: + return value.HasField("tuple") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> tuple[t.Any]: + return tuple(decode_fn(element) for element in value.tuple.values) + + +def _is_tuple(obj: t.Any) -> bool: + return not _is_named_tuple(obj) and isinstance(obj, tuple) + + +def _is_named_tuple(instance: type) -> bool: + # Returns True iff `instance` is a `namedtuple`. + if not isinstance(instance, tuple): + return False + return ( + hasattr(instance, "_fields") + and isinstance(instance._fields, t.Sequence) + and all(isinstance(f, str) for f in instance._fields) # type: ignore + ) + + +class _DictCodec(Codec[t.Dict[str, t.Any]]): + def can_encode(self, obj: t.Any) -> bool: + return isinstance(obj, dict) + + def do_encode( + self, obj: dict[str, t.Any], encode_fn: F + ) -> struct_pb2.ContentsProto: + encoded = struct_pb2.ContentsProto() + encoded.dict.CopyFrom(struct_pb2.DictValue()) + for key, value in obj.items(): + encoded.dict.fields[key].CopyFrom(encode_fn(value)) + return encoded + + def can_decode(self, value: Message) -> bool: + return value.HasField("dict") + + def do_decode( + self, value: struct_pb2.ContentsProto, decode_fn: F + ) -> dict[str, t.Any]: + return {key: decode_fn(val) for key, val in value.dict.fields.items()} + + +class _NamedTupleCodec(Codec[t.NamedTuple]): + def can_encode(self, obj: t.Any) -> bool: + return _is_named_tuple(obj) + + def do_encode(self, obj: t.NamedTuple, encode_fn: F) -> struct_pb2.ContentsProto: + encoded = struct_pb2.ContentsProto() + encoded.namedtuple.CopyFrom(struct_pb2.NamedTupleValue()) + encoded.namedtuple.name = obj.__class__.__name__ + for key in obj._fields: + pair = encoded.namedtuple.values.add() + pair.key = key + pair.value.CopyFrom(encode_fn(obj._asdict()[key])) + return encoded + + def can_decode(self, value: Message) -> bool: + return value.HasField("namedtuple") + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> t.NamedTuple: + kv_pairs = value.namedtuple.values + items = [(pair.key, decode_fn(pair.value)) for pair in kv_pairs] + # TODO: makes this type safe by using t.NamedTuple + namedtuple_klass = namedtuple( # type: ignore (no type support yet) + value.namedtuple.name, list(map(lambda x: x[0], items)) + ) + return namedtuple_klass(**dict(items)) + + +class _DataFrameCodec(Codec[t.List[str]]): + _df_type: t.Literal["dataframe_columns", "dataframe_indices"] + + def __new__(cls): + assert cls._df_type in ["dataframe_columns", "dataframe_indices"] + if cls._df_type == "dataframe_columns": + res = object.__new__(_DataFrameColumnsCodec) + elif cls._df_type == "dataframe_indices": + res = object.__new__(_DataFrameIndicesCodec) + else: + raise EncodeError("Unsupported DataFrame type.") + return res + + def can_encode(self, obj: t.Iterable[t.Any]) -> bool: + return isinstance(obj, list) and all(isinstance(x, str) for x in obj) + + def do_encode(self, obj: list[t.Any], encode_fn: F) -> struct_pb2.ContentsProto: + del encode_fn + encoded = struct_pb2.ContentsProto() + df = getattr(encoded, self._df_type) + df.CopyFrom(struct_pb2.ListValue()) + for element in obj: + df.values.add().CopyFrom(str(element)) + return encoded + + def can_decode(self, value: Message) -> bool: + return value.HasField(self._df_type) + + def do_decode(self, value: struct_pb2.ContentsProto, decode_fn: F) -> list[str]: + del decode_fn + return [str(element) for element in getattr(value, self._df_type).values] + + +class _DataFrameColumnsCodec(_DataFrameCodec): + _df_type = "dataframe_columns" + + +class _DataFrameIndicesCodec(_DataFrameCodec): + _df_type = "dataframe_indices" + + +_codecs: list[Codec[t.Any]] = [ + _NoneCodec(), + _Float64Codec(), + _Int64Codec(), + _StringCodec(), + _BoolCodec(), + _ListCodec(), + _TupleCodec(), + _DictCodec(), + _NamedTupleCodec(), + _DataFrameColumnsCodec(), + _DataFrameIndicesCodec(), +] diff --git a/bentoml/_internal/utils/grpc/serializer.py b/bentoml/_internal/utils/grpc/serializer.py index 9e78c1558e1..ba8d37932f2 100644 --- a/bentoml/_internal/utils/grpc/serializer.py +++ b/bentoml/_internal/utils/grpc/serializer.py @@ -1,25 +1,16 @@ from __future__ import annotations -import builtins +import typing as t +from typing import TYPE_CHECKING -from google.protobuf.descriptor import FieldDescriptor +if TYPE_CHECKING: + from google.protobuf.message import Message -# defines a descriptor type to python native type. -TYPE_CALLABLE_MAP: dict[int, type] = { - FieldDescriptor.TYPE_DOUBLE: builtins.float, - FieldDescriptor.TYPE_FLOAT: builtins.float, - FieldDescriptor.TYPE_INT32: builtins.int, - FieldDescriptor.TYPE_INT64: builtins.int, - FieldDescriptor.TYPE_UINT32: builtins.int, - FieldDescriptor.TYPE_UINT64: builtins.int, - FieldDescriptor.TYPE_SINT32: builtins.int, - FieldDescriptor.TYPE_SINT64: builtins.int, - FieldDescriptor.TYPE_FIXED32: builtins.int, - FieldDescriptor.TYPE_FIXED64: builtins.int, - FieldDescriptor.TYPE_SFIXED32: builtins.int, - FieldDescriptor.TYPE_SFIXED64: builtins.int, - FieldDescriptor.TYPE_BOOL: builtins.bool, - FieldDescriptor.TYPE_STRING: builtins.str, - FieldDescriptor.TYPE_BYTES: builtins.bytes, - FieldDescriptor.TYPE_ENUM: builtins.int, -} + +def proto_to_dict(msg: Message, **kwargs: t.Any) -> dict[str, t.Any]: + from google.protobuf.json_format import MessageToDict + + if "preserving_proto_field_name" not in kwargs: + kwargs.setdefault("preserving_proto_field_name", True) + + return MessageToDict(msg, **kwargs) diff --git a/bentoml/_internal/server/cli/grpc/__init__.py b/bentoml/grpc/v1/__init__.py similarity index 100% rename from bentoml/_internal/server/cli/grpc/__init__.py rename to bentoml/grpc/v1/__init__.py diff --git a/bentoml/grpc/v1/service.proto b/bentoml/grpc/v1/service.proto index fd34a2bfe05..86ffaa6ba6f 100644 --- a/bentoml/grpc/v1/service.proto +++ b/bentoml/grpc/v1/service.proto @@ -3,6 +3,9 @@ syntax = "proto3"; package bentoml.grpc.v1; import "bentoml/grpc/v1/struct.proto"; +import "google/protobuf/any.proto"; +import "google/rpc/error_details.proto"; +import "google/rpc/status.proto"; // cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only) option cc_enable_arenas = true; @@ -22,11 +25,8 @@ service BentoService { // Check server readiness rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} - // Call handles unary API. - rpc Call(CallRequest) returns (CallResponse) {} - - // CallStream handles streaming API. - rpc CallStream(stream CallStreamRequest) returns (stream CallStreamResponse) {} + // Inference handles unary API. + rpc Inference(InferenceRequest) returns (InferenceResponse) {} } // request for ServerLive that takes no arguments. @@ -45,31 +45,175 @@ message ServerReadyResponse { bool ready = 1; } -// Request for Call. -message CallRequest { +// Request for Inference. +message InferenceRequest { // a given API route the rpc request is sent to. - string api_name = 1; + string api_name = 14; + string api_version = 15; // representation of the input value. - bentoml.grpc.v1.StructuredValue contents = 2; -} - -// Response from Call. -message CallResponse { - // representation of the output value. - bentoml.grpc.v1.StructuredValue contents = 1; + oneof contents { + // Serialized bytes contents. + bytes bytes_contents = 1; + + // DT_HALF, DT_BFLOAT16 + int32 half_contents = 10; + + // DT_FLOAT. + float float_contents = 2; + + // DT_DOUBLE. + double double_contents = 3; + + // DT_INT32, DT_INT16, DT_UINT16, DT_INT8, DT_UINT8. + int32 int_contents = 4; + + // DT_STRING + bytes string_contents = 5; + + // DT_COMPLEX64. + float scomplex_contents = 6; + + // DT_INT64 + int64 int64_contents = 7; + + // DT_BOOL + bool bool_contents = 8; + + // DT_COMPLEX128 + double dcomplex_contents = 9; + + // DT_UINT32 + uint32 uint32_contents = 11; + + // DT_UINT64 + uint64 uint64_contents = 12; + + // Represents arbitrary Python data structures. + NoneValue none_contents = 50; + ListValue list_contents = 51; + TupleValue tuple_contents = 52; + DictValue dict_contents = 53; + NamedTupleValue namedtuple_contents = 54; + + google.protobuf.Any any_contents = 55; + } + + // The data contained in an input can be represented in + // "raw" bytes form or in the repeated type that matches the + // data type. + // Using the "raw" bytes form will typically allow higher performance due to the way protobuf + // allocation and reuse interacts with GRPC. + // For example, see https://github.com/grpc/grpc/issues/23231. + // + // To use the raw representation 'raw_input_contents' must be + // initialized with data for each tensor in the same order as + // 'inputs'. For each tensor, the size of this content must + // match what is expected by the tensor's shape and data + // type. The raw data must be the flattened, one-dimensional, + // row-major order of the tensor elements without any stride + // or padding between the elements. + // + // Note that the FP16 and BF16 data + // types must be represented as raw content as there is no + // specific data type for a 16-bit float type. + // + // If this field is specified then contents must not be specified for any input tensor. + repeated bytes raw_bytes_contents = 13; + + // dataframes_columns and dataframe_indices are used + // in conjunction with contents to represent a dataframe. + // Recommendation: for better performance, use raw_bytes_contents in + // conjunction with the below fields. + repeated string dataframe_columns = 101; + repeated string dataframe_indices = 102; } -// Request message for CallStream. -message CallStreamRequest { - CallRequest stream_inputs = 1; -} - -// Response message for CallStream. -message CallStreamResponse { - // message describing the error. Empty message signals the call was successful without error. - string error_message = 1; - +// Response from Inference. +message InferenceResponse { // representation of the output value. - CallResponse stream_outputs = 2; + // representation of the input value. + oneof contents { + // Serialized bytes contents. + bytes bytes_contents = 1; + + // DT_HALF, DT_BFLOAT16 + int32 half_contents = 10; + + // DT_FLOAT. + float float_contents = 2; + + // DT_DOUBLE. + double double_contents = 3; + + // DT_INT32, DT_INT16, DT_UINT16, DT_INT8, DT_UINT8. + int32 int_contents = 4; + + // DT_STRING + bytes string_contents = 5; + + // DT_COMPLEX64. + float scomplex_contents = 6; + + // DT_INT64 + int64 int64_contents = 7; + + // DT_BOOL + bool bool_contents = 8; + + // DT_COMPLEX128 + double dcomplex_contents = 9; + + // DT_UINT32 + uint32 uint32_contents = 11; + + // DT_UINT64 + uint64 uint64_contents = 12; + + // Represents arbitrary Python data structures. + NoneValue none_contents = 50; + ListValue list_contents = 51; + TupleValue tuple_contents = 52; + DictValue dict_contents = 53; + NamedTupleValue namedtuple_contents = 54; + + google.protobuf.Any any_contents = 55; + } + + // The data contained in an output can be represented in + // "raw" bytes form or in the repeated type that matches the + // data type. + // Using the "raw" bytes form will typically allow higher performance due to the way protobuf + // allocation and reuse interacts with GRPC. + // For example, see https://github.com/grpc/grpc/issues/23231. + // + // To use the raw representation 'raw_input_contents' must be + // initialized with data for each tensor in the same order as + // 'inputs'. For each tensor, the size of this content must + // match what is expected by the tensor's shape and data + // type. The raw data must be the flattened, one-dimensional, + // row-major order of the tensor elements without any stride + // or padding between the elements. + // + // Note that the FP16 and BF16 data + // types must be represented as raw content as there is no + // specific data type for a 16-bit float type. + // + // If this field is specified then contents must not be specified for any input tensor. + repeated bytes raw_bytes_contents = 13; + + // Sends a rpc status back to the client. + google.rpc.Status status = 14; + + // the response should also include an error message type + oneof errors { + google.rpc.RetryInfo retry_info = 100; + google.rpc.DebugInfo debug_info = 101; + google.rpc.QuotaFailure quota_failure = 102; + google.rpc.ErrorInfo error_info = 103; + google.rpc.PreconditionFailure precondition_failure = 104; + google.rpc.BadRequest bad_request = 105; + google.rpc.RequestInfo request_info = 106; + google.rpc.LocalizedMessage localized_message = 107; + } } diff --git a/bentoml/grpc/v1/service_pb2.pyi b/bentoml/grpc/v1/service_pb2.pyi deleted file mode 100644 index 5f550d2962d..00000000000 --- a/bentoml/grpc/v1/service_pb2.pyi +++ /dev/null @@ -1,201 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import abc -import bentoml.grpc.v1.struct_pb2 -import builtins -import concurrent.futures -import google.protobuf.descriptor -import google.protobuf.message -import google.protobuf.service -import typing -import typing_extensions - -DESCRIPTOR: google.protobuf.descriptor.FileDescriptor - -class ServerLiveRequest(google.protobuf.message.Message): - """request for ServerLive that takes no arguments.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - def __init__(self, - ) -> None: ... -global___ServerLiveRequest = ServerLiveRequest - -class ServerLiveResponse(google.protobuf.message.Message): - """response for ServerLive returns a boolean determine server's liveliness.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - LIVE_FIELD_NUMBER: builtins.int - live: builtins.bool - def __init__(self, - *, - live: builtins.bool = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["live",b"live"]) -> None: ... -global___ServerLiveResponse = ServerLiveResponse - -class ServerReadyRequest(google.protobuf.message.Message): - """request for ServerReady that takes no arguments.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - def __init__(self, - ) -> None: ... -global___ServerReadyRequest = ServerReadyRequest - -class ServerReadyResponse(google.protobuf.message.Message): - """response for ServerReady returns a boolean determine server's readiness.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - READY_FIELD_NUMBER: builtins.int - ready: builtins.bool - def __init__(self, - *, - ready: builtins.bool = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["ready",b"ready"]) -> None: ... -global___ServerReadyResponse = ServerReadyResponse - -class CallRequest(google.protobuf.message.Message): - """Request for Call.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - API_NAME_FIELD_NUMBER: builtins.int - CONTENTS_FIELD_NUMBER: builtins.int - api_name: typing.Text - """a given API route the rpc request is sent to.""" - - @property - def contents(self) -> bentoml.grpc.v1.struct_pb2.StructuredValue: - """representation of the input value.""" - pass - def __init__(self, - *, - api_name: typing.Text = ..., - contents: typing.Optional[bentoml.grpc.v1.struct_pb2.StructuredValue] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["contents",b"contents"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["api_name",b"api_name","contents",b"contents"]) -> None: ... -global___CallRequest = CallRequest - -class CallResponse(google.protobuf.message.Message): - """Response from Call.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - CONTENTS_FIELD_NUMBER: builtins.int - @property - def contents(self) -> bentoml.grpc.v1.struct_pb2.StructuredValue: - """representation of the output value.""" - pass - def __init__(self, - *, - contents: typing.Optional[bentoml.grpc.v1.struct_pb2.StructuredValue] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["contents",b"contents"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["contents",b"contents"]) -> None: ... -global___CallResponse = CallResponse - -class CallStreamRequest(google.protobuf.message.Message): - """Request message for CallStream.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - STREAM_INPUTS_FIELD_NUMBER: builtins.int - @property - def stream_inputs(self) -> global___CallRequest: ... - def __init__(self, - *, - stream_inputs: typing.Optional[global___CallRequest] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["stream_inputs",b"stream_inputs"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["stream_inputs",b"stream_inputs"]) -> None: ... -global___CallStreamRequest = CallStreamRequest - -class CallStreamResponse(google.protobuf.message.Message): - """Response message for CallStream.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - ERROR_MESSAGE_FIELD_NUMBER: builtins.int - STREAM_OUTPUTS_FIELD_NUMBER: builtins.int - error_message: typing.Text - """message describing the error. Empty message signals the call was successful without error.""" - - @property - def stream_outputs(self) -> global___CallResponse: - """representation of the output value.""" - pass - def __init__(self, - *, - error_message: typing.Text = ..., - stream_outputs: typing.Optional[global___CallResponse] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["stream_outputs",b"stream_outputs"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["error_message",b"error_message","stream_outputs",b"stream_outputs"]) -> None: ... -global___CallStreamResponse = CallStreamResponse - -class BentoService(google.protobuf.service.Service, metaclass=abc.ABCMeta): - """a gRPC BentoServer.""" - DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor - @abc.abstractmethod - def ServerLive( - inst: BentoService, - rpc_controller: google.protobuf.service.RpcController, - request: global___ServerLiveRequest, - callback: typing.Optional[typing.Callable[[global___ServerLiveResponse], None]], - ) -> concurrent.futures.Future[global___ServerLiveResponse]: - """Check server liveliness.""" - pass - @abc.abstractmethod - def ServerReady( - inst: BentoService, - rpc_controller: google.protobuf.service.RpcController, - request: global___ServerReadyRequest, - callback: typing.Optional[typing.Callable[[global___ServerReadyResponse], None]], - ) -> concurrent.futures.Future[global___ServerReadyResponse]: - """Check server readiness""" - pass - @abc.abstractmethod - def Call( - inst: BentoService, - rpc_controller: google.protobuf.service.RpcController, - request: global___CallRequest, - callback: typing.Optional[typing.Callable[[global___CallResponse], None]], - ) -> concurrent.futures.Future[global___CallResponse]: - """Call handles unary API.""" - pass - @abc.abstractmethod - def CallStream( - inst: BentoService, - rpc_controller: google.protobuf.service.RpcController, - request: global___CallStreamRequest, - callback: typing.Optional[typing.Callable[[global___CallStreamResponse], None]], - ) -> concurrent.futures.Future[global___CallStreamResponse]: - """CallStream handles streaming API.""" - pass -class BentoService_Stub(BentoService): - """a gRPC BentoServer.""" - def __init__(self, rpc_channel: google.protobuf.service.RpcChannel) -> None: ... - DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor - def ServerLive( - inst: BentoService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___ServerLiveRequest, - callback: typing.Optional[typing.Callable[[global___ServerLiveResponse], None]] = None, - ) -> concurrent.futures.Future[global___ServerLiveResponse]: - """Check server liveliness.""" - pass - def ServerReady( - inst: BentoService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___ServerReadyRequest, - callback: typing.Optional[typing.Callable[[global___ServerReadyResponse], None]] = None, - ) -> concurrent.futures.Future[global___ServerReadyResponse]: - """Check server readiness""" - pass - def Call( - inst: BentoService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___CallRequest, - callback: typing.Optional[typing.Callable[[global___CallResponse], None]] = None, - ) -> concurrent.futures.Future[global___CallResponse]: - """Call handles unary API.""" - pass - def CallStream( - inst: BentoService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___CallStreamRequest, - callback: typing.Optional[typing.Callable[[global___CallStreamResponse], None]] = None, - ) -> concurrent.futures.Future[global___CallStreamResponse]: - """CallStream handles streaming API.""" - pass \ No newline at end of file diff --git a/bentoml/grpc/v1/service_pb2_grpc.pyi b/bentoml/grpc/v1/service_pb2_grpc.pyi deleted file mode 100644 index 659c3d587a2..00000000000 --- a/bentoml/grpc/v1/service_pb2_grpc.pyi +++ /dev/null @@ -1,69 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import abc -import bentoml.grpc.v1.service_pb2 -import grpc -import typing - -class BentoServiceStub: - """a gRPC BentoServer.""" - def __init__(self, channel: grpc.Channel) -> None: ... - ServerLive: grpc.UnaryUnaryMultiCallable[ - bentoml.grpc.v1.service_pb2.ServerLiveRequest, - bentoml.grpc.v1.service_pb2.ServerLiveResponse] - """Check server liveliness.""" - - ServerReady: grpc.UnaryUnaryMultiCallable[ - bentoml.grpc.v1.service_pb2.ServerReadyRequest, - bentoml.grpc.v1.service_pb2.ServerReadyResponse] - """Check server readiness""" - - Call: grpc.UnaryUnaryMultiCallable[ - bentoml.grpc.v1.service_pb2.CallRequest, - bentoml.grpc.v1.service_pb2.CallResponse] - """Call handles unary API.""" - - CallStream: grpc.StreamStreamMultiCallable[ - bentoml.grpc.v1.service_pb2.CallStreamRequest, - bentoml.grpc.v1.service_pb2.CallStreamResponse] - """CallStream handles streaming API.""" - - -class BentoServiceServicer(metaclass=abc.ABCMeta): - """a gRPC BentoServer.""" - @abc.abstractmethod - def ServerLive(self, - request: bentoml.grpc.v1.service_pb2.ServerLiveRequest, - context: grpc.ServicerContext, - ) -> bentoml.grpc.v1.service_pb2.ServerLiveResponse: - """Check server liveliness.""" - pass - - @abc.abstractmethod - def ServerReady(self, - request: bentoml.grpc.v1.service_pb2.ServerReadyRequest, - context: grpc.ServicerContext, - ) -> bentoml.grpc.v1.service_pb2.ServerReadyResponse: - """Check server readiness""" - pass - - @abc.abstractmethod - def Call(self, - request: bentoml.grpc.v1.service_pb2.CallRequest, - context: grpc.ServicerContext, - ) -> bentoml.grpc.v1.service_pb2.CallResponse: - """Call handles unary API.""" - pass - - @abc.abstractmethod - def CallStream(self, - request_iterator: typing.Iterator[bentoml.grpc.v1.service_pb2.CallStreamRequest], - context: grpc.ServicerContext, - ) -> typing.Iterator[bentoml.grpc.v1.service_pb2.CallStreamResponse]: - """CallStream handles streaming API.""" - pass - - -def add_BentoServiceServicer_to_server(servicer: BentoServiceServicer, server: grpc.Server) -> None: ... diff --git a/bentoml/grpc/v1/service_test.proto b/bentoml/grpc/v1/service_test.proto index 12c11e2ee6d..2674e2035da 100644 --- a/bentoml/grpc/v1/service_test.proto +++ b/bentoml/grpc/v1/service_test.proto @@ -9,23 +9,23 @@ option optimize_for = SPEED; option py_generic_services = true; // Represents a request for TestService. -message TestRequest { +message ExecuteRequest { string input = 1; } // Represents a response from TestService. -message TestResponse { +message ExecuteResponse { string output = 1; } // Use for testing interceptors per RPC call. service TestService { // Unary API - rpc Execute(TestRequest) returns (TestResponse); + rpc Execute(ExecuteRequest) returns (ExecuteResponse); // Client-streaming API - rpc ClientStreamExecute(stream TestRequest) returns (TestResponse); + rpc ClientStreamExecute(stream ExecuteRequest) returns (ExecuteResponse); // Server-streaming API - rpc ServerStreamExecute(TestRequest) returns (stream TestResponse); + rpc ServerStreamExecute(ExecuteRequest) returns (stream ExecuteResponse); // Bidirectional streaming API - rpc BidiStreamExecute(stream TestRequest) returns (stream TestResponse); + rpc BidiStreamExecute(stream ExecuteRequest) returns (stream ExecuteResponse); } diff --git a/bentoml/grpc/v1/service_test_pb2.pyi b/bentoml/grpc/v1/service_test_pb2.pyi deleted file mode 100644 index 54190501b7c..00000000000 --- a/bentoml/grpc/v1/service_test_pb2.pyi +++ /dev/null @@ -1,114 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import abc -import builtins -import concurrent.futures -import google.protobuf.descriptor -import google.protobuf.message -import google.protobuf.service -import typing -import typing_extensions - -DESCRIPTOR: google.protobuf.descriptor.FileDescriptor - -class TestRequest(google.protobuf.message.Message): - """Represents a request for TestService.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - INPUT_FIELD_NUMBER: builtins.int - input: typing.Text - def __init__(self, - *, - input: typing.Text = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["input",b"input"]) -> None: ... -global___TestRequest = TestRequest - -class TestResponse(google.protobuf.message.Message): - """Represents a response from TestService.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - OUTPUT_FIELD_NUMBER: builtins.int - output: typing.Text - def __init__(self, - *, - output: typing.Text = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["output",b"output"]) -> None: ... -global___TestResponse = TestResponse - -class TestService(google.protobuf.service.Service, metaclass=abc.ABCMeta): - """Use for testing interceptors per RPC call.""" - DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor - @abc.abstractmethod - def Execute( - inst: TestService, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]], - ) -> concurrent.futures.Future[global___TestResponse]: - """Unary API""" - pass - @abc.abstractmethod - def ClientStreamExecute( - inst: TestService, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]], - ) -> concurrent.futures.Future[global___TestResponse]: - """Client-streaming API""" - pass - @abc.abstractmethod - def ServerStreamExecute( - inst: TestService, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]], - ) -> concurrent.futures.Future[global___TestResponse]: - """Server-streaming API""" - pass - @abc.abstractmethod - def BidiStreamExecute( - inst: TestService, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]], - ) -> concurrent.futures.Future[global___TestResponse]: - """Bidirectional streaming API""" - pass -class TestService_Stub(TestService): - """Use for testing interceptors per RPC call.""" - def __init__(self, rpc_channel: google.protobuf.service.RpcChannel) -> None: ... - DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor - def Execute( - inst: TestService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]] = None, - ) -> concurrent.futures.Future[global___TestResponse]: - """Unary API""" - pass - def ClientStreamExecute( - inst: TestService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]] = None, - ) -> concurrent.futures.Future[global___TestResponse]: - """Client-streaming API""" - pass - def ServerStreamExecute( - inst: TestService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]] = None, - ) -> concurrent.futures.Future[global___TestResponse]: - """Server-streaming API""" - pass - def BidiStreamExecute( - inst: TestService_Stub, - rpc_controller: google.protobuf.service.RpcController, - request: global___TestRequest, - callback: typing.Optional[typing.Callable[[global___TestResponse], None]] = None, - ) -> concurrent.futures.Future[global___TestResponse]: - """Bidirectional streaming API""" - pass \ No newline at end of file diff --git a/bentoml/grpc/v1/service_test_pb2_grpc.pyi b/bentoml/grpc/v1/service_test_pb2_grpc.pyi deleted file mode 100644 index d2249871b34..00000000000 --- a/bentoml/grpc/v1/service_test_pb2_grpc.pyi +++ /dev/null @@ -1,69 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import abc -import bentoml.grpc.v1.service_test_pb2 -import grpc -import typing - -class TestServiceStub: - """Use for testing interceptors per RPC call.""" - def __init__(self, channel: grpc.Channel) -> None: ... - Execute: grpc.UnaryUnaryMultiCallable[ - bentoml.grpc.v1.service_test_pb2.TestRequest, - bentoml.grpc.v1.service_test_pb2.TestResponse] - """Unary API""" - - ClientStreamExecute: grpc.StreamUnaryMultiCallable[ - bentoml.grpc.v1.service_test_pb2.TestRequest, - bentoml.grpc.v1.service_test_pb2.TestResponse] - """Client-streaming API""" - - ServerStreamExecute: grpc.UnaryStreamMultiCallable[ - bentoml.grpc.v1.service_test_pb2.TestRequest, - bentoml.grpc.v1.service_test_pb2.TestResponse] - """Server-streaming API""" - - BidiStreamExecute: grpc.StreamStreamMultiCallable[ - bentoml.grpc.v1.service_test_pb2.TestRequest, - bentoml.grpc.v1.service_test_pb2.TestResponse] - """Bidirectional streaming API""" - - -class TestServiceServicer(metaclass=abc.ABCMeta): - """Use for testing interceptors per RPC call.""" - @abc.abstractmethod - def Execute(self, - request: bentoml.grpc.v1.service_test_pb2.TestRequest, - context: grpc.ServicerContext, - ) -> bentoml.grpc.v1.service_test_pb2.TestResponse: - """Unary API""" - pass - - @abc.abstractmethod - def ClientStreamExecute(self, - request_iterator: typing.Iterator[bentoml.grpc.v1.service_test_pb2.TestRequest], - context: grpc.ServicerContext, - ) -> bentoml.grpc.v1.service_test_pb2.TestResponse: - """Client-streaming API""" - pass - - @abc.abstractmethod - def ServerStreamExecute(self, - request: bentoml.grpc.v1.service_test_pb2.TestRequest, - context: grpc.ServicerContext, - ) -> typing.Iterator[bentoml.grpc.v1.service_test_pb2.TestResponse]: - """Server-streaming API""" - pass - - @abc.abstractmethod - def BidiStreamExecute(self, - request_iterator: typing.Iterator[bentoml.grpc.v1.service_test_pb2.TestRequest], - context: grpc.ServicerContext, - ) -> typing.Iterator[bentoml.grpc.v1.service_test_pb2.TestResponse]: - """Bidirectional streaming API""" - pass - - -def add_TestServiceServicer_to_server(servicer: TestServiceServicer, server: grpc.Server) -> None: ... diff --git a/bentoml/grpc/v1/struct.proto b/bentoml/grpc/v1/struct.proto index 225cc2aab5a..95e7330d41f 100644 --- a/bentoml/grpc/v1/struct.proto +++ b/bentoml/grpc/v1/struct.proto @@ -2,7 +2,7 @@ syntax = "proto3"; package bentoml.grpc.v1; -import "bentoml/grpc/v1/types.proto"; +import "google/protobuf/any.proto"; // cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only) option cc_enable_arenas = true; @@ -13,101 +13,78 @@ option java_outer_classname = "StructProto"; option java_package = "com.bentoml.grpc.v1"; option objc_class_prefix = "STCT"; -// Protocol buffer representing contents of a given rpc call. -// Partial verbatim sources of tensorflow/core/framework/tensor.proto. -// -// One key different from the original is that since we are using proto3, -// repeated values are packed by default. -message Value { - // dtype represents the data type of given contents. This would help us - // to use the correct serialization protocol on the server side if needed. - bentoml.grpc.v1.DataType dtype = 1; - - // Only one of the representations below is set, one of "contents" and - // the "xxx_value" attributes. We are not using oneof because as oneofs cannot - // contain repeated fields it would require another extra set of messages. - - // Serialized raw contents. This representation - // can be used for all tensor types. The purpose of this representation is to +// Representing contents of a given rpc call. +message ContentsProto { + // Serialized bytes contents. + // The purpose of this representation is to // reduce serialization overhead during RPC call by avoiding serialization of // many repeated small items. - bytes contents = 2; + repeated bytes bytes_contents = 1; - // Type specific representations that make it easy to create tensor protos in - // all languages. Only the representation corresponding to "dtype" can - // be set. The values hold the flattened representation of the inputs in - // row major order. + // Type specific representations that make it easy to create protos in + // all languages. The values hold the flattened representation of the inputs in row major order. + // + // Note that for quantized types, one should use ``bytes_contents`` and provide dtype at IO descriptor level. // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll // have some pointless zero padding for each value here. - repeated int32 half_value = 11; + repeated int32 half_contents = 10 [packed = true]; // DT_FLOAT. - repeated float float_value = 3; + repeated float float_contents = 2 [packed = true]; // DT_DOUBLE. - repeated double double_value = 4; + repeated double double_contents = 3 [packed = true]; // DT_INT32, DT_INT16, DT_UINT16, DT_INT8, DT_UINT8. - repeated int32 int_value = 5; + repeated int32 int_contents = 4 [packed = true]; // DT_STRING - repeated bytes string_value = 6; + repeated bytes string_contents = 5; - // DT_COMPLEX64. scomplex_value(2*i) and scomplex_value(2*i+1) are real + // DT_COMPLEX64. scomplex_contents(2*i) and scomplex_contents(2*i+1) are real // and imaginary parts of i-th single precision complex. - repeated float scomplex_value = 7; + repeated float scomplex_contents = 6 [packed = true]; // DT_INT64 - repeated int64 int64_value = 8; + repeated int64 int64_contents = 7 [packed = true]; // DT_BOOL - repeated bool bool_value = 9; + repeated bool bool_contents = 8 [packed = true]; - // DT_COMPLEX128. dcomplex_value(2*i) and dcomplex_value(2*i+1) are real + // DT_COMPLEX128. dcomplex_contents(2*i) and dcomplex_contents(2*i+1) are real // and imaginary parts of i-th double precision complex. - repeated double dcomplex_value = 10; + repeated double dcomplex_contents = 9 [packed = true]; // DT_UINT32 - repeated uint32 uint32_value = 12; + repeated uint32 uint32_contents = 11 [packed = true]; // DT_UINT64 - repeated uint64 uint64_value = 13; -} + repeated uint64 uint64_contents = 12 [packed = true]; -// StructuredValue represents a dynamic typed value representing various -// data structures that are inspired by Python's data structures. -// Partial verbatim sources of tensorflow/core/protobuf/struct.proto. -// -// [16, 100] will be reserved for future use. Users are free to use from [3, 15] -message StructuredValue { - oneof kind { - // Represents a None value. - NoneValue none_value = 1; + // This section represents a dynamic typed value representing various + // data structures that are inspired by Python's data structures. - // Represents any value that can be represented as a payload contents. - Value any_value = 2; + // Represents a None value. + repeated NoneValue none_contents = 50; - // Represents a list of `Value`. - ListValue list_value = 51; + // Represents a list of `ContentsProto`. + repeated ListValue list_contents = 51; - // Represents a tuple of `Value`. - TupleValue tuple_value = 52; + // Represents a tuple of `ContentsProto`. + repeated TupleValue tuple_contents = 52; - // Represents a dict `Value`. - DictValue dict_value = 53; + // Represents a dict `ContentsProto`. + repeated DictValue dict_contents = 53; - // Represents Python's namedtuple. - NamedTupleValue namedtuple_value = 54; - } + // Represents Python's namedtuple. + repeated NamedTupleValue namedtuple_contents = 54; - // dataframes_columns and dataframe_indices are used - // in conjunction with contents to represent a dataframe. - repeated string dataframe_columns = 101; - repeated string dataframe_indices = 102; + // To represent any type contents. + repeated google.protobuf.Any any_contents = 55; // We want to reserve these for future uses. - reserved 16 to 50, 55 to 100; + reserved 56 to 100; } // represents None type. @@ -115,28 +92,31 @@ message NoneValue {} // Represents a Python list. message ListValue { - repeated StructuredValue values = 1; + repeated ContentsProto values = 1; } // Represents a Python tuple. message TupleValue { - repeated StructuredValue values = 1; + repeated ContentsProto values = 1; } -// Represents a Python dict with key and value. -// This key is analogous with Value.string_value +// Represents a Python dict with key and value. +// This key is analogous with ContentsProto.string_contents. +// Note that map<> does not preserve order from given message. message DictValue { - map fields = 1; + map fields = 1; } // Represents a (key, value) pair. message PairValue { string key = 1; - StructuredValue value = 2; + ContentsProto value = 2; } // Represents a namedtuple. +// One key different from DictValue is that NamedTupleValue reserved +// the order of key value map, whereas map doesn't. message NamedTupleValue { - string type_name = 1; + string name = 1; repeated PairValue values = 2; } diff --git a/bentoml/grpc/v1/struct_pb2.pyi b/bentoml/grpc/v1/struct_pb2.pyi deleted file mode 100644 index ce030f269be..00000000000 --- a/bentoml/grpc/v1/struct_pb2.pyi +++ /dev/null @@ -1,284 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import bentoml.grpc.v1.types_pb2 -import builtins -import google.protobuf.descriptor -import google.protobuf.internal.containers -import google.protobuf.message -import typing -import typing_extensions - -DESCRIPTOR: google.protobuf.descriptor.FileDescriptor - -class Value(google.protobuf.message.Message): - """Protocol buffer representing contents of a given rpc call. - Partial verbatim sources of tensorflow/core/framework/tensor.proto. - - One key different from the original is that since we are using proto3, - repeated values are packed by default. - """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - DTYPE_FIELD_NUMBER: builtins.int - CONTENTS_FIELD_NUMBER: builtins.int - HALF_VALUE_FIELD_NUMBER: builtins.int - FLOAT_VALUE_FIELD_NUMBER: builtins.int - DOUBLE_VALUE_FIELD_NUMBER: builtins.int - INT_VALUE_FIELD_NUMBER: builtins.int - STRING_VALUE_FIELD_NUMBER: builtins.int - SCOMPLEX_VALUE_FIELD_NUMBER: builtins.int - INT64_VALUE_FIELD_NUMBER: builtins.int - BOOL_VALUE_FIELD_NUMBER: builtins.int - DCOMPLEX_VALUE_FIELD_NUMBER: builtins.int - UINT32_VALUE_FIELD_NUMBER: builtins.int - UINT64_VALUE_FIELD_NUMBER: builtins.int - dtype: bentoml.grpc.v1.types_pb2.DataType.ValueType - """dtype represents the data type of given contents. This would help us - to use the correct serialization protocol on the server side if needed. - """ - - contents: builtins.bytes - """Only one of the representations below is set, one of "contents" and - the "xxx_value" attributes. We are not using oneof because as oneofs cannot - contain repeated fields it would require another extra set of messages. - - Serialized raw contents. This representation - can be used for all tensor types. The purpose of this representation is to - reduce serialization overhead during RPC call by avoiding serialization of - many repeated small items. - """ - - @property - def half_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: - """Type specific representations that make it easy to create tensor protos in - all languages. Only the representation corresponding to "dtype" can - be set. The values hold the flattened representation of the inputs in - row major order. - - DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll - have some pointless zero padding for each value here. - """ - pass - @property - def float_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: - """DT_FLOAT.""" - pass - @property - def double_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: - """DT_DOUBLE.""" - pass - @property - def int_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: - """DT_INT32, DT_INT16, DT_UINT16, DT_INT8, DT_UINT8.""" - pass - @property - def string_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bytes]: - """DT_STRING""" - pass - @property - def scomplex_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: - """DT_COMPLEX64. scomplex_value(2*i) and scomplex_value(2*i+1) are real - and imaginary parts of i-th single precision complex. - """ - pass - @property - def int64_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: - """DT_INT64""" - pass - @property - def bool_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bool]: - """DT_BOOL""" - pass - @property - def dcomplex_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: - """DT_COMPLEX128. dcomplex_value(2*i) and dcomplex_value(2*i+1) are real - and imaginary parts of i-th double precision complex. - """ - pass - @property - def uint32_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: - """DT_UINT32""" - pass - @property - def uint64_value(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: - """DT_UINT64""" - pass - def __init__(self, - *, - dtype: bentoml.grpc.v1.types_pb2.DataType.ValueType = ..., - contents: builtins.bytes = ..., - half_value: typing.Optional[typing.Iterable[builtins.int]] = ..., - float_value: typing.Optional[typing.Iterable[builtins.float]] = ..., - double_value: typing.Optional[typing.Iterable[builtins.float]] = ..., - int_value: typing.Optional[typing.Iterable[builtins.int]] = ..., - string_value: typing.Optional[typing.Iterable[builtins.bytes]] = ..., - scomplex_value: typing.Optional[typing.Iterable[builtins.float]] = ..., - int64_value: typing.Optional[typing.Iterable[builtins.int]] = ..., - bool_value: typing.Optional[typing.Iterable[builtins.bool]] = ..., - dcomplex_value: typing.Optional[typing.Iterable[builtins.float]] = ..., - uint32_value: typing.Optional[typing.Iterable[builtins.int]] = ..., - uint64_value: typing.Optional[typing.Iterable[builtins.int]] = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["bool_value",b"bool_value","contents",b"contents","dcomplex_value",b"dcomplex_value","double_value",b"double_value","dtype",b"dtype","float_value",b"float_value","half_value",b"half_value","int64_value",b"int64_value","int_value",b"int_value","scomplex_value",b"scomplex_value","string_value",b"string_value","uint32_value",b"uint32_value","uint64_value",b"uint64_value"]) -> None: ... -global___Value = Value - -class StructuredValue(google.protobuf.message.Message): - """StructuredValue represents a dynamic typed value representing various - data structures that are inspired by Python's data structures. - Partial verbatim sources of tensorflow/core/protobuf/struct.proto. - - [16, 100] will be reserved for future use. Users are free to use from [3, 15] - """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - NONE_VALUE_FIELD_NUMBER: builtins.int - ANY_VALUE_FIELD_NUMBER: builtins.int - LIST_VALUE_FIELD_NUMBER: builtins.int - TUPLE_VALUE_FIELD_NUMBER: builtins.int - DICT_VALUE_FIELD_NUMBER: builtins.int - NAMEDTUPLE_VALUE_FIELD_NUMBER: builtins.int - DATAFRAME_COLUMNS_FIELD_NUMBER: builtins.int - DATAFRAME_INDICES_FIELD_NUMBER: builtins.int - @property - def none_value(self) -> global___NoneValue: - """Represents a None value.""" - pass - @property - def any_value(self) -> global___Value: - """Represents any value that can be represented as a payload contents.""" - pass - @property - def list_value(self) -> global___ListValue: - """Represents a list of `Value`.""" - pass - @property - def tuple_value(self) -> global___TupleValue: - """Represents a tuple of `Value`.""" - pass - @property - def dict_value(self) -> global___DictValue: - """Represents a dict `Value`.""" - pass - @property - def namedtuple_value(self) -> global___NamedTupleValue: - """Represents Python's namedtuple.""" - pass - @property - def dataframe_columns(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: - """dataframes_columns and dataframe_indices are used - in conjunction with contents to represent a dataframe. - """ - pass - @property - def dataframe_indices(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: ... - def __init__(self, - *, - none_value: typing.Optional[global___NoneValue] = ..., - any_value: typing.Optional[global___Value] = ..., - list_value: typing.Optional[global___ListValue] = ..., - tuple_value: typing.Optional[global___TupleValue] = ..., - dict_value: typing.Optional[global___DictValue] = ..., - namedtuple_value: typing.Optional[global___NamedTupleValue] = ..., - dataframe_columns: typing.Optional[typing.Iterable[typing.Text]] = ..., - dataframe_indices: typing.Optional[typing.Iterable[typing.Text]] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["any_value",b"any_value","dict_value",b"dict_value","kind",b"kind","list_value",b"list_value","namedtuple_value",b"namedtuple_value","none_value",b"none_value","tuple_value",b"tuple_value"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["any_value",b"any_value","dataframe_columns",b"dataframe_columns","dataframe_indices",b"dataframe_indices","dict_value",b"dict_value","kind",b"kind","list_value",b"list_value","namedtuple_value",b"namedtuple_value","none_value",b"none_value","tuple_value",b"tuple_value"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["kind",b"kind"]) -> typing.Optional[typing_extensions.Literal["none_value","any_value","list_value","tuple_value","dict_value","namedtuple_value"]]: ... -global___StructuredValue = StructuredValue - -class NoneValue(google.protobuf.message.Message): - """represents None type.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - def __init__(self, - ) -> None: ... -global___NoneValue = NoneValue - -class ListValue(google.protobuf.message.Message): - """Represents a Python list.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - VALUES_FIELD_NUMBER: builtins.int - @property - def values(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___StructuredValue]: ... - def __init__(self, - *, - values: typing.Optional[typing.Iterable[global___StructuredValue]] = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["values",b"values"]) -> None: ... -global___ListValue = ListValue - -class TupleValue(google.protobuf.message.Message): - """Represents a Python tuple.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - VALUES_FIELD_NUMBER: builtins.int - @property - def values(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___StructuredValue]: ... - def __init__(self, - *, - values: typing.Optional[typing.Iterable[global___StructuredValue]] = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["values",b"values"]) -> None: ... -global___TupleValue = TupleValue - -class DictValue(google.protobuf.message.Message): - """Represents a Python dict with key and value. - This key is analogous with Value.string_value - """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - class FieldsEntry(google.protobuf.message.Message): - DESCRIPTOR: google.protobuf.descriptor.Descriptor - KEY_FIELD_NUMBER: builtins.int - VALUE_FIELD_NUMBER: builtins.int - key: typing.Text - @property - def value(self) -> global___StructuredValue: ... - def __init__(self, - *, - key: typing.Text = ..., - value: typing.Optional[global___StructuredValue] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["value",b"value"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["key",b"key","value",b"value"]) -> None: ... - - FIELDS_FIELD_NUMBER: builtins.int - @property - def fields(self) -> google.protobuf.internal.containers.MessageMap[typing.Text, global___StructuredValue]: ... - def __init__(self, - *, - fields: typing.Optional[typing.Mapping[typing.Text, global___StructuredValue]] = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["fields",b"fields"]) -> None: ... -global___DictValue = DictValue - -class PairValue(google.protobuf.message.Message): - """Represents a (key, value) pair.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - KEY_FIELD_NUMBER: builtins.int - VALUE_FIELD_NUMBER: builtins.int - key: typing.Text - @property - def value(self) -> global___StructuredValue: ... - def __init__(self, - *, - key: typing.Text = ..., - value: typing.Optional[global___StructuredValue] = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["value",b"value"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["key",b"key","value",b"value"]) -> None: ... -global___PairValue = PairValue - -class NamedTupleValue(google.protobuf.message.Message): - """Represents a namedtuple.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - TYPE_NAME_FIELD_NUMBER: builtins.int - VALUES_FIELD_NUMBER: builtins.int - type_name: typing.Text - @property - def values(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___PairValue]: ... - def __init__(self, - *, - type_name: typing.Text = ..., - values: typing.Optional[typing.Iterable[global___PairValue]] = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["type_name",b"type_name","values",b"values"]) -> None: ... -global___NamedTupleValue = NamedTupleValue diff --git a/bentoml/grpc/v1/types.proto b/bentoml/grpc/v1/types.proto deleted file mode 100644 index d88ae4b50b0..00000000000 --- a/bentoml/grpc/v1/types.proto +++ /dev/null @@ -1,61 +0,0 @@ -syntax = "proto3"; - -package bentoml.grpc.v1; - -// cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only) -option cc_enable_arenas = true; -option cc_generic_services = true; -option go_package = "github.com/bentoml/grpc/v1"; -option java_multiple_files = true; -option java_outer_classname = "TypesProto"; -option java_package = "com.bentoml.grpc.v1"; -option objc_class_prefix = "TYP"; - -// partial verbatim from tensorflow/core/framework/types.proto -enum DataType { - // Not a legal value for DataType. Used to indicate a DataType field - // has not been set. - DT_UNSPECIFIED = 0; - - // Data types that all computation devices are expected to be - // capable to support. - DT_FLOAT = 1; - DT_DOUBLE = 2; - DT_INT32 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_INT8 = 6; - DT_STRING = 7; - - // Single-precision complex - DT_COMPLEX64 = 8; - DT_INT64 = 9; - DT_BOOL = 10; - - // Quantized int8 - DT_QINT8 = 11; - - // Quantized uint8 - DT_QUINT8 = 12; - - // Quantized int32 - DT_QINT32 = 13; - // Float32 truncated to 16 bits. Only for cast ops. - DT_BFLOAT16 = 14; - - // Quantized int16 - DT_QINT16 = 15; - // Quantized uint16 - DT_QUINT16 = 16; - - DT_UINT16 = 17; - - // Double-precision complex - DT_COMPLEX128 = 18; - - // Represents half data type (32 -> 16 bits). - DT_HALF = 19; - - DT_UINT32 = 20; - DT_UINT64 = 21; -} diff --git a/bentoml/grpc/v1/types_pb2.pyi b/bentoml/grpc/v1/types_pb2.pyi deleted file mode 100644 index c553881ae36..00000000000 --- a/bentoml/grpc/v1/types_pb2.pyi +++ /dev/null @@ -1,119 +0,0 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" -import builtins -import google.protobuf.descriptor -import google.protobuf.internal.enum_type_wrapper -import typing -import typing_extensions - -DESCRIPTOR: google.protobuf.descriptor.FileDescriptor - -class _DataType: - ValueType = typing.NewType('ValueType', builtins.int) - V: typing_extensions.TypeAlias = ValueType -class _DataTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_DataType.ValueType], builtins.type): - DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor - DT_UNSPECIFIED: _DataType.ValueType # 0 - """Not a legal value for DataType. Used to indicate a DataType field - has not been set. - """ - - DT_FLOAT: _DataType.ValueType # 1 - """Data types that all computation devices are expected to be - capable to support. - """ - - DT_DOUBLE: _DataType.ValueType # 2 - DT_INT32: _DataType.ValueType # 3 - DT_UINT8: _DataType.ValueType # 4 - DT_INT16: _DataType.ValueType # 5 - DT_INT8: _DataType.ValueType # 6 - DT_STRING: _DataType.ValueType # 7 - DT_COMPLEX64: _DataType.ValueType # 8 - """Single-precision complex""" - - DT_INT64: _DataType.ValueType # 9 - DT_BOOL: _DataType.ValueType # 10 - DT_QINT8: _DataType.ValueType # 11 - """Quantized int8""" - - DT_QUINT8: _DataType.ValueType # 12 - """Quantized uint8""" - - DT_QINT32: _DataType.ValueType # 13 - """Quantized int32""" - - DT_BFLOAT16: _DataType.ValueType # 14 - """Float32 truncated to 16 bits. Only for cast ops.""" - - DT_QINT16: _DataType.ValueType # 15 - """Quantized int16""" - - DT_QUINT16: _DataType.ValueType # 16 - """Quantized uint16""" - - DT_UINT16: _DataType.ValueType # 17 - DT_COMPLEX128: _DataType.ValueType # 18 - """Double-precision complex""" - - DT_HALF: _DataType.ValueType # 19 - """Represents half data type (32 -> 16 bits).""" - - DT_UINT32: _DataType.ValueType # 20 - DT_UINT64: _DataType.ValueType # 21 -class DataType(_DataType, metaclass=_DataTypeEnumTypeWrapper): - """partial verbatim from tensorflow/core/framework/types.proto""" - pass - -DT_UNSPECIFIED: DataType.ValueType # 0 -"""Not a legal value for DataType. Used to indicate a DataType field -has not been set. -""" - -DT_FLOAT: DataType.ValueType # 1 -"""Data types that all computation devices are expected to be -capable to support. -""" - -DT_DOUBLE: DataType.ValueType # 2 -DT_INT32: DataType.ValueType # 3 -DT_UINT8: DataType.ValueType # 4 -DT_INT16: DataType.ValueType # 5 -DT_INT8: DataType.ValueType # 6 -DT_STRING: DataType.ValueType # 7 -DT_COMPLEX64: DataType.ValueType # 8 -"""Single-precision complex""" - -DT_INT64: DataType.ValueType # 9 -DT_BOOL: DataType.ValueType # 10 -DT_QINT8: DataType.ValueType # 11 -"""Quantized int8""" - -DT_QUINT8: DataType.ValueType # 12 -"""Quantized uint8""" - -DT_QINT32: DataType.ValueType # 13 -"""Quantized int32""" - -DT_BFLOAT16: DataType.ValueType # 14 -"""Float32 truncated to 16 bits. Only for cast ops.""" - -DT_QINT16: DataType.ValueType # 15 -"""Quantized int16""" - -DT_QUINT16: DataType.ValueType # 16 -"""Quantized uint16""" - -DT_UINT16: DataType.ValueType # 17 -DT_COMPLEX128: DataType.ValueType # 18 -"""Double-precision complex""" - -DT_HALF: DataType.ValueType # 19 -"""Represents half data type (32 -> 16 bits).""" - -DT_UINT32: DataType.ValueType # 20 -DT_UINT64: DataType.ValueType # 21 -global___DataType = DataType - diff --git a/bentoml/testing/server.py b/bentoml/testing/server.py index 92ead466355..09a1a704221 100644 --- a/bentoml/testing/server.py +++ b/bentoml/testing/server.py @@ -331,7 +331,7 @@ def run_bento_server_distributed( cmd = [ sys.executable, "-m", - "bentoml._internal.server.cli.http.api_server", + "bentoml._internal.server.cli.rest_api_server", str(bento_tag), "--bind", bind, diff --git a/pyproject.toml b/pyproject.toml index 0946d0417a8..4db2410e9fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,7 @@ requires = [ # sync with setup.cfg until we discard non-pep-517/518 "grpcio-tools==1.47.0", + "googleapis-common-protos", "mypy-protobuf==3.2.0", "protobuf==3.19.4", "setuptools>=59.0", diff --git a/setup.cfg b/setup.cfg index c240d561fd1..62f706390ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -73,6 +73,7 @@ python_requires = >=3.7 include_package_data = True setup_requires = grpcio-tools==1.47.0 + googleapis-common-protos mypy-protobuf==3.2.0 # generating stubs for type checking protobuf==3.19.4 setuptools>=59.0 @@ -99,12 +100,14 @@ grpc = grpcio grpcio-health-checking grpcio-reflection + grpcio-status tracing = opentelemetry-exporter-jaeger opentelemetry-exporter-zipkin [options.package_data] * = *.pyi +bentoml/grpc/* = *.proto bentoml = py.typed [global] diff --git a/setup.py b/setup.py index ce6f6604420..48b824a67a7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,9 @@ import os import sys +import typing as t import subprocess +import importlib.util from pathlib import Path from setuptools import setup @@ -12,13 +14,28 @@ _VERSION_MAP = { "v1": { ("service.proto", "service_test.proto"): {"grpc_out": True}, - ("types.proto", "struct.proto"): {}, + ("struct.proto",): {}, }, } +def source_locations(pkg: str) -> str: + spec = importlib.util.find_spec(pkg) + if not spec: + raise RuntimeError( + f"{pkg} not found. Make sure to add to both pyproject.toml and setup.cfg." + ) + (location,) = spec.submodule_search_locations # type: ignore (unfinished type) + return t.cast(str, location) + + def get_args(parent_path: str, *paths: str, grpc_out: bool = False) -> list[str]: - args = ["-I.", "--python_out=.", "--mypy_out=."] + args = [ + "-I.", + f"-I{os.path.dirname(source_locations('google'))}", # include common googleapis stubs. + "--python_out=.", + "--mypy_out=.", + ] if grpc_out: args.extend(["--grpc_python_out=.", "--mypy_grpc_out=."]) args.extend([os.path.join(parent_path, path) for path in paths])