From ccf60823f3ad10de7efcbf752e7443be47b2518d Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Fri, 25 Nov 2022 02:37:38 -0800 Subject: [PATCH 01/19] feat(grpc): gRPC client Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Ubuntu <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- grpc-client/python/client.py | 71 +- pyproject.toml | 3 +- src/bentoml/_internal/server/grpc_app.py | 8 +- src/bentoml/client.py | 181 ----- src/bentoml/client/__init__.py | 291 ++++++++ src/bentoml/client/_grpc.py | 666 ++++++++++++++++++ src/bentoml/client/_http.py | 102 +++ src/bentoml/grpc/types.py | 13 + src/bentoml/testing/server.py | 6 +- .../bento_server_grpc/tests/test_metrics.py | 37 +- 10 files changed, 1141 insertions(+), 237 deletions(-) delete mode 100644 src/bentoml/client.py create mode 100644 src/bentoml/client/__init__.py create mode 100644 src/bentoml/client/_grpc.py create mode 100644 src/bentoml/client/_http.py diff --git a/grpc-client/python/client.py b/grpc-client/python/client.py index 7275b7f9253..1ea795403c5 100644 --- a/grpc-client/python/client.py +++ b/grpc-client/python/client.py @@ -1,32 +1,59 @@ +from __future__ import annotations + import asyncio +import logging + +import numpy as np + +from bentoml.client import Client + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from bentoml.client import GrpcClient + -import grpc +logger = logging.getLogger(__name__) -from bentoml.grpc.utils import import_generated_stubs -pb, services = import_generated_stubs() +async def arun(client: GrpcClient): + res = await client.async_classify(np.array([[5.9, 3, 5.1, 1.8]])) + logger.info("Result from 'client.async_classify':\n%s", res) + res = await client.async_call("classify", np.array([[5.9, 3, 5.1, 1.8]])) + logger.info("Result from 'client.async_call':\n%s", res) -async def run(): - async with grpc.aio.insecure_channel("localhost:3000") as channel: - stub = services.BentoServiceStub(channel) - req = await stub.Call( - request=pb.Request( - api_name="classify", - ndarray=pb.NDArray( - dtype=pb.NDArray.DTYPE_FLOAT, - shape=(1, 4), - float_values=[5.9, 3, 5.1, 1.8], - ), - ) + +def run(client: GrpcClient): + res = client.classify(np.array([[5.9, 3, 5.1, 1.8]])) + logger.info("Result from 'client.classify':\n%s", res) + res = client.call("classify", np.array([[5.9, 3, 5.1, 1.8]])) + logger.info("Result from 'client.call(bentoml_api_name='classify')':\n%s", res) + + with client.service(): + res = client.Call( + api_name="classify", + ndarray={"float_values": [5.9, 3, 5.1, 1.8], "shape": [1, 4], "dtype": 1}, ) - print(req) + logger.info("Result from 'client.Call' in a context manager:\n%s", res) if __name__ == "__main__": - loop = asyncio.new_event_loop() - try: - loop.run_until_complete(run()) - finally: - loop.close() - assert loop.is_closed() + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-rwa", "--run-with-async", action="store_true", default=False) + parser.add_argument("--grpc", action="store_true", default=False) + args = parser.parse_args() + + c: GrpcClient = Client.from_url("localhost:3000", grpc=args.grpc) + + if args.run_with_async: + loop = asyncio.new_event_loop() + try: + loop.run_until_complete(arun(c)) + finally: + loop.close() + assert loop.is_closed() + else: + run(c) diff --git a/pyproject.toml b/pyproject.toml index fe09cb5247a..1ea9a5eb0b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,7 @@ include = [ # include bentoml packages "bentoml", "bentoml.grpc*", + "bentoml.client*", "bentoml.testing*", "bentoml._internal*", # include bentoml_cli packages @@ -183,7 +184,7 @@ omit = [ show_missing = true precision = 2 omit = [ - 'src/bentoml/__main__.py', + "src/bentoml/__main__.py", "src/bentoml/io.py", "src/bentoml/serve.py", "src/bentoml/start.py", diff --git a/src/bentoml/_internal/server/grpc_app.py b/src/bentoml/_internal/server/grpc_app.py index 3da154c5ee4..6d66c521bff 100644 --- a/src/bentoml/_internal/server/grpc_app.py +++ b/src/bentoml/_internal/server/grpc_app.py @@ -59,7 +59,7 @@ ) -def _load_from_file(p: str) -> bytes: +def load_from_file(p: str) -> bytes: rp = resolve_user_filepath(p, ctx=None) with open(rp, "rb") as f: return f.read() @@ -244,12 +244,12 @@ def configure_port(self, addr: str): ), "'ssl_keyfile' is required when 'ssl_certfile' is provided." if self.ssl_ca_certs is not None: client_auth = True - ca_cert = _load_from_file(self.ssl_ca_certs) + ca_cert = load_from_file(self.ssl_ca_certs) server_credentials = grpc.ssl_server_credentials( ( ( - _load_from_file(self.ssl_keyfile), - _load_from_file(self.ssl_certfile), + load_from_file(self.ssl_keyfile), + load_from_file(self.ssl_certfile), ), ), root_certificates=ca_cert, diff --git a/src/bentoml/client.py b/src/bentoml/client.py deleted file mode 100644 index a3687dcec31..00000000000 --- a/src/bentoml/client.py +++ /dev/null @@ -1,181 +0,0 @@ -from __future__ import annotations - -import json -import typing as t -import asyncio -import functools -from abc import ABC -from abc import abstractmethod -from http.client import HTTPConnection -from urllib.parse import urlparse - -import aiohttp -import starlette.requests -import starlette.datastructures - -import bentoml -from bentoml import Service - -from .exceptions import RemoteException -from .exceptions import BentoMLException -from ._internal.service.inference_api import InferenceAPI - - -class Client(ABC): - server_url: str - - def __init__(self, svc: Service, server_url: str): - self._svc = svc - self.server_url = server_url - if len(self._svc.apis) == 0: - raise BentoMLException("No APIs were found when constructing client") - - for name, api in self._svc.apis.items(): - if not hasattr(self, name): - setattr( - self, name, functools.partial(self._sync_call, _bentoml_api=api) - ) - - for name, api in self._svc.apis.items(): - if not hasattr(self, f"async_{name}"): - setattr( - self, - f"async_{name}", - functools.partial(self._call, _bentoml_api=api), - ) - - def call(self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any) -> t.Any: - return asyncio.run(self.async_call(bentoml_api_name, inp, **kwargs)) - - async def async_call( - self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any - ) -> t.Any: - return await self._call( - inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs - ) - - def _sync_call( - self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any - ): - return asyncio.run(self._call(inp, _bentoml_api=_bentoml_api, **kwargs)) - - @abstractmethod - async def _call( - self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any - ) -> t.Any: - raise NotImplementedError - - @staticmethod - def wait_until_server_is_ready(host: str, port: int, timeout: int) -> None: - import time - - time_end = time.time() + timeout - status = None - while status != 200: - try: - conn = HTTPConnection(host, port) - conn.request("GET", "/readyz") - status = conn.getresponse().status - except ConnectionRefusedError: - print("Connection refused. Trying again...") - if time.time() > time_end: - raise TimeoutError("The server took too long to get ready") - time.sleep(1) - - @staticmethod - def from_url(server_url: str) -> Client: - server_url = server_url if "://" in server_url else "http://" + server_url - url_parts = urlparse(server_url) - - # TODO: SSL and grpc support - conn = HTTPConnection(url_parts.netloc) - conn.request("GET", url_parts.path + "/docs.json") - resp = conn.getresponse() - if resp.status != 200: - raise RemoteException( - f"Failed to get OpenAPI schema from the server: {resp.status} {resp.reason}:\n{resp.read()}" - ) - openapi_spec = json.load(resp) - conn.close() - - dummy_service = Service(openapi_spec["info"]["title"]) - - for route, spec in openapi_spec["paths"].items(): - for meth_spec in spec.values(): - if "Service APIs" in meth_spec["tags"]: - if "x-bentoml-io-descriptor" not in meth_spec["requestBody"]: - # TODO: better message stating min version for from_url to work - raise BentoMLException( - f"Malformed BentoML spec received from BentoML server {server_url}" - ) - if "x-bentoml-io-descriptor" not in meth_spec["responses"]["200"]: - raise BentoMLException( - f"Malformed BentoML spec received from BentoML server {server_url}" - ) - if "x-bentoml-name" not in meth_spec: - raise BentoMLException( - f"Malformed BentoML spec received from BentoML server {server_url}" - ) - try: - api = InferenceAPI( - None, - bentoml.io.from_spec( - meth_spec["requestBody"]["x-bentoml-io-descriptor"] - ), - bentoml.io.from_spec( - meth_spec["responses"]["200"]["x-bentoml-io-descriptor"] - ), - name=meth_spec["x-bentoml-name"], - doc=meth_spec["description"], - route=route.lstrip("/"), - ) - dummy_service.apis[meth_spec["x-bentoml-name"]] = api - except BentoMLException as e: - logger.error( - "Failed to instantiate client for API %s: ", - meth_spec["x-bentoml-name"], - e, - ) - - res = HTTPClient(dummy_service, server_url) - res.server_url = server_url - return res - - -class HTTPClient(Client): - _svc: Service - - async def _call( - self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any - ) -> t.Any: - api = _bentoml_api - - if api.multi_input: - if inp is not None: - raise BentoMLException( - f"'{api.name}' takes multiple inputs; all inputs must be passed as keyword arguments." - ) - fake_resp = await api.input.to_http_response(kwargs, None) - else: - fake_resp = await api.input.to_http_response(inp, None) - req_body = fake_resp.body - - async with aiohttp.ClientSession(self.server_url) as sess: - async with sess.post( - "/" + api.route, - data=req_body, - headers={"content-type": fake_resp.headers["content-type"]}, - ) as resp: - if resp.status != 200: - raise BentoMLException( - f"Error making request: {resp.status}: {str(await resp.read())}" - ) - - fake_req = starlette.requests.Request(scope={"type": "http"}) - headers = starlette.datastructures.Headers(headers=resp.headers) - fake_req._body = await resp.read() - # Request.headers sets a _headers variable. We will need to set this - # value to our fake request object. - fake_req._headers = headers # type: ignore (request._headers is property) - - return await api.output.from_http_request(fake_req) diff --git a/src/bentoml/client/__init__.py b/src/bentoml/client/__init__.py new file mode 100644 index 00000000000..e486c16f996 --- /dev/null +++ b/src/bentoml/client/__init__.py @@ -0,0 +1,291 @@ +from __future__ import annotations + +import typing as t +import asyncio +import logging +import functools +from abc import ABC +from abc import abstractmethod +from typing import TYPE_CHECKING +from urllib.parse import urlparse + +import attr + +from .. import Service +from ..exceptions import BentoMLException +from ..grpc.utils import import_grpc +from ..grpc.utils import LATEST_PROTOCOL_VERSION +from .._internal.utils import bentoml_cattr +from .._internal.utils import cached_property +from .._internal.service.inference_api import InferenceAPI + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + from urllib.parse import ParseResult + + import grpc + from grpc import aio + + from ._grpc import GrpcClient + from ._http import HTTPClient + from .._internal.types import PathType + + class ClientCredentials(t.TypedDict): + root_certificates: t.NotRequired[PathType | bytes] + private_key: t.NotRequired[PathType | bytes] + certificate_chain: t.NotRequired[PathType | bytes] + +else: + ClientCredentials = dict + + grpc, aio = import_grpc() + + +@attr.define +class ClientConfig: + http: HTTP = attr.field( + default=attr.Factory(lambda self: self.HTTP(), takes_self=True) + ) + grpc: GRPC = attr.field( + default=attr.Factory(lambda self: self.GRPC(), takes_self=True) + ) + + def with_grpc_options(self, **kwargs: t.Any) -> ClientConfig: + _self_grpc_config = kwargs.pop("_self_grpc_config", None) + if not isinstance(_self_grpc_config, self.GRPC): + _self_grpc_config = ClientConfig.GRPC.from_options(**kwargs) + return attr.evolve(self, **{"grpc": _self_grpc_config}) + + def with_http_options(self, **kwargs: t.Any) -> ClientConfig: + _self_http_config = kwargs.pop("_self_http_config", None) + if not isinstance(_self_http_config, self.HTTP): + _self_http_config = ClientConfig.HTTP.from_options(**kwargs) + return attr.evolve(self, **{"http": _self_http_config}) + + @classmethod + def from_options(cls, **kwargs: t.Any) -> ClientConfig: + return bentoml_cattr.structure(kwargs, cls) + + @staticmethod + def from_grpc_options(**kwargs: t.Any) -> GRPC: + return ClientConfig.GRPC.from_options(**kwargs) + + @staticmethod + def from_http_options(**kwargs: t.Any) -> HTTP: + return ClientConfig.HTTP.from_options(**kwargs) + + def unstructure( + self, target: t.Literal["http", "grpc", "default"] = "default" + ) -> dict[str, t.Any]: + if target == "default": + targ = self + elif target == "http": + targ = self.http + elif target == "grpc": + targ = self.grpc + else: + raise ValueError( + f"Invalid target: {target}. Accepted value are 'http', 'grpc', 'default'." + ) + return bentoml_cattr.unstructure(targ) + + @attr.define + class HTTP: + """HTTP ClientConfig. + + .. TODO:: Add HTTP specific options here. + + """ + + # forbid additional keys to prevent typos. + __forbid_extra_keys__ = True + # Don't omit empty field. + __omit_if_default__ = False + + @classmethod + def from_options(cls, **kwargs: t.Any) -> ClientConfig.HTTP: + return bentoml_cattr.structure(kwargs, cls) + + def unstructure(self) -> dict[str, t.Any]: + return ( + ClientConfig() + .with_http_options( + _self_http_config=self, + ) + .unstructure(target="http") + ) + + @attr.define + class GRPC: + """gRPC ClientConfig. + + .. code-block:: python + + from bentoml.client import ClientConfig + from bentoml.client import Client + + config = ClientConfig.from_grpc_options( + ssl=True, + ssl_client_credentials={ + "root_certificates": "path/to/cert.pem", + "private_key": "/path/to/key", + }, + protocol_version="v1alpha1", + ) + client = Client.from_url("localhost:50051", config) + + """ + + # forbid additional keys to prevent typos. + __forbid_extra_keys__ = True + # Don't omit empty field. + __omit_if_default__ = False + + ssl: bool = attr.field(default=False) + channel_options: t.Optional[aio.ChannelArgumentType] = attr.field(default=None) + compression: t.Optional[grpc.Compression] = attr.field(default=None) + ssl_client_credentials: t.Optional[ClientCredentials] = attr.field( + factory=lambda: ClientCredentials() + ) + protocol_version: str = attr.field(default=LATEST_PROTOCOL_VERSION) + interceptors: t.Optional[t.Sequence[aio.ClientInterceptor]] = attr.field( + default=None + ) + + @classmethod + def from_options(cls, **kwargs: t.Any) -> ClientConfig.GRPC: + return bentoml_cattr.structure(kwargs, cls) + + def unstructure(self) -> dict[str, t.Any]: + return ( + ClientConfig() + .with_grpc_options( + _self_grpc_config=self, + ) + .unstructure(target="grpc") + ) + + +if TYPE_CHECKING: + ClientConfigT = ClientConfig | ClientConfig.HTTP | ClientConfig.GRPC + + +_sentinel_svc = Service("sentinel_svc") + + +class Client(ABC): + server_url: str + _svc: Service + + def __init__(self, svc: Service | None, server_url: str): + self._svc = svc or _sentinel_svc + self.server_url = server_url + + if svc is not None and len(svc.apis) == 0: + raise BentoMLException("No APIs were found when constructing client.") + + # Register service method if given service is not _sentinel_svc + # We only set _sentinel_svc if given protocol is older than v1 (for gRPC client.) + if self._svc is not _sentinel_svc: + for name, api in self._svc.apis.items(): + if not hasattr(self, name): + setattr( + self, name, functools.partial(self._sync_call, _bentoml_api=api) + ) + + if not hasattr(self, f"async_{name}"): + setattr( + self, + f"async_{name}", + functools.partial(self._call, _bentoml_api=api), + ) + + def call(self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any) -> t.Any: + return self._loop.run_until_complete( + self.async_call(bentoml_api_name, inp, **kwargs) + ) + + async def async_call( + self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any + ) -> t.Any: + return await self._call( + inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs + ) + + @t.overload + @staticmethod + def from_url( + server_url: str, + config: ClientConfigT | None = ..., + *, + grpc: t.Literal[False] = ..., + ) -> HTTPClient: + ... + + @t.overload + @staticmethod + def from_url( + server_url: str, + config: ClientConfigT | None = ..., + *, + grpc: t.Literal[True] = ..., + ) -> GrpcClient: + ... + + @staticmethod + def from_url( + server_url: str, config: ClientConfigT | None = None, *, grpc: bool = False + ) -> Client: + server_url = server_url if "://" in server_url else "http://" + server_url + if grpc: + from ._grpc import GrpcClient + + client_type = "grpc" + klass = GrpcClient + else: + from ._http import HTTPClient + + client_type = "http" + klass = HTTPClient + + if config is None: + config = ClientConfig() + + # First, if config is a ClientConfig that contains both HTTP and gRPC fields, then we use + # grpc_client boolean to determine which configset to use. + # If config is either ClientConfig.HTTP or ClientConfig.GRPC, then we use unstructure for kwargs + kwargs = config.unstructure() + + if isinstance(config, ClientConfig): + # by default we will set the config to HTTP (backward compatibility) + kwargs = config.unstructure(target=client_type) + + try: + return getattr(klass, "_create_client")(urlparse(server_url), **kwargs) + except Exception as e: # pylint: disable=broad-except + raise BentoMLException( + f"Failed to create a BentoML client from given URL '{server_url}': {e} ({e.__class__.__name__})" + ) from e + + @cached_property + def _loop(self) -> asyncio.AbstractEventLoop: + return asyncio.get_event_loop() + + def _sync_call( + self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any + ): + return self._loop.run_until_complete( + self._call(inp, _bentoml_api=_bentoml_api, **kwargs) + ) + + @abstractmethod + async def _call( + self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any + ) -> t.Any: + raise NotImplementedError + + @staticmethod + @abstractmethod + def _create_client(parsed: ParseResult, **kwargs: t.Any) -> Client: + raise NotImplementedError diff --git a/src/bentoml/client/_grpc.py b/src/bentoml/client/_grpc.py new file mode 100644 index 00000000000..6b989ad226d --- /dev/null +++ b/src/bentoml/client/_grpc.py @@ -0,0 +1,666 @@ +from __future__ import annotations + +import typing as t +import asyncio +import inspect +import logging +import functools +import contextlib +from enum import Enum +from typing import TYPE_CHECKING + +from packaging.version import parse + +from . import Client +from . import ClientCredentials +from .. import io +from .. import Service +from ..exceptions import BentoMLException +from ..grpc.utils import import_grpc +from ..grpc.utils import parse_method_name +from ..grpc.utils import import_generated_stubs +from ..grpc.utils import LATEST_PROTOCOL_VERSION +from .._internal.utils import LazyLoader +from .._internal.utils import cached_property +from .._internal.server.grpc_app import load_from_file +from .._internal.service.inference_api import InferenceAPI + +logger = logging.getLogger(__name__) + +PROTOBUF_EXC_MESSAGE = "'protobuf' is required to use gRPC Client. Install with 'pip install bentoml[grpc]'." +REFLECTION_EXC_MESSAGE = "'grpcio-reflection' is required to use gRPC Client. Install with 'pip install bentoml[grpc-reflection]'." + +if TYPE_CHECKING: + from types import TracebackType + from urllib.parse import ParseResult + + import grpc + from grpc import aio + from google.protobuf import message as _message + from google.protobuf import json_format as _json_format + from google.protobuf import descriptor_pb2 as pb_descriptor + from google.protobuf import descriptor_pool as _descriptor_pool + from google.protobuf import symbol_database as _symbol_database + from grpc_reflection.v1alpha import reflection_pb2 as pb_reflection + from grpc_reflection.v1alpha import reflection_pb2_grpc as services_reflection + + # type hint specific imports. + from google.protobuf.descriptor import MethodDescriptor + from google.protobuf.descriptor import ServiceDescriptor + from google.protobuf.descriptor_pb2 import FileDescriptorProto + from google.protobuf.descriptor_pb2 import MethodDescriptorProto + from google.protobuf.descriptor_pool import DescriptorPool + from google.protobuf.symbol_database import SymbolDatabase + from grpc_reflection.v1alpha.reflection_pb2 import ServiceResponse + from grpc_reflection.v1alpha.reflection_pb2_grpc import ServerReflectionStub + + from ..grpc.types import MultiCallable + + from ..grpc.v1.service_pb2 import ServiceMetadataResponse +else: + pb_descriptor = LazyLoader( + "pb_descriptor", + globals(), + "google.protobuf.descriptor_pb2", + exc_msg=PROTOBUF_EXC_MESSAGE, + ) + _descriptor_pool = LazyLoader( + "_descriptor_pool", + globals(), + "google.protobuf.descriptor_pool", + exc_msg=PROTOBUF_EXC_MESSAGE, + ) + _symbol_database = LazyLoader( + "_symbol_database", + globals(), + "google.protobuf.symbol_database", + exc_msg=PROTOBUF_EXC_MESSAGE, + ) + _json_format = LazyLoader( + "_json_format", + globals(), + "google.protobuf.json_format", + exc_msg=PROTOBUF_EXC_MESSAGE, + ) + services_reflection = LazyLoader( + "services_reflection", + globals(), + "grpc_reflection.v1alpha.reflection_pb2_grpc", + exc_msg=REFLECTION_EXC_MESSAGE, + ) + pb_reflection = LazyLoader( + "pb_reflection", + globals(), + "grpc_reflection.v1alpha.reflection_pb2", + exc_msg=REFLECTION_EXC_MESSAGE, + ) + grpc, aio = import_grpc() + +_object_setattr = object.__setattr__ + +if TYPE_CHECKING: + + class RpcMethod(t.TypedDict): + request_streaming: t.Literal[True, False] + response_streaming: bool + input_type: type[t.Any] + output_type: t.NotRequired[type[t.Any]] + handler: MultiCallable + +else: + RpcMethod = dict + +# TODO: xDS support +class GrpcClient(Client): + def __init__( + self, + server_url: str, + svc: Service | None = None, + # gRPC specific options + ssl: bool = False, + channel_options: aio.ChannelArgumentType | None = None, + interceptors: t.Sequence[aio.ClientInterceptor] | None = None, + compression: grpc.Compression | None = None, + ssl_client_credentials: ClientCredentials | None = None, + *, + protocol_version: str = LATEST_PROTOCOL_VERSION, + ): + super().__init__(svc, server_url) + + # Call requires an api_name, therefore we need a reserved keyset of self._svc.apis + self._rev_apis = {v: k for k, v in self._svc.apis.items()} + + self._protocol_version = protocol_version + self._compression = compression + self._options = channel_options + self._interceptors = interceptors + self._channel = None + self._credentials = None + if ssl: + assert ( + ssl_client_credentials is not None + ), "'ssl=True' requires 'credentials'" + self._credentials = grpc.ssl_channel_credentials( + **{ + k: load_from_file(v) if isinstance(v, str) else v + for k, v in ssl_client_credentials.items() + } + ) + + self._descriptor_pool: DescriptorPool = _descriptor_pool.Default() + self._symbol_database: SymbolDatabase = _symbol_database.Default() + + self._available_services: tuple[str, ...] = tuple() + # cached of all available rpc for a given service. + self._service_cache: dict[str, dict[str, RpcMethod]] = {} + # Sets of FileDescriptorProto name to be registered + self._registered_file_name: set[str] = set() + self._reflection_stub: ServerReflectionStub | None = None + + @cached_property + def channel(self): + if not self._channel: + if self._credentials is not None: + self._channel = aio.secure_channel( + self.server_url, + credentials=self._credentials, + options=self._options, + compression=self._compression, + interceptors=self._interceptors, + ) + self._channel = aio.insecure_channel( + self.server_url, + options=self._options, + compression=self._compression, + interceptors=self._interceptors, + ) + return self._channel + + @staticmethod + def make_rpc_method(service_name: str, method: str): + return f"/{service_name}/{method}" + + @property + def _call_rpc_method(self): + return self.make_rpc_method( + f"bentoml.grpc.{self._protocol_version}.BentoService", "Call" + ) + + @cached_property + def _reserved_kw_mapping(self): + return { + "default": f"bentoml.grpc.{self._protocol_version}.BentoService", + "health": "grpc.health.v1.Health", + "reflection": "grpc.reflection.v1alpha.ServerReflection", + } + + async def _exit(self): + try: + if self._channel: + if self._channel.get_state() == grpc.ChannelConnectivity.IDLE: + await self._channel.close() + except AttributeError as e: + logger.error(f"Error closing channel: %s", e, exc_info=e) + raise + + def __enter__(self): + return self.service().__enter__() + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + try: + if exc_type is not None: + self.service().__exit__(exc_type, exc, traceback) + self._loop.run_until_complete(self._exit()) + except Exception as err: # pylint: disable=broad-except + logger.error(f"Exception occurred: %s (%s)", err, exc_type, exc_info=err) + return False + + @contextlib.contextmanager + def service(self, service_name: str = "default"): + stack = contextlib.AsyncExitStack() + + async def close(): + await stack.aclose() + + async def enter(): + res = await stack.enter_async_context( + self.aservice(service_name, _wrap_in_sync=True) + ) + return res + + try: + yield self._loop.run_until_complete(enter()) + finally: + self._loop.run_until_complete(close()) + + async def __aenter__(self): + return await self.aservice().__aenter__() + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + try: + if exc_type is not None: + await self.aservice().__aexit__(exc_type, exc, traceback) + await self._exit() + except Exception as err: # pylint: disable=broad-except + logger.error(f"Exception occurred: %s (%s)", err, exc_type, exc_info=err) + return False + + @contextlib.asynccontextmanager + async def aservice( + self, service_name: str = "default", *, _wrap_in_sync: bool = False + ) -> t.AsyncGenerator[t.Self, None]: + # This is the entrypoint for user to instantiate a client for a given service. + + # default is a special case for BentoService proto. + if service_name in self._reserved_kw_mapping: + service_name = self._reserved_kw_mapping[service_name] + + if not self._available_services: + resp = await self._do_one_request( + pb_reflection.ServerReflectionRequest(list_services="") + ) + if resp is not None: + services: list[ServiceResponse] = resp.list_services_response.service + self._available_services = tuple( + [t.cast(str, s.name) for s in services] + ) + + if ( + service_name in self._available_services + and service_name not in self._service_cache + ): + await self._register_service(service_name) + + if self.channel.get_state() != grpc.ChannelConnectivity.READY: + # create a blocking call to wait til channel is ready. + await self.channel.channel_ready() + + try: + method_meta = self._service_cache[service_name] + except KeyError: + raise ValueError( + f"Failed to find service '{service_name}'. Available: {list(self._service_cache.keys())}" + ) from None + + def _register(method: str): + finaliser = f = functools.partial( + self._invoke, + self.make_rpc_method(service_name, method), + _serialize_input=True, + ) + if _wrap_in_sync: + # We will have to run the async function in a sync wrapper + @functools.wraps(f) + def wrapper(*args: t.Any, **kwargs: t.Any): + coro = f(*args, **kwargs) + task = asyncio.ensure_future(coro, loop=self._loop) + try: + res = self._loop.run_until_complete(task) + if inspect.isasyncgen(res): + # If this is an async generator, then we need to yield again + async def call(): + return await res.__anext__() + + return self._loop.run_until_complete(call()) + return res + except BaseException: + # Consume all exceptions. + if task.done() and not task.cancelled(): + task.exception() + raise + + finaliser = wrapper + _object_setattr(self, method, finaliser) + + # Register all RPC method. + for method in reversed(method_meta): + _register(method) + + yield self + + async def _register_service(self, service_name: str) -> None: + svc_descriptor: ServiceDescriptor | None = None + try: + svc_descriptor = self._descriptor_pool.FindServiceByName(service_name) + except KeyError: + file_descriptor = await self._find_descriptor_by_symbol(service_name) + await self._add_file_descriptor(file_descriptor) + # try to register from FileDescriptorProto again. + svc_descriptor = self._descriptor_pool.FindServiceByName(service_name) + except Exception as e: # pylint: disable=broad-except + logger.warning( + "Failed to register %s. This might have already been registered.", + service_name, + exc_info=e, + ) + raise + finally: + if svc_descriptor is not None: + self._service_cache[service_name] = self._register_methods( + svc_descriptor + ) + + def _register_methods( + self, service_descriptor: ServiceDescriptor + ) -> dict[str, RpcMethod]: + service_descriptor_proto = pb_descriptor.ServiceDescriptorProto() + service_descriptor.CopyToProto(service_descriptor_proto) + full_name = service_descriptor.full_name + metadata: dict[str, RpcMethod] = {} + for method_proto in service_descriptor_proto.method: + method_name = method_proto.name + method_descriptor: MethodDescriptor = service_descriptor.FindMethodByName( + method_name + ) + input_type = self._symbol_database.GetPrototype( + method_descriptor.input_type + ) + output_type = self._symbol_database.GetPrototype( + method_descriptor.output_type + ) + metadata[method_name] = RpcMethod( + request_streaming=method_proto.client_streaming, + response_streaming=method_proto.server_streaming, + input_type=input_type, + output_type=output_type, + handler=getattr( + self.channel, + _RpcType.from_method_descriptor(method_proto), + )( + method=f"/{full_name}/{method_name}", + request_serializer=input_type.SerializeToString, + response_deserializer=output_type.FromString, + ), + ) + return metadata + + async def _add_file_descriptor(self, file_descriptor: FileDescriptorProto): + dependencies = file_descriptor.dependency + for deps in dependencies: + if deps not in self._registered_file_name: + d_descriptor = await self._find_descriptor_by_filename(deps) + await self._add_file_descriptor(d_descriptor) + self._registered_file_name.add(deps) + self._descriptor_pool.Add(file_descriptor) + + async def _find_descriptor_by_symbol(self, symbol: str): + req = pb_reflection.ServerReflectionRequest(file_containing_symbol=symbol) + res = await self._do_one_request(req) + assert res is not None + fdp: list[bytes] = res.file_descriptor_response.file_descriptor_proto + return pb_descriptor.FileDescriptorProto.FromString(fdp[0]) + + async def _find_descriptor_by_filename(self, name: str): + req = pb_reflection.ServerReflectionRequest(file_by_filename=name) + res = await self._do_one_request(req) + assert res is not None + fdp: list[bytes] = res.file_descriptor_response.file_descriptor_proto + return pb_descriptor.FileDescriptorProto.FromString(fdp[0]) + + def _reflection_request( + self, *reqs: pb_reflection.ServerReflectionRequest + ) -> t.AsyncIterator[pb_reflection.ServerReflectionResponse]: + if self._reflection_stub is None: + # ServerReflectionInfo is a stream RPC, hence the generator. + self._reflection_stub = services_reflection.ServerReflectionStub( + self.channel + ) + return t.cast( + t.AsyncIterator[pb_reflection.ServerReflectionResponse], + self._reflection_stub.ServerReflectionInfo((r for r in reqs)), + ) + + async def _do_one_request( + self, req: pb_reflection.ServerReflectionRequest + ) -> pb_reflection.ServerReflectionResponse | None: + try: + async for r in self._reflection_request(req): + return r + except aio.AioRpcError as err: + code = err.code() + if code == grpc.StatusCode.UNIMPLEMENTED: + raise BentoMLException( + f"[{code}] Couldn't locate servicer method. The running server might not have reflection enabled. Make sure to pass '--enable-reflection'" + ) + raise BentoMLException( + f"Caught AioRpcError while handling reflection request: {err}" + ) from None + + async def _invoke( + self, + method_name: str, + _serialize_input: bool = False, + **attrs: t.Any, + ): + mn, _ = parse_method_name(method_name) + if mn.fully_qualified_service not in self._available_services: + raise ValueError( + f"{mn.service} is not available in server. Registered services: {self._available_services}" + ) + # channel kwargs include timeout, metadata, credentials, wait_for_ready and compression + # to pass it in kwargs add prefix _channel_ + channel_kwargs = { + k: attrs.pop(f"_channel_{k}", None) + for k in { + "timeout", + "metadata", + "credentials", + "wait_for_ready", + "compression", + } + } + + mn, is_valid = parse_method_name(method_name) + if not is_valid: + raise ValueError( + f"{method_name} is not a valid method name. Make sure to follow the format '/package.ServiceName/MethodName'" + ) + try: + rpc_method = self._service_cache[mn.fully_qualified_service][mn.method] + except KeyError: + raise BentoMLException( + f"Method '{method_name}' is not registered in current service client." + ) from None + + handler_type = _RpcType.from_streaming_type( + rpc_method["request_streaming"], rpc_method["response_streaming"] + ) + + if _serialize_input: + parsed = handler_type.request_serializer(rpc_method["input_type"], **attrs) + else: + parsed = rpc_method["input_type"](**attrs) + if handler_type.is_unary_response(): + result = await t.cast( + t.Awaitable[t.Any], + rpc_method["handler"](parsed, **channel_kwargs), + ) + return result + # streaming response + return handler_type.response_deserializer( + rpc_method["handler"](parsed, **channel_kwargs) + ) + + def _sync_call( + self, + inp: t.Any = None, + *, + _bentoml_api: InferenceAPI, + **kwargs: t.Any, + ): + with self: + return self._loop.run_until_complete( + self._call(inp, _bentoml_api=_bentoml_api, **kwargs) + ) + + async def _call( + self, + inp: t.Any = None, + *, + _bentoml_api: InferenceAPI, + **attrs: t.Any, + ) -> t.Any: + async with self: + fn = functools.partial( + self._invoke, + **{ + f"_channel_{k}": attrs.pop(f"_channel_{k}", None) + for k in { + "timeout", + "metadata", + "credentials", + "wait_for_ready", + "compression", + } + }, + ) + + if _bentoml_api.multi_input: + if inp is not None: + raise BentoMLException( + f"'{_bentoml_api.name}' takes multiple inputs; all inputs must be passed as keyword arguments." + ) + serialized_req = await _bentoml_api.input.to_proto(attrs) + else: + serialized_req = await _bentoml_api.input.to_proto(inp) + + # A call includes api_name and given proto_fields + return await fn( + self._call_rpc_method, + **{ + "api_name": self._rev_apis[_bentoml_api], + _bentoml_api.input._proto_fields[0]: serialized_req, + }, + ) + + @staticmethod + def _create_client(parsed: ParseResult, **kwargs: t.Any) -> GrpcClient: + server_url = parsed.netloc + protocol_version = kwargs.get("protocol_version", LATEST_PROTOCOL_VERSION) + + # Since v1, we introduce a ServiceMetadata rpc to retrieve bentoml.Service metadata. + # This means if user are using client for protocol version v1alpha1, + # then `client.predict` or `client.classify` won't be available. + # client.Call will still persist for both protocol version. + dummy_service: Service | None = None + if parse(protocol_version) < parse("v1"): + logger.warning( + "Using protocol version %s older than v1. This means the client won't have service API functions as attributes. To invoke the RPC endpoint, use 'client.Call()'.", + protocol_version, + ) + else: + pb, _ = import_generated_stubs(protocol_version) + + # create an insecure channel to invoke ServiceMetadata rpc + with grpc.insecure_channel(server_url) as channel: + # gRPC sync stub is WIP. + ServiceMetadata = channel.unary_unary( + f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", + request_serializer=pb.ServiceMetadataRequest.SerializeToString, + response_deserializer=pb.ServiceMetadataResponse.FromString, + ) + metadata = t.cast( + "ServiceMetadataResponse", + ServiceMetadata(pb.ServiceMetadataRequest()), + ) + dummy_service = Service(metadata.name) + + for api in metadata.apis: + dummy_service.apis[api.name] = InferenceAPI( + None, + io.from_spec( + { + "id": api.input.descriptor_id, + "args": _json_format.MessageToDict(api.input.attributes)[ + "args" + ], + } + ), + io.from_spec( + { + "id": api.output.descriptor_id, + "args": _json_format.MessageToDict(api.output.attributes)[ + "args" + ], + } + ), + name=api.name, + doc=api.docs, + ) + + return GrpcClient(server_url, dummy_service, **kwargs) + + def __del__(self): + if self._channel: + try: + del self._channel + except Exception: # pylint: disable=broad-except + pass + + +class _RpcType(Enum): + UNARY_UNARY = 1 + UNARY_STREAM = 2 + STREAM_UNARY = 3 + STREAM_STREAM = 4 + + def is_unary_request(self) -> bool: + return self.name.lower().startswith("unary_") + + def is_unary_response(self) -> bool: + return self.name.lower().endswith("_unary") + + @classmethod + def from_method_descriptor(cls, method_descriptor: MethodDescriptorProto) -> str: + rpcs = cls.from_streaming_type( + method_descriptor.client_streaming, method_descriptor.server_streaming + ) + return rpcs.name.lower() + + @classmethod + def from_streaming_type( + cls, client_streaming: bool, server_streaming: bool + ) -> t.Self: + if not client_streaming and not server_streaming: + return cls.UNARY_UNARY + elif client_streaming and not server_streaming: + return cls.STREAM_UNARY + elif not client_streaming and server_streaming: + return cls.UNARY_STREAM + else: + return cls.STREAM_STREAM + + @property + def request_serializer(self) -> t.Callable[..., t.Any]: + def _(input_type: type[t.Any], **request_data: t.Any): + data = request_data or {} + return _json_format.ParseDict(data, input_type()) + + def _it(input_type: type[t.Any], request_data: t.Iterable[t.Any]): + for data in request_data: + yield _(input_type, **data) + + return _ if self.is_unary_request() else _it + + @property + def response_deserializer(self) -> t.Callable[..., t.Any]: + async def _(response: _message.Message): + return _json_format.MessageToDict( + response, preserving_proto_field_name=True + ) + + async def _it(response: t.AsyncIterator[_message.Message]): + async for r in response: + yield await _(r) + + return _ if self.is_unary_response() else _it diff --git a/src/bentoml/client/_http.py b/src/bentoml/client/_http.py new file mode 100644 index 00000000000..41cbffed9a9 --- /dev/null +++ b/src/bentoml/client/_http.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import json +import typing as t +import logging +from typing import TYPE_CHECKING +from http.client import HTTPConnection + +import aiohttp +import starlette.requests +import starlette.datastructures + +from . import Client +from .. import io +from .. import Service +from ..exceptions import BentoMLException +from .._internal.configuration import get_debug_mode +from .._internal.service.inference_api import InferenceAPI + +if TYPE_CHECKING: + from urllib.parse import ParseResult + + +class HTTPClient(Client): + @staticmethod + def _create_client(parsed: ParseResult, **kwargs: t.Any) -> HTTPClient: + # TODO: HTTP SSL support + server_url = parsed.netloc + conn = HTTPConnection(server_url) + conn.set_debuglevel(logging.DEBUG if get_debug_mode() else 0) + conn.request("GET", "/docs.json") + resp = conn.getresponse() + openapi_spec = json.load(resp) + conn.close() + + dummy_service = Service(openapi_spec["info"]["title"]) + + for route, spec in openapi_spec["paths"].items(): + for meth_spec in spec.values(): + if "Service APIs" in meth_spec["tags"]: + if "x-bentoml-io-descriptor" not in meth_spec["requestBody"]: + # TODO: better message stating min version for from_url to work + raise BentoMLException( + f"Malformed BentoML spec received from BentoML server {server_url}" + ) + if "x-bentoml-io-descriptor" not in meth_spec["responses"]["200"]: + raise BentoMLException( + f"Malformed BentoML spec received from BentoML server {server_url}" + ) + if "x-bentoml-name" not in meth_spec: + raise BentoMLException( + f"Malformed BentoML spec received from BentoML server {server_url}" + ) + dummy_service.apis[meth_spec["x-bentoml-name"]] = InferenceAPI( + None, + io.from_spec( + meth_spec["requestBody"]["x-bentoml-io-descriptor"] + ), + io.from_spec( + meth_spec["responses"]["200"]["x-bentoml-io-descriptor"] + ), + name=meth_spec["x-bentoml-name"], + doc=meth_spec["description"], + route=route.lstrip("/"), + ) + + return HTTPClient(dummy_service, parsed.geturl()) + + async def _call( + self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any + ) -> t.Any: + api = _bentoml_api + + if api.multi_input: + if inp is not None: + raise BentoMLException( + f"'{api.name}' takes multiple inputs; all inputs must be passed as keyword arguments." + ) + fake_resp = await api.input.to_http_response(kwargs, None) + else: + fake_resp = await api.input.to_http_response(inp, None) + req_body = fake_resp.body + + async with aiohttp.ClientSession(self.server_url) as sess: + async with sess.post( + "/" + api.route, + data=req_body, + headers={"content-type": fake_resp.headers["content-type"]}, + ) as resp: + if resp.status != 200: + raise BentoMLException( + f"Error making request: {resp.status}: {str(await resp.read())}" + ) + + fake_req = starlette.requests.Request(scope={"type": "http"}) + headers = starlette.datastructures.Headers(headers=resp.headers) + fake_req._body = await resp.read() + # Request.headers sets a _headers variable. We will need to set this + # value to our fake request object. + fake_req._headers = headers # type: ignore (request._headers is property) + + return await api.output.from_http_request(fake_req) diff --git a/src/bentoml/grpc/types.py b/src/bentoml/grpc/types.py index 942aa44db0f..f4567e81871 100644 --- a/src/bentoml/grpc/types.py +++ b/src/bentoml/grpc/types.py @@ -12,6 +12,12 @@ import grpc from grpc import aio + from grpc.aio._typing import SerializingFunction + from grpc.aio._typing import DeserializingFunction + from grpc.aio._base_channel import UnaryUnaryMultiCallable + from grpc.aio._base_channel import StreamUnaryMultiCallable + from grpc.aio._base_channel import UnaryStreamMultiCallable + from grpc.aio._base_channel import StreamStreamMultiCallable from bentoml.grpc.v1.service_pb2 import Request from bentoml.grpc.v1.service_pb2 import Response @@ -94,6 +100,13 @@ class HandlerCallDetails( t.Callable[[], aio.ServerInterceptor] | partial[aio.ServerInterceptor] ] + MultiCallable = ( + UnaryUnaryMultiCallable + | UnaryStreamMultiCallable + | StreamUnaryMultiCallable + | StreamStreamMultiCallable + ) + __all__ = [ "Request", "Response", diff --git a/src/bentoml/testing/server.py b/src/bentoml/testing/server.py index 6d2b1d03321..dc6cf3be4e8 100644 --- a/src/bentoml/testing/server.py +++ b/src/bentoml/testing/server.py @@ -219,6 +219,8 @@ def run_bento_server_container( cmd.append(image_tag) serve_cmd = "serve-grpc" if use_grpc else "serve-http" cmd.extend([serve_cmd, "--production"]) + if use_grpc: + cmd.extend(["--enable-reflection"]) print(f"Running API server in container: '{' '.join(cmd)}'") with subprocess.Popen( cmd, @@ -273,7 +275,7 @@ def run_bento_server_standalone( f"{server_port}", ] if use_grpc: - cmd += ["--host", f"{host}"] + cmd += ["--host", f"{host}", "--enable-reflection"] cmd += [bento] print(f"Running command: '{' '.join(cmd)}'") p = subprocess.Popen( @@ -393,6 +395,8 @@ def run_bento_server_distributed( path, *itertools.chain.from_iterable(runner_args), ] + if use_grpc: + cmd.extend(["--enable-reflection"]) with reserve_free_port(host=host, enable_so_reuseport=use_grpc) as server_port: cmd.extend(["--port", f"{server_port}"]) print(f"Running command: '{' '.join(cmd)}'") diff --git a/tests/e2e/bento_server_grpc/tests/test_metrics.py b/tests/e2e/bento_server_grpc/tests/test_metrics.py index f3ea0adfd76..be6496ccef6 100644 --- a/tests/e2e/bento_server_grpc/tests/test_metrics.py +++ b/tests/e2e/bento_server_grpc/tests/test_metrics.py @@ -2,42 +2,23 @@ from typing import TYPE_CHECKING +import numpy as np import pytest +from bentoml.client import Client from bentoml.grpc.utils import import_generated_stubs -from bentoml.testing.grpc import create_channel -from bentoml.testing.grpc import async_client_call -from bentoml._internal.utils import LazyLoader if TYPE_CHECKING: - from google.protobuf import wrappers_pb2 - from bentoml.grpc.v1 import service_pb2 as pb else: - wrappers_pb2 = LazyLoader("wrappers_pb2", globals(), "google.protobuf.wrappers_pb2") pb, _ = import_generated_stubs() @pytest.mark.asyncio -async def test_metrics_available(host: str, img_file: str): - with open(str(img_file), "rb") as f: - fb = f.read() - - async with create_channel(host) as channel: - await async_client_call( - "predict_multi_images", - channel=channel, - data={ - "multipart": { - "fields": { - "original": pb.Part(file=pb.File(kind="image/bmp", content=fb)), - "compared": pb.Part(file=pb.File(kind="image/bmp", content=fb)), - } - } - }, - ) - await async_client_call( - "ensure_metrics_are_registered", - channel=channel, - data={"text": wrappers_pb2.StringValue(value="input_string")}, - ) +async def test_metrics_available(host: str): + client = Client.from_url(host, grpc=True) + resp = await client.async_predict_multi_images( + original=np.random.randint(255, size=(10, 10, 3)).astype("uint8"), + compared=np.random.randint(255, size=(10, 10, 3)).astype("uint8"), + ) + assert isinstance(resp, pb.Response) From eb0a900df1af1a60e577349489480ce5732d47ec Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 00:24:34 -0800 Subject: [PATCH 02/19] chore: simplify gRPC implementation Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- grpc-client/python/client.py | 45 +- pyproject.toml | 1 - src/bentoml/_internal/client/__init__.py | 121 ++++ src/bentoml/_internal/client/grpc.py | 309 ++++++++ .../_http.py => _internal/client/http.py} | 21 +- src/bentoml/client.py | 19 + src/bentoml/client/__init__.py | 291 -------- src/bentoml/client/_grpc.py | 666 ------------------ src/bentoml/grpc/types.py | 13 - src/bentoml/grpc/v1/service.proto | 4 +- 10 files changed, 475 insertions(+), 1015 deletions(-) create mode 100644 src/bentoml/_internal/client/__init__.py create mode 100644 src/bentoml/_internal/client/grpc.py rename src/bentoml/{client/_http.py => _internal/client/http.py} (88%) create mode 100644 src/bentoml/client.py delete mode 100644 src/bentoml/client/__init__.py delete mode 100644 src/bentoml/client/_grpc.py diff --git a/grpc-client/python/client.py b/grpc-client/python/client.py index 1ea795403c5..a9935878840 100644 --- a/grpc-client/python/client.py +++ b/grpc-client/python/client.py @@ -5,18 +5,10 @@ import numpy as np -from bentoml.client import Client +import bentoml -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from bentoml.client import GrpcClient - - -logger = logging.getLogger(__name__) - - -async def arun(client: GrpcClient): +async def async_run(client: bentoml.client.Client): res = await client.async_classify(np.array([[5.9, 3, 5.1, 1.8]])) logger.info("Result from 'client.async_classify':\n%s", res) @@ -24,36 +16,31 @@ async def arun(client: GrpcClient): logger.info("Result from 'client.async_call':\n%s", res) -def run(client: GrpcClient): +def run(client: bentoml.client.Client): res = client.classify(np.array([[5.9, 3, 5.1, 1.8]])) logger.info("Result from 'client.classify':\n%s", res) res = client.call("classify", np.array([[5.9, 3, 5.1, 1.8]])) logger.info("Result from 'client.call(bentoml_api_name='classify')':\n%s", res) - with client.service(): - res = client.Call( - api_name="classify", - ndarray={"float_values": [5.9, 3, 5.1, 1.8], "shape": [1, 4], "dtype": 1}, - ) - logger.info("Result from 'client.Call' in a context manager:\n%s", res) - if __name__ == "__main__": import argparse + logger = logging.getLogger(__name__) + + ch = logging.StreamHandler() + formatter = logging.Formatter("%(message)s") + ch.setFormatter(formatter) + logger.addHandler(ch) + logger.setLevel(logging.DEBUG) + parser = argparse.ArgumentParser() - parser.add_argument("-rwa", "--run-with-async", action="store_true", default=False) - parser.add_argument("--grpc", action="store_true", default=False) + parser.add_argument("-s", "--sync", action="store_true", default=False) args = parser.parse_args() - c: GrpcClient = Client.from_url("localhost:3000", grpc=args.grpc) + c = bentoml.client.Client.from_url("localhost:3000") - if args.run_with_async: - loop = asyncio.new_event_loop() - try: - loop.run_until_complete(arun(c)) - finally: - loop.close() - assert loop.is_closed() - else: + if args.sync: run(c) + else: + asyncio.run(async_run(c)) diff --git a/pyproject.toml b/pyproject.toml index 1ea9a5eb0b5..0fbc835f3ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,6 @@ include = [ # include bentoml packages "bentoml", "bentoml.grpc*", - "bentoml.client*", "bentoml.testing*", "bentoml._internal*", # include bentoml_cli packages diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py new file mode 100644 index 00000000000..0be0473485d --- /dev/null +++ b/src/bentoml/_internal/client/__init__.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import typing as t +import asyncio +import logging +import functools +from abc import ABC +from abc import abstractmethod +from typing import TYPE_CHECKING +from http.client import BadStatusLine +from urllib.parse import urlparse + +from ...exceptions import BentoMLException +from ..service.inference_api import InferenceAPI + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + from .grpc import GrpcClient + from .http import HTTPClient + from ..service import Service + + +class Client(ABC): + server_url: str + _svc: Service + + def __init__(self, svc: Service, server_url: str): + self._svc = svc + self.server_url = server_url + + if svc is not None and len(svc.apis) == 0: + raise BentoMLException("No APIs were found when constructing client.") + + for name, api in self._svc.apis.items(): + if not hasattr(self, name): + setattr( + self, name, functools.partial(self._sync_call, _bentoml_api=api) + ) + + if not hasattr(self, f"async_{name}"): + setattr( + self, + f"async_{name}", + functools.partial(self._call, _bentoml_api=api), + ) + + def call(self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any) -> t.Any: + return asyncio.run(self.async_call(bentoml_api_name, inp, **kwargs)) + + async def async_call( + self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any + ) -> t.Any: + return await self._call( + inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs + ) + + @t.overload + @classmethod + def from_url( + cls, server_url: str, *, kind: None | t.Literal["auto"] = ... + ) -> GrpcClient | HTTPClient: + ... + + @t.overload + @classmethod + def from_url(cls, server_url: str, *, kind: t.Literal["http"] = ...) -> HTTPClient: + ... + + @t.overload + @classmethod + def from_url(cls, server_url: str, *, kind: t.Literal["grpc"] = ...) -> GrpcClient: + ... + + @classmethod + def from_url( + cls, server_url: str, *, kind: str | None = None, **kwargs: t.Any + ) -> Client: + url_parsed = urlparse(server_url) + if url_parsed.scheme == "http": + kind = "http" + elif url_parsed.scheme == "grpc": + kind = "grpc" + + if kind is None or kind == "auto": + try: + from .http import HTTPClient + + return HTTPClient.from_url(server_url, **kwargs) + except BadStatusLine: + from .grpc import GrpcClient + + return GrpcClient.from_url(server_url, **kwargs) + except Exception as e: # pylint: disable=broad-except + raise BentoMLException( + f"Failed to create a BentoML client from given URL '{server_url}': {e} ({e.__class__.__name__})" + ) from e + elif kind == "http": + from .http import HTTPClient + + server_url = server_url if "://" in server_url else "http://" + server_url + return HTTPClient.from_url(server_url, **kwargs) + elif kind == "grpc": + from .grpc import GrpcClient + + return GrpcClient.from_url(server_url, **kwargs) + else: + raise BentoMLException( + "Invalid client kind. Must be one of ['http', 'grpc', 'auto']" + ) + + def _sync_call( + self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any + ): + return asyncio.run(self._call(inp, _bentoml_api=_bentoml_api, **kwargs)) + + @abstractmethod + async def _call( + self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any + ) -> t.Any: + raise NotImplementedError diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py new file mode 100644 index 00000000000..1710a16c7db --- /dev/null +++ b/src/bentoml/_internal/client/grpc.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +import typing as t +import logging +import functools +from typing import TYPE_CHECKING + +from packaging.version import parse + +from . import Client +from .. import io_descriptors as io +from ..utils import LazyLoader +from ..utils import cached_property +from ..service import Service +from ...exceptions import BentoMLException +from ...grpc.utils import import_grpc +from ...grpc.utils import import_generated_stubs +from ...grpc.utils import LATEST_PROTOCOL_VERSION +from ..server.grpc_app import load_from_file +from ..service.inference_api import InferenceAPI + +logger = logging.getLogger(__name__) + +PROTOBUF_EXC_MESSAGE = "'protobuf' is required to use gRPC Client. Install with 'pip install bentoml[grpc]'." +REFLECTION_EXC_MESSAGE = "'grpcio-reflection' is required to use gRPC Client. Install with 'pip install bentoml[grpc-reflection]'." + +if TYPE_CHECKING: + + import grpc + from grpc import aio + from google.protobuf import json_format as _json_format + + from ..types import PathType + from ...grpc.v1.service_pb2 import ServiceMetadataResponse + + class ClientCredentials(t.TypedDict): + root_certificates: t.NotRequired[PathType | bytes] + private_key: t.NotRequired[PathType | bytes] + certificate_chain: t.NotRequired[PathType | bytes] + +else: + ClientCredentials = dict + _json_format = LazyLoader( + "_json_format", + globals(), + "google.protobuf.json_format", + exc_msg=PROTOBUF_EXC_MESSAGE, + ) + grpc, aio = import_grpc() + +_INDENTATION = " " * 4 + +# TODO: xDS support +class GrpcClient(Client): + def __init__( + self, + server_url: str, + svc: Service, + # gRPC specific options + ssl: bool = False, + channel_options: aio.ChannelArgumentType | None = None, + interceptors: t.Sequence[aio.ClientInterceptor] | None = None, + compression: grpc.Compression | None = None, + ssl_client_credentials: ClientCredentials | None = None, + *, + protocol_version: str = LATEST_PROTOCOL_VERSION, + ): + super().__init__(svc, server_url) + + self._pb, self._services = import_generated_stubs(protocol_version) + + self._protocol_version = protocol_version + self._compression = compression + self._options = channel_options + self._interceptors = interceptors + self._channel = None + self._credentials = None + if ssl: + assert ( + ssl_client_credentials is not None + ), "'ssl=True' requires 'credentials'" + self._credentials = grpc.ssl_channel_credentials( + **{ + k: load_from_file(v) if isinstance(v, str) else v + for k, v in ssl_client_credentials.items() + } + ) + + @cached_property + def channel(self): + if not self._channel: + if self._credentials is not None: + self._channel = aio.secure_channel( + self.server_url, + credentials=self._credentials, + options=self._options, + compression=self._compression, + interceptors=self._interceptors, + ) + self._channel = aio.insecure_channel( + self.server_url, + options=self._options, + compression=self._compression, + interceptors=self._interceptors, + ) + return self._channel + + @cached_property + def _rpc_handler_mapping(self): + # Currently all RPCs in BentoService are unary-unary + return { + method: { + "handler": self.channel.unary_unary( + method=method, + request_serializer=input_type.SerializeToString, + response_deserializer=output_type.FromString, + ), + "input_type": input_type, + "output_type": output_type, + } + for method, input_type, output_type in ( + ( + f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", + self._pb.Request, + self._pb.Response, + ), + ( + f"/bentoml.grpc.{self._protocol_version}.BentoService/ServiceMetadata", + self._pb.ServiceMetadataRequest, + self._pb.ServiceMetadataResponse, + ), + ) + } + + async def _invoke(self, method_name: str, **attrs: t.Any): + # channel kwargs include timeout, metadata, credentials, wait_for_ready and compression + # to pass it in kwargs add prefix _channel_ + channel_kwargs = { + k: attrs.pop(f"_channel_{k}", None) + for k in { + "timeout", + "metadata", + "credentials", + "wait_for_ready", + "compression", + } + } + if method_name not in self._rpc_handler_mapping: + raise ValueError( + f"'{method_name}' is a yet supported rpc. Current supported are: {list(self._rpc_handler_mapping.keys())}" + ) + rpc_handler = self._rpc_handler_mapping[method_name] + + return await t.cast( + t.Awaitable[t.Any], + rpc_handler["handler"]( + rpc_handler["input_type"](**attrs), **channel_kwargs + ), + ) + + async def _call( + self, + inp: t.Any = None, + *, + _bentoml_api: InferenceAPI, + **attrs: t.Any, + ) -> t.Any: + if self.channel.get_state() != grpc.ChannelConnectivity.READY: + # create a blocking call to wait til channel is ready. + await self.channel.channel_ready() + + fn = functools.partial( + self._invoke, + **{ + f"_channel_{k}": attrs.pop(f"_channel_{k}", None) + for k in { + "timeout", + "metadata", + "credentials", + "wait_for_ready", + "compression", + } + }, + ) + + if _bentoml_api.multi_input: + if inp is not None: + raise BentoMLException( + f"'{_bentoml_api.name}' takes multiple inputs; all inputs must be passed as keyword arguments." + ) + serialized_req = await _bentoml_api.input.to_proto(attrs) + else: + serialized_req = await _bentoml_api.input.to_proto(inp) + + # A call includes api_name and given proto_fields + _rev_apis = {v: k for k, v in self._svc.apis.items()} + return await fn( + f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", + **{ + "api_name": _rev_apis[_bentoml_api], + _bentoml_api.input._proto_fields[0]: serialized_req, + }, + ) + + @classmethod + def from_url(cls, server_url: str, **kwargs: t.Any) -> GrpcClient: + protocol_version = kwargs.get("protocol_version", LATEST_PROTOCOL_VERSION) + ssl = kwargs.get("ssl", False) + ssl_client_credentials = kwargs.get("ssl_client_credentials", None) + + # Since v1, we introduce a ServiceMetadata rpc to retrieve bentoml.Service metadata. + # then `client.predict` or `client.classify` won't be available. + # client.Call will still persist for both protocol version. + if parse(protocol_version) < parse("v1"): + exception_message = [ + f"Using protocol version {protocol_version} older than v1. 'bentoml.client.Client' will only support protocol version v1 onwards. To create client with protocol version '{protocol_version}', do the following:\n" + """\ + +from bentoml.grpc.utils import import_generated_stubs, import_grpc + +pb, services = import_generated_stubs("v1alpha1") + +grpc, _ = import_grpc() + +def run(): + with grpc.insecure_channel("localhost:3000") as channel: + stubs = services.BentoServiceStub(channel) + req = stubs.Call( + request=pb.Request( + api_name="predict", + ndarray=pb.NDArray( + dtype=pb.NDArray.DTYPE_FLOAT, + shape=(1, 4), + float_values=[5.9, 3, 5.1, 1.8], + ), + ) + ) + print(req) + +if __name__ == '__main__': + run() +""" + ] + raise BentoMLException("\n".join(exception_message)) + pb, _ = import_generated_stubs(protocol_version) + + if ssl: + assert ( + ssl_client_credentials is not None + ), "'ssl=True' requires 'credentials'" + channel = grpc.secure_channel( + server_url, + credentials=grpc.ssl_channel_credentials( + **{ + k: load_from_file(v) if isinstance(v, str) else v + for k, v in ssl_client_credentials.items() + } + ), + options=kwargs.get("channel_options", None), + compression=kwargs.get("compression", None), + ) + else: + channel = grpc.insecure_channel( + server_url, + options=kwargs.get("channel_options", None), + compression=kwargs.get("compression", None), + ) + + # create an insecure channel to invoke ServiceMetadata rpc + with channel: + # gRPC sync stub is WIP. + ServiceMetadata = channel.unary_unary( + f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", + request_serializer=pb.ServiceMetadataRequest.SerializeToString, + response_deserializer=pb.ServiceMetadataResponse.FromString, + ) + metadata = t.cast( + "ServiceMetadataResponse", + ServiceMetadata(pb.ServiceMetadataRequest()), + ) + dummy_service = Service(metadata.name) + + for api in metadata.apis: + try: + dummy_service.apis[api.name] = InferenceAPI( + None, + io.from_spec( + { + "id": api.input.descriptor_id, + "args": _json_format.MessageToDict( + api.input.attributes + ).get("args", None), + } + ), + io.from_spec( + { + "id": api.output.descriptor_id, + "args": _json_format.MessageToDict( + api.output.attributes + ).get("args", None), + } + ), + name=api.name, + doc=api.docs, + ) + except BentoMLException as e: + logger.error("Failed to instantiate client for API %s: ", api.name, e) + + return cls(server_url, dummy_service, **kwargs) diff --git a/src/bentoml/client/_http.py b/src/bentoml/_internal/client/http.py similarity index 88% rename from src/bentoml/client/_http.py rename to src/bentoml/_internal/client/http.py index 41cbffed9a9..0e0f160f1fc 100644 --- a/src/bentoml/client/_http.py +++ b/src/bentoml/_internal/client/http.py @@ -3,7 +3,6 @@ import json import typing as t import logging -from typing import TYPE_CHECKING from http.client import HTTPConnection import aiohttp @@ -11,21 +10,17 @@ import starlette.datastructures from . import Client -from .. import io -from .. import Service -from ..exceptions import BentoMLException -from .._internal.configuration import get_debug_mode -from .._internal.service.inference_api import InferenceAPI - -if TYPE_CHECKING: - from urllib.parse import ParseResult +from .. import io_descriptors as io +from ..service import Service +from ...exceptions import BentoMLException +from ..configuration import get_debug_mode +from ..service.inference_api import InferenceAPI class HTTPClient(Client): - @staticmethod - def _create_client(parsed: ParseResult, **kwargs: t.Any) -> HTTPClient: + @classmethod + def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: # TODO: HTTP SSL support - server_url = parsed.netloc conn = HTTPConnection(server_url) conn.set_debuglevel(logging.DEBUG if get_debug_mode() else 0) conn.request("GET", "/docs.json") @@ -64,7 +59,7 @@ def _create_client(parsed: ParseResult, **kwargs: t.Any) -> HTTPClient: route=route.lstrip("/"), ) - return HTTPClient(dummy_service, parsed.geturl()) + return cls(dummy_service, server_url) async def _call( self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any diff --git a/src/bentoml/client.py b/src/bentoml/client.py new file mode 100644 index 00000000000..80db6a7f1dd --- /dev/null +++ b/src/bentoml/client.py @@ -0,0 +1,19 @@ +""" +Bento Client. +============= + +See https://docs.bentoml.org/en/latest/guides/client.html for more information. + +.. code-block:: python + + import bentoml + + client = bentoml.client.Client.from_url("localhost:3000") + + client.predict(np.array([[5.9, 3, 5.1, 1.8]])) +""" +from __future__ import annotations + +from ._internal.client import Client + +__all__ = ["Client"] diff --git a/src/bentoml/client/__init__.py b/src/bentoml/client/__init__.py deleted file mode 100644 index e486c16f996..00000000000 --- a/src/bentoml/client/__init__.py +++ /dev/null @@ -1,291 +0,0 @@ -from __future__ import annotations - -import typing as t -import asyncio -import logging -import functools -from abc import ABC -from abc import abstractmethod -from typing import TYPE_CHECKING -from urllib.parse import urlparse - -import attr - -from .. import Service -from ..exceptions import BentoMLException -from ..grpc.utils import import_grpc -from ..grpc.utils import LATEST_PROTOCOL_VERSION -from .._internal.utils import bentoml_cattr -from .._internal.utils import cached_property -from .._internal.service.inference_api import InferenceAPI - -logger = logging.getLogger(__name__) - -if TYPE_CHECKING: - from urllib.parse import ParseResult - - import grpc - from grpc import aio - - from ._grpc import GrpcClient - from ._http import HTTPClient - from .._internal.types import PathType - - class ClientCredentials(t.TypedDict): - root_certificates: t.NotRequired[PathType | bytes] - private_key: t.NotRequired[PathType | bytes] - certificate_chain: t.NotRequired[PathType | bytes] - -else: - ClientCredentials = dict - - grpc, aio = import_grpc() - - -@attr.define -class ClientConfig: - http: HTTP = attr.field( - default=attr.Factory(lambda self: self.HTTP(), takes_self=True) - ) - grpc: GRPC = attr.field( - default=attr.Factory(lambda self: self.GRPC(), takes_self=True) - ) - - def with_grpc_options(self, **kwargs: t.Any) -> ClientConfig: - _self_grpc_config = kwargs.pop("_self_grpc_config", None) - if not isinstance(_self_grpc_config, self.GRPC): - _self_grpc_config = ClientConfig.GRPC.from_options(**kwargs) - return attr.evolve(self, **{"grpc": _self_grpc_config}) - - def with_http_options(self, **kwargs: t.Any) -> ClientConfig: - _self_http_config = kwargs.pop("_self_http_config", None) - if not isinstance(_self_http_config, self.HTTP): - _self_http_config = ClientConfig.HTTP.from_options(**kwargs) - return attr.evolve(self, **{"http": _self_http_config}) - - @classmethod - def from_options(cls, **kwargs: t.Any) -> ClientConfig: - return bentoml_cattr.structure(kwargs, cls) - - @staticmethod - def from_grpc_options(**kwargs: t.Any) -> GRPC: - return ClientConfig.GRPC.from_options(**kwargs) - - @staticmethod - def from_http_options(**kwargs: t.Any) -> HTTP: - return ClientConfig.HTTP.from_options(**kwargs) - - def unstructure( - self, target: t.Literal["http", "grpc", "default"] = "default" - ) -> dict[str, t.Any]: - if target == "default": - targ = self - elif target == "http": - targ = self.http - elif target == "grpc": - targ = self.grpc - else: - raise ValueError( - f"Invalid target: {target}. Accepted value are 'http', 'grpc', 'default'." - ) - return bentoml_cattr.unstructure(targ) - - @attr.define - class HTTP: - """HTTP ClientConfig. - - .. TODO:: Add HTTP specific options here. - - """ - - # forbid additional keys to prevent typos. - __forbid_extra_keys__ = True - # Don't omit empty field. - __omit_if_default__ = False - - @classmethod - def from_options(cls, **kwargs: t.Any) -> ClientConfig.HTTP: - return bentoml_cattr.structure(kwargs, cls) - - def unstructure(self) -> dict[str, t.Any]: - return ( - ClientConfig() - .with_http_options( - _self_http_config=self, - ) - .unstructure(target="http") - ) - - @attr.define - class GRPC: - """gRPC ClientConfig. - - .. code-block:: python - - from bentoml.client import ClientConfig - from bentoml.client import Client - - config = ClientConfig.from_grpc_options( - ssl=True, - ssl_client_credentials={ - "root_certificates": "path/to/cert.pem", - "private_key": "/path/to/key", - }, - protocol_version="v1alpha1", - ) - client = Client.from_url("localhost:50051", config) - - """ - - # forbid additional keys to prevent typos. - __forbid_extra_keys__ = True - # Don't omit empty field. - __omit_if_default__ = False - - ssl: bool = attr.field(default=False) - channel_options: t.Optional[aio.ChannelArgumentType] = attr.field(default=None) - compression: t.Optional[grpc.Compression] = attr.field(default=None) - ssl_client_credentials: t.Optional[ClientCredentials] = attr.field( - factory=lambda: ClientCredentials() - ) - protocol_version: str = attr.field(default=LATEST_PROTOCOL_VERSION) - interceptors: t.Optional[t.Sequence[aio.ClientInterceptor]] = attr.field( - default=None - ) - - @classmethod - def from_options(cls, **kwargs: t.Any) -> ClientConfig.GRPC: - return bentoml_cattr.structure(kwargs, cls) - - def unstructure(self) -> dict[str, t.Any]: - return ( - ClientConfig() - .with_grpc_options( - _self_grpc_config=self, - ) - .unstructure(target="grpc") - ) - - -if TYPE_CHECKING: - ClientConfigT = ClientConfig | ClientConfig.HTTP | ClientConfig.GRPC - - -_sentinel_svc = Service("sentinel_svc") - - -class Client(ABC): - server_url: str - _svc: Service - - def __init__(self, svc: Service | None, server_url: str): - self._svc = svc or _sentinel_svc - self.server_url = server_url - - if svc is not None and len(svc.apis) == 0: - raise BentoMLException("No APIs were found when constructing client.") - - # Register service method if given service is not _sentinel_svc - # We only set _sentinel_svc if given protocol is older than v1 (for gRPC client.) - if self._svc is not _sentinel_svc: - for name, api in self._svc.apis.items(): - if not hasattr(self, name): - setattr( - self, name, functools.partial(self._sync_call, _bentoml_api=api) - ) - - if not hasattr(self, f"async_{name}"): - setattr( - self, - f"async_{name}", - functools.partial(self._call, _bentoml_api=api), - ) - - def call(self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any) -> t.Any: - return self._loop.run_until_complete( - self.async_call(bentoml_api_name, inp, **kwargs) - ) - - async def async_call( - self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any - ) -> t.Any: - return await self._call( - inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs - ) - - @t.overload - @staticmethod - def from_url( - server_url: str, - config: ClientConfigT | None = ..., - *, - grpc: t.Literal[False] = ..., - ) -> HTTPClient: - ... - - @t.overload - @staticmethod - def from_url( - server_url: str, - config: ClientConfigT | None = ..., - *, - grpc: t.Literal[True] = ..., - ) -> GrpcClient: - ... - - @staticmethod - def from_url( - server_url: str, config: ClientConfigT | None = None, *, grpc: bool = False - ) -> Client: - server_url = server_url if "://" in server_url else "http://" + server_url - if grpc: - from ._grpc import GrpcClient - - client_type = "grpc" - klass = GrpcClient - else: - from ._http import HTTPClient - - client_type = "http" - klass = HTTPClient - - if config is None: - config = ClientConfig() - - # First, if config is a ClientConfig that contains both HTTP and gRPC fields, then we use - # grpc_client boolean to determine which configset to use. - # If config is either ClientConfig.HTTP or ClientConfig.GRPC, then we use unstructure for kwargs - kwargs = config.unstructure() - - if isinstance(config, ClientConfig): - # by default we will set the config to HTTP (backward compatibility) - kwargs = config.unstructure(target=client_type) - - try: - return getattr(klass, "_create_client")(urlparse(server_url), **kwargs) - except Exception as e: # pylint: disable=broad-except - raise BentoMLException( - f"Failed to create a BentoML client from given URL '{server_url}': {e} ({e.__class__.__name__})" - ) from e - - @cached_property - def _loop(self) -> asyncio.AbstractEventLoop: - return asyncio.get_event_loop() - - def _sync_call( - self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any - ): - return self._loop.run_until_complete( - self._call(inp, _bentoml_api=_bentoml_api, **kwargs) - ) - - @abstractmethod - async def _call( - self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any - ) -> t.Any: - raise NotImplementedError - - @staticmethod - @abstractmethod - def _create_client(parsed: ParseResult, **kwargs: t.Any) -> Client: - raise NotImplementedError diff --git a/src/bentoml/client/_grpc.py b/src/bentoml/client/_grpc.py deleted file mode 100644 index 6b989ad226d..00000000000 --- a/src/bentoml/client/_grpc.py +++ /dev/null @@ -1,666 +0,0 @@ -from __future__ import annotations - -import typing as t -import asyncio -import inspect -import logging -import functools -import contextlib -from enum import Enum -from typing import TYPE_CHECKING - -from packaging.version import parse - -from . import Client -from . import ClientCredentials -from .. import io -from .. import Service -from ..exceptions import BentoMLException -from ..grpc.utils import import_grpc -from ..grpc.utils import parse_method_name -from ..grpc.utils import import_generated_stubs -from ..grpc.utils import LATEST_PROTOCOL_VERSION -from .._internal.utils import LazyLoader -from .._internal.utils import cached_property -from .._internal.server.grpc_app import load_from_file -from .._internal.service.inference_api import InferenceAPI - -logger = logging.getLogger(__name__) - -PROTOBUF_EXC_MESSAGE = "'protobuf' is required to use gRPC Client. Install with 'pip install bentoml[grpc]'." -REFLECTION_EXC_MESSAGE = "'grpcio-reflection' is required to use gRPC Client. Install with 'pip install bentoml[grpc-reflection]'." - -if TYPE_CHECKING: - from types import TracebackType - from urllib.parse import ParseResult - - import grpc - from grpc import aio - from google.protobuf import message as _message - from google.protobuf import json_format as _json_format - from google.protobuf import descriptor_pb2 as pb_descriptor - from google.protobuf import descriptor_pool as _descriptor_pool - from google.protobuf import symbol_database as _symbol_database - from grpc_reflection.v1alpha import reflection_pb2 as pb_reflection - from grpc_reflection.v1alpha import reflection_pb2_grpc as services_reflection - - # type hint specific imports. - from google.protobuf.descriptor import MethodDescriptor - from google.protobuf.descriptor import ServiceDescriptor - from google.protobuf.descriptor_pb2 import FileDescriptorProto - from google.protobuf.descriptor_pb2 import MethodDescriptorProto - from google.protobuf.descriptor_pool import DescriptorPool - from google.protobuf.symbol_database import SymbolDatabase - from grpc_reflection.v1alpha.reflection_pb2 import ServiceResponse - from grpc_reflection.v1alpha.reflection_pb2_grpc import ServerReflectionStub - - from ..grpc.types import MultiCallable - - from ..grpc.v1.service_pb2 import ServiceMetadataResponse -else: - pb_descriptor = LazyLoader( - "pb_descriptor", - globals(), - "google.protobuf.descriptor_pb2", - exc_msg=PROTOBUF_EXC_MESSAGE, - ) - _descriptor_pool = LazyLoader( - "_descriptor_pool", - globals(), - "google.protobuf.descriptor_pool", - exc_msg=PROTOBUF_EXC_MESSAGE, - ) - _symbol_database = LazyLoader( - "_symbol_database", - globals(), - "google.protobuf.symbol_database", - exc_msg=PROTOBUF_EXC_MESSAGE, - ) - _json_format = LazyLoader( - "_json_format", - globals(), - "google.protobuf.json_format", - exc_msg=PROTOBUF_EXC_MESSAGE, - ) - services_reflection = LazyLoader( - "services_reflection", - globals(), - "grpc_reflection.v1alpha.reflection_pb2_grpc", - exc_msg=REFLECTION_EXC_MESSAGE, - ) - pb_reflection = LazyLoader( - "pb_reflection", - globals(), - "grpc_reflection.v1alpha.reflection_pb2", - exc_msg=REFLECTION_EXC_MESSAGE, - ) - grpc, aio = import_grpc() - -_object_setattr = object.__setattr__ - -if TYPE_CHECKING: - - class RpcMethod(t.TypedDict): - request_streaming: t.Literal[True, False] - response_streaming: bool - input_type: type[t.Any] - output_type: t.NotRequired[type[t.Any]] - handler: MultiCallable - -else: - RpcMethod = dict - -# TODO: xDS support -class GrpcClient(Client): - def __init__( - self, - server_url: str, - svc: Service | None = None, - # gRPC specific options - ssl: bool = False, - channel_options: aio.ChannelArgumentType | None = None, - interceptors: t.Sequence[aio.ClientInterceptor] | None = None, - compression: grpc.Compression | None = None, - ssl_client_credentials: ClientCredentials | None = None, - *, - protocol_version: str = LATEST_PROTOCOL_VERSION, - ): - super().__init__(svc, server_url) - - # Call requires an api_name, therefore we need a reserved keyset of self._svc.apis - self._rev_apis = {v: k for k, v in self._svc.apis.items()} - - self._protocol_version = protocol_version - self._compression = compression - self._options = channel_options - self._interceptors = interceptors - self._channel = None - self._credentials = None - if ssl: - assert ( - ssl_client_credentials is not None - ), "'ssl=True' requires 'credentials'" - self._credentials = grpc.ssl_channel_credentials( - **{ - k: load_from_file(v) if isinstance(v, str) else v - for k, v in ssl_client_credentials.items() - } - ) - - self._descriptor_pool: DescriptorPool = _descriptor_pool.Default() - self._symbol_database: SymbolDatabase = _symbol_database.Default() - - self._available_services: tuple[str, ...] = tuple() - # cached of all available rpc for a given service. - self._service_cache: dict[str, dict[str, RpcMethod]] = {} - # Sets of FileDescriptorProto name to be registered - self._registered_file_name: set[str] = set() - self._reflection_stub: ServerReflectionStub | None = None - - @cached_property - def channel(self): - if not self._channel: - if self._credentials is not None: - self._channel = aio.secure_channel( - self.server_url, - credentials=self._credentials, - options=self._options, - compression=self._compression, - interceptors=self._interceptors, - ) - self._channel = aio.insecure_channel( - self.server_url, - options=self._options, - compression=self._compression, - interceptors=self._interceptors, - ) - return self._channel - - @staticmethod - def make_rpc_method(service_name: str, method: str): - return f"/{service_name}/{method}" - - @property - def _call_rpc_method(self): - return self.make_rpc_method( - f"bentoml.grpc.{self._protocol_version}.BentoService", "Call" - ) - - @cached_property - def _reserved_kw_mapping(self): - return { - "default": f"bentoml.grpc.{self._protocol_version}.BentoService", - "health": "grpc.health.v1.Health", - "reflection": "grpc.reflection.v1alpha.ServerReflection", - } - - async def _exit(self): - try: - if self._channel: - if self._channel.get_state() == grpc.ChannelConnectivity.IDLE: - await self._channel.close() - except AttributeError as e: - logger.error(f"Error closing channel: %s", e, exc_info=e) - raise - - def __enter__(self): - return self.service().__enter__() - - def __exit__( - self, - exc_type: type[BaseException] | None, - exc: BaseException | None, - traceback: TracebackType | None, - ) -> bool | None: - try: - if exc_type is not None: - self.service().__exit__(exc_type, exc, traceback) - self._loop.run_until_complete(self._exit()) - except Exception as err: # pylint: disable=broad-except - logger.error(f"Exception occurred: %s (%s)", err, exc_type, exc_info=err) - return False - - @contextlib.contextmanager - def service(self, service_name: str = "default"): - stack = contextlib.AsyncExitStack() - - async def close(): - await stack.aclose() - - async def enter(): - res = await stack.enter_async_context( - self.aservice(service_name, _wrap_in_sync=True) - ) - return res - - try: - yield self._loop.run_until_complete(enter()) - finally: - self._loop.run_until_complete(close()) - - async def __aenter__(self): - return await self.aservice().__aenter__() - - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc: BaseException | None, - traceback: TracebackType | None, - ) -> bool | None: - try: - if exc_type is not None: - await self.aservice().__aexit__(exc_type, exc, traceback) - await self._exit() - except Exception as err: # pylint: disable=broad-except - logger.error(f"Exception occurred: %s (%s)", err, exc_type, exc_info=err) - return False - - @contextlib.asynccontextmanager - async def aservice( - self, service_name: str = "default", *, _wrap_in_sync: bool = False - ) -> t.AsyncGenerator[t.Self, None]: - # This is the entrypoint for user to instantiate a client for a given service. - - # default is a special case for BentoService proto. - if service_name in self._reserved_kw_mapping: - service_name = self._reserved_kw_mapping[service_name] - - if not self._available_services: - resp = await self._do_one_request( - pb_reflection.ServerReflectionRequest(list_services="") - ) - if resp is not None: - services: list[ServiceResponse] = resp.list_services_response.service - self._available_services = tuple( - [t.cast(str, s.name) for s in services] - ) - - if ( - service_name in self._available_services - and service_name not in self._service_cache - ): - await self._register_service(service_name) - - if self.channel.get_state() != grpc.ChannelConnectivity.READY: - # create a blocking call to wait til channel is ready. - await self.channel.channel_ready() - - try: - method_meta = self._service_cache[service_name] - except KeyError: - raise ValueError( - f"Failed to find service '{service_name}'. Available: {list(self._service_cache.keys())}" - ) from None - - def _register(method: str): - finaliser = f = functools.partial( - self._invoke, - self.make_rpc_method(service_name, method), - _serialize_input=True, - ) - if _wrap_in_sync: - # We will have to run the async function in a sync wrapper - @functools.wraps(f) - def wrapper(*args: t.Any, **kwargs: t.Any): - coro = f(*args, **kwargs) - task = asyncio.ensure_future(coro, loop=self._loop) - try: - res = self._loop.run_until_complete(task) - if inspect.isasyncgen(res): - # If this is an async generator, then we need to yield again - async def call(): - return await res.__anext__() - - return self._loop.run_until_complete(call()) - return res - except BaseException: - # Consume all exceptions. - if task.done() and not task.cancelled(): - task.exception() - raise - - finaliser = wrapper - _object_setattr(self, method, finaliser) - - # Register all RPC method. - for method in reversed(method_meta): - _register(method) - - yield self - - async def _register_service(self, service_name: str) -> None: - svc_descriptor: ServiceDescriptor | None = None - try: - svc_descriptor = self._descriptor_pool.FindServiceByName(service_name) - except KeyError: - file_descriptor = await self._find_descriptor_by_symbol(service_name) - await self._add_file_descriptor(file_descriptor) - # try to register from FileDescriptorProto again. - svc_descriptor = self._descriptor_pool.FindServiceByName(service_name) - except Exception as e: # pylint: disable=broad-except - logger.warning( - "Failed to register %s. This might have already been registered.", - service_name, - exc_info=e, - ) - raise - finally: - if svc_descriptor is not None: - self._service_cache[service_name] = self._register_methods( - svc_descriptor - ) - - def _register_methods( - self, service_descriptor: ServiceDescriptor - ) -> dict[str, RpcMethod]: - service_descriptor_proto = pb_descriptor.ServiceDescriptorProto() - service_descriptor.CopyToProto(service_descriptor_proto) - full_name = service_descriptor.full_name - metadata: dict[str, RpcMethod] = {} - for method_proto in service_descriptor_proto.method: - method_name = method_proto.name - method_descriptor: MethodDescriptor = service_descriptor.FindMethodByName( - method_name - ) - input_type = self._symbol_database.GetPrototype( - method_descriptor.input_type - ) - output_type = self._symbol_database.GetPrototype( - method_descriptor.output_type - ) - metadata[method_name] = RpcMethod( - request_streaming=method_proto.client_streaming, - response_streaming=method_proto.server_streaming, - input_type=input_type, - output_type=output_type, - handler=getattr( - self.channel, - _RpcType.from_method_descriptor(method_proto), - )( - method=f"/{full_name}/{method_name}", - request_serializer=input_type.SerializeToString, - response_deserializer=output_type.FromString, - ), - ) - return metadata - - async def _add_file_descriptor(self, file_descriptor: FileDescriptorProto): - dependencies = file_descriptor.dependency - for deps in dependencies: - if deps not in self._registered_file_name: - d_descriptor = await self._find_descriptor_by_filename(deps) - await self._add_file_descriptor(d_descriptor) - self._registered_file_name.add(deps) - self._descriptor_pool.Add(file_descriptor) - - async def _find_descriptor_by_symbol(self, symbol: str): - req = pb_reflection.ServerReflectionRequest(file_containing_symbol=symbol) - res = await self._do_one_request(req) - assert res is not None - fdp: list[bytes] = res.file_descriptor_response.file_descriptor_proto - return pb_descriptor.FileDescriptorProto.FromString(fdp[0]) - - async def _find_descriptor_by_filename(self, name: str): - req = pb_reflection.ServerReflectionRequest(file_by_filename=name) - res = await self._do_one_request(req) - assert res is not None - fdp: list[bytes] = res.file_descriptor_response.file_descriptor_proto - return pb_descriptor.FileDescriptorProto.FromString(fdp[0]) - - def _reflection_request( - self, *reqs: pb_reflection.ServerReflectionRequest - ) -> t.AsyncIterator[pb_reflection.ServerReflectionResponse]: - if self._reflection_stub is None: - # ServerReflectionInfo is a stream RPC, hence the generator. - self._reflection_stub = services_reflection.ServerReflectionStub( - self.channel - ) - return t.cast( - t.AsyncIterator[pb_reflection.ServerReflectionResponse], - self._reflection_stub.ServerReflectionInfo((r for r in reqs)), - ) - - async def _do_one_request( - self, req: pb_reflection.ServerReflectionRequest - ) -> pb_reflection.ServerReflectionResponse | None: - try: - async for r in self._reflection_request(req): - return r - except aio.AioRpcError as err: - code = err.code() - if code == grpc.StatusCode.UNIMPLEMENTED: - raise BentoMLException( - f"[{code}] Couldn't locate servicer method. The running server might not have reflection enabled. Make sure to pass '--enable-reflection'" - ) - raise BentoMLException( - f"Caught AioRpcError while handling reflection request: {err}" - ) from None - - async def _invoke( - self, - method_name: str, - _serialize_input: bool = False, - **attrs: t.Any, - ): - mn, _ = parse_method_name(method_name) - if mn.fully_qualified_service not in self._available_services: - raise ValueError( - f"{mn.service} is not available in server. Registered services: {self._available_services}" - ) - # channel kwargs include timeout, metadata, credentials, wait_for_ready and compression - # to pass it in kwargs add prefix _channel_ - channel_kwargs = { - k: attrs.pop(f"_channel_{k}", None) - for k in { - "timeout", - "metadata", - "credentials", - "wait_for_ready", - "compression", - } - } - - mn, is_valid = parse_method_name(method_name) - if not is_valid: - raise ValueError( - f"{method_name} is not a valid method name. Make sure to follow the format '/package.ServiceName/MethodName'" - ) - try: - rpc_method = self._service_cache[mn.fully_qualified_service][mn.method] - except KeyError: - raise BentoMLException( - f"Method '{method_name}' is not registered in current service client." - ) from None - - handler_type = _RpcType.from_streaming_type( - rpc_method["request_streaming"], rpc_method["response_streaming"] - ) - - if _serialize_input: - parsed = handler_type.request_serializer(rpc_method["input_type"], **attrs) - else: - parsed = rpc_method["input_type"](**attrs) - if handler_type.is_unary_response(): - result = await t.cast( - t.Awaitable[t.Any], - rpc_method["handler"](parsed, **channel_kwargs), - ) - return result - # streaming response - return handler_type.response_deserializer( - rpc_method["handler"](parsed, **channel_kwargs) - ) - - def _sync_call( - self, - inp: t.Any = None, - *, - _bentoml_api: InferenceAPI, - **kwargs: t.Any, - ): - with self: - return self._loop.run_until_complete( - self._call(inp, _bentoml_api=_bentoml_api, **kwargs) - ) - - async def _call( - self, - inp: t.Any = None, - *, - _bentoml_api: InferenceAPI, - **attrs: t.Any, - ) -> t.Any: - async with self: - fn = functools.partial( - self._invoke, - **{ - f"_channel_{k}": attrs.pop(f"_channel_{k}", None) - for k in { - "timeout", - "metadata", - "credentials", - "wait_for_ready", - "compression", - } - }, - ) - - if _bentoml_api.multi_input: - if inp is not None: - raise BentoMLException( - f"'{_bentoml_api.name}' takes multiple inputs; all inputs must be passed as keyword arguments." - ) - serialized_req = await _bentoml_api.input.to_proto(attrs) - else: - serialized_req = await _bentoml_api.input.to_proto(inp) - - # A call includes api_name and given proto_fields - return await fn( - self._call_rpc_method, - **{ - "api_name": self._rev_apis[_bentoml_api], - _bentoml_api.input._proto_fields[0]: serialized_req, - }, - ) - - @staticmethod - def _create_client(parsed: ParseResult, **kwargs: t.Any) -> GrpcClient: - server_url = parsed.netloc - protocol_version = kwargs.get("protocol_version", LATEST_PROTOCOL_VERSION) - - # Since v1, we introduce a ServiceMetadata rpc to retrieve bentoml.Service metadata. - # This means if user are using client for protocol version v1alpha1, - # then `client.predict` or `client.classify` won't be available. - # client.Call will still persist for both protocol version. - dummy_service: Service | None = None - if parse(protocol_version) < parse("v1"): - logger.warning( - "Using protocol version %s older than v1. This means the client won't have service API functions as attributes. To invoke the RPC endpoint, use 'client.Call()'.", - protocol_version, - ) - else: - pb, _ = import_generated_stubs(protocol_version) - - # create an insecure channel to invoke ServiceMetadata rpc - with grpc.insecure_channel(server_url) as channel: - # gRPC sync stub is WIP. - ServiceMetadata = channel.unary_unary( - f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString, - ) - metadata = t.cast( - "ServiceMetadataResponse", - ServiceMetadata(pb.ServiceMetadataRequest()), - ) - dummy_service = Service(metadata.name) - - for api in metadata.apis: - dummy_service.apis[api.name] = InferenceAPI( - None, - io.from_spec( - { - "id": api.input.descriptor_id, - "args": _json_format.MessageToDict(api.input.attributes)[ - "args" - ], - } - ), - io.from_spec( - { - "id": api.output.descriptor_id, - "args": _json_format.MessageToDict(api.output.attributes)[ - "args" - ], - } - ), - name=api.name, - doc=api.docs, - ) - - return GrpcClient(server_url, dummy_service, **kwargs) - - def __del__(self): - if self._channel: - try: - del self._channel - except Exception: # pylint: disable=broad-except - pass - - -class _RpcType(Enum): - UNARY_UNARY = 1 - UNARY_STREAM = 2 - STREAM_UNARY = 3 - STREAM_STREAM = 4 - - def is_unary_request(self) -> bool: - return self.name.lower().startswith("unary_") - - def is_unary_response(self) -> bool: - return self.name.lower().endswith("_unary") - - @classmethod - def from_method_descriptor(cls, method_descriptor: MethodDescriptorProto) -> str: - rpcs = cls.from_streaming_type( - method_descriptor.client_streaming, method_descriptor.server_streaming - ) - return rpcs.name.lower() - - @classmethod - def from_streaming_type( - cls, client_streaming: bool, server_streaming: bool - ) -> t.Self: - if not client_streaming and not server_streaming: - return cls.UNARY_UNARY - elif client_streaming and not server_streaming: - return cls.STREAM_UNARY - elif not client_streaming and server_streaming: - return cls.UNARY_STREAM - else: - return cls.STREAM_STREAM - - @property - def request_serializer(self) -> t.Callable[..., t.Any]: - def _(input_type: type[t.Any], **request_data: t.Any): - data = request_data or {} - return _json_format.ParseDict(data, input_type()) - - def _it(input_type: type[t.Any], request_data: t.Iterable[t.Any]): - for data in request_data: - yield _(input_type, **data) - - return _ if self.is_unary_request() else _it - - @property - def response_deserializer(self) -> t.Callable[..., t.Any]: - async def _(response: _message.Message): - return _json_format.MessageToDict( - response, preserving_proto_field_name=True - ) - - async def _it(response: t.AsyncIterator[_message.Message]): - async for r in response: - yield await _(r) - - return _ if self.is_unary_response() else _it diff --git a/src/bentoml/grpc/types.py b/src/bentoml/grpc/types.py index f4567e81871..942aa44db0f 100644 --- a/src/bentoml/grpc/types.py +++ b/src/bentoml/grpc/types.py @@ -12,12 +12,6 @@ import grpc from grpc import aio - from grpc.aio._typing import SerializingFunction - from grpc.aio._typing import DeserializingFunction - from grpc.aio._base_channel import UnaryUnaryMultiCallable - from grpc.aio._base_channel import StreamUnaryMultiCallable - from grpc.aio._base_channel import UnaryStreamMultiCallable - from grpc.aio._base_channel import StreamStreamMultiCallable from bentoml.grpc.v1.service_pb2 import Request from bentoml.grpc.v1.service_pb2 import Response @@ -100,13 +94,6 @@ class HandlerCallDetails( t.Callable[[], aio.ServerInterceptor] | partial[aio.ServerInterceptor] ] - MultiCallable = ( - UnaryUnaryMultiCallable - | UnaryStreamMultiCallable - | StreamUnaryMultiCallable - | StreamStreamMultiCallable - ) - __all__ = [ "Request", "Response", diff --git a/src/bentoml/grpc/v1/service.proto b/src/bentoml/grpc/v1/service.proto index f4919adf540..c0d937b3365 100644 --- a/src/bentoml/grpc/v1/service.proto +++ b/src/bentoml/grpc/v1/service.proto @@ -43,9 +43,9 @@ message ServiceMetadataResponse { // name is the name of the API. string name = 1; // input is the input descriptor of the API. - DescriptorMetadata input = 2; + optional DescriptorMetadata input = 2; // output is the output descriptor of the API. - DescriptorMetadata output = 3; + optional DescriptorMetadata output = 3; // docs is the optional documentation of the API. optional string docs = 4; } From f58ea871c2c6f7737ab3e1a8329bc930f584abe3 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 00:31:25 -0800 Subject: [PATCH 03/19] chore: address upstream change Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 5 ++- src/bentoml/_internal/client/grpc.py | 15 ++++----- src/bentoml/_internal/client/http.py | 42 ++++++++++++++++-------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index 0be0473485d..aa77424bfe4 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -57,6 +57,7 @@ async def async_call( @t.overload @classmethod + @abstractmethod def from_url( cls, server_url: str, *, kind: None | t.Literal["auto"] = ... ) -> GrpcClient | HTTPClient: @@ -64,15 +65,18 @@ def from_url( @t.overload @classmethod + @abstractmethod def from_url(cls, server_url: str, *, kind: t.Literal["http"] = ...) -> HTTPClient: ... @t.overload @classmethod + @abstractmethod def from_url(cls, server_url: str, *, kind: t.Literal["grpc"] = ...) -> GrpcClient: ... @classmethod + @abstractmethod def from_url( cls, server_url: str, *, kind: str | None = None, **kwargs: t.Any ) -> Client: @@ -98,7 +102,6 @@ def from_url( elif kind == "http": from .http import HTTPClient - server_url = server_url if "://" in server_url else "http://" + server_url return HTTPClient.from_url(server_url, **kwargs) elif kind == "grpc": from .grpc import GrpcClient diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 1710a16c7db..e1ab4ba76ef 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -65,8 +65,6 @@ def __init__( *, protocol_version: str = LATEST_PROTOCOL_VERSION, ): - super().__init__(svc, server_url) - self._pb, self._services = import_generated_stubs(protocol_version) self._protocol_version = protocol_version @@ -85,6 +83,7 @@ def __init__( for k, v in ssl_client_credentials.items() } ) + super().__init__(svc, server_url) @cached_property def channel(self): @@ -268,15 +267,13 @@ def run(): # create an insecure channel to invoke ServiceMetadata rpc with channel: - # gRPC sync stub is WIP. - ServiceMetadata = channel.unary_unary( - f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString, - ) metadata = t.cast( "ServiceMetadataResponse", - ServiceMetadata(pb.ServiceMetadataRequest()), + channel.unary_unary( + f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", + request_serializer=pb.ServiceMetadataRequest.SerializeToString, + response_deserializer=pb.ServiceMetadataResponse.FromString, + )(pb.ServiceMetadataRequest()), ) dummy_service = Service(metadata.name) diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index 0e0f160f1fc..49f27188f64 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -4,6 +4,7 @@ import typing as t import logging from http.client import HTTPConnection +from urllib.parse import urlparse import aiohttp import starlette.requests @@ -16,12 +17,17 @@ from ..configuration import get_debug_mode from ..service.inference_api import InferenceAPI +logger = logging.getLogger(__name__) + class HTTPClient(Client): @classmethod def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: - # TODO: HTTP SSL support - conn = HTTPConnection(server_url) + server_url = server_url if "://" in server_url else "http://" + server_url + url_parts = urlparse(server_url) + + # TODO: SSL and grpc support + conn = HTTPConnection(url_parts.netloc) conn.set_debuglevel(logging.DEBUG if get_debug_mode() else 0) conn.request("GET", "/docs.json") resp = conn.getresponse() @@ -46,18 +52,26 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: raise BentoMLException( f"Malformed BentoML spec received from BentoML server {server_url}" ) - dummy_service.apis[meth_spec["x-bentoml-name"]] = InferenceAPI( - None, - io.from_spec( - meth_spec["requestBody"]["x-bentoml-io-descriptor"] - ), - io.from_spec( - meth_spec["responses"]["200"]["x-bentoml-io-descriptor"] - ), - name=meth_spec["x-bentoml-name"], - doc=meth_spec["description"], - route=route.lstrip("/"), - ) + try: + api = InferenceAPI( + None, + io.from_spec( + meth_spec["requestBody"]["x-bentoml-io-descriptor"] + ), + io.from_spec( + meth_spec["responses"]["200"]["x-bentoml-io-descriptor"] + ), + name=meth_spec["x-bentoml-name"], + doc=meth_spec["description"], + route=route.lstrip("/"), + ) + dummy_service.apis[meth_spec["x-bentoml-name"]] = api + except BentoMLException as e: + logger.error( + "Failed to instantiate client for API %s: ", + meth_spec["x-bentoml-name"], + e, + ) return cls(dummy_service, server_url) From 1c524d584ed88d249abeeb7065eebe663c7c6e7f Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 00:36:16 -0800 Subject: [PATCH 04/19] chore: unrestrict grpcio deps Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/testing/server.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bentoml/testing/server.py b/src/bentoml/testing/server.py index dc6cf3be4e8..6d2b1d03321 100644 --- a/src/bentoml/testing/server.py +++ b/src/bentoml/testing/server.py @@ -219,8 +219,6 @@ def run_bento_server_container( cmd.append(image_tag) serve_cmd = "serve-grpc" if use_grpc else "serve-http" cmd.extend([serve_cmd, "--production"]) - if use_grpc: - cmd.extend(["--enable-reflection"]) print(f"Running API server in container: '{' '.join(cmd)}'") with subprocess.Popen( cmd, @@ -275,7 +273,7 @@ def run_bento_server_standalone( f"{server_port}", ] if use_grpc: - cmd += ["--host", f"{host}", "--enable-reflection"] + cmd += ["--host", f"{host}"] cmd += [bento] print(f"Running command: '{' '.join(cmd)}'") p = subprocess.Popen( @@ -395,8 +393,6 @@ def run_bento_server_distributed( path, *itertools.chain.from_iterable(runner_args), ] - if use_grpc: - cmd.extend(["--enable-reflection"]) with reserve_free_port(host=host, enable_so_reuseport=use_grpc) as server_port: cmd.extend(["--port", f"{server_port}"]) print(f"Running command: '{' '.join(cmd)}'") From 5e3bdc373a082bd9c21155fcf2d643c2d5a0a0b2 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 00:37:29 -0800 Subject: [PATCH 05/19] fix: tests with new api Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- tests/e2e/bento_server_grpc/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/bento_server_grpc/tests/test_metrics.py b/tests/e2e/bento_server_grpc/tests/test_metrics.py index be6496ccef6..393be9ad0f1 100644 --- a/tests/e2e/bento_server_grpc/tests/test_metrics.py +++ b/tests/e2e/bento_server_grpc/tests/test_metrics.py @@ -16,7 +16,7 @@ @pytest.mark.asyncio async def test_metrics_available(host: str): - client = Client.from_url(host, grpc=True) + client = Client.from_url(host) resp = await client.async_predict_multi_images( original=np.random.randint(255, size=(10, 10, 3)).astype("uint8"), compared=np.random.randint(255, size=(10, 10, 3)).astype("uint8"), From 58c8c9b6bfd40b5221d1d149967959668db74420 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 01:37:53 -0800 Subject: [PATCH 06/19] fix: make sure to call _sync_call in sync api Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 4 +- src/bentoml/_internal/client/grpc.py | 93 +++++++++---------- src/bentoml/_internal/client/http.py | 2 + .../bento_server_grpc/tests/test_metrics.py | 2 + 4 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index aa77424bfe4..e41d518810b 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -46,7 +46,9 @@ def __init__(self, svc: Service, server_url: str): ) def call(self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any) -> t.Any: - return asyncio.run(self.async_call(bentoml_api_name, inp, **kwargs)) + return self._sync_call( + inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs + ) async def async_call( self, bentoml_api_name: str, inp: t.Any = None, **kwargs: t.Any diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index e1ab4ba76ef..ba9b99ac9f0 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -31,6 +31,7 @@ from google.protobuf import json_format as _json_format from ..types import PathType + from ...grpc.v1.service_pb2 import Response from ...grpc.v1.service_pb2 import ServiceMetadataResponse class ClientCredentials(t.TypedDict): @@ -39,16 +40,13 @@ class ClientCredentials(t.TypedDict): certificate_chain: t.NotRequired[PathType | bytes] else: - ClientCredentials = dict + grpc, aio = import_grpc() _json_format = LazyLoader( "_json_format", globals(), "google.protobuf.json_format", exc_msg=PROTOBUF_EXC_MESSAGE, ) - grpc, aio = import_grpc() - -_INDENTATION = " " * 4 # TODO: xDS support class GrpcClient(Client): @@ -65,13 +63,12 @@ def __init__( *, protocol_version: str = LATEST_PROTOCOL_VERSION, ): - self._pb, self._services = import_generated_stubs(protocol_version) + self._pb, _ = import_generated_stubs(protocol_version) self._protocol_version = protocol_version self._compression = compression self._options = channel_options self._interceptors = interceptors - self._channel = None self._credentials = None if ssl: assert ( @@ -85,38 +82,29 @@ def __init__( ) super().__init__(svc, server_url) - @cached_property + @property def channel(self): - if not self._channel: - if self._credentials is not None: - self._channel = aio.secure_channel( - self.server_url, - credentials=self._credentials, - options=self._options, - compression=self._compression, - interceptors=self._interceptors, - ) - self._channel = aio.insecure_channel( + if self._credentials is not None: + return aio.secure_channel( + self.server_url, + credentials=self._credentials, + options=self._options, + compression=self._compression, + interceptors=self._interceptors, + ) + else: + return aio.insecure_channel( self.server_url, options=self._options, compression=self._compression, interceptors=self._interceptors, ) - return self._channel @cached_property - def _rpc_handler_mapping(self): + def _rpc_metadata(self): # Currently all RPCs in BentoService are unary-unary return { - method: { - "handler": self.channel.unary_unary( - method=method, - request_serializer=input_type.SerializeToString, - response_deserializer=output_type.FromString, - ), - "input_type": input_type, - "output_type": output_type, - } + method: {"input_type": input_type, "output_type": output_type} for method, input_type, output_type in ( ( f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", @@ -133,9 +121,9 @@ def _rpc_handler_mapping(self): async def _invoke(self, method_name: str, **attrs: t.Any): # channel kwargs include timeout, metadata, credentials, wait_for_ready and compression - # to pass it in kwargs add prefix _channel_ + # to pass it in kwargs add prefix _grpc_channel_ channel_kwargs = { - k: attrs.pop(f"_channel_{k}", None) + k: attrs.pop(f"_grpc_channel_{k}", None) for k in { "timeout", "metadata", @@ -144,17 +132,20 @@ async def _invoke(self, method_name: str, **attrs: t.Any): "compression", } } - if method_name not in self._rpc_handler_mapping: + if method_name not in self._rpc_metadata: raise ValueError( - f"'{method_name}' is a yet supported rpc. Current supported are: {list(self._rpc_handler_mapping.keys())}" + f"'{method_name}' is a yet supported rpc. Current supported are: {self._rpc_metadata}" ) - rpc_handler = self._rpc_handler_mapping[method_name] + metadata = self._rpc_metadata[method_name] + rpc = self.channel.unary_unary( + method_name, + request_serializer=metadata["input_type"].SerializeToString, + response_deserializer=metadata["output_type"].FromString, + ) return await t.cast( - t.Awaitable[t.Any], - rpc_handler["handler"]( - rpc_handler["input_type"](**attrs), **channel_kwargs - ), + "t.Awaitable[Response]", + rpc(metadata["input_type"](**attrs), **channel_kwargs), ) async def _call( @@ -164,14 +155,16 @@ async def _call( _bentoml_api: InferenceAPI, **attrs: t.Any, ) -> t.Any: - if self.channel.get_state() != grpc.ChannelConnectivity.READY: + state = self.channel.get_state(try_to_connect=True) + if state != grpc.ChannelConnectivity.READY: # create a blocking call to wait til channel is ready. await self.channel.channel_ready() fn = functools.partial( self._invoke, + method_name=f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", **{ - f"_channel_{k}": attrs.pop(f"_channel_{k}", None) + f"_grpc_channel_{k}": attrs.pop(f"_grpc_channel_{k}", None) for k in { "timeout", "metadata", @@ -192,11 +185,10 @@ async def _call( serialized_req = await _bentoml_api.input.to_proto(inp) # A call includes api_name and given proto_fields - _rev_apis = {v: k for k, v in self._svc.apis.items()} + api_fn = {v: k for k, v in self._svc.apis.items()} return await fn( - f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", **{ - "api_name": _rev_apis[_bentoml_api], + "api_name": api_fn[_bentoml_api], _bentoml_api.input._proto_fields[0]: serialized_req, }, ) @@ -266,15 +258,14 @@ def run(): ) # create an insecure channel to invoke ServiceMetadata rpc - with channel: - metadata = t.cast( - "ServiceMetadataResponse", - channel.unary_unary( - f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString, - )(pb.ServiceMetadataRequest()), - ) + metadata = t.cast( + "ServiceMetadataResponse", + channel.unary_unary( + f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", + request_serializer=pb.ServiceMetadataRequest.SerializeToString, + response_deserializer=pb.ServiceMetadataResponse.FromString, + )(pb.ServiceMetadataRequest()), + ) dummy_service = Service(metadata.name) for api in metadata.apis: diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index 49f27188f64..db97bda4dc4 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -78,6 +78,8 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: async def _call( self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any ) -> t.Any: + # All gRPC kwargs should be poped out. + kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_grpc_")} api = _bentoml_api if api.multi_input: diff --git a/tests/e2e/bento_server_grpc/tests/test_metrics.py b/tests/e2e/bento_server_grpc/tests/test_metrics.py index 393be9ad0f1..caa8c40d4d2 100644 --- a/tests/e2e/bento_server_grpc/tests/test_metrics.py +++ b/tests/e2e/bento_server_grpc/tests/test_metrics.py @@ -22,3 +22,5 @@ async def test_metrics_available(host: str): compared=np.random.randint(255, size=(10, 10, 3)).astype("uint8"), ) assert isinstance(resp, pb.Response) + resp = await client.async_ensure_metrics_are_registered("input_data") + assert isinstance(resp, pb.Response) From 57b7d6ee184c9b7e752220a25365a7ee500e4db3 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 8 Dec 2022 21:29:50 -0800 Subject: [PATCH 07/19] chore: fix wait_until_server_ready for client implementation Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 10 +++++ src/bentoml/_internal/client/grpc.py | 48 +++++++++++++++++++++++- src/bentoml/_internal/client/http.py | 33 ++++++++++++++++ src/bentoml/_internal/server/server.py | 11 +++--- 4 files changed, 94 insertions(+), 8 deletions(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index e41d518810b..38a9253052a 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -57,6 +57,16 @@ async def async_call( inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs ) + @abstractmethod + def wait_until_server_ready( + self, + *, + server_url: str | None = None, + timeout: int = 30, + **kwargs: t.Any, + ) -> None: + raise NotImplementedError + @t.overload @classmethod @abstractmethod diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index ba9b99ac9f0..94081baf20e 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -1,6 +1,8 @@ from __future__ import annotations +import time import typing as t +import asyncio import logging import functools from typing import TYPE_CHECKING @@ -28,7 +30,9 @@ import grpc from grpc import aio + from grpc_health.v1 import health_pb2 as pb_health from google.protobuf import json_format as _json_format + from google.protobuf.internal import python_message from ..types import PathType from ...grpc.v1.service_pb2 import Response @@ -41,6 +45,7 @@ class ClientCredentials(t.TypedDict): else: grpc, aio = import_grpc() + pb_health = LazyLoader("pb_health", globals(), "grpc_health.v1.health_pb2") _json_format = LazyLoader( "_json_format", globals(), @@ -80,6 +85,7 @@ def __init__( for k, v in ssl_client_credentials.items() } ) + self._call_rpc = f"/bentoml.grpc.{protocol_version}.BentoService/Call" super().__init__(svc, server_url) @property @@ -100,14 +106,40 @@ def channel(self): interceptors=self._interceptors, ) + def wait_until_server_ready( + self, + *, + server_url: str | None = None, + timeout: int = 30, + check_interval: float = 1, + **kwargs: t.Any, + ) -> None: + start_time = time.time() + while time.time() - start_time < timeout: + try: + res = asyncio.run( + self._health(service_name=self._call_rpc, timeout=timeout) + ) + if res.status == pb_health.HealthCheckResponse.SERVING: + break + else: + asyncio.run(asyncio.sleep(check_interval)) + except aio.AioRpcError as err: + logger.debug("[%s] Retrying to connect to the host %s", err, server_url) + asyncio.run(asyncio.sleep(check_interval)) + raise TimeoutError( + f"Timed out waiting {timeout} seconds for server at '{server_url}' to be ready." + ) + @cached_property - def _rpc_metadata(self): + def _rpc_metadata(self) -> dict[str, dict[str, t.Any]]: # Currently all RPCs in BentoService are unary-unary + # NOTE: we will set the types of the stubs to be Any. return { method: {"input_type": input_type, "output_type": output_type} for method, input_type, output_type in ( ( - f"/bentoml.grpc.{self._protocol_version}.BentoService/Call", + self._call_rpc, self._pb.Request, self._pb.Response, ), @@ -116,9 +148,21 @@ def _rpc_metadata(self): self._pb.ServiceMetadataRequest, self._pb.ServiceMetadataResponse, ), + ( + "/grpc.health.v1.Health/Check", + pb_health.HealthCheckRequest, + pb_health.HealthCheckResponse, + ), ) } + async def _health(self, service_name: str, *, timeout: int = 30) -> t.Any: + return await self._invoke( + method_name="/grpc.health.v1.Health/Check", + service=service_name, + _grpc_channel_timeout=timeout, + ) + async def _invoke(self, method_name: str, **attrs: t.Any): # channel kwargs include timeout, metadata, credentials, wait_for_ready and compression # to pass it in kwargs add prefix _grpc_channel_ diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index db97bda4dc4..e50682beb61 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -1,8 +1,12 @@ from __future__ import annotations import json +import time +import socket import typing as t import logging +import urllib.error +import urllib.request from http.client import HTTPConnection from urllib.parse import urlparse @@ -21,6 +25,35 @@ class HTTPClient(Client): + def wait_until_server_ready( + self, + *, + server_url: str | None = None, + timeout: int = 30, + check_interval: int = 1, + # set kwargs here to omit gRPC kwargs + **kwargs: t.Any, + ) -> None: + start_time = time.time() + if server_url is None: + server_url = self.server_url + + proxy_handler = urllib.request.ProxyHandler({}) + opener = urllib.request.build_opener(proxy_handler) + logger.debug("Waiting for host %s to be ready.", server_url) + while time.time() - start_time < timeout: + try: + if opener.open(f"http://{server_url}/readyz", timeout=1).status == 200: + break + else: + time.sleep(check_interval) + except (ConnectionError, urllib.error.URLError, socket.timeout) as err: + logger.debug("[%s] Retrying to connect to the host %s", err, server_url) + time.sleep(check_interval) + raise TimeoutError( + f"Timed out waiting {timeout} seconds for server at '{server_url}' to be ready." + ) + @classmethod def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: server_url = server_url if "://" in server_url else "http://" + server_url diff --git a/src/bentoml/_internal/server/server.py b/src/bentoml/_internal/server/server.py index e70b50b3548..6b6fa3a4904 100644 --- a/src/bentoml/_internal/server/server.py +++ b/src/bentoml/_internal/server/server.py @@ -24,12 +24,11 @@ def client(self): return self.get_client() def get_client(self): - from bentoml.client import Client + from ..client import Client - Client.wait_until_server_is_ready( - host=self.host, port=self.port, timeout=self.timeout - ) - return Client.from_url(f"http://localhost:{self.port}") + client = Client.from_url(f"http://{self.host}:{self.port}") + client.wait_until_server_ready(timeout=10) + return client def stop(self) -> None: self.process.terminate() @@ -43,7 +42,7 @@ def __enter__(self): def __exit__( self, - exc_type: type[BaseException], + exc_type: type[BaseException] | None, exc_value: BaseException, traceback_type: TracebackType, ): From 346a390385227e93253557555b83e522dd20a215 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Fri, 9 Dec 2022 17:29:37 -0800 Subject: [PATCH 08/19] chore: update exception message. --- src/bentoml/_internal/client/grpc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 94081baf20e..cfa9e138687 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -249,11 +249,11 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> GrpcClient: if parse(protocol_version) < parse("v1"): exception_message = [ f"Using protocol version {protocol_version} older than v1. 'bentoml.client.Client' will only support protocol version v1 onwards. To create client with protocol version '{protocol_version}', do the following:\n" - """\ + f"""\ from bentoml.grpc.utils import import_generated_stubs, import_grpc -pb, services = import_generated_stubs("v1alpha1") +pb, services = import_generated_stubs("{protocol_version}") grpc, _ = import_grpc() From a75da26731372752b808948e051e6866bc296ace Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Mon, 26 Dec 2022 22:30:56 -0800 Subject: [PATCH 09/19] chore: revert changes from upstream/main reverted changes: pyproject.toml Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 2 +- src/bentoml/_internal/client/http.py | 7 ++++++- src/bentoml/_internal/server/grpc/servicer/v1/__init__.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0fbc835f3ce..fe09cb5247a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,7 +183,7 @@ omit = [ show_missing = true precision = 2 omit = [ - "src/bentoml/__main__.py", + 'src/bentoml/__main__.py', "src/bentoml/io.py", "src/bentoml/serve.py", "src/bentoml/start.py", diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index e50682beb61..f093c7bb8b7 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -17,6 +17,7 @@ from . import Client from .. import io_descriptors as io from ..service import Service +from ...exceptions import RemoteException from ...exceptions import BentoMLException from ..configuration import get_debug_mode from ..service.inference_api import InferenceAPI @@ -62,8 +63,12 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: # TODO: SSL and grpc support conn = HTTPConnection(url_parts.netloc) conn.set_debuglevel(logging.DEBUG if get_debug_mode() else 0) - conn.request("GET", "/docs.json") + conn.request("GET", url_parts.path + "/docs.json") resp = conn.getresponse() + if resp.status != 200: + raise RemoteException( + f"Failed to get OpenAPI schema from the server: {resp.status} {resp.reason}:\n{resp.read()}" + ) openapi_spec = json.load(resp) conn.close() diff --git a/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py b/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py index 7fd0366cf37..49ad48b0bcf 100644 --- a/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py +++ b/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py @@ -49,7 +49,7 @@ class BentoServiceImpl(services.BentoServiceServicer): """An asyncio implementation of BentoService servicer.""" async def Call( # type: ignore (no async types) # pylint: disable=invalid-overridden-method - self, + self: services.BentoServiceServicer, request: pb.Request, context: BentoServicerContext, ) -> pb.Response | None: From 06345ad2a741917818335dbc2d34681223ba95bc Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:50:40 -0500 Subject: [PATCH 10/19] fix: address #2900 Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index f093c7bb8b7..148fa6aacad 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -76,7 +76,7 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: for route, spec in openapi_spec["paths"].items(): for meth_spec in spec.values(): - if "Service APIs" in meth_spec["tags"]: + if "tags" in meth_spec and "Service APIs" in meth_spec["tags"]: if "x-bentoml-io-descriptor" not in meth_spec["requestBody"]: # TODO: better message stating min version for from_url to work raise BentoMLException( From 9d7e65cacc8fb172a0b045bec13648d419be3195 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Fri, 6 Jan 2023 16:04:56 -0500 Subject: [PATCH 11/19] fix: missing exception from base Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/grpc.py | 2 +- src/bentoml/_internal/client/http.py | 2 +- src/bentoml/_internal/io_descriptors/base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index cfa9e138687..0f857d3bc78 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -32,7 +32,6 @@ from grpc import aio from grpc_health.v1 import health_pb2 as pb_health from google.protobuf import json_format as _json_format - from google.protobuf.internal import python_message from ..types import PathType from ...grpc.v1.service_pb2 import Response @@ -67,6 +66,7 @@ def __init__( ssl_client_credentials: ClientCredentials | None = None, *, protocol_version: str = LATEST_PROTOCOL_VERSION, + **kwargs: t.Any, ): self._pb, _ = import_generated_stubs(protocol_version) diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index 148fa6aacad..d1dca374349 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -60,7 +60,7 @@ def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: server_url = server_url if "://" in server_url else "http://" + server_url url_parts = urlparse(server_url) - # TODO: SSL and grpc support + # TODO: SSL support conn = HTTPConnection(url_parts.netloc) conn.set_debuglevel(logging.DEBUG if get_debug_mode() else 0) conn.request("GET", url_parts.path + "/docs.json") diff --git a/src/bentoml/_internal/io_descriptors/base.py b/src/bentoml/_internal/io_descriptors/base.py index 091f3e78fd5..c4a791c9d15 100644 --- a/src/bentoml/_internal/io_descriptors/base.py +++ b/src/bentoml/_internal/io_descriptors/base.py @@ -6,6 +6,7 @@ from typing import TYPE_CHECKING from ...exceptions import InvalidArgument +from ...exceptions import BentoMLException if TYPE_CHECKING: from types import UnionType @@ -19,7 +20,6 @@ from ..types import LazyType from ..context import InferenceApiContext as Context from ..service.openapi.specification import Schema - from ..service.openapi.specification import MediaType from ..service.openapi.specification import Reference InputType = ( From 9711b1fd80a17726abc47754868ab9f0e2ecb57c Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Wed, 11 Jan 2023 12:44:43 -0800 Subject: [PATCH 12/19] fix: typing check Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/grpc.py | 2 +- src/bentoml/_internal/server/server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 0f857d3bc78..f2f46b26cd4 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -111,7 +111,7 @@ def wait_until_server_ready( *, server_url: str | None = None, timeout: int = 30, - check_interval: float = 1, + check_interval: int = 1, **kwargs: t.Any, ) -> None: start_time = time.time() diff --git a/src/bentoml/_internal/server/server.py b/src/bentoml/_internal/server/server.py index 6b6fa3a4904..b6130f2c585 100644 --- a/src/bentoml/_internal/server/server.py +++ b/src/bentoml/_internal/server/server.py @@ -42,7 +42,7 @@ def __enter__(self): def __exit__( self, - exc_type: type[BaseException] | None, + exc_type: type[BaseException], exc_value: BaseException, traceback_type: TracebackType, ): From 0dd26fba6bbc9cb17efe0e65636002e3922d4429 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 17 Jan 2023 22:17:12 -0800 Subject: [PATCH 13/19] chore: address comments Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 74 +++++++++---- src/bentoml/_internal/client/grpc.py | 133 ++++++++++++++++------- src/bentoml/_internal/client/http.py | 50 ++++++--- src/bentoml/_internal/server/server.py | 5 +- 4 files changed, 182 insertions(+), 80 deletions(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index 38a9253052a..55fd3cd2fdb 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -6,7 +6,6 @@ import functools from abc import ABC from abc import abstractmethod -from typing import TYPE_CHECKING from http.client import BadStatusLine from urllib.parse import urlparse @@ -15,7 +14,9 @@ logger = logging.getLogger(__name__) -if TYPE_CHECKING: +if t.TYPE_CHECKING: + from types import TracebackType + from .grpc import GrpcClient from .http import HTTPClient from ..service import Service @@ -24,6 +25,7 @@ class Client(ABC): server_url: str _svc: Service + endpoints: list[str] def __init__(self, svc: Service, server_url: str): self._svc = svc @@ -32,7 +34,10 @@ def __init__(self, svc: Service, server_url: str): if svc is not None and len(svc.apis) == 0: raise BentoMLException("No APIs were found when constructing client.") + self.endpoints = [] for name, api in self._svc.apis.items(): + self.endpoints.append(name) + if not hasattr(self, name): setattr( self, name, functools.partial(self._sync_call, _bentoml_api=api) @@ -57,40 +62,45 @@ async def async_call( inp, _bentoml_api=self._svc.apis[bentoml_api_name], **kwargs ) - @abstractmethod + @staticmethod def wait_until_server_ready( - self, - *, - server_url: str | None = None, - timeout: int = 30, - **kwargs: t.Any, + host: str, port: int, timeout: int = 30, **kwargs: t.Any ) -> None: - raise NotImplementedError + try: + from .http import HTTPClient + + HTTPClient.wait_until_server_ready(host, port, timeout, **kwargs) + except BadStatusLine: + # when address is a RPC + from .grpc import GrpcClient + + GrpcClient.wait_until_server_ready(host, port, timeout, **kwargs) + except Exception as err: + # caught all other exceptions + logger.error("Failed to connect to server %s:%s", host, port) + logger.error(err) + raise @t.overload - @classmethod - @abstractmethod + @staticmethod def from_url( - cls, server_url: str, *, kind: None | t.Literal["auto"] = ... + server_url: str, *, kind: None | t.Literal["auto"] = ... ) -> GrpcClient | HTTPClient: ... @t.overload - @classmethod - @abstractmethod - def from_url(cls, server_url: str, *, kind: t.Literal["http"] = ...) -> HTTPClient: + @staticmethod + def from_url(server_url: str, *, kind: t.Literal["http"] = ...) -> HTTPClient: ... @t.overload - @classmethod - @abstractmethod - def from_url(cls, server_url: str, *, kind: t.Literal["grpc"] = ...) -> GrpcClient: + @staticmethod + def from_url(server_url: str, *, kind: t.Literal["grpc"] = ...) -> GrpcClient: ... - @classmethod - @abstractmethod + @staticmethod def from_url( - cls, server_url: str, *, kind: str | None = None, **kwargs: t.Any + server_url: str, *, kind: str | None = None, **kwargs: t.Any ) -> Client: url_parsed = urlparse(server_url) if url_parsed.scheme == "http": @@ -134,3 +144,25 @@ async def _call( self, inp: t.Any = None, *, _bentoml_api: InferenceAPI, **kwargs: t.Any ) -> t.Any: raise NotImplementedError + + def __enter__(self): + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + pass + + async def __aenter__(self): + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + pass diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index f2f46b26cd4..6e7eceba4c3 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -2,7 +2,6 @@ import time import typing as t -import asyncio import logging import functools from typing import TYPE_CHECKING @@ -10,7 +9,7 @@ from packaging.version import parse from . import Client -from .. import io_descriptors as io +from .. import io_descriptors from ..utils import LazyLoader from ..utils import cached_property from ..service import Service @@ -30,6 +29,7 @@ import grpc from grpc import aio + from grpc._channel import Channel as GrpcSyncChannel from grpc_health.v1 import health_pb2 as pb_health from google.protobuf import json_format as _json_format @@ -78,7 +78,7 @@ def __init__( if ssl: assert ( ssl_client_credentials is not None - ), "'ssl=True' requires 'credentials'" + ), "'ssl=True' requires 'ssl_client_credentials'" self._credentials = grpc.ssl_channel_credentials( **{ k: load_from_file(v) if isinstance(v, str) else v @@ -106,30 +106,95 @@ def channel(self): interceptors=self._interceptors, ) + @staticmethod + def _create_sync_channel( + server_url: str, + ssl: bool = False, + ssl_client_credentials: ClientCredentials | None = None, + channel_options: t.Any | None = None, + compression: grpc.Compression | None = None, + ) -> GrpcSyncChannel: + if ssl: + assert ( + ssl_client_credentials is not None + ), "'ssl=True' requires 'ssl_client_credentials'" + return grpc.secure_channel( + server_url, + credentials=grpc.ssl_channel_credentials( + **{ + k: load_from_file(v) if isinstance(v, str) else v + for k, v in ssl_client_credentials.items() + } + ), + options=channel_options, + compression=compression, + ) + return grpc.insecure_channel( + server_url, options=channel_options, compression=compression + ) + + @staticmethod def wait_until_server_ready( - self, - *, - server_url: str | None = None, + host: str, + port: int, timeout: int = 30, check_interval: int = 1, + # set kwargs here to omit gRPC kwargs **kwargs: t.Any, ) -> None: + protocol_version = kwargs.get("protocol_version", LATEST_PROTOCOL_VERSION) + + channel = GrpcClient._create_sync_channel( + f"{host}:{port}", + ssl=kwargs.get("ssl", False), + ssl_client_credentials=kwargs.get("ssl_client_credentials", None), + channel_options=kwargs.get("channel_options", None), + compression=kwargs.get("compression", None), + ) + rpc = channel.unary_unary( + "/grpc.health.v1.Health/Check", + request_serializer=pb_health.HealthCheckRequest.SerializeToString, + response_deserializer=pb_health.HealthCheckResponse.FromString, + ) + start_time = time.time() while time.time() - start_time < timeout: try: - res = asyncio.run( - self._health(service_name=self._call_rpc, timeout=timeout) + response = t.cast( + pb_health.HealthCheckResponse, + rpc( + pb_health.HealthCheckRequest( + service=f"bentoml.grpc.{protocol_version}.BentoService" + ) + ), ) - if res.status == pb_health.HealthCheckResponse.SERVING: + if response.status == pb_health.HealthCheckResponse.SERVING: break else: - asyncio.run(asyncio.sleep(check_interval)) - except aio.AioRpcError as err: - logger.debug("[%s] Retrying to connect to the host %s", err, server_url) - asyncio.run(asyncio.sleep(check_interval)) - raise TimeoutError( - f"Timed out waiting {timeout} seconds for server at '{server_url}' to be ready." - ) + time.sleep(check_interval) + except grpc.RpcError: + logger.debug("Server is not ready. Retrying...") + time.sleep(check_interval) + + try: + response = t.cast( + pb_health.HealthCheckResponse, + rpc( + pb_health.HealthCheckRequest( + service=f"bentoml.grpc.{protocol_version}.BentoService" + ) + ), + ) + if response.status != pb_health.HealthCheckResponse.SERVING: + raise TimeoutError( + f"Timed out waiting {timeout} seconds for server at '{host}:{port}' to be ready." + ) + except (grpc.RpcError, TimeoutError) as err: + logger.error("Caught exception while connecting to %s:%s:", host, port) + logger.error(err) + raise + else: + channel.close() @cached_property def _rpc_metadata(self) -> dict[str, dict[str, t.Any]]: @@ -156,7 +221,7 @@ def _rpc_metadata(self) -> dict[str, dict[str, t.Any]]: ) } - async def _health(self, service_name: str, *, timeout: int = 30) -> t.Any: + async def health(self, service_name: str, *, timeout: int = 30) -> t.Any: return await self._invoke( method_name="/grpc.health.v1.Health/Check", service=service_name, @@ -240,8 +305,6 @@ async def _call( @classmethod def from_url(cls, server_url: str, **kwargs: t.Any) -> GrpcClient: protocol_version = kwargs.get("protocol_version", LATEST_PROTOCOL_VERSION) - ssl = kwargs.get("ssl", False) - ssl_client_credentials = kwargs.get("ssl_client_credentials", None) # Since v1, we introduce a ServiceMetadata rpc to retrieve bentoml.Service metadata. # then `client.predict` or `client.classify` won't be available. @@ -279,27 +342,13 @@ def run(): raise BentoMLException("\n".join(exception_message)) pb, _ = import_generated_stubs(protocol_version) - if ssl: - assert ( - ssl_client_credentials is not None - ), "'ssl=True' requires 'credentials'" - channel = grpc.secure_channel( - server_url, - credentials=grpc.ssl_channel_credentials( - **{ - k: load_from_file(v) if isinstance(v, str) else v - for k, v in ssl_client_credentials.items() - } - ), - options=kwargs.get("channel_options", None), - compression=kwargs.get("compression", None), - ) - else: - channel = grpc.insecure_channel( - server_url, - options=kwargs.get("channel_options", None), - compression=kwargs.get("compression", None), - ) + channel = GrpcClient._create_sync_channel( + server_url, + ssl=kwargs.get("ssl", False), + ssl_client_credentials=kwargs.get("ssl_client_credentials", None), + channel_options=kwargs.get("channel_options", None), + compression=kwargs.get("compression", None), + ) # create an insecure channel to invoke ServiceMetadata rpc metadata = t.cast( @@ -316,7 +365,7 @@ def run(): try: dummy_service.apis[api.name] = InferenceAPI( None, - io.from_spec( + io_descriptors.from_spec( { "id": api.input.descriptor_id, "args": _json_format.MessageToDict( @@ -324,7 +373,7 @@ def run(): ).get("args", None), } ), - io.from_spec( + io_descriptors.from_spec( { "id": api.output.descriptor_id, "args": _json_format.MessageToDict( diff --git a/src/bentoml/_internal/client/http.py b/src/bentoml/_internal/client/http.py index d1dca374349..c5abd7a22f9 100644 --- a/src/bentoml/_internal/client/http.py +++ b/src/bentoml/_internal/client/http.py @@ -26,34 +26,56 @@ class HTTPClient(Client): + @staticmethod def wait_until_server_ready( - self, - *, - server_url: str | None = None, + host: str, + port: int, timeout: int = 30, check_interval: int = 1, # set kwargs here to omit gRPC kwargs **kwargs: t.Any, ) -> None: start_time = time.time() - if server_url is None: - server_url = self.server_url + status = None - proxy_handler = urllib.request.ProxyHandler({}) - opener = urllib.request.build_opener(proxy_handler) - logger.debug("Waiting for host %s to be ready.", server_url) + logger.debug("Waiting for host %s to be ready.", f"{host}:{port}") while time.time() - start_time < timeout: try: - if opener.open(f"http://{server_url}/readyz", timeout=1).status == 200: + conn = HTTPConnection(host, port) + conn.request("GET", "/readyz") + status = conn.getresponse().status + if status == 200: break else: time.sleep(check_interval) - except (ConnectionError, urllib.error.URLError, socket.timeout) as err: - logger.debug("[%s] Retrying to connect to the host %s", err, server_url) + except ( + ConnectionError, + urllib.error.URLError, + socket.timeout, + ConnectionRefusedError, + ): + logger.debug("Server is not ready. Retrying...") time.sleep(check_interval) - raise TimeoutError( - f"Timed out waiting {timeout} seconds for server at '{server_url}' to be ready." - ) + + # try to connect one more time and raise exception. + try: + conn = HTTPConnection(host, port) + conn.request("GET", "/readyz") + status = conn.getresponse().status + if status != 200: + raise TimeoutError( + f"Timed out waiting {timeout} seconds for server at '{host}:{port}' to be ready." + ) + except ( + ConnectionError, + urllib.error.URLError, + socket.timeout, + ConnectionRefusedError, + TimeoutError, + ) as err: + logger.error("Caught exception while connecting to %s:%s:", host, port) + logger.error(err) + raise @classmethod def from_url(cls, server_url: str, **kwargs: t.Any) -> HTTPClient: diff --git a/src/bentoml/_internal/server/server.py b/src/bentoml/_internal/server/server.py index b6130f2c585..8578109d704 100644 --- a/src/bentoml/_internal/server/server.py +++ b/src/bentoml/_internal/server/server.py @@ -26,9 +26,8 @@ def client(self): def get_client(self): from ..client import Client - client = Client.from_url(f"http://{self.host}:{self.port}") - client.wait_until_server_ready(timeout=10) - return client + Client.wait_until_server_ready(self.host, self.port, self.timeout) + return Client.from_url(f"http://{self.host}:{self.port}") def stop(self) -> None: self.process.terminate() From 793144919a841fbc6265f4c9cb8520edb7466944 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 17 Jan 2023 22:33:38 -0800 Subject: [PATCH 14/19] fix: gRPC client from serve Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 7 ------- src/bentoml/_internal/client/grpc.py | 5 ++--- src/bentoml/_internal/server/server.py | 3 ++- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index 55fd3cd2fdb..2237931562f 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -7,7 +7,6 @@ from abc import ABC from abc import abstractmethod from http.client import BadStatusLine -from urllib.parse import urlparse from ...exceptions import BentoMLException from ..service.inference_api import InferenceAPI @@ -102,12 +101,6 @@ def from_url(server_url: str, *, kind: t.Literal["grpc"] = ...) -> GrpcClient: def from_url( server_url: str, *, kind: str | None = None, **kwargs: t.Any ) -> Client: - url_parsed = urlparse(server_url) - if url_parsed.scheme == "http": - kind = "http" - elif url_parsed.scheme == "grpc": - kind = "grpc" - if kind is None or kind == "auto": try: from .http import HTTPClient diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 6e7eceba4c3..769ebb7af7e 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -5,6 +5,7 @@ import logging import functools from typing import TYPE_CHECKING +from urllib.parse import urlparse from packaging.version import parse @@ -193,8 +194,6 @@ def wait_until_server_ready( logger.error("Caught exception while connecting to %s:%s:", host, port) logger.error(err) raise - else: - channel.close() @cached_property def _rpc_metadata(self) -> dict[str, dict[str, t.Any]]: @@ -343,7 +342,7 @@ def run(): pb, _ = import_generated_stubs(protocol_version) channel = GrpcClient._create_sync_channel( - server_url, + urlparse(server_url).netloc, ssl=kwargs.get("ssl", False), ssl_client_credentials=kwargs.get("ssl_client_credentials", None), channel_options=kwargs.get("channel_options", None), diff --git a/src/bentoml/_internal/server/server.py b/src/bentoml/_internal/server/server.py index 8578109d704..4db4abb2729 100644 --- a/src/bentoml/_internal/server/server.py +++ b/src/bentoml/_internal/server/server.py @@ -20,6 +20,7 @@ class ServerHandle: port: int timeout: int = attr.field(default=10) + @property def client(self): return self.get_client() @@ -27,7 +28,7 @@ def get_client(self): from ..client import Client Client.wait_until_server_ready(self.host, self.port, self.timeout) - return Client.from_url(f"http://{self.host}:{self.port}") + return Client.from_url(f"http://{self.host}:{self.port}", kind="auto") def stop(self) -> None: self.process.terminate() From 2f45112c17e450114f112fec18825c43633cd7e3 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 17 Jan 2023 23:57:20 -0800 Subject: [PATCH 15/19] chore: handle localhost from server_url Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/grpc.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 769ebb7af7e..02ffbb5bb6a 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -341,23 +341,22 @@ def run(): raise BentoMLException("\n".join(exception_message)) pb, _ = import_generated_stubs(protocol_version) - channel = GrpcClient._create_sync_channel( - urlparse(server_url).netloc, + with GrpcClient._create_sync_channel( + server_url.replace(r"localhost", "0.0.0.0"), ssl=kwargs.get("ssl", False), ssl_client_credentials=kwargs.get("ssl_client_credentials", None), channel_options=kwargs.get("channel_options", None), compression=kwargs.get("compression", None), - ) - - # create an insecure channel to invoke ServiceMetadata rpc - metadata = t.cast( - "ServiceMetadataResponse", - channel.unary_unary( - f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString, - )(pb.ServiceMetadataRequest()), - ) + ) as channel: + # create an insecure channel to invoke ServiceMetadata rpc + metadata = t.cast( + "ServiceMetadataResponse", + channel.unary_unary( + f"/bentoml.grpc.{protocol_version}.BentoService/ServiceMetadata", + request_serializer=pb.ServiceMetadataRequest.SerializeToString, + response_deserializer=pb.ServiceMetadataResponse.FromString, + )(pb.ServiceMetadataRequest()), + ) dummy_service = Service(metadata.name) for api in metadata.apis: From 6351e674e9d5c2ea8cc4eea5cd6c0bd0ea7c3375 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Wed, 18 Jan 2023 19:05:41 -0800 Subject: [PATCH 16/19] chore: revert changes from upstream/main reverted changes: src/bentoml/_internal/server/grpc/servicer/v1/__init__.py Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/server/grpc/servicer/v1/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py b/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py index 49ad48b0bcf..7fd0366cf37 100644 --- a/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py +++ b/src/bentoml/_internal/server/grpc/servicer/v1/__init__.py @@ -49,7 +49,7 @@ class BentoServiceImpl(services.BentoServiceServicer): """An asyncio implementation of BentoService servicer.""" async def Call( # type: ignore (no async types) # pylint: disable=invalid-overridden-method - self: services.BentoServiceServicer, + self, request: pb.Request, context: BentoServicerContext, ) -> pb.Response | None: From 39c7747399f5f6bb3e98ca7f1e92b6b2e6a366d8 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Wed, 18 Jan 2023 19:12:33 -0800 Subject: [PATCH 17/19] chore: update stubs [generated] Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- .../grpc/v1/_generated_pb3/service_pb2.py | 50 +++++++++---------- .../grpc/v1/_generated_pb3/service_pb2.pyi | 9 +++- .../grpc/v1/_generated_pb4/service_pb2.py | 50 +++++++++---------- .../grpc/v1/_generated_pb4/service_pb2.pyi | 9 +++- 4 files changed, 64 insertions(+), 54 deletions(-) diff --git a/src/bentoml/grpc/v1/_generated_pb3/service_pb2.py b/src/bentoml/grpc/v1/_generated_pb3/service_pb2.py index e333bfa1610..94ca0c90e09 100644 --- a/src/bentoml/grpc/v1/_generated_pb3/service_pb2.py +++ b/src/bentoml/grpc/v1/_generated_pb3/service_pb2.py @@ -18,7 +18,7 @@ from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x62\x65ntoml/grpc/v1/service.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xbf\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xd1\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadata\x12K\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadata\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x62\x65ntoml/grpc/v1/service.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xde\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xf0\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12O\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x00\x88\x01\x01\x12P\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x01\x88\x01\x01\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_inputB\t\n\x07_outputB\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') @@ -164,33 +164,33 @@ _SERVICEMETADATAREQUEST._serialized_start=112 _SERVICEMETADATAREQUEST._serialized_end=136 _SERVICEMETADATARESPONSE._serialized_start=139 - _SERVICEMETADATARESPONSE._serialized_end=586 + _SERVICEMETADATARESPONSE._serialized_end=617 _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_start=263 _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_end=374 _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_start=377 - _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_end=586 - _REQUEST._serialized_start=589 - _REQUEST._serialized_end=978 - _RESPONSE._serialized_start=981 - _RESPONSE._serialized_end=1353 - _PART._serialized_start=1356 - _PART._serialized_end=1682 - _MULTIPART._serialized_start=1685 - _MULTIPART._serialized_end=1822 - _MULTIPART_FIELDSENTRY._serialized_start=1754 - _MULTIPART_FIELDSENTRY._serialized_end=1822 - _FILE._serialized_start=1824 - _FILE._serialized_end=1875 - _DATAFRAME._serialized_start=1877 - _DATAFRAME._serialized_end=1952 - _SERIES._serialized_start=1955 - _SERIES._serialized_end=2116 - _NDARRAY._serialized_start=2119 - _NDARRAY._serialized_end=2569 - _NDARRAY_DTYPE._serialized_start=2400 - _NDARRAY_DTYPE._serialized_end=2569 - _BENTOSERVICE._serialized_start=2572 - _BENTOSERVICE._serialized_end=2753 + _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_end=617 + _REQUEST._serialized_start=620 + _REQUEST._serialized_end=1009 + _RESPONSE._serialized_start=1012 + _RESPONSE._serialized_end=1384 + _PART._serialized_start=1387 + _PART._serialized_end=1713 + _MULTIPART._serialized_start=1716 + _MULTIPART._serialized_end=1853 + _MULTIPART_FIELDSENTRY._serialized_start=1785 + _MULTIPART_FIELDSENTRY._serialized_end=1853 + _FILE._serialized_start=1855 + _FILE._serialized_end=1906 + _DATAFRAME._serialized_start=1908 + _DATAFRAME._serialized_end=1983 + _SERIES._serialized_start=1986 + _SERIES._serialized_end=2147 + _NDARRAY._serialized_start=2150 + _NDARRAY._serialized_end=2600 + _NDARRAY_DTYPE._serialized_start=2431 + _NDARRAY_DTYPE._serialized_end=2600 + _BENTOSERVICE._serialized_start=2603 + _BENTOSERVICE._serialized_end=2784 BentoService = service_reflection.GeneratedServiceType('BentoService', (_service.Service,), dict( DESCRIPTOR = _BENTOSERVICE, __module__ = 'bentoml.grpc.v1.service_pb2' diff --git a/src/bentoml/grpc/v1/_generated_pb3/service_pb2.pyi b/src/bentoml/grpc/v1/_generated_pb3/service_pb2.pyi index c1c6e2a4065..17cbab4fa05 100644 --- a/src/bentoml/grpc/v1/_generated_pb3/service_pb2.pyi +++ b/src/bentoml/grpc/v1/_generated_pb3/service_pb2.pyi @@ -93,9 +93,14 @@ class ServiceMetadataResponse(google.protobuf.message.Message): output: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., docs: builtins.str | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + @typing.overload def WhichOneof(self, oneof_group: typing_extensions.Literal["_docs", b"_docs"]) -> typing_extensions.Literal["docs"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_input", b"_input"]) -> typing_extensions.Literal["input"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_output", b"_output"]) -> typing_extensions.Literal["output"] | None: ... NAME_FIELD_NUMBER: builtins.int APIS_FIELD_NUMBER: builtins.int diff --git a/src/bentoml/grpc/v1/_generated_pb4/service_pb2.py b/src/bentoml/grpc/v1/_generated_pb4/service_pb2.py index 7c72dcfbba1..f6718ed06ab 100644 --- a/src/bentoml/grpc/v1/_generated_pb4/service_pb2.py +++ b/src/bentoml/grpc/v1/_generated_pb4/service_pb2.py @@ -15,7 +15,7 @@ from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x62\x65ntoml/grpc/v1/service.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xbf\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xd1\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadata\x12K\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadata\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x62\x65ntoml/grpc/v1/service.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xde\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xf0\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12O\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x00\x88\x01\x01\x12P\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x01\x88\x01\x01\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_inputB\t\n\x07_outputB\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'bentoml.grpc.v1.service_pb2', globals()) @@ -52,32 +52,32 @@ _SERVICEMETADATAREQUEST._serialized_start=112 _SERVICEMETADATAREQUEST._serialized_end=136 _SERVICEMETADATARESPONSE._serialized_start=139 - _SERVICEMETADATARESPONSE._serialized_end=586 + _SERVICEMETADATARESPONSE._serialized_end=617 _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_start=263 _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_end=374 _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_start=377 - _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_end=586 - _REQUEST._serialized_start=589 - _REQUEST._serialized_end=978 - _RESPONSE._serialized_start=981 - _RESPONSE._serialized_end=1353 - _PART._serialized_start=1356 - _PART._serialized_end=1682 - _MULTIPART._serialized_start=1685 - _MULTIPART._serialized_end=1822 - _MULTIPART_FIELDSENTRY._serialized_start=1754 - _MULTIPART_FIELDSENTRY._serialized_end=1822 - _FILE._serialized_start=1824 - _FILE._serialized_end=1875 - _DATAFRAME._serialized_start=1877 - _DATAFRAME._serialized_end=1952 - _SERIES._serialized_start=1955 - _SERIES._serialized_end=2116 - _NDARRAY._serialized_start=2119 - _NDARRAY._serialized_end=2569 - _NDARRAY_DTYPE._serialized_start=2400 - _NDARRAY_DTYPE._serialized_end=2569 - _BENTOSERVICE._serialized_start=2572 - _BENTOSERVICE._serialized_end=2753 + _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_end=617 + _REQUEST._serialized_start=620 + _REQUEST._serialized_end=1009 + _RESPONSE._serialized_start=1012 + _RESPONSE._serialized_end=1384 + _PART._serialized_start=1387 + _PART._serialized_end=1713 + _MULTIPART._serialized_start=1716 + _MULTIPART._serialized_end=1853 + _MULTIPART_FIELDSENTRY._serialized_start=1785 + _MULTIPART_FIELDSENTRY._serialized_end=1853 + _FILE._serialized_start=1855 + _FILE._serialized_end=1906 + _DATAFRAME._serialized_start=1908 + _DATAFRAME._serialized_end=1983 + _SERIES._serialized_start=1986 + _SERIES._serialized_end=2147 + _NDARRAY._serialized_start=2150 + _NDARRAY._serialized_end=2600 + _NDARRAY_DTYPE._serialized_start=2431 + _NDARRAY_DTYPE._serialized_end=2600 + _BENTOSERVICE._serialized_start=2603 + _BENTOSERVICE._serialized_end=2784 _builder.BuildServices(DESCRIPTOR, 'bentoml.grpc.v1.service_pb2', globals()) # @@protoc_insertion_point(module_scope) diff --git a/src/bentoml/grpc/v1/_generated_pb4/service_pb2.pyi b/src/bentoml/grpc/v1/_generated_pb4/service_pb2.pyi index 0664fd5b53d..62216947738 100644 --- a/src/bentoml/grpc/v1/_generated_pb4/service_pb2.pyi +++ b/src/bentoml/grpc/v1/_generated_pb4/service_pb2.pyi @@ -97,9 +97,14 @@ class ServiceMetadataResponse(google.protobuf.message.Message): output: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., docs: builtins.str | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + @typing.overload def WhichOneof(self, oneof_group: typing_extensions.Literal["_docs", b"_docs"]) -> typing_extensions.Literal["docs"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_input", b"_input"]) -> typing_extensions.Literal["input"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_output", b"_output"]) -> typing_extensions.Literal["output"] | None: ... NAME_FIELD_NUMBER: builtins.int APIS_FIELD_NUMBER: builtins.int From c9fe7227d8d9b29c5129a5b6e11683a7e2aae3af Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Wed, 18 Jan 2023 19:13:47 -0800 Subject: [PATCH 18/19] chore: cleanup channel after server is ready Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- src/bentoml/_internal/client/grpc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bentoml/_internal/client/grpc.py b/src/bentoml/_internal/client/grpc.py index 02ffbb5bb6a..3441429ada9 100644 --- a/src/bentoml/_internal/client/grpc.py +++ b/src/bentoml/_internal/client/grpc.py @@ -5,7 +5,6 @@ import logging import functools from typing import TYPE_CHECKING -from urllib.parse import urlparse from packaging.version import parse @@ -194,6 +193,8 @@ def wait_until_server_ready( logger.error("Caught exception while connecting to %s:%s:", host, port) logger.error(err) raise + finally: + channel.close() @cached_property def _rpc_metadata(self) -> dict[str, dict[str, t.Any]]: From b3184eb67e44b1ccf667318bcb2d8b1158f9eaea Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Sat, 18 Feb 2023 18:45:54 -0800 Subject: [PATCH 19/19] chore: update exception message Co-authored-by: Sauyon Lee <2347889+sauyon@users.noreply.github.com> --- src/bentoml/_internal/client/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index 2237931562f..a826f4c73d5 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -124,7 +124,7 @@ def from_url( return GrpcClient.from_url(server_url, **kwargs) else: raise BentoMLException( - "Invalid client kind. Must be one of ['http', 'grpc', 'auto']" + f"Invalid client kind '{kind}'. Must be one of 'http', 'grpc', or 'auto'." ) def _sync_call(