diff --git a/src/bentoml/_internal/client/__init__.py b/src/bentoml/_internal/client/__init__.py index 81d3a9b850b..4786dfb792f 100644 --- a/src/bentoml/_internal/client/__init__.py +++ b/src/bentoml/_internal/client/__init__.py @@ -30,7 +30,7 @@ def __init__(self, svc: Service, server_url: str): self._svc = svc self.server_url = server_url - if svc is not None and len(svc.apis) == 0: + if len(svc.apis) == 0: raise BentoMLException("No APIs were found when constructing client.") self.endpoints = [] diff --git a/src/bentoml/exceptions.py b/src/bentoml/exceptions.py index c06c251d489..283e7da3fa6 100644 --- a/src/bentoml/exceptions.py +++ b/src/bentoml/exceptions.py @@ -119,3 +119,15 @@ class ImportServiceError(BentoMLException): """Raised when BentoML failed to import the user's service file.""" pass + + +class UnservableException(StateException): + """Raised when a service is not servable.""" + + pass + + +class ServerStateException(StateException): + """Raised when a server API requiring the BentoML server to be running is executed when the server is not running.""" + + pass diff --git a/src/bentoml/server.py b/src/bentoml/server.py index c05d9f6d7e9..b2ecf302346 100644 --- a/src/bentoml/server.py +++ b/src/bentoml/server.py @@ -10,24 +10,26 @@ from abc import ABC from abc import abstractmethod from typing import TYPE_CHECKING +from warnings import warn from simple_di import Provide from simple_di import inject from ._internal.bento import Bento +from ._internal.client import Client +from ._internal.client import GrpcClient +from ._internal.client import HTTPClient from ._internal.configuration.containers import BentoMLContainer from ._internal.service import Service from ._internal.tag import Tag from ._internal.utils.analytics.usage_stats import BENTOML_SERVE_FROM_SERVER_API -from .exceptions import BentoMLException +from .exceptions import InvalidArgument +from .exceptions import ServerStateException +from .exceptions import UnservableException if TYPE_CHECKING: from types import TracebackType - from ._internal.client import Client - from ._internal.client import GrpcClient - from ._internal.client import HTTPClient - _FILE: t.TypeAlias = None | int | t.IO[t.Any] @@ -37,7 +39,10 @@ __all__ = ["Server", "GrpcServer", "HTTPServer"] -class Server(ABC): +ClientType = t.TypeVar("ClientType", bound=Client) + + +class Server(ABC, t.Generic[ClientType]): servable: str | Bento | Tag | Service host: str port: int @@ -63,15 +68,16 @@ def __init__( timeout: float = 10, ): if bento is not None: - if not servable: - logger.warning( - "'bento' is deprecated, either remove it as a kwargs or pass '%s' as the first positional argument", - bento, + if servable is None: # type: ignore # dealing with backwards compatibility, where a user has set bento argument manually. + warn( + f"serving using the 'bento' argument is deprecated, either remove it as a kwarg or pass '{bento}' as the first positional argument", + DeprecationWarning, + stacklevel=2, ) servable = bento else: - raise BentoMLException( - "Cannot use both 'bento' and 'servable' as kwargs as 'bento' is deprecated." + raise InvalidArgument( + "Cannot use both 'bento' and 'servable' arguments; as 'bento' is deprecated, set 'servable' instead." ) self.servable = servable @@ -84,7 +90,7 @@ def __init__( bento_str = str(servable) elif isinstance(servable, Service): if not servable.is_service_importable(): - raise BentoMLException( + raise UnservableException( "Cannot use 'bentoml.Service' as a server if it is defined in interactive session or Jupyter Notebooks." ) bento_str, working_dir = servable.get_service_import_origin() @@ -130,7 +136,7 @@ def start( stdout: _FILE = None, stderr: _FILE = None, text: bool | None = None, - ): + ) -> t.ContextManager[ClientType]: """Start the server programmatically. To get the client, use the context manager. @@ -158,9 +164,9 @@ def __init__(__inner_self): logger.debug(f"Starting server with arguments: {self.args}") default_io_descriptor = None if blocking else subprocess.PIPE if text is None: - logger.warning( - "Setting text to True will be the default behaviour for bentoml 2.x. Please set it explicitly to avoid breaking changes.\n" - + ' Example: "server.start(text=False, ...)"', + warn( + "Setting text to True will be the default behavior for bentoml 2.x. Set it explicitly to avoid breaking changes.\n" + + "For Example: 'server.start(text=False, ...)'" ) self.process = subprocess.Popen( self.args, @@ -178,36 +184,34 @@ def __init__(__inner_self): except KeyboardInterrupt: pass - def __enter__(__inner_self): + def __enter__(__inner_self) -> ClientType: return self.get_client() def __exit__( __inner_self, - exc_type: type[BaseException] | None, - exc_value: BaseException | None, - traceback: TracebackType | None, + _exc_type: type[BaseException] | None, + _exc_value: BaseException | None, + _traceback: TracebackType | None, ): self.stop() return _Manager() - def get_client(self) -> Client | None: + def get_client(self) -> ClientType: if self.process is None: # NOTE: if the process is None, we reset this envvar del os.environ[BENTOML_SERVE_FROM_SERVER_API] - logger.warning( + raise ServerStateException( "Attempted to get a client for a BentoML server that was not running! Try running 'bentoml.*Server.start()' first." ) - return assert self.process is not None out_code = self.process.poll() if out_code == 0: # NOTE: if the process is None, we reset this envvar del os.environ[BENTOML_SERVE_FROM_SERVER_API] - logger.warning( + raise ServerStateException( "Attempted to get a client from a BentoML server that has already exited! You can run '.start()' again to restart it." ) - return elif out_code is not None: # NOTE: if the process is None, we reset this envvar del os.environ[BENTOML_SERVE_FROM_SERVER_API] @@ -215,20 +219,19 @@ def get_client(self) -> Client | None: if self.process.stdout is not None and not self.process.stdout.closed: s = self.process.stdout.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 + s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string ) if self.process.stderr is not None and not self.process.stderr.closed: logs += "\nServer Error:\n" s = self.process.stderr.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 + s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string ) - logger.warning(logs) - return + raise ServerStateException(logs) return self._get_client() @abstractmethod - def _get_client(self) -> Client | None: + def _get_client(self) -> ClientType: pass def stop(self) -> None: @@ -244,20 +247,22 @@ def stop(self) -> None: logger.warning( "Attempted to stop a BentoML server that has already exited!" ) + return elif out_code is not None: logs = "Attempted to stop a BentoML server that has already exited with an error!\nServer Output:\n" if self.process.stdout is not None and not self.process.stdout.closed: s = self.process.stdout.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 + s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string ) if self.process.stderr is not None and not self.process.stderr.closed: logs += "\nServer Error:\n" s = self.process.stderr.read() logs += textwrap.indent( - s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 + s.decode("utf-8") if isinstance(s, bytes) else s, " " * 4 # type: ignore # may be string ) logger.warning(logs) + return if sys.platform == "win32": os.kill(self.process.pid, signal.CTRL_C_EVENT) @@ -273,8 +278,10 @@ def stop(self) -> None: self.process.wait() def __enter__(self): - logger.warning( - "Using bentoml.Server as a context manager is deprecated, use bentoml.Server.start instead." + warn( + "Using bentoml.Server as a context manager is deprecated, use bentoml.Server.start instead.", + DeprecationWarning, + stacklevel=2, ) return self @@ -291,7 +298,7 @@ def __exit__( logger.error(f"Error stopping server: {e}", exc_info=e) -class HTTPServer(Server): +class HTTPServer(Server[HTTPClient]): _client: HTTPClient | None = None @inject @@ -347,13 +354,18 @@ def __init__( self.args.extend(construct_ssl_args(**ssl_args)) + def get_client(self) -> HTTPClient: + return super().get_client() + def client(self) -> HTTPClient | None: - logger.warning( - "'Server.client()' is deprecated, use 'Server.get_client()' instead." + warn( + "'Server.client()' is deprecated, use 'Server.get_client()' instead.", + DeprecationWarning, + stacklevel=2, ) - return t.cast("HTTPClient | None", self.get_client()) + return self._get_client() - def _get_client(self) -> HTTPClient | None: + def _get_client(self) -> HTTPClient: if self._client is None: from .client import HTTPClient @@ -364,7 +376,7 @@ def _get_client(self) -> HTTPClient | None: return self._client -class GrpcServer(Server): +class GrpcServer(Server[GrpcClient]): _client: GrpcClient | None = None @inject @@ -423,7 +435,7 @@ def __init__( if grpc_protocol_version is not None: self.args.extend(["--protocol-version", str(grpc_protocol_version)]) - def _get_client(self) -> GrpcClient | None: + def _get_client(self) -> GrpcClient: if self._client is None: from .client import GrpcClient diff --git a/tests/e2e/bento_server_http/tests/test_serve.py b/tests/e2e/bento_server_http/tests/test_serve.py index cbcd6a4dc9a..eb9a2378b72 100644 --- a/tests/e2e/bento_server_http/tests/test_serve.py +++ b/tests/e2e/bento_server_http/tests/test_serve.py @@ -11,7 +11,8 @@ from bentoml.testing.utils import async_request -def test_http_server(bentoml_home: str): +@pytest.mark.usefixtures("bentoml_home") +def test_http_server(): server = bentoml.HTTPServer("service.py:svc", port=12345) server.start() @@ -21,54 +22,64 @@ def test_http_server(bentoml_home: str): assert resp.status == 200 - res = client.echo_json_sync({"test": "json"}) + res = client.call("echo_json", {"test": "json"}) assert res == {"test": "json"} server.stop() + assert server.process is not None # process should not be removed + timeout = 10 start_time = time.time() while time.time() - start_time < timeout: retcode = server.process.poll() if retcode is not None and retcode <= 0: break + + retcode = server.process.poll() + assert retcode is not None + if sys.platform == "win32": # on Windows, because of the way that terminate is run, it seems the exit code is set. - assert isinstance(server.process.poll(), int) + pass else: - # on POSIX negative return codes mean the process was terminated; since we will be terminating + # negative return codes mean the process was terminated; since we will be terminating # the process, it should be negative. - # on all other platforms, this should be 0. - assert server.process.poll() <= 0 + assert retcode <= 0 -def test_http_server_ctx(bentoml_home: str): +@pytest.mark.usefixtures("bentoml_home") +def test_http_server_ctx(): server = bentoml.HTTPServer("service.py:svc", port=12346) with server.start() as client: resp = client.health() - assert resp.status == 200 - res = client.echo_json_sync({"more_test": "and more json"}) + res = client.call("echo_json", {"more_test": "and more json"}) assert res == {"more_test": "and more json"} + assert server.process is not None # process should not be removed + timeout = 10 start_time = time.time() while time.time() - start_time < timeout: retcode = server.process.poll() if retcode is not None and retcode <= 0: break + + retcode = server.process.poll() + assert retcode is not None + if sys.platform == "win32": # on Windows, because of the way that terminate is run, it seems the exit code is set. - assert isinstance(server.process.poll(), int) + pass else: - # on POSIX negative return codes mean the process was terminated; since we will be terminating + # negative return codes mean the process was terminated; since we will be terminating # the process, it should be negative. - # on all other platforms, this should be 0. - assert server.process.poll() <= 0 + assert retcode <= 0 def test_serve_from_svc(): @@ -81,23 +92,29 @@ def test_serve_from_svc(): assert resp.status == 200 server.stop() - timeout = 60 + assert server.process is not None # process should not be removed + + timeout = 10 start_time = time.time() while time.time() - start_time < timeout: retcode = server.process.poll() if retcode is not None and retcode <= 0: break + + retcode = server.process.poll() + assert retcode is not None + if sys.platform == "win32": # on Windows, because of the way that terminate is run, it seems the exit code is set. - assert isinstance(server.process.poll(), int) + pass else: - # on POSIX negative return codes mean the process was terminated; since we will be terminating + # negative return codes mean the process was terminated; since we will be terminating # the process, it should be negative. - # on all other platforms, this should be 0. - assert server.process.poll() <= 0 + assert retcode <= 0 -def test_serve_with_timeout(bentoml_home: str): +@pytest.mark.usefixtures("bentoml_home") +def test_serve_with_timeout(): server = bentoml.HTTPServer("service.py:svc", port=12349) config_file = os.path.abspath("configs/timeout.yml") env = os.environ.copy() @@ -108,23 +125,26 @@ def test_serve_with_timeout(bentoml_home: str): BentoMLException, match="504: b'Not able to process the request in 1 seconds'", ): - client.echo_delay({}) + client.call("echo_delay", {}) @pytest.mark.asyncio -async def test_serve_with_api_max_concurrency(bentoml_home: str): +@pytest.mark.usefixtures("bentoml_home") +async def test_serve_with_api_max_concurrency(): server = bentoml.HTTPServer("service.py:svc", port=12350, api_workers=1) config_file = os.path.abspath("configs/max_concurrency.yml") env = os.environ.copy() env.update(BENTOML_CONFIG=config_file) - with server.start(env=env) as client: + with server.start(stdin=None, stdout=None, env=env) as client: tasks = [ - asyncio.create_task(client.async_echo_delay({"delay": 0.5})), - asyncio.create_task(client.async_echo_delay({"delay": 0.5})), + asyncio.create_task(client.async_call("echo_delay", {"delay": 0.5})), + asyncio.create_task(client.async_call("echo_delay", {"delay": 0.5})), ] await asyncio.sleep(0.1) - tasks.append(asyncio.create_task(client.async_echo_delay({"delay": 0.5}))) + tasks.append( + asyncio.create_task(client.async_call("echo_delay", {"delay": 0.5})) + ) results = await asyncio.gather(*tasks, return_exceptions=True) for i in range(2): @@ -138,7 +158,8 @@ async def test_serve_with_api_max_concurrency(bentoml_home: str): reason="Windows runner doesn't have enough cores to run this test", ) @pytest.mark.asyncio -async def test_serve_with_lifecycle_hooks(bentoml_home: str, tmp_path: Path): +@pytest.mark.usefixtures("bentoml_home") +async def test_serve_with_lifecycle_hooks(tmp_path: Path): server = bentoml.HTTPServer("service.py:svc", port=12351, api_workers=4) env = os.environ.copy() env["BENTOML_TEST_DATA"] = str(tmp_path)