From cdcb509fc0367babe56e44d65fdfc9c385610075 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 29 Sep 2022 21:40:10 +0100 Subject: [PATCH] fix(config): respect `api_server.workers` (#3049) Co-authored-by: Sauyon Lee <2347889+sauyon@users.noreply.github.com> Co-authored-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- bentoml/_internal/configuration/containers.py | 5 +++-- .../_internal/configuration/default_configuration.yaml | 2 +- bentoml/serve.py | 8 ++++---- bentoml_cli/serve.py | 4 ++-- bentoml_cli/start.py | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/bentoml/_internal/configuration/containers.py b/bentoml/_internal/configuration/containers.py index ad8a29b37a..372b572b90 100644 --- a/bentoml/_internal/configuration/containers.py +++ b/bentoml/_internal/configuration/containers.py @@ -1,10 +1,10 @@ from __future__ import annotations import os +import math import uuid import typing as t import logging -import multiprocessing from copy import deepcopy from typing import TYPE_CHECKING from dataclasses import dataclass @@ -24,6 +24,7 @@ from ..utils import split_with_quotes from ..utils import validate_or_create_dir from ..context import component_context +from ..resource import CpuResource from ..resource import system_resources from ...exceptions import BentoMLConfigException from ..utils.unflatten import unflatten @@ -449,7 +450,7 @@ def access_control_options( return filtered_kwargs api_server_workers = providers.Factory[int]( - lambda workers: workers or (multiprocessing.cpu_count() // 2) + 1, + lambda workers: workers or math.ceil(CpuResource.from_system()), api_server_config.workers, ) diff --git a/bentoml/_internal/configuration/default_configuration.yaml b/bentoml/_internal/configuration/default_configuration.yaml index 3c66fecfff..dd95930436 100644 --- a/bentoml/_internal/configuration/default_configuration.yaml +++ b/bentoml/_internal/configuration/default_configuration.yaml @@ -1,5 +1,5 @@ api_server: - workers: 1 + workers: ~ # When this is set to null the number of available CPU cores is used. timeout: 60 backlog: 2048 metrics: diff --git a/bentoml/serve.py b/bentoml/serve.py index ad6b620bc7..7105ad1ae3 100644 --- a/bentoml/serve.py +++ b/bentoml/serve.py @@ -289,7 +289,7 @@ def serve_http_production( port: int = Provide[BentoMLContainer.http.port], host: str = Provide[BentoMLContainer.http.host], backlog: int = Provide[BentoMLContainer.api_server_config.backlog], - api_workers: int | None = None, + api_workers: int = Provide[BentoMLContainer.api_server_workers], ssl_certfile: str | None = Provide[BentoMLContainer.api_server_config.ssl.certfile], ssl_keyfile: str | None = Provide[BentoMLContainer.api_server_config.ssl.keyfile], ssl_keyfile_password: str @@ -442,7 +442,7 @@ def serve_http_production( ), ], working_dir=working_dir, - numprocesses=api_workers or math.ceil(CpuResource.from_system()), + numprocesses=api_workers, ) ) @@ -650,7 +650,7 @@ def serve_grpc_production( port: int = Provide[BentoMLContainer.grpc.port], host: str = Provide[BentoMLContainer.grpc.host], backlog: int = Provide[BentoMLContainer.api_server_config.backlog], - api_workers: int | None = None, + api_workers: int = Provide[BentoMLContainer.api_server_workers], reflection: bool = Provide[BentoMLContainer.grpc.reflection.enabled], max_concurrent_streams: int | None = Provide[BentoMLContainer.grpc.max_concurrent_streams], @@ -808,7 +808,7 @@ def serve_grpc_production( args=args, use_sockets=False, working_dir=working_dir, - numprocesses=api_workers or math.ceil(CpuResource.from_system()), + numprocesses=api_workers, ) ) diff --git a/bentoml_cli/serve.py b/bentoml_cli/serve.py index cde029fdbc..067b1571d3 100644 --- a/bentoml_cli/serve.py +++ b/bentoml_cli/serve.py @@ -45,7 +45,7 @@ def add_serve_command(cli: click.Group) -> None: @click.option( "--api-workers", type=click.INT, - default=None, + default=BentoMLContainer.api_server_workers.get(), help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode", envvar="BENTOML_API_WORKERS", show_default=True, @@ -249,7 +249,7 @@ def serve( # type: ignore (unused warning) @click.option( "--api-workers", type=click.INT, - default=None, + default=BentoMLContainer.api_server_workers.get(), help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode", envvar="BENTOML_API_WORKERS", show_default=True, diff --git a/bentoml_cli/start.py b/bentoml_cli/start.py index 6fd61bdd2f..025541e045 100644 --- a/bentoml_cli/start.py +++ b/bentoml_cli/start.py @@ -63,7 +63,7 @@ def add_start_command(cli: click.Group) -> None: @click.option( "--api-workers", type=click.INT, - default=None, + default=BentoMLContainer.api_server_workers.get(), help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode", envvar="BENTOML_API_WORKERS", ) @@ -295,7 +295,7 @@ def start_runner_server( # type: ignore (unused warning) @click.option( "--api-workers", type=click.INT, - default=None, + default=BentoMLContainer.api_server_workers.get(), help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode", envvar="BENTOML_API_WORKERS", )