bentoml · aarnphm · Sep 29, 2022 · Sep 29, 2022 · Sep 29, 2022
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import os
+import math
 import uuid
 import typing as t
 import logging
-import multiprocessing
 from copy import deepcopy
 from typing import TYPE_CHECKING
 from dataclasses import dataclass
@@ -24,6 +24,7 @@
 from ..utils import split_with_quotes
 from ..utils import validate_or_create_dir
 from ..context import component_context
+from ..resource import CpuResource
 from ..resource import system_resources
 from ...exceptions import BentoMLConfigException
 from ..utils.unflatten import unflatten
@@ -449,7 +450,7 @@ def access_control_options(
         return filtered_kwargs
 
     api_server_workers = providers.Factory[int](
-        lambda workers: workers or (multiprocessing.cpu_count() // 2) + 1,
+        lambda workers: workers or math.ceil(CpuResource.from_system()),
         api_server_config.workers,
     )
 

@@ -1,5 +1,5 @@
 api_server:
-  workers: 1
+  workers: ~ # When this is set to null the number of available CPU cores is used.
   timeout: 60
   backlog: 2048
   metrics:

@@ -289,7 +289,7 @@ def serve_http_production(
     port: int = Provide[BentoMLContainer.http.port],
     host: str = Provide[BentoMLContainer.http.host],
     backlog: int = Provide[BentoMLContainer.api_server_config.backlog],
-    api_workers: int | None = None,
+    api_workers: int = Provide[BentoMLContainer.api_server_workers],
     ssl_certfile: str | None = Provide[BentoMLContainer.api_server_config.ssl.certfile],
     ssl_keyfile: str | None = Provide[BentoMLContainer.api_server_config.ssl.keyfile],
     ssl_keyfile_password: str
@@ -442,7 +442,7 @@ def serve_http_production(
                 ),
             ],
             working_dir=working_dir,
-            numprocesses=api_workers or math.ceil(CpuResource.from_system()),
+            numprocesses=api_workers,
         )
     )
 
@@ -650,7 +650,7 @@ def serve_grpc_production(
     port: int = Provide[BentoMLContainer.grpc.port],
     host: str = Provide[BentoMLContainer.grpc.host],
     backlog: int = Provide[BentoMLContainer.api_server_config.backlog],
-    api_workers: int | None = None,
+    api_workers: int = Provide[BentoMLContainer.api_server_workers],
     reflection: bool = Provide[BentoMLContainer.grpc.reflection.enabled],
     max_concurrent_streams: int
     | None = Provide[BentoMLContainer.grpc.max_concurrent_streams],
@@ -808,7 +808,7 @@ def serve_grpc_production(
                 args=args,
                 use_sockets=False,
                 working_dir=working_dir,
-                numprocesses=api_workers or math.ceil(CpuResource.from_system()),
+                numprocesses=api_workers,
             )
         )
 

@@ -45,7 +45,7 @@ def add_serve_command(cli: click.Group) -> None:
     @click.option(
         "--api-workers",
         type=click.INT,
-        default=None,
+        default=BentoMLContainer.api_server_workers.get(),
         help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode",
         envvar="BENTOML_API_WORKERS",
         show_default=True,
@@ -249,7 +249,7 @@ def serve(  # type: ignore (unused warning)
     @click.option(
         "--api-workers",
         type=click.INT,
-        default=None,
+        default=BentoMLContainer.api_server_workers.get(),
         help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode",
         envvar="BENTOML_API_WORKERS",
         show_default=True,

@@ -63,7 +63,7 @@ def add_start_command(cli: click.Group) -> None:
     @click.option(
         "--api-workers",
         type=click.INT,
-        default=None,
+        default=BentoMLContainer.api_server_workers.get(),
         help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode",
         envvar="BENTOML_API_WORKERS",
     )
@@ -295,7 +295,7 @@ def start_runner_server(  # type: ignore (unused warning)
     @click.option(
         "--api-workers",
         type=click.INT,
-        default=None,
+        default=BentoMLContainer.api_server_workers.get(),
         help="Specify the number of API server workers to start. Default to number of available CPU cores in production mode",
         envvar="BENTOML_API_WORKERS",
     )