From d8a1ff490e1a9e8ef12a12f2ec08662fd7ab9a0a Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 01:16:05 -0700 Subject: [PATCH 1/5] docs: Update advanced guides format --- docs/source/concepts/runner.rst | 4 ++-- docs/source/frameworks/catboost.rst | 2 +- docs/source/frameworks/xgboost.rst | 2 +- docs/source/guides/client.rst | 4 ++-- docs/source/guides/configuration.rst | 6 +++--- docs/source/guides/grpc.rst | 2 +- docs/source/guides/index.rst | 9 ++++---- docs/source/guides/logging.rst | 6 +++--- docs/source/guides/monitoring.rst | 21 ------------------- docs/source/guides/security.rst | 11 ++++++++++ docs/source/guides/server.rst | 6 +++--- .../nltk_pretrained_model/service.py | 12 +++++------ .../_internal/server/metrics/prometheus.py | 2 +- 13 files changed, 38 insertions(+), 49 deletions(-) delete mode 100644 docs/source/guides/monitoring.rst diff --git a/docs/source/concepts/runner.rst b/docs/source/concepts/runner.rst index fc774f9a8d..f8cbb07a11 100644 --- a/docs/source/concepts/runner.rst +++ b/docs/source/concepts/runner.rst @@ -299,7 +299,7 @@ Runner Definition Runner Configuration -------------------- -Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration `. +Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration `. Runners can be both configured individually or in aggregate under the ``runners`` configuration key. To configure a specific runner, specify its name under the ``runners`` configuration key. Otherwise, the configuration will be applied to all runners. The examples below demonstrate both the configuration for all runners in aggregate and for an individual runner (``iris_clf``). @@ -307,7 +307,7 @@ the configuration for all runners in aggregate and for an individual runner (``i Adaptive Batching ^^^^^^^^^^^^^^^^^ -If a model or custom runner supports batching, the :ref:`adaptive batching ` mechanism is enabled by default. +If a model or custom runner supports batching, the :ref:`adaptive batching ` mechanism is enabled by default. To explicitly disable or control adaptive batching behaviors at runtime, configuration can be specified under the ``batching`` key. .. tab-set:: diff --git a/docs/source/frameworks/catboost.rst b/docs/source/frameworks/catboost.rst index b3ab379d84..b849b8fca9 100644 --- a/docs/source/frameworks/catboost.rst +++ b/docs/source/frameworks/catboost.rst @@ -138,7 +138,7 @@ Using GPU CatBoost Runners will automatically use ``task_type=GPU`` if a GPU is detected. -This behavior can be disabled using the :ref:`BentoML configuration file`: +This behavior can be disabled using the :ref:`BentoML configuration file`: access: diff --git a/docs/source/frameworks/xgboost.rst b/docs/source/frameworks/xgboost.rst index e9a9fc0490..703455a55f 100644 --- a/docs/source/frameworks/xgboost.rst +++ b/docs/source/frameworks/xgboost.rst @@ -145,7 +145,7 @@ GPU Inference If there is a GPU available, the XGBoost Runner will automatically use ``gpu_predictor`` by default. This can be disabled by using the -:ref:`BentoML configuration file ` to disable Runner GPU +:ref:`BentoML configuration file ` to disable Runner GPU access: .. code-block:: yaml diff --git a/docs/source/guides/client.rst b/docs/source/guides/client.rst index 213c6a6ff0..82edea1a75 100644 --- a/docs/source/guides/client.rst +++ b/docs/source/guides/client.rst @@ -1,6 +1,6 @@ -======================== +============ Bento Client -======================== +============ BentoML provides a client implementation that can be used to make requests to a BentoML server. diff --git a/docs/source/guides/configuration.rst b/docs/source/guides/configuration.rst index cc02440b9b..b250b3bfdc 100644 --- a/docs/source/guides/configuration.rst +++ b/docs/source/guides/configuration.rst @@ -1,6 +1,6 @@ -=================== -Configuring BentoML -=================== +============= +Configuration +============= BentoML starts with an out-of-the-box configuration that works for common use cases. For advanced users, many features can be customized through configuration. Both BentoML CLI and Python APIs can be customized diff --git a/docs/source/guides/grpc.rst b/docs/source/guides/grpc.rst index e3aa386930..2e357b1eb5 100644 --- a/docs/source/guides/grpc.rst +++ b/docs/source/guides/grpc.rst @@ -1410,7 +1410,7 @@ faster go-to-market strategy. Performance tuning ~~~~~~~~~~~~~~~~~~ -BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml ` via ``api_server.grpc``. +BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml ` via ``api_server.grpc``. A quick overview of the available configuration for gRPC: diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst index ddc7580426..05700300a2 100644 --- a/docs/source/guides/index.rst +++ b/docs/source/guides/index.rst @@ -13,16 +13,15 @@ into this part of the documentation. :titlesonly: batching + containerization client - grpc + server configuration - containerization + grpc + logging metrics gpu - logging - monitoring performance - server security tracing migration diff --git a/docs/source/guides/logging.rst b/docs/source/guides/logging.rst index c47a390c9a..22c91211e3 100644 --- a/docs/source/guides/logging.rst +++ b/docs/source/guides/logging.rst @@ -1,6 +1,6 @@ -================= -Customize Logging -================= +======= +Logging +======= Server Logging -------------- diff --git a/docs/source/guides/monitoring.rst b/docs/source/guides/monitoring.rst deleted file mode 100644 index ef051f14d0..0000000000 --- a/docs/source/guides/monitoring.rst +++ /dev/null @@ -1,21 +0,0 @@ -========== -Monitoring -========== - - -.. TODO:: - Document monitoring features in BentoML - - * Service Monitoring (prometheus, /metrics endpoint, system metrics) - * Monitoring component in Yatai - * Model Monitoring integrations - - -.. admonition:: Help us improve the project! - - Found an issue or a TODO item? You're always welcome to make contributions to the - project and its documentation. Check out the - `BentoML development guide `_ - and `documentation guide `_ - to get started. - diff --git a/docs/source/guides/security.rst b/docs/source/guides/security.rst index 70e36eb372..b17aeaec52 100644 --- a/docs/source/guides/security.rst +++ b/docs/source/guides/security.rst @@ -38,6 +38,17 @@ Here's an example with starlette-authlib: svc.add_asgi_middleware(SessionMiddleware, secret_key='you_secret') +Certificates +^^^^^^^^^^^^ + +BentoML supports HTTPS with self-signed certificates. To enable HTTPS, you can to provide SSL certificate and key files as arguments +to the :code:`bentoml serve` command. Use :code:`bentoml serve --help` to see the full list of options. + +.. code:: + + bentoml serve iris_classifier:latest --ssl-certfile /path/to/cert.pem --ssl-keyfile /path/to/key.pem + + Reverse Proxy ^^^^^^^^^^^^^ diff --git a/docs/source/guides/server.rst b/docs/source/guides/server.rst index 774f3841e9..47186a372c 100644 --- a/docs/source/guides/server.rst +++ b/docs/source/guides/server.rst @@ -1,6 +1,6 @@ -===================== -Customize BentoServer -===================== +============ +Bento Server +============ BentoML Server runs the Service API in an `ASGI `_ web serving layer and puts Runners in a separate worker process pool managed by BentoML. The ASGI web diff --git a/examples/custom_runner/nltk_pretrained_model/service.py b/examples/custom_runner/nltk_pretrained_model/service.py index 7137fb5ab4..161553aef0 100644 --- a/examples/custom_runner/nltk_pretrained_model/service.py +++ b/examples/custom_runner/nltk_pretrained_model/service.py @@ -24,13 +24,13 @@ class RunnerImpl(bentoml.Runner): name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], - buckets=exponential_buckets(0.001, 1.5, 10.0), + buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float("inf")), ) -num_invocation = bentoml.metrics.Counter( - name="num_invocation", - documentation="Count total number of invocation for a given endpoint", - labelnames=["endpoint"], +polarity_counter = bentoml.metrics.Counter( + name="polarity_total", + documentation="Count total number of analysis by polarity scores", + labelnames=["polarity"], ) @@ -63,6 +63,6 @@ def is_positive(self, input_text: str) -> bool: @svc.api(input=Text(), output=JSON()) async def analysis(input_text: str) -> dict[str, bool]: - num_invocation.labels(endpoint="analysis").inc() is_positive = await nltk_runner.is_positive.async_run(input_text) + polarity_counter.labels(polarity=is_positive).inc() return {"is_positive": is_positive} diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py index b1a7907f90..2a05c60930 100644 --- a/src/bentoml/_internal/server/metrics/prometheus.py +++ b/src/bentoml/_internal/server/metrics/prometheus.py @@ -215,7 +215,7 @@ def create_response(request): ... The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds. - See :ref:`configuration guides ` to see how to customize the buckets. + See :ref:`configuration guides ` to see how to customize the buckets. Args: name (str): The name of the metric. From bd190378bb4d72a9b9947ab963155ed36043d080 Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 01:31:21 -0700 Subject: [PATCH 2/5] Update index.rst --- docs/source/guides/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst index 05700300a2..4bb84e7481 100644 --- a/docs/source/guides/index.rst +++ b/docs/source/guides/index.rst @@ -17,11 +17,11 @@ into this part of the documentation. client server configuration - grpc logging metrics - gpu performance + grpc + gpu security tracing migration From f7d111efb6b6b7000c757a8950bb4f922409a74c Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 01:41:05 -0700 Subject: [PATCH 3/5] Update format --- .../guides/snippets/metrics/metric_defs.py | 18 +++++++++++++++++- .../nltk_pretrained_model/service.py | 18 +++++++++++++++++- .../_internal/bento/build_dev_bentoml_whl.py | 3 +-- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/docs/source/guides/snippets/metrics/metric_defs.py b/docs/source/guides/snippets/metrics/metric_defs.py index 30db39daf9..ca48dce9dd 100644 --- a/docs/source/guides/snippets/metrics/metric_defs.py +++ b/docs/source/guides/snippets/metrics/metric_defs.py @@ -6,7 +6,23 @@ name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], - buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float("inf")), + buckets=( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + float("inf"), + ), ) polarity_counter = bentoml.metrics.Counter( diff --git a/examples/custom_runner/nltk_pretrained_model/service.py b/examples/custom_runner/nltk_pretrained_model/service.py index 161553aef0..d396caf9d9 100644 --- a/examples/custom_runner/nltk_pretrained_model/service.py +++ b/examples/custom_runner/nltk_pretrained_model/service.py @@ -24,7 +24,23 @@ class RunnerImpl(bentoml.Runner): name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], - buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float("inf")), + buckets=( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + float("inf"), + ), ) polarity_counter = bentoml.metrics.Counter( diff --git a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py index f833690bf8..a2caf9ec2b 100644 --- a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py +++ b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py @@ -28,9 +28,8 @@ def build_bentoml_editable_wheel(target_path: str) -> None: return try: - from build.env import IsolatedEnvBuilder - from build import ProjectBuilder + from build.env import IsolatedEnvBuilder except ModuleNotFoundError as e: raise MissingDependencyException( f"Environment variable '{BENTOML_DEV_BUILD}=True', which requires the 'pypa/build' package ({e}). Install development dependencies with 'pip install -r requirements/dev-requirements.txt' and try again." From 3d6c7d5bfe53bc29a341746f3c23f3583561e5dd Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 01:56:38 -0700 Subject: [PATCH 4/5] Update build_dev_bentoml_whl.py --- src/bentoml/_internal/bento/build_dev_bentoml_whl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py index a2caf9ec2b..ee73e32937 100644 --- a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py +++ b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py @@ -28,8 +28,8 @@ def build_bentoml_editable_wheel(target_path: str) -> None: return try: - from build import ProjectBuilder from build.env import IsolatedEnvBuilder + from build import ProjectBuilder except ModuleNotFoundError as e: raise MissingDependencyException( f"Environment variable '{BENTOML_DEV_BUILD}=True', which requires the 'pypa/build' package ({e}). Install development dependencies with 'pip install -r requirements/dev-requirements.txt' and try again." From 45a593a06d4c97a62a20cb03fdc25a6a2dd36e37 Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 02:08:21 -0700 Subject: [PATCH 5/5] Update build_dev_bentoml_whl.py --- src/bentoml/_internal/bento/build_dev_bentoml_whl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py index ee73e32937..f833690bf8 100644 --- a/src/bentoml/_internal/bento/build_dev_bentoml_whl.py +++ b/src/bentoml/_internal/bento/build_dev_bentoml_whl.py @@ -29,6 +29,7 @@ def build_bentoml_editable_wheel(target_path: str) -> None: try: from build.env import IsolatedEnvBuilder + from build import ProjectBuilder except ModuleNotFoundError as e: raise MissingDependencyException(