From 78d3e0011d3599d2888f6ea1b8f4985fd00fa625 Mon Sep 17 00:00:00 2001 From: Sean Sheng Date: Fri, 28 Oct 2022 02:28:39 -0700 Subject: [PATCH] docs: Update advanced guides format (#3154) * docs: Update advanced guides format --- docs/source/concepts/runner.rst | 4 +-- docs/source/frameworks/catboost.rst | 2 +- docs/source/frameworks/xgboost.rst | 2 +- docs/source/guides/client.rst | 4 +-- docs/source/guides/configuration.rst | 6 ++-- docs/source/guides/grpc.rst | 2 +- docs/source/guides/index.rst | 11 ++++---- docs/source/guides/logging.rst | 6 ++-- docs/source/guides/monitoring.rst | 21 -------------- docs/source/guides/security.rst | 11 ++++++++ docs/source/guides/server.rst | 6 ++-- .../guides/snippets/metrics/metric_defs.py | 18 +++++++++++- .../nltk_pretrained_model/service.py | 28 +++++++++++++++---- .../_internal/server/metrics/prometheus.py | 2 +- 14 files changed, 72 insertions(+), 51 deletions(-) delete mode 100644 docs/source/guides/monitoring.rst diff --git a/docs/source/concepts/runner.rst b/docs/source/concepts/runner.rst index fc774f9a8d..f8cbb07a11 100644 --- a/docs/source/concepts/runner.rst +++ b/docs/source/concepts/runner.rst @@ -299,7 +299,7 @@ Runner Definition Runner Configuration -------------------- -Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration `. +Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration `. Runners can be both configured individually or in aggregate under the ``runners`` configuration key. To configure a specific runner, specify its name under the ``runners`` configuration key. Otherwise, the configuration will be applied to all runners. The examples below demonstrate both the configuration for all runners in aggregate and for an individual runner (``iris_clf``). @@ -307,7 +307,7 @@ the configuration for all runners in aggregate and for an individual runner (``i Adaptive Batching ^^^^^^^^^^^^^^^^^ -If a model or custom runner supports batching, the :ref:`adaptive batching ` mechanism is enabled by default. +If a model or custom runner supports batching, the :ref:`adaptive batching ` mechanism is enabled by default. To explicitly disable or control adaptive batching behaviors at runtime, configuration can be specified under the ``batching`` key. .. tab-set:: diff --git a/docs/source/frameworks/catboost.rst b/docs/source/frameworks/catboost.rst index b3ab379d84..b849b8fca9 100644 --- a/docs/source/frameworks/catboost.rst +++ b/docs/source/frameworks/catboost.rst @@ -138,7 +138,7 @@ Using GPU CatBoost Runners will automatically use ``task_type=GPU`` if a GPU is detected. -This behavior can be disabled using the :ref:`BentoML configuration file`: +This behavior can be disabled using the :ref:`BentoML configuration file`: access: diff --git a/docs/source/frameworks/xgboost.rst b/docs/source/frameworks/xgboost.rst index e9a9fc0490..703455a55f 100644 --- a/docs/source/frameworks/xgboost.rst +++ b/docs/source/frameworks/xgboost.rst @@ -145,7 +145,7 @@ GPU Inference If there is a GPU available, the XGBoost Runner will automatically use ``gpu_predictor`` by default. This can be disabled by using the -:ref:`BentoML configuration file ` to disable Runner GPU +:ref:`BentoML configuration file ` to disable Runner GPU access: .. code-block:: yaml diff --git a/docs/source/guides/client.rst b/docs/source/guides/client.rst index 213c6a6ff0..82edea1a75 100644 --- a/docs/source/guides/client.rst +++ b/docs/source/guides/client.rst @@ -1,6 +1,6 @@ -======================== +============ Bento Client -======================== +============ BentoML provides a client implementation that can be used to make requests to a BentoML server. diff --git a/docs/source/guides/configuration.rst b/docs/source/guides/configuration.rst index cc02440b9b..b250b3bfdc 100644 --- a/docs/source/guides/configuration.rst +++ b/docs/source/guides/configuration.rst @@ -1,6 +1,6 @@ -=================== -Configuring BentoML -=================== +============= +Configuration +============= BentoML starts with an out-of-the-box configuration that works for common use cases. For advanced users, many features can be customized through configuration. Both BentoML CLI and Python APIs can be customized diff --git a/docs/source/guides/grpc.rst b/docs/source/guides/grpc.rst index e3aa386930..2e357b1eb5 100644 --- a/docs/source/guides/grpc.rst +++ b/docs/source/guides/grpc.rst @@ -1410,7 +1410,7 @@ faster go-to-market strategy. Performance tuning ~~~~~~~~~~~~~~~~~~ -BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml ` via ``api_server.grpc``. +BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml ` via ``api_server.grpc``. A quick overview of the available configuration for gRPC: diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst index ddc7580426..4bb84e7481 100644 --- a/docs/source/guides/index.rst +++ b/docs/source/guides/index.rst @@ -13,16 +13,15 @@ into this part of the documentation. :titlesonly: batching + containerization client - grpc + server configuration - containerization - metrics - gpu logging - monitoring + metrics performance - server + grpc + gpu security tracing migration diff --git a/docs/source/guides/logging.rst b/docs/source/guides/logging.rst index c47a390c9a..22c91211e3 100644 --- a/docs/source/guides/logging.rst +++ b/docs/source/guides/logging.rst @@ -1,6 +1,6 @@ -================= -Customize Logging -================= +======= +Logging +======= Server Logging -------------- diff --git a/docs/source/guides/monitoring.rst b/docs/source/guides/monitoring.rst deleted file mode 100644 index ef051f14d0..0000000000 --- a/docs/source/guides/monitoring.rst +++ /dev/null @@ -1,21 +0,0 @@ -========== -Monitoring -========== - - -.. TODO:: - Document monitoring features in BentoML - - * Service Monitoring (prometheus, /metrics endpoint, system metrics) - * Monitoring component in Yatai - * Model Monitoring integrations - - -.. admonition:: Help us improve the project! - - Found an issue or a TODO item? You're always welcome to make contributions to the - project and its documentation. Check out the - `BentoML development guide `_ - and `documentation guide `_ - to get started. - diff --git a/docs/source/guides/security.rst b/docs/source/guides/security.rst index 70e36eb372..b17aeaec52 100644 --- a/docs/source/guides/security.rst +++ b/docs/source/guides/security.rst @@ -38,6 +38,17 @@ Here's an example with starlette-authlib: svc.add_asgi_middleware(SessionMiddleware, secret_key='you_secret') +Certificates +^^^^^^^^^^^^ + +BentoML supports HTTPS with self-signed certificates. To enable HTTPS, you can to provide SSL certificate and key files as arguments +to the :code:`bentoml serve` command. Use :code:`bentoml serve --help` to see the full list of options. + +.. code:: + + bentoml serve iris_classifier:latest --ssl-certfile /path/to/cert.pem --ssl-keyfile /path/to/key.pem + + Reverse Proxy ^^^^^^^^^^^^^ diff --git a/docs/source/guides/server.rst b/docs/source/guides/server.rst index 774f3841e9..47186a372c 100644 --- a/docs/source/guides/server.rst +++ b/docs/source/guides/server.rst @@ -1,6 +1,6 @@ -===================== -Customize BentoServer -===================== +============ +Bento Server +============ BentoML Server runs the Service API in an `ASGI `_ web serving layer and puts Runners in a separate worker process pool managed by BentoML. The ASGI web diff --git a/docs/source/guides/snippets/metrics/metric_defs.py b/docs/source/guides/snippets/metrics/metric_defs.py index 30db39daf9..ca48dce9dd 100644 --- a/docs/source/guides/snippets/metrics/metric_defs.py +++ b/docs/source/guides/snippets/metrics/metric_defs.py @@ -6,7 +6,23 @@ name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], - buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float("inf")), + buckets=( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + float("inf"), + ), ) polarity_counter = bentoml.metrics.Counter( diff --git a/examples/custom_runner/nltk_pretrained_model/service.py b/examples/custom_runner/nltk_pretrained_model/service.py index 7137fb5ab4..d396caf9d9 100644 --- a/examples/custom_runner/nltk_pretrained_model/service.py +++ b/examples/custom_runner/nltk_pretrained_model/service.py @@ -24,13 +24,29 @@ class RunnerImpl(bentoml.Runner): name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], - buckets=exponential_buckets(0.001, 1.5, 10.0), + buckets=( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + float("inf"), + ), ) -num_invocation = bentoml.metrics.Counter( - name="num_invocation", - documentation="Count total number of invocation for a given endpoint", - labelnames=["endpoint"], +polarity_counter = bentoml.metrics.Counter( + name="polarity_total", + documentation="Count total number of analysis by polarity scores", + labelnames=["polarity"], ) @@ -63,6 +79,6 @@ def is_positive(self, input_text: str) -> bool: @svc.api(input=Text(), output=JSON()) async def analysis(input_text: str) -> dict[str, bool]: - num_invocation.labels(endpoint="analysis").inc() is_positive = await nltk_runner.is_positive.async_run(input_text) + polarity_counter.labels(polarity=is_positive).inc() return {"is_positive": is_positive} diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py index b1a7907f90..2a05c60930 100644 --- a/src/bentoml/_internal/server/metrics/prometheus.py +++ b/src/bentoml/_internal/server/metrics/prometheus.py @@ -215,7 +215,7 @@ def create_response(request): ... The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds. - See :ref:`configuration guides ` to see how to customize the buckets. + See :ref:`configuration guides ` to see how to customize the buckets. Args: name (str): The name of the metric.