From 78d3e0011d3599d2888f6ea1b8f4985fd00fa625 Mon Sep 17 00:00:00 2001
From: Sean Sheng <s3sheng@gmail.com>
Date: Fri, 28 Oct 2022 02:28:39 -0700
Subject: [PATCH] docs: Update advanced guides format (#3154)

* docs: Update advanced guides format
---
 docs/source/concepts/runner.rst               |  4 +--
 docs/source/frameworks/catboost.rst           |  2 +-
 docs/source/frameworks/xgboost.rst            |  2 +-
 docs/source/guides/client.rst                 |  4 +--
 docs/source/guides/configuration.rst          |  6 ++--
 docs/source/guides/grpc.rst                   |  2 +-
 docs/source/guides/index.rst                  | 11 ++++----
 docs/source/guides/logging.rst                |  6 ++--
 docs/source/guides/monitoring.rst             | 21 --------------
 docs/source/guides/security.rst               | 11 ++++++++
 docs/source/guides/server.rst                 |  6 ++--
 .../guides/snippets/metrics/metric_defs.py    | 18 +++++++++++-
 .../nltk_pretrained_model/service.py          | 28 +++++++++++++++----
 .../_internal/server/metrics/prometheus.py    |  2 +-
 14 files changed, 72 insertions(+), 51 deletions(-)
 delete mode 100644 docs/source/guides/monitoring.rst

diff --git a/docs/source/concepts/runner.rst b/docs/source/concepts/runner.rst
index fc774f9a8d..f8cbb07a11 100644
--- a/docs/source/concepts/runner.rst
+++ b/docs/source/concepts/runner.rst
@@ -299,7 +299,7 @@ Runner Definition
 Runner Configuration
 --------------------
 
-Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration <guides/configuration:Configuring BentoML>`.
+Runner behaviors and resource allocation can be specified via BentoML :ref:`configuration <guides/configuration:Configuration>`.
 Runners can be both configured individually or in aggregate under the ``runners`` configuration key. To configure a specific runner, specify its name
 under the ``runners`` configuration key. Otherwise, the configuration will be applied to all runners. The examples below demonstrate both
 the configuration for all runners in aggregate and for an individual runner (``iris_clf``).
@@ -307,7 +307,7 @@ the configuration for all runners in aggregate and for an individual runner (``i
 Adaptive Batching
 ^^^^^^^^^^^^^^^^^
 
-If a model or custom runner supports batching, the :ref:`adaptive batching <guides/configuration:Configuring BentoML>` mechanism is enabled by default.
+If a model or custom runner supports batching, the :ref:`adaptive batching <guides/configuration:Configuration>` mechanism is enabled by default.
 To explicitly disable or control adaptive batching behaviors at runtime, configuration can be specified under the ``batching`` key.
 
 .. tab-set::
diff --git a/docs/source/frameworks/catboost.rst b/docs/source/frameworks/catboost.rst
index b3ab379d84..b849b8fca9 100644
--- a/docs/source/frameworks/catboost.rst
+++ b/docs/source/frameworks/catboost.rst
@@ -138,7 +138,7 @@ Using GPU
 
 CatBoost Runners will automatically use ``task_type=GPU`` if a GPU is detected.
 
-This behavior can be disabled using the :ref:`BentoML configuration file<guides/configuration:Configuring BentoML>`:
+This behavior can be disabled using the :ref:`BentoML configuration file<guides/configuration:Configuration>`:
 
 access:
 
diff --git a/docs/source/frameworks/xgboost.rst b/docs/source/frameworks/xgboost.rst
index e9a9fc0490..703455a55f 100644
--- a/docs/source/frameworks/xgboost.rst
+++ b/docs/source/frameworks/xgboost.rst
@@ -145,7 +145,7 @@ GPU Inference
 
 If there is a GPU available, the XGBoost Runner will automatically use ``gpu_predictor`` by default.
 This can be disabled by using the
-:ref:`BentoML configuration file <guides/configuration:Configuring BentoML>` to disable Runner GPU
+:ref:`BentoML configuration file <guides/configuration:Configuration>` to disable Runner GPU
 access:
 
 .. code-block:: yaml
diff --git a/docs/source/guides/client.rst b/docs/source/guides/client.rst
index 213c6a6ff0..82edea1a75 100644
--- a/docs/source/guides/client.rst
+++ b/docs/source/guides/client.rst
@@ -1,6 +1,6 @@
-========================
+============
 Bento Client
-========================
+============
 
 BentoML provides a client implementation that can be used to make requests to a BentoML server.
 
diff --git a/docs/source/guides/configuration.rst b/docs/source/guides/configuration.rst
index cc02440b9b..b250b3bfdc 100644
--- a/docs/source/guides/configuration.rst
+++ b/docs/source/guides/configuration.rst
@@ -1,6 +1,6 @@
-===================
-Configuring BentoML
-===================
+=============
+Configuration
+=============
 
 BentoML starts with an out-of-the-box configuration that works for common use cases. For advanced users, many
 features can be customized through configuration. Both BentoML CLI and Python APIs can be customized 
diff --git a/docs/source/guides/grpc.rst b/docs/source/guides/grpc.rst
index e3aa386930..2e357b1eb5 100644
--- a/docs/source/guides/grpc.rst
+++ b/docs/source/guides/grpc.rst
@@ -1410,7 +1410,7 @@ faster go-to-market strategy.
 Performance tuning
 ~~~~~~~~~~~~~~~~~~
 
-BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml <guides/configuration:Configuring BentoML>` via ``api_server.grpc``.
+BentoML allows user to tune the performance of gRPC via :ref:`bentoml_configuration.yaml <guides/configuration:Configuration>` via ``api_server.grpc``.
 
 A quick overview of the available configuration for gRPC:
 
diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst
index ddc7580426..4bb84e7481 100644
--- a/docs/source/guides/index.rst
+++ b/docs/source/guides/index.rst
@@ -13,16 +13,15 @@ into this part of the documentation.
     :titlesonly:
 
     batching
+    containerization
     client
-    grpc
+    server
     configuration
-    containerization
-    metrics
-    gpu
     logging
-    monitoring
+    metrics
     performance
-    server
+    grpc
+    gpu
     security
     tracing
     migration
diff --git a/docs/source/guides/logging.rst b/docs/source/guides/logging.rst
index c47a390c9a..22c91211e3 100644
--- a/docs/source/guides/logging.rst
+++ b/docs/source/guides/logging.rst
@@ -1,6 +1,6 @@
-=================
-Customize Logging
-=================
+=======
+Logging
+=======
 
 Server Logging
 --------------
diff --git a/docs/source/guides/monitoring.rst b/docs/source/guides/monitoring.rst
deleted file mode 100644
index ef051f14d0..0000000000
--- a/docs/source/guides/monitoring.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-==========
-Monitoring
-==========
-
-
-.. TODO::
-    Document monitoring features in BentoML
-
-    * Service Monitoring (prometheus, /metrics endpoint, system metrics)
-    * Monitoring component in Yatai
-    * Model Monitoring integrations
-
-
-.. admonition:: Help us improve the project!
-
-    Found an issue or a TODO item? You're always welcome to make contributions to the
-    project and its documentation. Check out the
-    `BentoML development guide <https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md>`_
-    and `documentation guide <https://github.com/bentoml/BentoML/blob/main/docs/README.md>`_
-    to get started.
-
diff --git a/docs/source/guides/security.rst b/docs/source/guides/security.rst
index 70e36eb372..b17aeaec52 100644
--- a/docs/source/guides/security.rst
+++ b/docs/source/guides/security.rst
@@ -38,6 +38,17 @@ Here's an example with starlette-authlib:
     svc.add_asgi_middleware(SessionMiddleware, secret_key='you_secret')
 
 
+Certificates
+^^^^^^^^^^^^
+
+BentoML supports HTTPS with self-signed certificates. To enable HTTPS, you can to provide SSL certificate and key files as arguments
+to the :code:`bentoml serve` command. Use :code:`bentoml serve --help` to see the full list of options.
+
+.. code::
+    
+    bentoml serve iris_classifier:latest --ssl-certfile /path/to/cert.pem --ssl-keyfile /path/to/key.pem
+
+
 Reverse Proxy
 ^^^^^^^^^^^^^
 
diff --git a/docs/source/guides/server.rst b/docs/source/guides/server.rst
index 774f3841e9..47186a372c 100644
--- a/docs/source/guides/server.rst
+++ b/docs/source/guides/server.rst
@@ -1,6 +1,6 @@
-=====================
-Customize BentoServer
-=====================
+============
+Bento Server
+============
 
 BentoML Server runs the Service API in an `ASGI <https://asgi.readthedocs.io/en/latest/>`_
 web serving layer and puts Runners in a separate worker process pool managed by BentoML. The ASGI web
diff --git a/docs/source/guides/snippets/metrics/metric_defs.py b/docs/source/guides/snippets/metrics/metric_defs.py
index 30db39daf9..ca48dce9dd 100644
--- a/docs/source/guides/snippets/metrics/metric_defs.py
+++ b/docs/source/guides/snippets/metrics/metric_defs.py
@@ -6,7 +6,23 @@
     name="inference_duration",
     documentation="Duration of inference",
     labelnames=["nltk_version", "sentiment_cls"],
-    buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float("inf")),
+    buckets=(
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.075,
+        0.1,
+        0.25,
+        0.5,
+        0.75,
+        1.0,
+        2.5,
+        5.0,
+        7.5,
+        10.0,
+        float("inf"),
+    ),
 )
 
 polarity_counter = bentoml.metrics.Counter(
diff --git a/examples/custom_runner/nltk_pretrained_model/service.py b/examples/custom_runner/nltk_pretrained_model/service.py
index 7137fb5ab4..d396caf9d9 100644
--- a/examples/custom_runner/nltk_pretrained_model/service.py
+++ b/examples/custom_runner/nltk_pretrained_model/service.py
@@ -24,13 +24,29 @@ class RunnerImpl(bentoml.Runner):
     name="inference_duration",
     documentation="Duration of inference",
     labelnames=["nltk_version", "sentiment_cls"],
-    buckets=exponential_buckets(0.001, 1.5, 10.0),
+    buckets=(
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.075,
+        0.1,
+        0.25,
+        0.5,
+        0.75,
+        1.0,
+        2.5,
+        5.0,
+        7.5,
+        10.0,
+        float("inf"),
+    ),
 )
 
-num_invocation = bentoml.metrics.Counter(
-    name="num_invocation",
-    documentation="Count total number of invocation for a given endpoint",
-    labelnames=["endpoint"],
+polarity_counter = bentoml.metrics.Counter(
+    name="polarity_total",
+    documentation="Count total number of analysis by polarity scores",
+    labelnames=["polarity"],
 )
 
 
@@ -63,6 +79,6 @@ def is_positive(self, input_text: str) -> bool:
 
 @svc.api(input=Text(), output=JSON())
 async def analysis(input_text: str) -> dict[str, bool]:
-    num_invocation.labels(endpoint="analysis").inc()
     is_positive = await nltk_runner.is_positive.async_run(input_text)
+    polarity_counter.labels(polarity=is_positive).inc()
     return {"is_positive": is_positive}
diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py
index b1a7907f90..2a05c60930 100644
--- a/src/bentoml/_internal/server/metrics/prometheus.py
+++ b/src/bentoml/_internal/server/metrics/prometheus.py
@@ -215,7 +215,7 @@ def create_response(request):
                          ...
 
         The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds.
-        See :ref:`configuration guides <guides/configuration:Configuring BentoML>` to see how to customize the buckets.
+        See :ref:`configuration guides <guides/configuration:Configuration>` to see how to customize the buckets.
 
         Args:
             name (str): The name of the metric.