From da43bfb4da6e004cac9879382d45651eebcc11aa Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Wed, 26 Oct 2022 16:20:51 -0700
Subject: [PATCH] fix: tests

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 pyproject.toml                                |  3 +-
 requirements/tests-requirements.txt           |  1 -
 .../_internal/server/metrics/prometheus.py    | 16 ++++
 src/bentoml/_internal/utils/__init__.py       | 20 +++++
 src/bentoml/metrics.py                        | 84 ++++++++++++++++---
 src/bentoml/metrics.pyi                       |  2 -
 src/bentoml/testing/server.py                 | 31 ++++---
 tests/e2e/bento_server_grpc/service.py        | 14 ++++
 tests/e2e/bento_server_grpc/tests/conftest.py |  3 +
 .../tests/test_descriptors.py                 |  9 ++
 tests/e2e/bento_server_http/service.py        | 11 +++
 tests/e2e/bento_server_http/tests/conftest.py |  3 +
 .../e2e/bento_server_http/tests/test_meta.py  | 12 +++
 tests/unit/test_metrics.py                    | 15 +---
 14 files changed, 181 insertions(+), 43 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 919c1778987..62433a3f552 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,8 @@ dependencies = [
   "pathspec",
   "pip-tools>=6.6.2",
   "pip-requirements-parser>=31.2.0",
-  "prometheus-client>=0.10.0",
+  # Lowest version of prometheus_client that supports 3.10
+  "prometheus-client>=0.12.0",
   "psutil",
   "pynvml<12",
   "python-dateutil",
diff --git a/requirements/tests-requirements.txt b/requirements/tests-requirements.txt
index 8d71141cc5d..274f610e6ab 100644
--- a/requirements/tests-requirements.txt
+++ b/requirements/tests-requirements.txt
@@ -17,4 +17,3 @@ imageio==2.22.1
 pyarrow==9.0.0
 build[virtualenv]==0.8.0
 protobuf==3.19.6
-grpcio-tools>=1.41.0,<1.49.0,!=1.48.2
diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py
index 2a05c60930c..33a93f59cea 100644
--- a/src/bentoml/_internal/server/metrics/prometheus.py
+++ b/src/bentoml/_internal/server/metrics/prometheus.py
@@ -109,6 +109,19 @@ def start_http_server(self, port: int, addr: str = "") -> None:
             registry=self.registry,
         )
 
+    start_wsgi_server = start_http_server
+
+    def write_to_textfile(self, path: str) -> None:
+        """
+        Write metrics to given path. This is intended to be used with
+        the Node expoerter textfile collector.
+
+        Args:
+            path: path to write the metrics to. This file must end
+                  with '.prom' for the textfile collector to process it.
+        """
+        self.prometheus_client.write_to_textfile(path, registry=self.registry)
+
     def make_wsgi_app(self) -> ext.WSGIApp:
         """
         Create a WSGI app which serves the metrics from a registry.
@@ -118,6 +131,9 @@ def make_wsgi_app(self) -> ext.WSGIApp:
         """
         return self.prometheus_client.make_wsgi_app(registry=self.registry)  # type: ignore (unfinished prometheus types)
 
+    def make_asgi_app(self) -> ext.ASGIApp:
+        return self.prometheus_client.make_asgi_app(registry=self.registry)  # type: ignore (unfinished prometheus types)
+
     def generate_latest(self):
         """
         Returns metrics from the registry in latest text format as a string.
diff --git a/src/bentoml/_internal/utils/__init__.py b/src/bentoml/_internal/utils/__init__.py
index 9201265633c..dd7345aa1b2 100644
--- a/src/bentoml/_internal/utils/__init__.py
+++ b/src/bentoml/_internal/utils/__init__.py
@@ -70,6 +70,15 @@
 
 
 def warn_experimental(api_name: str) -> None:
+    """
+    Warns the user that the given API is experimental.
+    Make sure that if the API is not experimental anymore, remove this function call.
+
+    If 'api_name' requires string formatting, use %-formatting for optimization.
+
+    Args:
+        api_name: The name of the API that is experimental.
+    """
     if api_name not in _EXPERIMENTAL_APIS:
         _EXPERIMENTAL_APIS.add(api_name)
         msg = "'%s' is an EXPERIMENTAL API and is currently not yet stable. Proceed with caution!"
@@ -80,6 +89,14 @@ def warn_experimental(api_name: str) -> None:
 def experimental(
     f: t.Callable[P, t.Any] | None = None, *, api_name: str | None = None
 ) -> t.Callable[..., t.Any]:
+    """
+    Decorator to mark an API as experimental.
+
+    If 'api_name' requires string formatting, use %-formatting for optimization.
+
+    Args:
+        api_name: The name of the API that is experimental.
+    """
     if api_name is None:
         api_name = f.__name__ if inspect.isfunction(f) else repr(f)
 
@@ -97,6 +114,9 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> t.Any:
 
 
 def add_experimental_docstring(f: t.Callable[P, t.Any]) -> t.Callable[P, t.Any]:
+    """
+    Decorator to add an experimental warning to the docstring of a function.
+    """
     f.__doc__ = "[EXPERIMENTAL] " + (f.__doc__ if f.__doc__ is not None else "")
     return f
 
diff --git a/src/bentoml/metrics.py b/src/bentoml/metrics.py
index a0c61f20b47..6ce74a33c8e 100644
--- a/src/bentoml/metrics.py
+++ b/src/bentoml/metrics.py
@@ -17,14 +17,57 @@
 
 from ._internal.configuration.containers import BentoMLContainer
 
+# This sets of functions are implemented in the PrometheusClient class
+_INTERNAL_IMPL = [
+    "start_http_server",
+    "start_wsgi_server",
+    "make_wsgi_app",
+    "make_asgi_app",
+    "generate_latest",
+    "text_string_to_metric_families",
+    "write_to_textfile",
+]
+_NOT_IMPLEMENTED = [
+    "delete_from_gateway",
+    "instance_ip_grouping_key",
+    "push_to_gateway",
+    "pushadd_to_gateway",
+]
+_NOT_SUPPORTED = [
+    "GC_COLLECTOR",
+    "GCCollector",
+    "PLATFORM_COLLECTOR",
+    "PlatformCollector",
+    "PROCESS_COLLECTOR",
+    "ProcessCollector",
+    "REGISTRY",
+] + _NOT_IMPLEMENTED
+_PROPERTY = ["CONTENT_TYPE_LATEST"]
+
 
 def __dir__() -> list[str]:
+    # This is for IPython and IDE autocompletion.
     metrics_client = BentoMLContainer.metrics_client.get()
-    return dir(metrics_client.prometheus_client)
+    return list(set(dir(metrics_client.prometheus_client)) - set(_NOT_SUPPORTED))
+
+
+def __getattr__(item: t.Any):
+    # This is the entrypoint for all bentoml.metrics.*
+    if item in _PROPERTY:
+        logger.warning(
+            "'%s' is a '@property', which means there is no lazy loading. See https://docs.bentoml.org/en/latest/guides/metrics.html.",
+            item,
+        )
+        return getattr(_LazyMetric(item), item)
+    return _LazyMetric(item)
 
 
 class _LazyMetric:
-    def __init__(self, attr: t.Any):
+    __slots__ = ("_attr", "_proxy", "_initialized", "_args", "_kwargs")
+
+    def __init__(self, attr: str):
+        if attr in _NOT_SUPPORTED:
+            raise NotImplementedError(f"{attr} is not yet supported.")
         self._attr = attr
         self._proxy = None
         self._initialized = False
@@ -40,28 +83,47 @@ def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
             *args: Arguments to pass to the metrics object.
             **kwargs: Keyword arguments to pass to the metrics object.
         """
-        # This is where we lazy load the proxy object.
+        if "registry" in kwargs:
+            raise ValueError(
+                f"'registry' should not be passed when using '{__name__}.{self._attr}'. See https://docs.bentoml.org/en/latest/reference/metrics.html."
+            )
         warn_experimental("%s.%s" % (__name__, self._attr))
         self._args = args
         self._kwargs = kwargs
+        if self._attr in _INTERNAL_IMPL:
+            # first-class function implementation from BentoML Prometheus client.
+            # In this case, the function will be called directly.
+            return self._load_proxy()
         return self
 
     def __getattr__(self, item: t.Any) -> t.Any:
-        if item in ("_attr", "_proxy", "_initialized", "_args", "_kwargs"):
+        if item in self.__slots__:
             raise AttributeError(f"Attribute {item} is private to {self}.")
         if self._proxy is None:
-            self._load_proxy()
-        assert self._initialized
+            self._proxy = self._load_proxy()
+        assert self._initialized and self._proxy is not None
+        if self._attr in _PROPERTY:
+            return self._proxy
         return getattr(self._proxy, item)
 
+    def __dir__(self) -> list[str]:
+        if self._proxy is None:
+            self._proxy = self._load_proxy()
+        assert self._initialized and self._proxy is not None
+        return dir(self._proxy)
+
     @inject
     def _load_proxy(
         self,
         metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client],
     ) -> None:
-        self._proxy = getattr(metrics_client, self._attr)(*self._args, **self._kwargs)
+        client_impl = (
+            metrics_client
+            if self._attr in dir(metrics_client)
+            else metrics_client.prometheus_client
+        )
+        proxy = getattr(client_impl, self._attr)
+        if callable(proxy):
+            proxy = proxy(*self._args, **self._kwargs)
         self._initialized = True
-
-
-def __getattr__(item: t.Any):
-    return _LazyMetric(item)
+        return proxy
diff --git a/src/bentoml/metrics.pyi b/src/bentoml/metrics.pyi
index be1556075ff..e87b5ffc9d7 100644
--- a/src/bentoml/metrics.pyi
+++ b/src/bentoml/metrics.pyi
@@ -6,8 +6,6 @@ import prometheus_client
 
 from bentoml._internal import external_typing as ext
 
-__dir__: list[str] = dir(prometheus_client)
-
 CONTENT_TYPE_LATEST = prometheus_client.CONTENT_TYPE_LATEST
 Counter = prometheus_client.Counter
 Histogram = prometheus_client.Histogram
diff --git a/src/bentoml/testing/server.py b/src/bentoml/testing/server.py
index ab2d8afd1e6..be0f8ed7df4 100644
--- a/src/bentoml/testing/server.py
+++ b/src/bentoml/testing/server.py
@@ -160,7 +160,7 @@ def bentoml_containerize(
             str(bento_tag),
             docker_image_tag=(image_tag,),
             progress="plain",
-            features=["grpc"] if use_grpc else None,
+            features=["grpc", "grpc-reflection"] if use_grpc else None,
         )
         yield image_tag
     finally:
@@ -183,9 +183,11 @@ def run_bento_server_docker(
     from bentoml._internal.configuration.containers import BentoMLContainer
 
     container_name = f"bentoml-test-{image_tag}-{hash(config_file)}"
-    with reserve_free_port(enable_so_reuseport=use_grpc) as port:
+    with reserve_free_port(enable_so_reuseport=use_grpc) as port, reserve_free_port(
+        enable_so_reuseport=use_grpc
+    ) as prom_port:
         pass
-    bind_port = "3000"
+
     cmd = [
         "docker",
         "run",
@@ -193,7 +195,7 @@ def run_bento_server_docker(
         "--name",
         container_name,
         "--publish",
-        f"{port}:{bind_port}",
+        f"{port}:3000",
     ]
     if config_file is not None:
         cmd.extend(["--env", "BENTOML_CONFIG=/home/bentoml/bentoml_config.yml"])
@@ -201,11 +203,12 @@ def run_bento_server_docker(
             ["-v", f"{os.path.abspath(config_file)}:/home/bentoml/bentoml_config.yml"]
         )
     if use_grpc:
-        bind_prom_port = BentoMLContainer.grpc.metrics.port.get()
-        cmd.extend(["--publish", f"{bind_prom_port}:{bind_prom_port}"])
+        cmd.extend(
+            ["--publish", f"{prom_port}:{BentoMLContainer.grpc.metrics.port.get()}"]
+        )
     cmd.append(image_tag)
-    if use_grpc:
-        cmd.extend(["serve-grpc", "--production"])
+    serve_cmd = "serve-grpc" if use_grpc else "serve-http"
+    cmd.extend([serve_cmd, "--production"])
     print(f"Running API server docker image: '{' '.join(cmd)}'")
     with subprocess.Popen(
         cmd,
@@ -437,15 +440,15 @@ def host_bento(
 
         BentoMLContainer.bentoml_home.set(bentoml_home)
     try:
-        print(
-            f"Starting bento server {bento_name} at '{project_path}' {'with config file '+config_file+' ' if config_file else ' '}in {deployment_mode} mode..."
-        )
         if bento_name is None or not bentoml.list(bento_name):
             bento = clean_context.enter_context(
                 bentoml_build(project_path, cleanup=clean_on_exit)
             )
         else:
             bento = bentoml.get(bento_name)
+        print(
+            f"Hosting BentoServer '{bento.tag}' in {deployment_mode} mode at '{project_path}'{' with config file '+config_file if config_file else ''}."
+        )
         if deployment_mode == "standalone":
             with run_bento_server_standalone(
                 bento.path,
@@ -457,11 +460,7 @@ def host_bento(
                 yield host_url
         elif deployment_mode == "docker":
             container_tag = clean_context.enter_context(
-                bentoml_containerize(
-                    bento.tag,
-                    use_grpc=use_grpc,
-                    cleanup=clean_on_exit,
-                )
+                bentoml_containerize(bento.tag, use_grpc=use_grpc)
             )
             with run_bento_server_docker(
                 container_tag,
diff --git a/tests/e2e/bento_server_grpc/service.py b/tests/e2e/bento_server_grpc/service.py
index 10f3f3a5ab8..a04a807a1a0 100644
--- a/tests/e2e/bento_server_grpc/service.py
+++ b/tests/e2e/bento_server_grpc/service.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import time
 import typing as t
 from typing import TYPE_CHECKING
 
@@ -17,6 +18,7 @@
 from bentoml.io import PandasDataFrame
 from bentoml.testing.grpc import TestServiceServicer
 from bentoml._internal.utils import LazyLoader
+from bentoml._internal.utils.metrics import exponential_buckets
 
 if TYPE_CHECKING:
     import numpy as np
@@ -180,6 +182,14 @@ async def echo_image(f: PIL.Image.Image) -> NDArray[t.Any]:
     return np.array(f)
 
 
+histogram = bentoml.metrics.Histogram(
+    name="inference_latency",
+    documentation="Inference latency in seconds",
+    labelnames=["model_name", "model_version"],
+    buckets=exponential_buckets(0.001, 1.5, 10.0),
+)
+
+
 @svc.api(
     input=Multipart(
         original=Image(mime_type="image/bmp"), compared=Image(mime_type="image/bmp")
@@ -187,8 +197,12 @@ async def echo_image(f: PIL.Image.Image) -> NDArray[t.Any]:
     output=Multipart(meta=Text(), result=Image(mime_type="image/bmp")),
 )
 async def predict_multi_images(original: Image, compared: Image):
+    start = time.perf_counter()
     output_array = await py_model.multiply_float_ndarray.async_run(
         np.array(original), np.array(compared)
     )
+    histogram.labels(model_name=py_model.name, model_version="v1").observe(
+        time.perf_counter() - start
+    )
     img = PIL.Image.fromarray(output_array)
     return {"meta": "success", "result": img}
diff --git a/tests/e2e/bento_server_grpc/tests/conftest.py b/tests/e2e/bento_server_grpc/tests/conftest.py
index 4ded9dbcfda..a7f0273b06e 100644
--- a/tests/e2e/bento_server_grpc/tests/conftest.py
+++ b/tests/e2e/bento_server_grpc/tests/conftest.py
@@ -24,6 +24,9 @@
 def pytest_collection_modifyitems(
     session: Session, config: Config, items: list[Item]
 ) -> None:
+    subprocess.check_call(
+        ["pip", "install", "-r", f"{os.path.join(PROJECT_DIR, 'requirements.txt')}"]
+    )
     subprocess.check_call([sys.executable, f"{os.path.join(PROJECT_DIR, 'train.py')}"])
 
 
diff --git a/tests/e2e/bento_server_grpc/tests/test_descriptors.py b/tests/e2e/bento_server_grpc/tests/test_descriptors.py
index f4b132d53d3..2d919537ca5 100644
--- a/tests/e2e/bento_server_grpc/tests/test_descriptors.py
+++ b/tests/e2e/bento_server_grpc/tests/test_descriptors.py
@@ -8,6 +8,7 @@
 
 import pytest
 
+from bentoml import metrics
 from bentoml.grpc.utils import import_grpc
 from bentoml.grpc.utils import import_generated_stubs
 from bentoml.testing.grpc import create_channel
@@ -406,3 +407,11 @@ async def test_multipart(host: str, img_file: str):
                 assert_multi_images, method="pred_multi_images", im_file=img_file
             ),
         )
+
+    # Test for metrics
+    histograms = [
+        m.name
+        for m in metrics.text_string_to_metric_families()
+        if m.type == "histogram"
+    ]
+    assert "inference_latency" in histograms
diff --git a/tests/e2e/bento_server_http/service.py b/tests/e2e/bento_server_http/service.py
index 8785a7ec1e4..207d1a216bf 100644
--- a/tests/e2e/bento_server_http/service.py
+++ b/tests/e2e/bento_server_http/service.py
@@ -35,6 +35,17 @@
 svc = bentoml.Service(name="general_http_service.case-1.e2e", runners=[py_model])
 
 
+metric_test = bentoml.metrics.Counter(
+    name="test_metrics", documentation="Counter test metric"
+)
+
+
+@svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
+def echo_data_metric(data: str) -> str:
+    metric_test.inc()
+    return data
+
+
 @svc.api(input=JSON(), output=JSON())
 async def echo_json(json_obj: JSONSerializable) -> JSONSerializable:
     batch_ret = await py_model.echo_json.async_run([json_obj])
diff --git a/tests/e2e/bento_server_http/tests/conftest.py b/tests/e2e/bento_server_http/tests/conftest.py
index 68052a720ec..21f46aa0e3e 100644
--- a/tests/e2e/bento_server_http/tests/conftest.py
+++ b/tests/e2e/bento_server_http/tests/conftest.py
@@ -27,6 +27,9 @@ class FixtureRequest(_PytestFixtureRequest):
 def pytest_collection_modifyitems(
     session: Session, config: Config, items: list[Item]
 ) -> None:
+    subprocess.check_call(
+        ["pip", "install", "-r", f"{os.path.join(PROJECT_DIR, 'requirements.txt')}"]
+    )
     subprocess.check_call([sys.executable, f"{os.path.join(PROJECT_DIR, 'train.py')}"])
 
 
diff --git a/tests/e2e/bento_server_http/tests/test_meta.py b/tests/e2e/bento_server_http/tests/test_meta.py
index 12d7d6bcc2c..e37b609f39b 100644
--- a/tests/e2e/bento_server_http/tests/test_meta.py
+++ b/tests/e2e/bento_server_http/tests/test_meta.py
@@ -4,6 +4,7 @@
 
 import time
 import asyncio
+from typing import TYPE_CHECKING
 from pathlib import Path
 
 import pytest
@@ -113,3 +114,14 @@ def test_dunder_string():
         str(svc)
         == 'bentoml.Service(name="dunder_string", runners=[py_model.case-1.http.e2e])'
     )
+
+
+@pytest.mark.asyncio
+async def test_metrics_type(host: str):
+    await async_request(
+        "POST",
+        f"http://{host}/echo_data_metric",
+        headers={"Content-Type": "application/json"},
+        data="[[1,2],[3,4]]",
+    )
+    assert "test_metrics_total" in bentoml.metrics.generate_latest().decode("utf-8")
diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py
index 03204f58aee..d243671e2bd 100644
--- a/tests/unit/test_metrics.py
+++ b/tests/unit/test_metrics.py
@@ -1,27 +1,18 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 import bentoml
 
-if TYPE_CHECKING:
-    from bentoml._internal.server.metrics.prometheus import PrometheusClient
-
 
 def test_metrics_initialization():
     o = bentoml.metrics.Gauge(name="test_metrics", documentation="test")
     assert isinstance(o, bentoml.metrics._LazyMetric)
+    assert o._proxy is None
     o = bentoml.metrics.Histogram(name="test_metrics", documentation="test")
     assert isinstance(o, bentoml.metrics._LazyMetric)
+    assert o._proxy is None
     o = bentoml.metrics.Counter(name="test_metrics", documentation="test")
     assert isinstance(o, bentoml.metrics._LazyMetric)
+    assert o._proxy is None
     o = bentoml.metrics.Summary(name="test_metrics", documentation="test")
     assert isinstance(o, bentoml.metrics._LazyMetric)
-
-
-def test_metrics_type(prom_client: PrometheusClient):
-    o = bentoml.metrics.Counter(name="test_metrics", documentation="test")
-    assert o._attr == "Counter"
     assert o._proxy is None
-    o.inc()
-    assert isinstance(o._proxy, prom_client.prometheus_client.Counter)