From 484d32e4a7da6dc9a78a7e1afbfa8d210abffd3c Mon Sep 17 00:00:00 2001
From: Ubuntu <29749331+aarnphm@users.noreply.github.com>
Date: Thu, 27 Oct 2022 23:59:42 +0000
Subject: [PATCH] chore: wip [skip ci]

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 requirements/tests-requirements.txt           |  1 -
 src/bentoml/metrics.py                        | 11 +++--
 src/bentoml/serve.py                          |  9 ++--
 src/bentoml/start.py                          |  9 ++--
 src/bentoml/testing/server.py                 | 13 ++---
 .../worker/grpc_prometheus_server.py          |  3 +-
 tests/e2e/bento_server_grpc/service.py        | 20 +-------
 tests/e2e/bento_server_grpc/tests/conftest.py |  3 ++
 .../tests/test_custom_metrics.py              | 48 -------------------
 .../tests/test_descriptors.py                 |  9 ++++
 tests/e2e/bento_server_http/tests/conftest.py |  3 ++
 .../e2e/bento_server_http/tests/test_meta.py  |  3 --
 12 files changed, 41 insertions(+), 91 deletions(-)
 delete mode 100644 tests/e2e/bento_server_grpc/tests/test_custom_metrics.py

diff --git a/requirements/tests-requirements.txt b/requirements/tests-requirements.txt
index 8d71141cc5d..274f610e6ab 100644
--- a/requirements/tests-requirements.txt
+++ b/requirements/tests-requirements.txt
@@ -17,4 +17,3 @@ imageio==2.22.1
 pyarrow==9.0.0
 build[virtualenv]==0.8.0
 protobuf==3.19.6
-grpcio-tools>=1.41.0,<1.49.0,!=1.48.2
diff --git a/src/bentoml/metrics.py b/src/bentoml/metrics.py
index cbed5b723d9..6ce74a33c8e 100644
--- a/src/bentoml/metrics.py
+++ b/src/bentoml/metrics.py
@@ -65,7 +65,9 @@ def __getattr__(item: t.Any):
 class _LazyMetric:
     __slots__ = ("_attr", "_proxy", "_initialized", "_args", "_kwargs")
 
-    def __init__(self, attr: t.Any):
+    def __init__(self, attr: str):
+        if attr in _NOT_SUPPORTED:
+            raise NotImplementedError(f"{attr} is not yet supported.")
         self._attr = attr
         self._proxy = None
         self._initialized = False
@@ -81,7 +83,10 @@ def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
             *args: Arguments to pass to the metrics object.
             **kwargs: Keyword arguments to pass to the metrics object.
         """
-        # This is where we lazy load the proxy object.
+        if "registry" in kwargs:
+            raise ValueError(
+                f"'registry' should not be passed when using '{__name__}.{self._attr}'. See https://docs.bentoml.org/en/latest/reference/metrics.html."
+            )
         warn_experimental("%s.%s" % (__name__, self._attr))
         self._args = args
         self._kwargs = kwargs
@@ -89,8 +94,6 @@ def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
             # first-class function implementation from BentoML Prometheus client.
             # In this case, the function will be called directly.
             return self._load_proxy()
-        if self._attr in _NOT_SUPPORTED:
-            raise NotImplementedError(f"{self._attr} is not supported, yet.")
         return self
 
     def __getattr__(self, item: t.Any) -> t.Any:
diff --git a/src/bentoml/serve.py b/src/bentoml/serve.py
index 750ed048d5a..50ba46f3eb1 100644
--- a/src/bentoml/serve.py
+++ b/src/bentoml/serve.py
@@ -190,10 +190,10 @@ def serve_http_development(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir)
 
+    prometheus_dir = ensure_prometheus_dir()
     watchers: list[Watcher] = []
     circus_sockets: list[CircusSocket] = [
         CircusSocket(name=API_SERVER_NAME, host=host, port=port, backlog=backlog)
@@ -310,13 +310,13 @@ def serve_http_production(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir, standalone_load=True)
     watchers: t.List[Watcher] = []
     circus_socket_map: t.Dict[str, CircusSocket] = {}
     runner_bind_map: t.Dict[str, str] = {}
     uds_path = None
+    prometheus_dir = ensure_prometheus_dir()
 
     if psutil.POSIX:
         # use AF_UNIX sockets for Circus
@@ -505,10 +505,10 @@ def serve_grpc_development(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir)
 
+    prometheus_dir = ensure_prometheus_dir()
     watchers: list[Watcher] = []
     circus_sockets: list[CircusSocket] = []
 
@@ -681,7 +681,6 @@ def serve_grpc_production(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir, standalone_load=True)
 
@@ -692,6 +691,8 @@ def serve_grpc_production(
     runner_bind_map: dict[str, str] = {}
     uds_path = None
 
+    prometheus_dir = ensure_prometheus_dir()
+
     # Check whether users are running --grpc on windows
     # also raising warning if users running on MacOS or FreeBSD
     if psutil.WINDOWS:
diff --git a/src/bentoml/start.py b/src/bentoml/start.py
index 63f72c855e2..ef04a052c1e 100644
--- a/src/bentoml/start.py
+++ b/src/bentoml/start.py
@@ -42,7 +42,6 @@ def start_runner_server(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir, standalone_load=True)
 
@@ -52,6 +51,8 @@ def start_runner_server(
     watchers: t.List[Watcher] = []
     circus_socket_map: t.Dict[str, CircusSocket] = {}
 
+    ensure_prometheus_dir()
+
     with contextlib.ExitStack() as port_stack:
         for runner in svc.runners:
             if runner.name == runner_name:
@@ -83,8 +84,6 @@ def start_runner_server(
                             "--no-access-log",
                             "--worker-id",
                             "$(circus.wid)",
-                            "--prometheus-dir",
-                            prometheus_dir,
                         ],
                         copy_env=True,
                         stop_children=True,
@@ -144,7 +143,6 @@ def start_http_server(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir, standalone_load=True)
     runner_requirements = {runner.name for runner in svc.runners}
@@ -154,6 +152,7 @@ def start_http_server(
         )
     watchers: t.List[Watcher] = []
     circus_socket_map: t.Dict[str, CircusSocket] = {}
+    prometheus_dir = ensure_prometheus_dir()
     logger.debug("Runner map: %s", runner_map)
     circus_socket_map[API_SERVER_NAME] = CircusSocket(
         name=API_SERVER_NAME,
@@ -252,7 +251,6 @@ def start_grpc_server(
     from ._internal.utils.circus import create_standalone_arbiter
     from ._internal.utils.analytics import track_serve
 
-    prometheus_dir = ensure_prometheus_dir()
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir, standalone_load=True)
     runner_requirements = {runner.name for runner in svc.runners}
@@ -262,6 +260,7 @@ def start_grpc_server(
         )
     watchers: list[Watcher] = []
     circus_socket_map: dict[str, CircusSocket] = {}
+    prometheus_dir = ensure_prometheus_dir()
     logger.debug("Runner map: %s", runner_map)
     ssl_args = construct_ssl_args(
         ssl_certfile=ssl_certfile,
diff --git a/src/bentoml/testing/server.py b/src/bentoml/testing/server.py
index e6a1b3f71a3..be0f8ed7df4 100644
--- a/src/bentoml/testing/server.py
+++ b/src/bentoml/testing/server.py
@@ -187,7 +187,7 @@ def run_bento_server_docker(
         enable_so_reuseport=use_grpc
     ) as prom_port:
         pass
-    bind_port = "3000"
+
     cmd = [
         "docker",
         "run",
@@ -195,7 +195,7 @@ def run_bento_server_docker(
         "--name",
         container_name,
         "--publish",
-        f"{port}:{bind_port}",
+        f"{port}:3000",
     ]
     if config_file is not None:
         cmd.extend(["--env", "BENTOML_CONFIG=/home/bentoml/bentoml_config.yml"])
@@ -203,11 +203,12 @@ def run_bento_server_docker(
             ["-v", f"{os.path.abspath(config_file)}:/home/bentoml/bentoml_config.yml"]
         )
     if use_grpc:
-        bind_prom_port = BentoMLContainer.grpc.metrics.port.get()
-        cmd.extend(["--publish", f"{prom_port}:{bind_prom_port}"])
+        cmd.extend(
+            ["--publish", f"{prom_port}:{BentoMLContainer.grpc.metrics.port.get()}"]
+        )
     cmd.append(image_tag)
-    if use_grpc:
-        cmd.extend(["serve-grpc", "--production"])
+    serve_cmd = "serve-grpc" if use_grpc else "serve-http"
+    cmd.extend([serve_cmd, "--production"])
     print(f"Running API server docker image: '{' '.join(cmd)}'")
     with subprocess.Popen(
         cmd,
diff --git a/src/bentoml_cli/worker/grpc_prometheus_server.py b/src/bentoml_cli/worker/grpc_prometheus_server.py
index ab0286272e2..47a1d8a4e62 100644
--- a/src/bentoml_cli/worker/grpc_prometheus_server.py
+++ b/src/bentoml_cli/worker/grpc_prometheus_server.py
@@ -68,11 +68,10 @@ def main(fd: int, backlog: int, prometheus_dir: str | None):
     configure_server_logging()
 
     BentoMLContainer.development_mode.set(False)
+    metrics_client = BentoMLContainer.metrics_client.get()
     if prometheus_dir is not None:
         BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir)
 
-    metrics_client = BentoMLContainer.metrics_client.get()
-
     # create a ASGI app that wraps around the default HTTP prometheus server.
     prom_app = Starlette(
         debug=get_debug_mode(), middleware=[Middleware(GenerateLatestMiddleware)]
diff --git a/tests/e2e/bento_server_grpc/service.py b/tests/e2e/bento_server_grpc/service.py
index d842b41ea58..96c1f9e9da9 100644
--- a/tests/e2e/bento_server_grpc/service.py
+++ b/tests/e2e/bento_server_grpc/service.py
@@ -175,20 +175,6 @@ async def echo_image(f: PIL.Image.Image) -> NDArray[t.Any]:
     return np.array(f)
 
 
-@svc.api(
-    input=Multipart(
-        original=Image(mime_type="image/bmp"), compared=Image(mime_type="image/bmp")
-    ),
-    output=Multipart(meta=Text(), result=Image(mime_type="image/bmp")),
-)
-async def predict_multi_images(original: Image, compared: Image):
-    output_array = await py_model.multiply_float_ndarray.async_run(
-        np.array(original), np.array(compared)
-    )
-    img = PIL.Image.fromarray(output_array)
-    return {"meta": "success", "result": img}
-
-
 histogram = bentoml.metrics.Histogram(
     name="inference_latency",
     documentation="Inference latency in seconds",
@@ -201,11 +187,9 @@ async def predict_multi_images(original: Image, compared: Image):
     input=Multipart(
         original=Image(mime_type="image/bmp"), compared=Image(mime_type="image/bmp")
     ),
-    output=bentoml.io.Multipart(
-        meta=bentoml.io.Text(), result=bentoml.io.Image(mime_type="image/bmp")
-    ),
+    output=Multipart(meta=Text(), result=Image(mime_type="image/bmp")),
 )
-async def multi_image_with_metrics(original: Image, compared: Image):
+async def predict_multi_images(original: Image, compared: Image):
     start = time.perf_counter()
     output_array = await py_model.multiply_float_ndarray.async_run(
         np.array(original), np.array(compared)
diff --git a/tests/e2e/bento_server_grpc/tests/conftest.py b/tests/e2e/bento_server_grpc/tests/conftest.py
index 4ded9dbcfda..a7f0273b06e 100644
--- a/tests/e2e/bento_server_grpc/tests/conftest.py
+++ b/tests/e2e/bento_server_grpc/tests/conftest.py
@@ -24,6 +24,9 @@
 def pytest_collection_modifyitems(
     session: Session, config: Config, items: list[Item]
 ) -> None:
+    subprocess.check_call(
+        ["pip", "install", "-r", f"{os.path.join(PROJECT_DIR, 'requirements.txt')}"]
+    )
     subprocess.check_call([sys.executable, f"{os.path.join(PROJECT_DIR, 'train.py')}"])
 
 
diff --git a/tests/e2e/bento_server_grpc/tests/test_custom_metrics.py b/tests/e2e/bento_server_grpc/tests/test_custom_metrics.py
deleted file mode 100644
index 7a440eafd1b..00000000000
--- a/tests/e2e/bento_server_grpc/tests/test_custom_metrics.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import pytest
-
-from bentoml import metrics
-from bentoml.grpc.utils import import_generated_stubs
-from bentoml.testing.grpc import create_channel
-from bentoml.testing.grpc import async_client_call
-
-if TYPE_CHECKING:
-    from bentoml.grpc.v1alpha1 import service_pb2 as pb
-else:
-    pb, _ = import_generated_stubs()
-
-
-@pytest.mark.asyncio
-async def test_metrics_collector(host: str, img_file: str):
-    with open(str(img_file), "rb") as f1, open(str(img_file), "rb") as f2:
-        fb1 = f1.read()
-        fb2 = f2.read()
-
-    async with create_channel(host) as channel:
-        await async_client_call(
-            "multi_image_with_metrics",
-            channel=channel,
-            data={
-                "multipart": {
-                    "fields": {
-                        "original": pb.Part(
-                            file=pb.File(kind=pb.File.FILE_TYPE_BMP, content=fb1)
-                        ),
-                        "compared": pb.Part(
-                            file=pb.File(kind=pb.File.FILE_TYPE_BMP, content=fb2)
-                        ),
-                    }
-                }
-            },
-        )
-
-    print(metrics.generate_latest())
-    histograms = [
-        m.name
-        for m in metrics.text_string_to_metric_families()
-        if m.type == "histogram"
-    ]
-    assert "inference_latency" in histograms
diff --git a/tests/e2e/bento_server_grpc/tests/test_descriptors.py b/tests/e2e/bento_server_grpc/tests/test_descriptors.py
index bff8da31238..532fff0cf6a 100644
--- a/tests/e2e/bento_server_grpc/tests/test_descriptors.py
+++ b/tests/e2e/bento_server_grpc/tests/test_descriptors.py
@@ -8,6 +8,7 @@
 
 import pytest
 
+from bentoml import metrics
 from bentoml.grpc.utils import import_grpc
 from bentoml.grpc.utils import import_generated_stubs
 from bentoml.testing.grpc import create_channel
@@ -394,3 +395,11 @@ async def test_multipart(host: str, img_file: str):
                 assert_multi_images, method="pred_multi_images", im_file=img_file
             ),
         )
+
+    # Test for metrics
+    histograms = [
+        m.name
+        for m in metrics.text_string_to_metric_families()
+        if m.type == "histogram"
+    ]
+    assert "inference_latency" in histograms
diff --git a/tests/e2e/bento_server_http/tests/conftest.py b/tests/e2e/bento_server_http/tests/conftest.py
index 68052a720ec..21f46aa0e3e 100644
--- a/tests/e2e/bento_server_http/tests/conftest.py
+++ b/tests/e2e/bento_server_http/tests/conftest.py
@@ -27,6 +27,9 @@ class FixtureRequest(_PytestFixtureRequest):
 def pytest_collection_modifyitems(
     session: Session, config: Config, items: list[Item]
 ) -> None:
+    subprocess.check_call(
+        ["pip", "install", "-r", f"{os.path.join(PROJECT_DIR, 'requirements.txt')}"]
+    )
     subprocess.check_call([sys.executable, f"{os.path.join(PROJECT_DIR, 'train.py')}"])
 
 
diff --git a/tests/e2e/bento_server_http/tests/test_meta.py b/tests/e2e/bento_server_http/tests/test_meta.py
index 3361cf07288..e37b609f39b 100644
--- a/tests/e2e/bento_server_http/tests/test_meta.py
+++ b/tests/e2e/bento_server_http/tests/test_meta.py
@@ -12,9 +12,6 @@
 import bentoml
 from bentoml.testing.utils import async_request
 
-if TYPE_CHECKING:
-    from bentoml._internal.server.metrics.prometheus import PrometheusClient
-
 
 @pytest.mark.asyncio
 async def test_api_server_meta(host: str) -> None: