From cb900bb0c31c1bdff56a912a179281182a0ecd56 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Mon, 19 Sep 2022 16:41:43 -0700
Subject: [PATCH 1/7] fix: metrics service_name incorrect setup

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 .../_internal/server/metrics/prometheus.py    |   9 +-
 bentoml/_internal/utils/__init__.py           |  86 +++++++++++--
 bentoml/_internal/utils/analytics/schemas.py  |  15 ++-
 .../_internal/utils/analytics/usage_stats.py  | 112 ++++++++++++-----
 bentoml/serve.py                              |   6 +-
 bentoml/start.py                              |   2 +-
 pyproject.toml                                |   2 +-
 tests/unit/_internal/utils/test_analytics.py  | 117 ++++++++++++++----
 8 files changed, 272 insertions(+), 77 deletions(-)

diff --git a/bentoml/_internal/server/metrics/prometheus.py b/bentoml/_internal/server/metrics/prometheus.py
index ce2230e066..88bd9eff64 100644
--- a/bentoml/_internal/server/metrics/prometheus.py
+++ b/bentoml/_internal/server/metrics/prometheus.py
@@ -34,10 +34,10 @@ def __init__(
             assert multiproc_dir is not None, "multiproc_dir must be provided"
 
         self.multiproc = multiproc
-        self.multiproc_dir: t.Optional[str] = multiproc_dir
+        self.multiproc_dir: str | None = multiproc_dir
         self._registry = None
         self._imported = False
-        self._pid: t.Optional[int] = None
+        self._pid: int | None = None
 
     @property
     def prometheus_client(self):
@@ -56,6 +56,7 @@ def prometheus_client(self):
 
         # step 2:
         import prometheus_client
+        import prometheus_client.parser
         import prometheus_client.multiprocess
 
         self._imported = True
@@ -106,9 +107,7 @@ def generate_latest(self):
             return self.prometheus_client.generate_latest()
 
     def text_string_to_metric_families(self) -> t.Generator[Metric, None, None]:
-        from prometheus_client.parser import text_string_to_metric_families
-
-        yield from text_string_to_metric_families(
+        yield from self.prometheus_client.parser.text_string_to_metric_families(
             self.generate_latest().decode("utf-8")
         )
 
diff --git a/bentoml/_internal/utils/__init__.py b/bentoml/_internal/utils/__init__.py
index 71ad30f259..f4068a9fc3 100644
--- a/bentoml/_internal/utils/__init__.py
+++ b/bentoml/_internal/utils/__init__.py
@@ -13,6 +13,7 @@
 from typing import overload
 from typing import TYPE_CHECKING
 from pathlib import Path
+from reprlib import recursive_repr as _recursive_repr
 from datetime import date
 from datetime import time
 from datetime import datetime
@@ -30,16 +31,18 @@
 
 from .cattr import bentoml_cattr
 from ..types import LazyType
-from ..types import PathType
-from ..types import MetadataDict
-from ..types import MetadataType
 from .lazy_loader import LazyLoader
 
 if TYPE_CHECKING:
     from fs.base import FS
+    from typing_extensions import Self
+
+    from ..types import PathType
+    from ..types import MetadataDict
+    from ..types import MetadataType
 
     P = t.ParamSpec("P")
-    GenericFunction = t.Callable[P, t.Any]
+    F = t.Callable[P, t.Any]
 
 
 C = t.TypeVar("C")
@@ -59,6 +62,7 @@
     "display_path_under_home",
     "rich_console",
     "experimental",
+    "compose",
 ]
 
 _EXPERIMENTAL_APIS: set[str] = set()
@@ -350,13 +354,16 @@ class cached_contextmanager:
     Just like contextlib.contextmanager, but will cache the yield value for the same
     arguments. When all instances of the same contextmanager exits, the cache value will
     be dropped.
-    Example Usage:
-    (To reuse the container based on the same image)
-    >>> @cached_contextmanager("{docker_image.id}")
-    >>> def start_docker_container_from_image(docker_image, timeout=60):
-    >>>     container = ...
-    >>>     yield container
-    >>>     container.stop()
+
+    Example Usage: (To reuse the container based on the same image)
+
+    .. code-block:: python
+
+        @cached_contextmanager("{docker_image.id}")
+        def start_docker_container_from_image(docker_image, timeout=60):
+            container = ...
+            yield container
+            container.stop()
     """
 
     def __init__(self, cache_key_template: t.Optional[str] = None):
@@ -387,3 +394,60 @@ def _func(*args: "P.args", **kwargs: "P.kwargs") -> t.Any:
                     self._cache.pop(cache_key)
 
         return _func
+
+
+class compose:
+    """
+    Function composition: compose(f, g)(...) is equivalent to f(g(...)).
+    Refers to https://github.com/mentalisttraceur/python-compose for original implementation.
+
+    Args:
+        *functions: Functions (or other callables) to compose.
+
+    Raises:
+        TypeError: If no arguments are given, or any argument is not callable.
+    """
+
+    def __init__(self: Self, *functions: F[t.Any]):
+        if not functions:
+            raise TypeError(f"{self!r} needs at least one argument.")
+        _functions: list[F[t.Any]] = []
+        for function in reversed(functions):
+            if not callable(function):
+                raise TypeError(f"{self!r} arguments must be callable.")
+            if isinstance(function, compose):
+                _functions.extend(function.functions)
+            else:
+                _functions.append(function)
+        self.__wrapped__ = _functions[0]
+        self._wrappers = tuple(_functions[1:])
+
+    def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
+        """Call the composed function."""
+        result = self.__wrapped__(*args, **kwargs)
+        for function in self._wrappers:
+            result = function(result)
+        return result
+
+    def __get__(self, obj: t.Any, typ_: type | None = None):
+        """Get the composed function as a bound method."""
+        wrapped = self.__wrapped__
+        try:
+            bind = type(wrapped).__get__
+        except AttributeError:
+            return self
+        bound_wrapped = bind(wrapped, obj, typ_)
+        if bound_wrapped is wrapped:
+            return self
+        bound_self = type(self)(bound_wrapped)
+        bound_self._wrappers = self._wrappers
+        return bound_self
+
+    @_recursive_repr("<...>")
+    def __repr__(self):
+        return f"{self!r}({','.join(map(repr, reversed(self.functions)))})"
+
+    @property
+    def functions(self):
+        """Read-only tuple of the composed callables, in order of execution."""
+        return (self.__wrapped__,) + tuple(self._wrappers)
diff --git a/bentoml/_internal/utils/analytics/schemas.py b/bentoml/_internal/utils/analytics/schemas.py
index a34e814975..dca1362eb5 100644
--- a/bentoml/_internal/utils/analytics/schemas.py
+++ b/bentoml/_internal/utils/analytics/schemas.py
@@ -183,6 +183,7 @@ class ModelSaveEvent(EventMeta):
 class ServeInitEvent(EventMeta):
     serve_id: str
     production: bool
+    grpc: bool
     serve_from_bento: bool
 
     bento_creation_timestamp: t.Optional[datetime]
@@ -199,9 +200,21 @@ class ServeInitEvent(EventMeta):
 class ServeUpdateEvent(EventMeta):
     serve_id: str
     production: bool
-    component: str
+    grpc: bool
     triggered_at: datetime
     duration_in_seconds: int
+    component: str = attr.field(
+        validator=attr.validators.and_(
+            attr.validators.instance_of(str),
+            attr.validators.in_(["standalone", "api_server", "runner"]),
+        )
+    )
+    metrics_type: str = attr.field(
+        validator=attr.validators.and_(
+            attr.validators.instance_of(str),
+            attr.validators.in_(["not_available", "legacy", "current"]),
+        ),
+    )
     metrics: t.List[t.Any] = attr.field(factory=list)
 
 
diff --git a/bentoml/_internal/utils/analytics/usage_stats.py b/bentoml/_internal/utils/analytics/usage_stats.py
index d9e28fab8a..ab11bcd108 100644
--- a/bentoml/_internal/utils/analytics/usage_stats.py
+++ b/bentoml/_internal/utils/analytics/usage_stats.py
@@ -17,11 +17,13 @@
 from simple_di import inject
 from simple_di import Provide
 
+from ...utils import compose
 from .schemas import EventMeta
 from .schemas import ServeInitEvent
 from .schemas import TrackingPayload
 from .schemas import CommonProperties
 from .schemas import ServeUpdateEvent
+from ...configuration import get_debug_mode
 from ...configuration.containers import BentoMLContainer
 
 if TYPE_CHECKING:
@@ -29,6 +31,8 @@
     T = t.TypeVar("T")
     AsyncFunc = t.Callable[P, t.Coroutine[t.Any, t.Any, t.Any]]
 
+    from prometheus_client.samples import Sample
+
     from bentoml import Service
 
     from ...server.metrics.prometheus import PrometheusClient
@@ -62,7 +66,12 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> t.Any:
             return func(*args, **kwargs)
         except Exception as err:  # pylint: disable=broad-except
             if _usage_event_debugging():
-                logger.info(f"Tracking Error: {err}")
+                if get_debug_mode():
+                    logger.error(
+                        f"Tracking Error: {err}", stack_info=True, stacklevel=3
+                    )
+                else:
+                    logger.info(f"Tracking Error: {err}")
             else:
                 logger.debug(f"Tracking Error: {err}")
 
@@ -116,6 +125,7 @@ def track(event_properties: EventMeta):
 def _track_serve_init(
     svc: Service,
     production: bool,
+    grpc: bool,
     serve_info: ServeInfo = Provide[BentoMLContainer.serve_info],
 ):
     if svc.bento is not None:
@@ -124,6 +134,7 @@ def _track_serve_init(
             serve_id=serve_info.serve_id,
             serve_from_bento=True,
             production=production,
+            grpc=grpc,
             bento_creation_timestamp=bento.info.creation_time,
             num_of_models=len(bento.info.models),
             num_of_runners=len(svc.runners),
@@ -138,6 +149,7 @@ def _track_serve_init(
             serve_id=serve_info.serve_id,
             serve_from_bento=False,
             production=production,
+            grpc=grpc,
             bento_creation_timestamp=None,
             num_of_models=len(
                 set(
@@ -160,42 +172,76 @@ def _track_serve_init(
 EXCLUDE_PATHS = {"/docs.json", "/livez", "/healthz", "/readyz"}
 
 
-def get_metrics_report(
-    metrics_client: PrometheusClient,
-) -> list[dict[str, str | float]]:
-    metrics_text = metrics_client.generate_latest().decode("utf-8")
-    if not metrics_text:
-        return []
+def filter_metrics(
+    samples: list[Sample], *filters: t.Callable[[list[Sample]], list[Sample]]
+):
+    return [
+        {**sample.labels, "value": sample.value}
+        for sample in compose(*filters)(samples)
+    ]
 
-    from prometheus_client.parser import (
-        text_string_to_metric_families,  # type: ignore (unfinished prometheus types)
-    )
 
-    for metric in text_string_to_metric_families(metrics_text):
-        # Searching for the metric BENTOML_{service_name}_request of type Counter
-        if (
-            metric.type == "counter"
-            and metric.name.startswith("BENTOML_")
-            and metric.name.endswith("_request")
-        ):
-            return [
-                {**sample.labels, "value": sample.value}
-                for sample in metric.samples
-                if "endpoint" in sample.labels
-                # exclude common infra paths
-                and sample.labels["endpoint"] not in EXCLUDE_PATHS
-                # exclude static_content prefix
-                and not sample.labels["endpoint"].startswith("/static_content/")
+@inject
+def get_metrics_report(
+    metrics_client: PrometheusClient,
+    grpc: bool = False,
+) -> tuple[list[dict[str, str | float]], bool | None]:
+    """
+    Get Prometheus metrics reports from the metrics client. This will be used to determine tracking events.
+    If the return metrics are legacy metrics, the metrics will have prefix BENTOML_, otherwise they will have prefix bentoml_
+
+    Args:
+        metrics_client: Instance of bentoml._internal.server.metrics.prometheus.PrometheusClient
+        grpc: Whether the metrics are for gRPC server.
+
+    Returns:
+        A tuple of a list of metrics and an optional boolean to determine whether the return metrics are legacy metrics.
+    """
+    for metric in metrics_client.text_string_to_metric_families():
+        metric_type = t.cast("str", metric.type)  # type: ignore (we need to cast due to no prometheus types)
+        metric_name = t.cast("str", metric.name)  # type: ignore (we need to cast due to no prometheus types)
+        metric_samples = t.cast("list[Sample]", metric.samples)  # type: ignore (we need to cast due to no prometheus types)
+        if metric_type != "counter":
+            continue
+        # We only care about the counter metrics.
+        assert metric_type == "counter"
+        if grpc:
+            _filters: list[t.Callable[[list[Sample]], list[Sample]]] = [
+                lambda samples: [s for s in samples if "api_name" in s.labels]
+            ]
+        else:
+            _filters = [
+                lambda samples: [
+                    s
+                    for s in samples
+                    if not s.labels["endpoint"].startswith("/static_content/")
+                ],
+                lambda samples: [
+                    s for s in samples if s.labels["endpoint"] not in EXCLUDE_PATHS
+                ],
+                lambda samples: [s for s in samples if "endpoint" in s.labels],
             ]
+        # If metrics prefix is BENTOML_, this is legacy metrics
+        if metric_name.endswith("_request"):
+            if metric_name.startswith("BENTOML_"):
+                # This is the case where we have legacy metrics with
+                # newer metrics. We will ignore the newer metrics.
+                return filter_metrics(metric_samples, *_filters), True
+            else:
+                # This is the case where we only have newer metrics
+                assert metric_name.startswith("bentoml_")
+                return filter_metrics(metric_samples, *_filters), False
 
-    return []
+    return [], None
 
 
 @inject
 @contextlib.contextmanager
 def track_serve(
     svc: Service,
-    production: bool,
+    *,
+    production: bool = False,
+    grpc: bool = False,
     component: str = "standalone",
     metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client],
     serve_info: ServeInfo = Provide[BentoMLContainer.serve_info],
@@ -204,12 +250,12 @@ def track_serve(
         yield
         return
 
-    _track_serve_init(svc, production)
+    _track_serve_init(svc=svc, production=production, grpc=grpc)
 
     if _usage_event_debugging():
         tracking_interval = 5
     else:
-        tracking_interval = SERVE_USAGE_TRACKING_INTERVAL_SECONDS
+        tracking_interval = SERVE_USAGE_TRACKING_INTERVAL_SECONDS  # pragma: no cover
 
     stop_event = threading.Event()
 
@@ -217,14 +263,20 @@ def track_serve(
     def loop() -> t.NoReturn:  # type: ignore
         last_tracked_timestamp: datetime = serve_info.serve_started_timestamp
         while not stop_event.wait(tracking_interval):  # pragma: no cover
+            metrics_type = "not_available"
+            metrics, use_legacy_metrics = get_metrics_report(metrics_client, grpc=grpc)
+            if use_legacy_metrics is not None:
+                metrics_type = "legacy" if use_legacy_metrics else "current"
             now = datetime.now(timezone.utc)
             event_properties = ServeUpdateEvent(
                 serve_id=serve_info.serve_id,
                 production=production,
+                grpc=grpc,
                 component=component,
                 triggered_at=now,
                 duration_in_seconds=int((now - last_tracked_timestamp).total_seconds()),
-                metrics=get_metrics_report(metrics_client),
+                metrics_type=metrics_type,
+                metrics=metrics,
             )
             last_tracked_timestamp = now
             track(event_properties)
diff --git a/bentoml/serve.py b/bentoml/serve.py
index 092e02facc..5840fe24b0 100644
--- a/bentoml/serve.py
+++ b/bentoml/serve.py
@@ -267,7 +267,7 @@ def serve_http_development(
         loglevel="WARNING",
     )
 
-    with track_serve(svc, production=False):
+    with track_serve(svc):
         arbiter.start(
             cb=lambda _: logger.info(  # type: ignore
                 'Starting development %s BentoServer from "%s" running on http://%s:%d (Press CTRL+C to quit)',
@@ -630,7 +630,7 @@ def serve_grpc_development(
         loglevel="ERROR",
     )
 
-    with track_serve(svc, production=False):
+    with track_serve(svc, grpc=True):
         arbiter.start(
             cb=lambda _: logger.info(  # type: ignore
                 'Starting development %s BentoServer from "%s" listening on %s:%d (Press CTRL+C to quit)',
@@ -854,7 +854,7 @@ def serve_grpc_production(
         watchers=watchers, sockets=list(circus_socket_map.values())
     )
 
-    with track_serve(svc, production=True):
+    with track_serve(svc, production=True, grpc=True):
         try:
             arbiter.start(
                 cb=lambda _: logger.info(  # type: ignore
diff --git a/bentoml/start.py b/bentoml/start.py
index 1541f61e89..905935f689 100644
--- a/bentoml/start.py
+++ b/bentoml/start.py
@@ -347,7 +347,7 @@ def start_grpc_server(
     arbiter = create_standalone_arbiter(
         watchers=watchers, sockets=list(circus_socket_map.values())
     )
-    with track_serve(svc, production=True, component=API_SERVER):
+    with track_serve(svc, production=True, component=API_SERVER, grpc=True):
         arbiter.start(
             cb=lambda _: logger.info(  # type: ignore
                 'Starting bare %s BentoServer from "%s" running on http://%s:%d (Press CTRL+C to quit)',
diff --git a/pyproject.toml b/pyproject.toml
index 8341ef8371..2363d45fcb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -191,7 +191,7 @@ exclude = '''
 extend-exclude = "(_pb2.py$|_pb2_grpc.py$)"
 
 [tool.pytest.ini_options]
-addopts = "-rfEX -p pytester -p no:warnings -x --capture=tee-sys"
+addopts = "-rfEX -p pytester -p no:warnings -x --capture=tee-sys --cov-report=term-missing --cov-append"
 python_files = ["test_*.py", "*_test.py"]
 testpaths = ["tests"]
 markers = ["gpus", "disable-tf-eager-execution"]
diff --git a/tests/unit/_internal/utils/test_analytics.py b/tests/unit/_internal/utils/test_analytics.py
index 5c170f44d6..3d65c6443b 100644
--- a/tests/unit/_internal/utils/test_analytics.py
+++ b/tests/unit/_internal/utils/test_analytics.py
@@ -1,5 +1,7 @@
+# pylint: disable=unused-argument
 from __future__ import annotations
 
+import typing as t
 import logging
 from typing import TYPE_CHECKING
 from unittest.mock import Mock
@@ -12,12 +14,14 @@
 
 import bentoml
 from bentoml._internal.utils import analytics
+from prometheus_client.parser import text_string_to_metric_families  # type: ignore (no prometheus types)
 
 if TYPE_CHECKING:
     from unittest.mock import MagicMock
 
     from _pytest.logging import LogCaptureFixture
     from _pytest.monkeypatch import MonkeyPatch
+    from prometheus_client.metrics_core import Metric
 
     from bentoml import Service
 
@@ -142,6 +146,7 @@ def test_track_serve_init(
         noop_service,
         production=production,
         serve_info=analytics.usage_stats.get_serve_info(),
+        grpc=False,
     )
 
     assert mock_do_not_track.called
@@ -153,6 +158,7 @@ def test_track_serve_init(
             noop_service,
             production=production,
             serve_info=analytics.usage_stats.get_serve_info(),
+            grpc=False,
         )
     assert "model_types" in caplog.text
 
@@ -176,24 +182,35 @@ def test_track_serve_init_no_bento(
             bentoml.Service("test"),
             production=False,
             serve_info=analytics.usage_stats.get_serve_info(),
+            grpc=False,
         )
     assert "model_types" not in caplog.text
 
 
 @patch("bentoml._internal.server.metrics.prometheus.PrometheusClient")
 @pytest.mark.parametrize(
-    "mock_output",
+    "mock_output,expected",
     [
-        b"",
-        b"""# HELP BENTOML_noop_request_total Multiprocess metric""",
+        (b"", tuple([[], None])),
+        (
+            b"""# HELP BENTOML_noop_request_total Multiprocess metric""",
+            tuple([[], None]),
+        ),
     ],
 )
-def test_get_metrics_report_filtered(
-    mock_prometheus_client: MagicMock, mock_output: bytes
+@pytest.mark.parametrize("grpc", [True, False])
+def test_filter_metrics_report(
+    mock_prometheus_client: MagicMock,
+    mock_output: bytes,
+    expected: tuple[list[t.Any], bool | None],
+    grpc: bool,
 ):
     mock_prometheus_client.multiproc.return_value = False
     mock_prometheus_client.generate_latest.return_value = mock_output
-    assert analytics.usage_stats.get_metrics_report(mock_prometheus_client) == []
+    assert (
+        analytics.usage_stats.get_metrics_report(mock_prometheus_client, grpc=grpc)
+        == expected
+    )
 
 
 @patch("bentoml._internal.utils.analytics.usage_stats.do_not_track")
@@ -212,33 +229,31 @@ def test_track_serve_do_not_track(mock_do_not_track: MagicMock, noop_service: Se
 
 @patch("bentoml._internal.utils.analytics.usage_stats.do_not_track")
 @patch("bentoml._internal.server.metrics.prometheus.PrometheusClient")
-def test_get_metrics_report(
+def test_legacy_get_metrics_report(
     mock_prometheus_client: MagicMock,
     mock_do_not_track: MagicMock,
     noop_service: Service,
 ):
     mock_do_not_track.return_value = True
     mock_prometheus_client.multiproc.return_value = False
-    mock_prometheus_client.generate_latest.return_value = b"""\
-# HELP BENTOML_noop_request_total Multiprocess metric
-# TYPE BENTOML_noop_request_total counter
-BENTOML_noop_request_total{endpoint="/docs.json",http_response_code="200",service_version=""} 2.0
-BENTOML_noop_request_total{endpoint="/classify",http_response_code="200",service_version=""} 9.0
-BENTOML_noop_request_total{endpoint="/",http_response_code="200",service_version=""} 1.0
-# HELP BENTOML_noop_request_in_progress Multiprocess metric
-# TYPE BENTOML_noop_request_in_progress gauge
-BENTOML_noop_request_in_progress{endpoint="/",service_version=""} 0.0
-BENTOML_noop_request_in_progress{endpoint="/docs.json",service_version=""} 0.0
-BENTOML_noop_request_in_progress{endpoint="/classify",service_version=""} 0.0
-# HELP BENTOML_noop_request_duration_seconds Multiprocess metric
-# TYPE BENTOML_noop_request_duration_seconds histogram
-            """
-    output = analytics.usage_stats.get_metrics_report(mock_prometheus_client)
+    mock_prometheus_client.text_string_to_metric_families.return_value = text_string_to_metric_families(
+        b"""\
+# HELP BENTOML_noop_service_request_in_progress Multiprocess metric
+# TYPE BENTOML_noop_service_request_in_progress gauge
+BENTOML_noop_service_request_in_progress{endpoint="/predict",service_version="not available"} 0.0
+# HELP BENTOML_noop_service_request_total Multiprocess metric
+# TYPE BENTOML_noop_service_request_total counter
+BENTOML_noop_service_request_total{endpoint="/predict",http_response_code="200",service_version="not available"} 8.0
+""".decode(
+            "utf-8"
+        )
+    )
+    output, _ = analytics.usage_stats.get_metrics_report(mock_prometheus_client)
     assert {
-        "endpoint": "/classify",
+        "endpoint": "/predict",
         "http_response_code": "200",
-        "service_version": "",
-        "value": 9.0,
+        "service_version": "not available",
+        "value": 8.0,
     } in output
 
     endpoints = [filtered["endpoint"] for filtered in output]
@@ -246,6 +261,58 @@ def test_get_metrics_report(
     assert not any(x in endpoints for x in analytics.usage_stats.EXCLUDE_PATHS)
 
 
+@patch("bentoml._internal.server.metrics.prometheus.PrometheusClient")
+@pytest.mark.parametrize(
+    "grpc,expected",
+    [
+        (
+            True,
+            {
+                "api_name": "pred_json",
+                "http_response_code": "200",
+                "service_name": "noop_service",
+                "service_version": "not available",
+                "value": 15.0,
+            },
+        ),
+        (False, None),
+    ],
+)
+@pytest.mark.parametrize(
+    "generated_metrics",
+    [
+        text_string_to_metric_families(
+            b"""\
+                # HELP bentoml_api_server_request_total Multiprocess metric
+                # TYPE bentoml_api_server_request_total counter
+                bentoml_api_server_request_total{api_name="pred_json",http_response_code="200",service_name="noop_service",service_version="not available"} 15.0
+                # HELP bentoml_api_server_request_in_progress Multiprocess metric
+                # TYPE bentoml_api_server_request_in_progress gauge
+                bentoml_api_server_request_in_progress{api_name="pred_json",service_name="noop_service",service_version="not available"} 0.0
+                """.decode(
+                "utf-8"
+            )
+        )
+    ],
+)
+def test_get_metrics_report(
+    mock_prometheus_client: MagicMock,
+    noop_service: Service,
+    grpc: bool,
+    expected: dict[str, str | float] | None,
+    generated_metrics: t.Generator[Metric, None, None],
+):
+    mock_prometheus_client.multiproc.return_value = False
+    mock_prometheus_client.text_string_to_metric_families.return_value = (
+        generated_metrics
+    )
+    output, _ = analytics.usage_stats.get_metrics_report(
+        mock_prometheus_client, grpc=grpc
+    )
+    if expected:
+        assert expected in output
+
+
 @patch("bentoml._internal.utils.analytics.usage_stats.do_not_track")
 @patch("bentoml._internal.utils.analytics.usage_stats.requests.post")
 @patch("bentoml._internal.utils.analytics.usage_stats._track_serve_init")

From d7b48b871b931ef46557b22e3865cadfd7c4d331 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Thu, 22 Sep 2022 05:05:31 -0700
Subject: [PATCH 2/7] fix: linlt and format

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 tests/unit/_internal/utils/test_analytics.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/unit/_internal/utils/test_analytics.py b/tests/unit/_internal/utils/test_analytics.py
index 3d65c6443b..8702ed4b9d 100644
--- a/tests/unit/_internal/utils/test_analytics.py
+++ b/tests/unit/_internal/utils/test_analytics.py
@@ -11,10 +11,12 @@
 from schema import Or
 from schema import And
 from schema import Schema
+from prometheus_client.parser import (
+    text_string_to_metric_families,  # type: ignore (no prometheus types)
+)
 
 import bentoml
 from bentoml._internal.utils import analytics
-from prometheus_client.parser import text_string_to_metric_families  # type: ignore (no prometheus types)
 
 if TYPE_CHECKING:
     from unittest.mock import MagicMock

From 40c71156995f1a77770180df962e97daae75bcab Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Sun, 25 Sep 2022 23:58:23 -0700
Subject: [PATCH 3/7] chore: remove unused inject

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 bentoml/_internal/utils/analytics/usage_stats.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bentoml/_internal/utils/analytics/usage_stats.py b/bentoml/_internal/utils/analytics/usage_stats.py
index ab11bcd108..9640f930b1 100644
--- a/bentoml/_internal/utils/analytics/usage_stats.py
+++ b/bentoml/_internal/utils/analytics/usage_stats.py
@@ -181,7 +181,6 @@ def filter_metrics(
     ]
 
 
-@inject
 def get_metrics_report(
     metrics_client: PrometheusClient,
     grpc: bool = False,

From 98575858c8d8d104038a897fbabcb2a1634354f2 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Mon, 26 Sep 2022 01:15:10 -0700
Subject: [PATCH 4/7] chore: revert tracking metrics_type

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 bentoml/_internal/utils/analytics/schemas.py  |  6 -----
 .../_internal/utils/analytics/usage_stats.py  | 24 ++++++-------------
 tests/unit/_internal/utils/test_analytics.py  | 10 ++++----
 3 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/bentoml/_internal/utils/analytics/schemas.py b/bentoml/_internal/utils/analytics/schemas.py
index dca1362eb5..fb643abca5 100644
--- a/bentoml/_internal/utils/analytics/schemas.py
+++ b/bentoml/_internal/utils/analytics/schemas.py
@@ -209,12 +209,6 @@ class ServeUpdateEvent(EventMeta):
             attr.validators.in_(["standalone", "api_server", "runner"]),
         )
     )
-    metrics_type: str = attr.field(
-        validator=attr.validators.and_(
-            attr.validators.instance_of(str),
-            attr.validators.in_(["not_available", "legacy", "current"]),
-        ),
-    )
     metrics: t.List[t.Any] = attr.field(factory=list)
 
 
diff --git a/bentoml/_internal/utils/analytics/usage_stats.py b/bentoml/_internal/utils/analytics/usage_stats.py
index 9640f930b1..a1f862fb04 100644
--- a/bentoml/_internal/utils/analytics/usage_stats.py
+++ b/bentoml/_internal/utils/analytics/usage_stats.py
@@ -184,7 +184,7 @@ def filter_metrics(
 def get_metrics_report(
     metrics_client: PrometheusClient,
     grpc: bool = False,
-) -> tuple[list[dict[str, str | float]], bool | None]:
+) -> list[dict[str, str | float]]:
     """
     Get Prometheus metrics reports from the metrics client. This will be used to determine tracking events.
     If the return metrics are legacy metrics, the metrics will have prefix BENTOML_, otherwise they will have prefix bentoml_
@@ -221,17 +221,12 @@ def get_metrics_report(
                 lambda samples: [s for s in samples if "endpoint" in s.labels],
             ]
         # If metrics prefix is BENTOML_, this is legacy metrics
-        if metric_name.endswith("_request"):
-            if metric_name.startswith("BENTOML_"):
-                # This is the case where we have legacy metrics with
-                # newer metrics. We will ignore the newer metrics.
-                return filter_metrics(metric_samples, *_filters), True
-            else:
-                # This is the case where we only have newer metrics
-                assert metric_name.startswith("bentoml_")
-                return filter_metrics(metric_samples, *_filters), False
+        if metric_name.endswith("_request") and (
+            metric_name.startswith("bentoml_") or metric_name.startswith("BENTOML_")
+        ):
+            return filter_metrics(metric_samples, *_filters)
 
-    return [], None
+    return []
 
 
 @inject
@@ -262,10 +257,6 @@ def track_serve(
     def loop() -> t.NoReturn:  # type: ignore
         last_tracked_timestamp: datetime = serve_info.serve_started_timestamp
         while not stop_event.wait(tracking_interval):  # pragma: no cover
-            metrics_type = "not_available"
-            metrics, use_legacy_metrics = get_metrics_report(metrics_client, grpc=grpc)
-            if use_legacy_metrics is not None:
-                metrics_type = "legacy" if use_legacy_metrics else "current"
             now = datetime.now(timezone.utc)
             event_properties = ServeUpdateEvent(
                 serve_id=serve_info.serve_id,
@@ -274,8 +265,7 @@ def loop() -> t.NoReturn:  # type: ignore
                 component=component,
                 triggered_at=now,
                 duration_in_seconds=int((now - last_tracked_timestamp).total_seconds()),
-                metrics_type=metrics_type,
-                metrics=metrics,
+                metrics=get_metrics_report(metrics_client, grpc=grpc),
             )
             last_tracked_timestamp = now
             track(event_properties)
diff --git a/tests/unit/_internal/utils/test_analytics.py b/tests/unit/_internal/utils/test_analytics.py
index 8702ed4b9d..975973d650 100644
--- a/tests/unit/_internal/utils/test_analytics.py
+++ b/tests/unit/_internal/utils/test_analytics.py
@@ -193,10 +193,10 @@ def test_track_serve_init_no_bento(
 @pytest.mark.parametrize(
     "mock_output,expected",
     [
-        (b"", tuple([[], None])),
+        (b"", []),
         (
             b"""# HELP BENTOML_noop_request_total Multiprocess metric""",
-            tuple([[], None]),
+            [],
         ),
     ],
 )
@@ -250,7 +250,7 @@ def test_legacy_get_metrics_report(
             "utf-8"
         )
     )
-    output, _ = analytics.usage_stats.get_metrics_report(mock_prometheus_client)
+    output = analytics.usage_stats.get_metrics_report(mock_prometheus_client)
     assert {
         "endpoint": "/predict",
         "http_response_code": "200",
@@ -308,9 +308,7 @@ def test_get_metrics_report(
     mock_prometheus_client.text_string_to_metric_families.return_value = (
         generated_metrics
     )
-    output, _ = analytics.usage_stats.get_metrics_report(
-        mock_prometheus_client, grpc=grpc
-    )
+    output = analytics.usage_stats.get_metrics_report(mock_prometheus_client, grpc=grpc)
     if expected:
         assert expected in output
 

From 152f06b358cbae2bd5a118f38261cb561a86cf1b Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Tue, 27 Sep 2022 04:04:41 -0700
Subject: [PATCH 5/7] logic: update tracking fields to job to use string

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 bentoml/_internal/utils/analytics/schemas.py     | 2 +-
 bentoml/_internal/utils/analytics/usage_stats.py | 4 +++-
 pyproject.toml                                   | 6 ++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/bentoml/_internal/utils/analytics/schemas.py b/bentoml/_internal/utils/analytics/schemas.py
index fb643abca5..289a980e94 100644
--- a/bentoml/_internal/utils/analytics/schemas.py
+++ b/bentoml/_internal/utils/analytics/schemas.py
@@ -200,7 +200,7 @@ class ServeInitEvent(EventMeta):
 class ServeUpdateEvent(EventMeta):
     serve_id: str
     production: bool
-    grpc: bool
+    job: str
     triggered_at: datetime
     duration_in_seconds: int
     component: str = attr.field(
diff --git a/bentoml/_internal/utils/analytics/usage_stats.py b/bentoml/_internal/utils/analytics/usage_stats.py
index a1f862fb04..824b8ba741 100644
--- a/bentoml/_internal/utils/analytics/usage_stats.py
+++ b/bentoml/_internal/utils/analytics/usage_stats.py
@@ -261,7 +261,9 @@ def loop() -> t.NoReturn:  # type: ignore
             event_properties = ServeUpdateEvent(
                 serve_id=serve_info.serve_id,
                 production=production,
-                grpc=grpc,
+                # Note that we are currently only have two tracking jobs: http and grpc
+                job="grpc" if grpc else "http",
+                # Current accept components are "standalone", "api_server" and "runner"
                 component=component,
                 triggered_at=now,
                 duration_in_seconds=int((now - last_tracked_timestamp).total_seconds()),
diff --git a/pyproject.toml b/pyproject.toml
index 2363d45fcb..c685a86a5f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -184,6 +184,7 @@ exclude = '''
     | dist
     | typings
     | bentoml/grpc/v1alpha1
+    | grpc-client/thirdparty
   )/
   | bentoml/_version.py
 )
@@ -340,6 +341,9 @@ skip_glob = [
   "**/*_pb2_grpc.py",
   "venv/*",
   "lib/*",
+  'bentoml/grpc/v1alpha1/',
+  "grpc-client/thirdparty",
+  "bazel-*",
 ]
 
 [tool.pyright]
@@ -352,6 +356,8 @@ exclude = [
   'bentoml/grpc/v1alpha1/',
   '**/*_pb2.py',
   "**/*_pb2_grpc.py",
+  "grpc-client/thirdparty",
+  "bazel-*",
 ]
 useLibraryCodeForTypes = true
 strictListInference = true

From 8afe527eafb558b29d13670def6868df5390cb7e Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Tue, 27 Sep 2022 13:21:12 -0700
Subject: [PATCH 6/7] chore: update to serve_kind

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 bentoml/_internal/utils/analytics/schemas.py  | 13 +++++++----
 .../_internal/utils/analytics/usage_stats.py  | 22 ++++++++++---------
 bentoml/serve.py                              |  4 ++--
 bentoml/start.py                              |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/bentoml/_internal/utils/analytics/schemas.py b/bentoml/_internal/utils/analytics/schemas.py
index 289a980e94..bf46ead3b5 100644
--- a/bentoml/_internal/utils/analytics/schemas.py
+++ b/bentoml/_internal/utils/analytics/schemas.py
@@ -179,14 +179,20 @@ class ModelSaveEvent(EventMeta):
     model_size_in_kb: float
 
 
+# serve_kind determines different type of serving scenarios
+SERVE_KIND = ["grpc", "http"]
+# components are different components to be tracked.
+COMPONENT_KIND = ["standalone", "api_server", "runner"]
+
+
 @attr.define
 class ServeInitEvent(EventMeta):
     serve_id: str
     production: bool
-    grpc: bool
     serve_from_bento: bool
 
     bento_creation_timestamp: t.Optional[datetime]
+    serve_kind: str = attr.field(validator=attr.validators.in_(SERVE_KIND))
     num_of_models: int = attr.field(default=0)
     num_of_runners: int = attr.field(default=0)
     num_of_apis: int = attr.field(default=0)
@@ -200,13 +206,12 @@ class ServeInitEvent(EventMeta):
 class ServeUpdateEvent(EventMeta):
     serve_id: str
     production: bool
-    job: str
     triggered_at: datetime
     duration_in_seconds: int
+    serve_kind: str = attr.field(validator=attr.validators.in_(SERVE_KIND))
     component: str = attr.field(
         validator=attr.validators.and_(
-            attr.validators.instance_of(str),
-            attr.validators.in_(["standalone", "api_server", "runner"]),
+            attr.validators.instance_of(str), attr.validators.in_(COMPONENT_KIND)
         )
     )
     metrics: t.List[t.Any] = attr.field(factory=list)
diff --git a/bentoml/_internal/utils/analytics/usage_stats.py b/bentoml/_internal/utils/analytics/usage_stats.py
index 824b8ba741..c49460dc3a 100644
--- a/bentoml/_internal/utils/analytics/usage_stats.py
+++ b/bentoml/_internal/utils/analytics/usage_stats.py
@@ -125,7 +125,7 @@ def track(event_properties: EventMeta):
 def _track_serve_init(
     svc: Service,
     production: bool,
-    grpc: bool,
+    serve_kind: str,
     serve_info: ServeInfo = Provide[BentoMLContainer.serve_info],
 ):
     if svc.bento is not None:
@@ -134,7 +134,7 @@ def _track_serve_init(
             serve_id=serve_info.serve_id,
             serve_from_bento=True,
             production=production,
-            grpc=grpc,
+            serve_kind=serve_kind,
             bento_creation_timestamp=bento.info.creation_time,
             num_of_models=len(bento.info.models),
             num_of_runners=len(svc.runners),
@@ -149,7 +149,7 @@ def _track_serve_init(
             serve_id=serve_info.serve_id,
             serve_from_bento=False,
             production=production,
-            grpc=grpc,
+            serve_kind=serve_kind,
             bento_creation_timestamp=None,
             num_of_models=len(
                 set(
@@ -183,7 +183,7 @@ def filter_metrics(
 
 def get_metrics_report(
     metrics_client: PrometheusClient,
-    grpc: bool = False,
+    serve_kind: str,
 ) -> list[dict[str, str | float]]:
     """
     Get Prometheus metrics reports from the metrics client. This will be used to determine tracking events.
@@ -204,11 +204,11 @@ def get_metrics_report(
             continue
         # We only care about the counter metrics.
         assert metric_type == "counter"
-        if grpc:
+        if serve_kind == "grpc":
             _filters: list[t.Callable[[list[Sample]], list[Sample]]] = [
                 lambda samples: [s for s in samples if "api_name" in s.labels]
             ]
-        else:
+        elif serve_kind == "http":
             _filters = [
                 lambda samples: [
                     s
@@ -220,6 +220,8 @@ def get_metrics_report(
                 ],
                 lambda samples: [s for s in samples if "endpoint" in s.labels],
             ]
+        else:
+            raise NotImplementedError("Unknown serve kind %s" % serve_kind)
         # If metrics prefix is BENTOML_, this is legacy metrics
         if metric_name.endswith("_request") and (
             metric_name.startswith("bentoml_") or metric_name.startswith("BENTOML_")
@@ -235,7 +237,7 @@ def track_serve(
     svc: Service,
     *,
     production: bool = False,
-    grpc: bool = False,
+    serve_kind: str = "http",
     component: str = "standalone",
     metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client],
     serve_info: ServeInfo = Provide[BentoMLContainer.serve_info],
@@ -244,7 +246,7 @@ def track_serve(
         yield
         return
 
-    _track_serve_init(svc=svc, production=production, grpc=grpc)
+    _track_serve_init(svc=svc, production=production, serve_kind=serve_kind)
 
     if _usage_event_debugging():
         tracking_interval = 5
@@ -262,12 +264,12 @@ def loop() -> t.NoReturn:  # type: ignore
                 serve_id=serve_info.serve_id,
                 production=production,
                 # Note that we are currently only have two tracking jobs: http and grpc
-                job="grpc" if grpc else "http",
+                serve_kind=serve_kind,
                 # Current accept components are "standalone", "api_server" and "runner"
                 component=component,
                 triggered_at=now,
                 duration_in_seconds=int((now - last_tracked_timestamp).total_seconds()),
-                metrics=get_metrics_report(metrics_client, grpc=grpc),
+                metrics=get_metrics_report(metrics_client, serve_kind=serve_kind),
             )
             last_tracked_timestamp = now
             track(event_properties)
diff --git a/bentoml/serve.py b/bentoml/serve.py
index 5840fe24b0..ad6b620bc7 100644
--- a/bentoml/serve.py
+++ b/bentoml/serve.py
@@ -630,7 +630,7 @@ def serve_grpc_development(
         loglevel="ERROR",
     )
 
-    with track_serve(svc, grpc=True):
+    with track_serve(svc, serve_kind="grpc"):
         arbiter.start(
             cb=lambda _: logger.info(  # type: ignore
                 'Starting development %s BentoServer from "%s" listening on %s:%d (Press CTRL+C to quit)',
@@ -854,7 +854,7 @@ def serve_grpc_production(
         watchers=watchers, sockets=list(circus_socket_map.values())
     )
 
-    with track_serve(svc, production=True, grpc=True):
+    with track_serve(svc, production=True, serve_kind="grpc"):
         try:
             arbiter.start(
                 cb=lambda _: logger.info(  # type: ignore
diff --git a/bentoml/start.py b/bentoml/start.py
index 905935f689..03d083b7c8 100644
--- a/bentoml/start.py
+++ b/bentoml/start.py
@@ -347,7 +347,7 @@ def start_grpc_server(
     arbiter = create_standalone_arbiter(
         watchers=watchers, sockets=list(circus_socket_map.values())
     )
-    with track_serve(svc, production=True, component=API_SERVER, grpc=True):
+    with track_serve(svc, production=True, component=API_SERVER, serve_kind="grpc"):
         arbiter.start(
             cb=lambda _: logger.info(  # type: ignore
                 'Starting bare %s BentoServer from "%s" running on http://%s:%d (Press CTRL+C to quit)',

From 60d0b9e529dad15c721f804a2a123990214139a9 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Tue, 27 Sep 2022 13:31:58 -0700
Subject: [PATCH 7/7] test: fix new changes

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 tests/unit/_internal/utils/test_analytics.py | 30 ++++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/tests/unit/_internal/utils/test_analytics.py b/tests/unit/_internal/utils/test_analytics.py
index 975973d650..6e3de4e122 100644
--- a/tests/unit/_internal/utils/test_analytics.py
+++ b/tests/unit/_internal/utils/test_analytics.py
@@ -148,7 +148,7 @@ def test_track_serve_init(
         noop_service,
         production=production,
         serve_info=analytics.usage_stats.get_serve_info(),
-        grpc=False,
+        serve_kind="http",
     )
 
     assert mock_do_not_track.called
@@ -160,7 +160,7 @@ def test_track_serve_init(
             noop_service,
             production=production,
             serve_info=analytics.usage_stats.get_serve_info(),
-            grpc=False,
+            serve_kind="http",
         )
     assert "model_types" in caplog.text
 
@@ -184,7 +184,7 @@ def test_track_serve_init_no_bento(
             bentoml.Service("test"),
             production=False,
             serve_info=analytics.usage_stats.get_serve_info(),
-            grpc=False,
+            serve_kind="http",
         )
     assert "model_types" not in caplog.text
 
@@ -200,17 +200,19 @@ def test_track_serve_init_no_bento(
         ),
     ],
 )
-@pytest.mark.parametrize("grpc", [True, False])
+@pytest.mark.parametrize("serve_kind", ["grpc", "http"])
 def test_filter_metrics_report(
     mock_prometheus_client: MagicMock,
     mock_output: bytes,
     expected: tuple[list[t.Any], bool | None],
-    grpc: bool,
+    serve_kind: str,
 ):
     mock_prometheus_client.multiproc.return_value = False
     mock_prometheus_client.generate_latest.return_value = mock_output
     assert (
-        analytics.usage_stats.get_metrics_report(mock_prometheus_client, grpc=grpc)
+        analytics.usage_stats.get_metrics_report(
+            mock_prometheus_client, serve_kind=serve_kind
+        )
         == expected
     )
 
@@ -250,7 +252,9 @@ def test_legacy_get_metrics_report(
             "utf-8"
         )
     )
-    output = analytics.usage_stats.get_metrics_report(mock_prometheus_client)
+    output = analytics.usage_stats.get_metrics_report(
+        mock_prometheus_client, serve_kind="http"
+    )
     assert {
         "endpoint": "/predict",
         "http_response_code": "200",
@@ -265,10 +269,10 @@ def test_legacy_get_metrics_report(
 
 @patch("bentoml._internal.server.metrics.prometheus.PrometheusClient")
 @pytest.mark.parametrize(
-    "grpc,expected",
+    "serve_kind,expected",
     [
         (
-            True,
+            "grpc",
             {
                 "api_name": "pred_json",
                 "http_response_code": "200",
@@ -277,7 +281,7 @@ def test_legacy_get_metrics_report(
                 "value": 15.0,
             },
         ),
-        (False, None),
+        ("http", None),
     ],
 )
 @pytest.mark.parametrize(
@@ -300,7 +304,7 @@ def test_legacy_get_metrics_report(
 def test_get_metrics_report(
     mock_prometheus_client: MagicMock,
     noop_service: Service,
-    grpc: bool,
+    serve_kind: str,
     expected: dict[str, str | float] | None,
     generated_metrics: t.Generator[Metric, None, None],
 ):
@@ -308,7 +312,9 @@ def test_get_metrics_report(
     mock_prometheus_client.text_string_to_metric_families.return_value = (
         generated_metrics
     )
-    output = analytics.usage_stats.get_metrics_report(mock_prometheus_client, grpc=grpc)
+    output = analytics.usage_stats.get_metrics_report(
+        mock_prometheus_client, serve_kind=serve_kind
+    )
     if expected:
         assert expected in output