diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bcdd04c7fe5..531f2c30f94 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -77,12 +77,6 @@ jobs:
         run: |
           npm install -g npm@^7 pyright
 
-      - name: Setup protobuf tools
-        uses: bufbuild/buf-setup-action@v1.6.0
-        if: success()
-        with:
-          github_token: ${{ github.token }}
-
       - name: Cache pip dependencies
         uses: actions/cache@v3
         id: cache-pip
@@ -99,8 +93,6 @@ jobs:
         run: make ci-format
       - name: Lint check
         run: make ci-lint
-      - name: Proto check
-        run: make ci-proto
       - name: Type check
         run: make ci-pyright
 
diff --git a/.readthedocs.yml b/.readthedocs.yaml
similarity index 60%
rename from .readthedocs.yml
rename to .readthedocs.yaml
index 94e742e486d..112e083ea9f 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yaml
@@ -1,25 +1,18 @@
-# .readthedocs.yml
-# Read the Docs configuration file
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
-# Required
 version: 2
 
 build:
-  os: "ubuntu-20.04"
+  os: "ubuntu-22.04"
   tools:
     python: "3.9"
   jobs:
-    post_checkout:
-      - git fetch --unshallow
     pre_install:
       - git update-index --assume-unchanged docs/source/conf.py
 
-# Build documentation in the docs/ directory with Sphinx
 sphinx:
   configuration: docs/source/conf.py
 
-# Optionally set the version of Python and requirements required to build your docs
 python:
   install:
     - requirements: requirements/docs-requirements.txt
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 78e4b95a487..5b6887a12ce 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -18,22 +18,26 @@ If you are interested in proposing a new feature, make sure to create a new feat
 2. Fork the BentoML project on [GitHub](https://github.com/bentoml/BentoML).
 
 3. Clone the source code from your fork of BentoML's GitHub repository:
+
    ```bash
    git clone git@github.com:username/BentoML.git && cd BentoML
    ```
 
 4. Add the BentoML upstream remote to your local BentoML clone:
+
    ```bash
    git remote add upstream git@github.com:bentoml/BentoML.git
    ```
 
 5. Configure git to pull from the upstream remote:
+
    ```bash
    git switch main # ensure you're on the main branch
    git branch --set-upstream-to=upstream/main
    ```
 
 6. Install BentoML with pip in editable mode:
+
    ```bash
    pip install -e .
    ```
@@ -41,11 +45,13 @@ If you are interested in proposing a new feature, make sure to create a new feat
    This installs BentoML in an editable state. The changes you make will automatically be reflected without reinstalling BentoML.
 
 7. Install the BentoML development requirements:
+
    ```bash
    pip install -r ./requirements/dev-requirements.txt
    ```
 
 8. Test the BentoML installation either with `bash`:
+
    ```bash
    bentoml --version
    ```
@@ -62,65 +68,72 @@ If you are interested in proposing a new feature, make sure to create a new feat
 <details><summary><h3>with VS Code</h3></summary>
 
 1. Confirm that you have the following installed:
-	- [Python3.7+](https://www.python.org/downloads/)
-	- VS Code with the [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Pylance](https://marketplace.visualstudio.com/items?itemName=ms-python.vscode-pylance) extensions
+
+   - [Python3.7+](https://www.python.org/downloads/)
+   - VS Code with the [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Pylance](https://marketplace.visualstudio.com/items?itemName=ms-python.vscode-pylance) extensions
 
 2. Fork the BentoML project on [GitHub](https://github.com/bentoml/BentoML).
 
 3. Clone the GitHub repository:
-    1. Open the command palette with Ctrl+Shift+P and type in 'clone'.
-	  2. Select 'Git: Clone(Recursive)'.
-	  3. Clone BentoML.
+
+   1. Open the command palette with Ctrl+Shift+P and type in 'clone'.
+   2. Select 'Git: Clone(Recursive)'.
+   3. Clone BentoML.
 
 4. Add an BentoML upstream remote:
-    1. Open the command palette and enter 'add remote'.
-    2. Select 'Git: Add Remote'.
-    3. Press enter to select 'Add remote' from GitHub.
-    4. Enter https://github.com/bentoml/BentoML.git to select the BentoML repository.
-    5. Name your remote 'upstream'.
+
+   1. Open the command palette and enter 'add remote'.
+   2. Select 'Git: Add Remote'.
+   3. Press enter to select 'Add remote' from GitHub.
+   4. Enter https://github.com/bentoml/BentoML.git to select the BentoML repository.
+   5. Name your remote 'upstream'.
 
 5. Pull from the BentoML upstream remote to your main branch:
-    1. Open the command palette and enter 'checkout'.
-    2. Select 'Git: Checkout to...'
-    3. Choose 'main' to switch to the main branch.
-    4. Open the command palette again and enter 'pull from'.
-    5. Click on 'Git: Pull from...'
-    6. Select 'upstream'.
+
+   1. Open the command palette and enter 'checkout'.
+   2. Select 'Git: Checkout to...'
+   3. Choose 'main' to switch to the main branch.
+   4. Open the command palette again and enter 'pull from'.
+   5. Click on 'Git: Pull from...'
+   6. Select 'upstream'.
 
 6. Open a new terminal by clicking the Terminal dropdown at the top of the window, followed by the 'New Terminal' option. Next, add a virtual environment with this command:
    ```bash
    python -m venv .venv
    ```
 7. Click yes if a popup suggests switching to the virtual environment. Otherwise, go through these steps:
-	  1. Open any python file in the directory.
-	  2. Select the interpreter selector on the blue status bar at the bottom of the editor.
-		![vscode-status-bar](https://user-images.githubusercontent.com/489344/166984038-75f1f4bd-c896-43ee-a7ee-1b57fda359a3.png)
 
-    
-	  3. Switch to the path that includes .venv from the dropdown at the top.
-	  	![vscode-select-venv](https://user-images.githubusercontent.com/489344/166984060-170d25f5-a91f-41d3-96f4-4db3c21df7c8.png)
+   1. Open any python file in the directory.
+   2. Select the interpreter selector on the blue status bar at the bottom of the editor.
+      ![vscode-status-bar](https://user-images.githubusercontent.com/489344/166984038-75f1f4bd-c896-43ee-a7ee-1b57fda359a3.png)
+
+   3. Switch to the path that includes .venv from the dropdown at the top.
+      ![vscode-select-venv](https://user-images.githubusercontent.com/489344/166984060-170d25f5-a91f-41d3-96f4-4db3c21df7c8.png)
 
 8. Update your PowerShell execution policies. Win+x followed by the 'a' key opens the admin Windows PowerShell. Enter the following command to allow the virtual environment activation script to run:
    ```
    Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
    ```
-</details>
+   </details>
 
 ## Making Changes
 
 <details><summary><h3>using the Command Line</h3></summary>
 
 1. Make sure you're on the main branch.
+
    ```bash
    git switch main
    ```
 
 2. Use the git pull command to retrieve content from the BentoML Github repository.
+
    ```bash
    git pull
    ```
 
 3. Create a new branch and switch to it.
+
    ```bash
    git switch -c my-new-branch-name
    ```
@@ -128,11 +141,13 @@ If you are interested in proposing a new feature, make sure to create a new feat
 4. Make your changes!
 
 5. Use the git add command to save the state of files you have changed.
+
    ```bash
    git add <names of the files you have changed>
    ```
 
 6. Commit your changes.
+
    ```bash
    git commit
    ```
@@ -141,46 +156,52 @@ If you are interested in proposing a new feature, make sure to create a new feat
    ```bash
    git push
    ```
-</details>
+   </details>
 
 <details><summary><h3>using VS Code</h3></summary>
 
 1. Switch to the main branch:
-    1. Open the command palette with Ctrl+Shift+P.
-    2. Search for 'Git: Checkout to...'
-    3. Select 'main'.
+
+   1. Open the command palette with Ctrl+Shift+P.
+   2. Search for 'Git: Checkout to...'
+   3. Select 'main'.
 
 2. Pull from the upstream remote:
-    1. Open the command palette.
-    2. Enter and select 'Git: Pull...'
-    3. Select 'upstream'.
+
+   1. Open the command palette.
+   2. Enter and select 'Git: Pull...'
+   3. Select 'upstream'.
 
 3. Create and change to a new branch:
-    1. Type in 'Git: Create Branch...' in the command palette.
-    2. Enter a branch name.
+
+   1. Type in 'Git: Create Branch...' in the command palette.
+   2. Enter a branch name.
 
 4. Make your changes!
 
 5. Stage all your changes:
-    1. Enter and select 'Git: Stage All Changes...' in the command palette.
+
+   1. Enter and select 'Git: Stage All Changes...' in the command palette.
 
 6. Commit your changes:
-    1. Open the command palette and enter 'Git: Commit'.
+
+   1. Open the command palette and enter 'Git: Commit'.
 
 7. Push your changes:
-    1. Enter and select 'Git: Push...' in the command palette.
+   1. Enter and select 'Git: Push...' in the command palette.
 
 </details>
 
-
 ## Run BentoML with verbose/debug logging
 
 To view internal debug loggings for development, set the `BENTOML_DEBUG` environment variable to `TRUE`:
+
 ```bash
 export BENTOML_DEBUG=TRUE
 ```
 
 And/or use the `--verbose` option when running `bentoml` CLI command, e.g.:
+
 ```bash
 bentoml get IrisClassifier --verbose
 ```
@@ -199,6 +220,7 @@ Currently, we are running `buf` with docker, hence we kindly ask our developers
 to have docker available. Docker installation can be found [here](https://docs.docker.com/get-docker/).
 
 Run linter/format script:
+
 ```bash
 make format
 
@@ -206,6 +228,7 @@ make lint
 ```
 
 Run type checker:
+
 ```bash
 make type
 ```
@@ -221,14 +244,13 @@ If you edit the proto files, make sure to run `pip install -e -U .` again to reg
 Test test out your changes in an actual BentoML model deployment, you can create a new Bento with your custom BentoML source repo:
 
 1. Install custom BentoML in editable mode. e.g.:
-   * git clone your bentoml fork
-   * `pip install -e PATH_TO_THE_FORK`
+   - git clone your bentoml fork
+   - `pip install -e PATH_TO_THE_FORK`
 2. Set env var `export BENTOML_BUNDLE_LOCAL_BUILD=True` and `export SETUPTOOLS_USE_DISTUTILS=stdlib`
-   * make sure you have the latest setuptools installed:  `pip install -U setuptools`
+   - make sure you have the latest setuptools installed: `pip install -U setuptools`
 3. Build a new Bento with `bentoml build` in your project directory
-4. The new Bento will include a wheel file built from the BentoML source, and 
-`bentoml containrize` will install it to override the default BentoML installation in base image
-
+4. The new Bento will include a wheel file built from the BentoML source, and
+   `bentoml containrize` will install it to override the default BentoML installation in base image
 
 ### Distribute a custom BentoML release for your team
 
@@ -247,18 +269,18 @@ description: "file: ./README.md"
 include:
   - "*.py"
 python:
- packages:
-  - pandas
-  - git+https://github.com/{YOUR_GITHUB_USERNAME}/bentoml@{YOUR_REVISION}
+  packages:
+    - pandas
+    - git+https://github.com/{YOUR_GITHUB_USERNAME}/bentoml@{YOUR_REVISION}
 docker:
- system_packages:
-  - git
+  system_packages:
+    - git
 ```
 
-
 ## Testing
 
 Make sure to install all test dependencies:
+
 ```bash
 pip install -r requirements/tests-requirements.txt
 ```
@@ -268,12 +290,14 @@ pip install -r requirements/tests-requirements.txt
 You can run unit tests in two ways:
 
 Run all unit tests directly with pytest:
+
 ```bash
 # GIT_ROOT=$(git rev-parse --show-toplevel)
-pytest tests/unit --cov=bentoml --cov-config="$GIT_ROOT"/setup.cfg
+pytest tests/unit --cov=bentoml --cov-config="$GIT_ROOT"/pyproject.toml
 ```
 
 Run all unit tests via `./scripts/ci/run_tests.sh`:
+
 ```bash
 ./scripts/ci/run_tests.sh unit
 
@@ -284,6 +308,7 @@ make tests-unit
 ### Integration tests
 
 Run given tests after defining a target under `scripts/ci/config.yml` with `run_tests.sh`:
+
 ```bash
 # example: run Keras TF1 integration tests
 ./scripts/ci/run_tests.sh keras_tf1
@@ -297,13 +322,13 @@ Run given tests after defining a target under `scripts/ci/config.yml` with `run_
 ```
 
 ### Adding new test suite
-	
+
 If you are adding new ML framework support, it is recommended that you also add a separate test suite in our CI. Currently we are using GitHub Actions to manage our CI/CD workflow.
 
 We recommend using [`nektos/act`](https://github.com/nektos/act) to run and test Actions locally.
 
-
 The following tests script [run_tests.sh](./scripts/ci/run_tests.sh) can be used to run tests locally.
+
 ```bash
 ./scripts/ci/run_tests.sh -h
 Running unit/integration tests with pytest and generate coverage reports. Make sure that given testcases is defined under ./scripts/ci/config.yml.
@@ -323,6 +348,7 @@ Example:
 ```
 
 All tests are then defined under [config.yml](./scripts/ci/config.yml) where each field follows the following format:
+
 ```yaml
 <target>: &tmpl
   root_test_dir: "tests/integration/frameworks"
@@ -335,20 +361,21 @@ All tests are then defined under [config.yml](./scripts/ci/config.yml) where eac
 
 By default, each of our frameworks tests files with the format: `test_<frameworks>_impl.py`. If `is_dir` set to `true` we will try to match the given `<target>` under `root_test_dir` to run tests from.
 
-| Keys | Type | Defintions |
-|------|------|------------|
-|`root_test_dir`| `<str>`| root directory to run a given tests |
-|`is_dir`| `<bool>`| whether `target` is a directory instead of a file |
-|`override_name_or_path`| `<str>`| optional way to override a tests file name if doesn't match our convention |
-|`dependencies`| `<List[str]>`| define additional dependencies required to run the tests, accepts `requirements.txt` format |
-|`external_scripts`| `<str>`| optional shell scripts that can be run on top of `./scripts/ci/run_tests.sh` for given testsuite |
-|`type_tests`| `<Literal["e2e","unit","integration"]>`| define type of tests for given `target` |
+| Keys                    | Type                                    | Defintions                                                                                       |
+| ----------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------ |
+| `root_test_dir`         | `<str>`                                 | root directory to run a given tests                                                              |
+| `is_dir`                | `<bool>`                                | whether `target` is a directory instead of a file                                                |
+| `override_name_or_path` | `<str>`                                 | optional way to override a tests file name if doesn't match our convention                       |
+| `dependencies`          | `<List[str]>`                           | define additional dependencies required to run the tests, accepts `requirements.txt` format      |
+| `external_scripts`      | `<str>`                                 | optional shell scripts that can be run on top of `./scripts/ci/run_tests.sh` for given testsuite |
+| `type_tests`            | `<Literal["e2e","unit","integration"]>` | define type of tests for given `target`                                                          |
 
 When `type_tests` is set to `e2e`, `./scripts/ci/run_tests.sh` will change current directory into the given `root_test_dir`, and will run the testsuite from there.
 
 The reason why we encourage developers to use the scripts in CI is that under the hood when we use pytest, we will create a custom report for the given tests. This report can then be used as carryforward flags on codecov for consistent reporting.
 
 Example:
+
 ```yaml
 # e2e tests
 general_features:
@@ -370,20 +397,18 @@ pytorch_lightning:
 
 Refer to [config.yml](./scripts/ci/config.yml) for more examples.
 
-
 ## Python tools ecosystem
 
-Currently, BentoML is [PEP518](https://www.python.org/dev/peps/pep-0518/) compatible via `setup.cfg` and `pyproject.toml`.
- We also define most of our config for Python tools where:
- - `pyproject.toml` contains config for `setuptools`, `black`, `pytest`, `pylint`, `isort`, `pyright`
- - `setup.cfg` contains metadata for `bentoml` library and `coverage`
+Currently, BentoML is [PEP518](https://www.python.org/dev/peps/pep-0518/) compatible. We define package configuration via [`pyproject.toml`][https://github.com/bentoml/bentoml/blob/main/pyproject.toml].
 
 ## Benchmark
+
 BentoML has moved its benchmark to [`bentoml/benchmark`](https://github.com/bentoml/benchmark).
 
 ## Optional: git hooks
 
 BentoML also provides git hooks that developers can install with:
+
 ```bash
 make hooks
 ```
@@ -396,6 +421,7 @@ on how to create a pull request on github. Name your pull request
 with one of the following prefixes, e.g. "feat: add support for
 PyTorch". This is based on the [Conventional Commits
 specification](https://www.conventionalcommits.org/en/v1.0.0/#summary)
+
 - feat: (new feature for the user, not a new feature for build script)
 - fix: (bug fix for the user, not a fix to a build script)
 - docs: (changes to the documentation)
@@ -417,4 +443,3 @@ your pull request.
 
 Refers to [BentoML Documentation Guide](./docs/README.md) for how to build and write
 docs.
-
diff --git a/Makefile b/Makefile
index 2c6ac13a1d1..c4fba12efe3 100644
--- a/Makefile
+++ b/Makefile
@@ -35,6 +35,8 @@ ci-format: ci-black ci-isort ## Running format check in CI: black, isort
 .PHONY: ci-lint
 ci-lint: ci-pylint ## Running lint check in CI: pylint
 
+.PHONY: tests-suite
+tests-suite: tests-unit tests-general_features ## Running BentoML tests suite (unit, e2e, integration)
 
 tests-%:
 	$(eval type :=$(subst tests-, , $@))
diff --git a/bentoml/_internal/bento/build_dev_bentoml_whl.py b/bentoml/_internal/bento/build_dev_bentoml_whl.py
index 4247270cdaf..1ecd63de0fc 100644
--- a/bentoml/_internal/bento/build_dev_bentoml_whl.py
+++ b/bentoml/_internal/bento/build_dev_bentoml_whl.py
@@ -29,8 +29,9 @@ def build_bentoml_editable_wheel(target_path: str) -> None:
         return
 
     try:
-        from build import ProjectBuilder
         from build.env import IsolatedEnvBuilder
+
+        from build import ProjectBuilder
     except ModuleNotFoundError:
         raise MissingDependencyException(_exc_message)
 
diff --git a/bentoml/_internal/bento/docker/templates/base_debian.j2 b/bentoml/_internal/bento/docker/templates/base_debian.j2
index fb6bff3cb48..96ec2692834 100644
--- a/bentoml/_internal/bento/docker/templates/base_debian.j2
+++ b/bentoml/_internal/bento/docker/templates/base_debian.j2
@@ -7,17 +7,8 @@ USER root
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
-RUN {{ common.mount_cache(__lib_apt__) }} \ 
+RUN {{ common.mount_cache(__lib_apt__) }} \
     {{ common.mount_cache(__cache_apt__) }} \
-	apt-get update -y \
-    && apt-get install -q -y --no-install-recommends --allow-remove-essential \
-		ca-certificates gnupg2 bash build-essential
-
-{% if __options__system_packages is not none %}
-# Install user-defined system package
-RUN {{ common.mount_cache(__lib_apt__) }} \ 
-    {{ common.mount_cache(__cache_apt__) }} \
-    apt-get install -q -y --no-install-recommends --allow-remove-essential \
-    {{ __options__system_packages | join(' ') }}
-{% endif -%}
+    apt-get update -y \
+    && apt-get install -q -y --no-install-recommends --allow-remove-essential ca-certificates gnupg2 bash build-essential {% if __options__system_packages is not none %}{{ __options__system_packages | join(' ') }}{% endif -%}
 {% endblock %}
diff --git a/bentoml/_internal/configuration/__init__.py b/bentoml/_internal/configuration/__init__.py
index 8061ec6dcc7..8aba27c818c 100644
--- a/bentoml/_internal/configuration/__init__.py
+++ b/bentoml/_internal/configuration/__init__.py
@@ -11,8 +11,6 @@
 except ModuleNotFoundError:
     import importlib_metadata
 
-import yaml
-
 try:
     import bentoml._version as version_mod
 except ImportError:
@@ -149,6 +147,8 @@ def load_global_config(bentoml_config_file: t.Optional[str] = None):
 
 
 def save_global_config(config_file_handle: t.IO[t.Any]):
+    import yaml
+
     from ..configuration.containers import BentoMLContainer
 
     content = yaml.safe_dump(BentoMLContainer.config)
diff --git a/bentoml/_internal/io_descriptors/base.py b/bentoml/_internal/io_descriptors/base.py
index f9f067b44c9..09ca7bf3e44 100644
--- a/bentoml/_internal/io_descriptors/base.py
+++ b/bentoml/_internal/io_descriptors/base.py
@@ -3,22 +3,24 @@
 import typing as t
 from abc import ABCMeta
 from abc import abstractmethod
+from typing import overload
 from typing import TYPE_CHECKING
 
-from starlette.requests import Request
-from starlette.responses import Response
-
 if TYPE_CHECKING:
     from types import UnionType
 
     from typing_extensions import Self
+    from starlette.requests import Request
+    from starlette.responses import Response
 
+    from bentoml.grpc.types import ProtoField
+    from bentoml.grpc.types import BentoServicerContext
+    from bentoml.grpc.v1.service_pb2 import Part as GRPCPart
     from bentoml.grpc.v1.service_pb2 import Request as GRPCRequest
     from bentoml.grpc.v1.service_pb2 import Response as GRPCResponse
 
     from ..types import LazyType
     from ..context import InferenceApiContext as Context
-    from ..server.grpc.types import BentoServicerContext
     from ..service.openapi.specification import Schema
     from ..service.openapi.specification import Response as OpenAPIResponse
     from ..service.openapi.specification import Reference
@@ -36,21 +38,27 @@
 
 
 class DescriptorMeta(ABCMeta):
+    _proto_field: str
+
     def __new__(
         cls: type[Self],
         name: str,
         bases: tuple[type, ...],
         namespace: dict[str, t.Any],
         *,
-        proto_fields: list[str] | None = None,
+        proto_field: str | None = None,
+        **kwargs: t.Any,
     ) -> Self:
-        if not proto_fields:
-            proto_fields = []
-        namespace["_proto_fields"] = proto_fields
-        return super().__new__(cls, name, bases, namespace)
+        if not proto_field:
+            proto_field = ""
+
+        klass = super().__new__(cls, name, bases, namespace, **kwargs)
+        klass._proto_field = proto_field
 
+        return klass
 
-class IODescriptor(t.Generic[IOType], metaclass=DescriptorMeta, proto_fields=None):
+
+class IODescriptor(t.Generic[IOType], metaclass=DescriptorMeta, proto_field=None):
     """
     IODescriptor describes the input/output data format of an InferenceAPI defined
     in a :code:`bentoml.Service`. This is an abstract base class for extending new HTTP
@@ -59,30 +67,49 @@ class IODescriptor(t.Generic[IOType], metaclass=DescriptorMeta, proto_fields=Non
 
     HTTP_METHODS = ["POST"]
 
-    _init_str: str = ""
-    _proto_fields: list[str]
-
+    _proto_field: str
     _mime_type: str
+    _rpc_content_type: str
 
-    def __new__(cls: t.Type[Self], *args: t.Any, **kwargs: t.Any) -> Self:
+    def __new__(  # pylint: disable=unused-argument
+        cls: t.Type[Self],
+        *args: t.Any,
+        **kwargs: t.Any,
+    ) -> Self:
         self = super().__new__(cls)
+
         # default mime type is application/json
         self._mime_type = "application/json"
-        self._init_str = cls.__qualname__
+        # default grpc content type is application/grpc
+        self._rpc_content_type = "application/grpc"
 
         return self
 
     def __repr__(self) -> str:
-        return self._init_str
+        return self.__class__.__qualname__
 
     @property
-    def accepted_proto_fields(self) -> list[str]:
+    def proto_field(self) -> ProtoField:
         """
         Returns a list of kinds fields that the IODescriptor can accept.
 
-        Make sure to keep in sync with bentoml.grpc.v1.Value message.
+        Make sure to keep in sync with bentoml.grpc.v1.Request message.
         """
-        return self._proto_fields
+        return t.cast("ProtoField", self._proto_field)
+
+    @property
+    def grpc_content_type(self) -> str:
+        generic_content_type = ["application/octet-stream", "text/plain"]
+        if self._mime_type in generic_content_type or self._mime_type.startswith(
+            "multipart"
+        ):
+            return f"{self._rpc_content_type}+proto"
+
+        return f"{self._rpc_content_type}+{self._mime_type.split('/')[-1]}"
+
+    @grpc_content_type.setter
+    def grpc_content_type(self, value: str) -> None:
+        self._rpc_content_type = value
 
     @abstractmethod
     def input_type(self) -> InputType:
@@ -114,13 +141,23 @@ async def to_http_response(
     ) -> Response:
         ...
 
+    @overload
     @abstractmethod
-    def generate_protobuf(self):
+    async def from_grpc_request(
+        self, request: GRPCRequest, context: BentoServicerContext
+    ) -> IOType:
         ...
 
+    @overload
     @abstractmethod
     async def from_grpc_request(
-        self, request: GRPCRequest, context: BentoServicerContext
+        self, request: GRPCPart, context: BentoServicerContext
+    ) -> IOType:
+        ...
+
+    @abstractmethod
+    async def from_grpc_request(
+        self, request: GRPCRequest | GRPCPart, context: BentoServicerContext
     ) -> IOType:
         ...
 
@@ -129,3 +166,6 @@ async def to_grpc_response(
         self, obj: IOType, context: BentoServicerContext
     ) -> GRPCResponse:
         ...
+
+    # feat: add generate_protobuf(self)
+    # to generate protobuf from python object
diff --git a/bentoml/_internal/io_descriptors/file.py b/bentoml/_internal/io_descriptors/file.py
index 68b4e37a3e3..90c8d149dda 100644
--- a/bentoml/_internal/io_descriptors/file.py
+++ b/bentoml/_internal/io_descriptors/file.py
@@ -12,8 +12,10 @@
 
 from .base import IODescriptor
 from ..types import FileLike
+from ..utils import LazyLoader
 from ..utils.http import set_cookies
 from ...exceptions import BentoMLException
+from ...exceptions import UnprocessableEntity
 from ..service.openapi import SUCCESS_DESCRIPTION
 from ..service.openapi.specification import Schema
 from ..service.openapi.specification import Response as OpenAPIResponse
@@ -23,13 +25,19 @@
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
+
     from ..context import InferenceApiContext as Context
 
     FileKind: t.TypeAlias = t.Literal["binaryio", "textio"]
+else:
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
+
 FileType: t.TypeAlias = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]
 
 
-class File(IODescriptor[FileType], proto_fields=["raw_value"]):
+class File(IODescriptor[FileType], proto_field="file"):
     """
     :obj:`File` defines API specification for the inputs/outputs of a Service, where either
     inputs will be converted to or outputs will be converted from file-like objects as
@@ -100,10 +108,12 @@ async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]:
 
     """
 
+    _kind: str
+
     def __new__(  # pylint: disable=arguments-differ # returning subclass from new
         cls, kind: FileKind = "binaryio", mime_type: str | None = None
     ) -> File:
-        mime_type = mime_type if mime_type is not None else "application/octet-stream"
+        mime_type = mime_type if mime_type else "application/octet-stream"
 
         if kind == "binaryio":
             res = object.__new__(BytesIOFile)
@@ -111,6 +121,8 @@ def __new__(  # pylint: disable=arguments-differ # returning subclass from new
             raise ValueError(f"invalid File kind '{kind}'")
 
         res._mime_type = mime_type
+        res._kind = kind
+
         return res
 
     def input_type(self) -> t.Type[t.Any]:
@@ -134,11 +146,7 @@ def openapi_responses(self) -> OpenAPIResponse:
             content={self._mime_type: MediaType(schema=self.openapi_schema())},
         )
 
-    async def to_http_response(
-        self,
-        obj: FileType,
-        ctx: Context | None = None,
-    ):
+    async def to_http_response(self, obj: FileType, ctx: Context | None = None):
         if isinstance(obj, bytes):
             body = obj
         else:
@@ -155,14 +163,45 @@ async def to_http_response(
             res = Response(body)
         return res
 
-    def generate_protobuf(self):
-        pass
+    async def to_grpc_response(
+        self, obj: FileType, context: BentoServicerContext
+    ) -> pb.Response:
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc.mapping import mimetype_to_filetype_pb_map
+
+        if self._mime_type.startswith("multipart"):
+            raise_grpc_exception(
+                "'multipart' Content-Type is not yet supported for parsing files in gRPC. Use Multipart() instead.",
+                context=context,
+                exception_cls=UnprocessableEntity,
+            )
 
-    async def from_grpc_request(self, request, context) -> t.Any:
-        pass
+        if isinstance(obj, bytes):
+            body = obj
+        else:
+            body = obj.read()
 
-    async def to_grpc_response(self, obj, context) -> t.Any:
-        pass
+        context.set_trailing_metadata((("content-type", self.grpc_content_type),))
+
+        mapping = mimetype_to_filetype_pb_map()
+
+        try:
+            kind = mapping[self._mime_type]
+        except KeyError:
+            raise_grpc_exception(
+                f"{self._mime_type} doesn't have a corresponding File 'kind'",
+                context=context,
+            )
+
+        return pb.Response(file=pb.File(kind=kind, content=body))
+
+    async def from_http_request(self, request: Request) -> FileType:
+        raise NotImplementedError("File.from_http_request is not implemented.")
+
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> FileType:
+        raise NotImplementedError("File.from_grpc_request is not implemented.")
 
 
 class BytesIOFile(File):
@@ -192,3 +231,41 @@ async def from_http_request(self, request: Request) -> t.IO[bytes]:
         raise BentoMLException(
             f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead"
         )
+
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> FileLike[bytes]:
+        from ..utils.grpc import get_field
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+        from ..utils.grpc.mapping import filetype_pb_to_mimetype_map
+
+        if self._mime_type.startswith("multipart"):
+            raise_grpc_exception(
+                "'multipart' Content-Type is not yet supported for parsing files in gRPC. Use Multipart() instead.",
+                context=context,
+                exception_cls=UnprocessableEntity,
+            )
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        # check if the request message has the correct field
+        field = get_field(request, self)
+        mapping = filetype_pb_to_mimetype_map()
+
+        if field.kind:
+            try:
+                mime_type = mapping[field.kind]
+                if mime_type != self._mime_type:
+                    raise_grpc_exception(
+                        f"Inferred mime_type from 'kind' is '{mime_type}', while '{repr(self)}' is expecting '{self._mime_type}'",
+                        context=context,
+                    )
+            except KeyError:
+                raise_grpc_exception(
+                    f"{field.kind} is not a valid File kind. Accepted file kind: {set(mapping)}",
+                    context=context,
+                )
+
+        return FileLike[bytes](io.BytesIO(field.content), "<content>")
diff --git a/bentoml/_internal/io_descriptors/image.py b/bentoml/_internal/io_descriptors/image.py
index a563756de9c..24c5a0ae664 100644
--- a/bentoml/_internal/io_descriptors/image.py
+++ b/bentoml/_internal/io_descriptors/image.py
@@ -16,6 +16,7 @@
 from ...exceptions import BadInput
 from ...exceptions import InvalidArgument
 from ...exceptions import InternalServerError
+from ...exceptions import UnprocessableEntity
 from ..service.openapi import SUCCESS_DESCRIPTION
 from ..service.openapi.specification import Schema
 from ..service.openapi.specification import Response as OpenAPIResponse
@@ -27,6 +28,9 @@
 
     import PIL.Image
 
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
+
     from .. import external_typing as ext
     from ..context import InferenceApiContext as Context
 
@@ -37,19 +41,21 @@
 
     # NOTE: pillow-simd only benefits users who want to do preprocessing
     # TODO: add options for users to choose between simd and native mode
-    _exc = f"'Pillow' is required to use {__name__}. Install with: 'pip install -U Pillow'."
+    _exc = f"'Pillow' is required to use '{__name__}.Image'. Install with: 'pip install -U Pillow'."
     PIL = LazyLoader("PIL", globals(), "PIL", exc_msg=_exc)
     PIL.Image = LazyLoader("PIL.Image", globals(), "PIL.Image", exc_msg=_exc)
 
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
+
 # NOTES: we will keep type in quotation to avoid backward compatibility
 #  with numpy < 1.20, since we will use the latest stubs from the main branch of numpy.
 #  that enable a new way to type hint an ndarray.
-ImageType: t.TypeAlias = t.Union["PIL.Image.Image", "ext.NpNDArray"]
+ImageType = t.Union["PIL.Image.Image", "ext.NpNDArray"]
 
 DEFAULT_PIL_MODE = "RGB"
 
 
-class Image(IODescriptor[ImageType], proto_fields=["raw_value"]):
+class Image(IODescriptor[ImageType], proto_field="file"):
     """
     :obj:`Image` defines API specification for the inputs/outputs of a Service, where either
     inputs will be converted to or outputs will be converted from images as specified
@@ -142,12 +148,6 @@ def __init__(
         pilmode: _Mode | None = DEFAULT_PIL_MODE,
         mime_type: str = "image/jpeg",
     ):
-        try:
-            import PIL.Image
-        except ImportError:
-            raise InternalServerError(
-                "`Pillow` is required to use {__name__}\n Instructions: `pip install -U Pillow`"
-            )
         PIL.Image.init()
         self.MIME_EXT_MAPPING.update({v: k for k, v in PIL.Image.MIME.items()})
 
@@ -197,8 +197,7 @@ async def from_http_request(self, request: Request) -> ImageType:
             bytes_ = await request.body()
         else:
             raise BadInput(
-                f"{self.__class__.__name__} should get `multipart/form-data`, "
-                f"`{self._mime_type}` or `image/*`, got {content_type} instead"
+                f"{self.__class__.__name__} should get 'multipart/form-data', '{self._mime_type}' or 'image/*', got '{content_type}' instead."
             )
         return PIL.Image.open(io.BytesIO(bytes_))
 
@@ -211,8 +210,7 @@ async def to_http_response(
             image = obj
         else:
             raise InternalServerError(
-                f"Unsupported Image type received: {type(obj)}, `{self.__class__.__name__}`"
-                " only supports `np.ndarray` and `PIL.Image`"
+                f"Unsupported Image type received: '{type(obj)}', '{self.__class__.__name__}' only supports 'np.ndarray' and 'PIL.Image'."
             )
         filename = f"output.{self._format.lower()}"
 
@@ -246,11 +244,80 @@ async def to_http_response(
                 headers={"content-disposition": content_disposition},
             )
 
-    def generate_protobuf(self):
-        pass
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> ImageType:
+        from ..utils.grpc import get_field
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+        from ..utils.grpc.mapping import filetype_pb_to_mimetype_map
+
+        if self._mime_type.startswith("multipart"):
+            raise_grpc_exception(
+                "'multipart' Content-Type is not yet supported for parsing files in gRPC. Use Multipart() instead.",
+                context=context,
+                exception_cls=UnprocessableEntity,
+            )
 
-    async def from_grpc_request(self, request, context) -> t.Any:
-        pass
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        # check if the request message has the correct field
+        field = get_field(request, self)
+        mapping = filetype_pb_to_mimetype_map()
+
+        if field.kind:
+            try:
+                mime_type = mapping[field.kind]
+                if mime_type != self._mime_type:
+                    raise_grpc_exception(
+                        f"Inferred mime_type from 'kind' is '{mime_type}', while '{repr(self)}' is expecting '{self._mime_type}'",
+                        context=context,
+                    )
+            except KeyError:
+                raise_grpc_exception(
+                    f"{field.kind} is not a valid File kind. Accepted file kind: {set(mapping)}",
+                    context=context,
+                )
+
+        return PIL.Image.open(io.BytesIO(field.content))
+
+    async def to_grpc_response(
+        self, obj: ImageType, context: BentoServicerContext
+    ) -> pb.Response:
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc.mapping import mimetype_to_filetype_pb_map
+
+        if self._mime_type.startswith("multipart"):
+            raise_grpc_exception(
+                "'multipart' Content-Type is not yet supported for parsing files in gRPC. Use Multipart() instead.",
+                context=context,
+                exception_cls=UnprocessableEntity,
+            )
 
-    async def to_grpc_response(self, obj, context) -> t.Any:
-        pass
+        if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(obj):
+            image = PIL.Image.fromarray(obj, mode=self._pilmode)
+        elif LazyType[PIL.Image.Image]("PIL.Image.Image").isinstance(obj):
+            image = obj
+        else:
+            raise_grpc_exception(
+                f"Unsupported Image type received: '{type(obj)}', '{self.__class__.__name__}' only supports 'np.ndarray' and 'PIL.Image'.",
+                context=context,
+                exception_cls=InternalServerError,
+            )
+        ret = io.BytesIO()
+        image.save(ret, format=self._format)
+
+        context.set_trailing_metadata((("content-type", self.grpc_content_type),))
+
+        mapping = mimetype_to_filetype_pb_map()
+
+        try:
+            kind = mapping[self._mime_type]
+        except KeyError:
+            raise_grpc_exception(
+                f"{self._mime_type} doesn't have a corresponding File 'kind'",
+                context=context,
+            )
+
+        return pb.Response(file=pb.File(kind=kind, content=ret.getvalue()))
diff --git a/bentoml/_internal/io_descriptors/json.py b/bentoml/_internal/io_descriptors/json.py
index 8ed104ef86f..e8a855eecc5 100644
--- a/bentoml/_internal/io_descriptors/json.py
+++ b/bentoml/_internal/io_descriptors/json.py
@@ -10,12 +10,14 @@
 from starlette.requests import Request
 from starlette.responses import Response
 
+from bentoml.exceptions import BadInput
+from bentoml.exceptions import UnprocessableEntity
+
 from .base import IODescriptor
 from ..types import LazyType
 from ..utils import LazyLoader
 from ..utils import bentoml_cattr
 from ..utils.http import set_cookies
-from ...exceptions import BadInput
 from ..service.openapi import REF_PREFIX
 from ..service.openapi import SUCCESS_DESCRIPTION
 from ..service.openapi.specification import Schema
@@ -29,25 +31,30 @@
     import pydantic
     import pydantic.schema as schema
 
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
+
     from .. import external_typing as ext
     from ..context import InferenceApiContext as Context
 
-    _Serializable = ext.NpNDArray | ext.PdDataFrame | t.Type[pydantic.BaseModel] | type
 else:
     _exc_msg = "'pydantic' must be installed to use 'pydantic_model'. Install with 'pip install pydantic'."
     pydantic = LazyLoader("pydantic", globals(), "pydantic", exc_msg=_exc_msg)
     schema = LazyLoader("schema", globals(), "pydantic.schema", exc_msg=_exc_msg)
 
+    # lazy load our proto generated.
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
+    # lazy load numpy for processing ndarray.
+    np = LazyLoader("np", globals(), "numpy")
 
-JSONType = t.Union[str, t.Dict[str, t.Any], "pydantic.BaseModel", None]
 
-MIME_TYPE_JSON = "application/json"
+JSONType = t.Union[str, t.Dict[str, t.Any], "pydantic.BaseModel", None]
 
 logger = logging.getLogger(__name__)
 
 
 class DefaultJsonEncoder(json.JSONEncoder):
-    def default(self, o: _Serializable) -> t.Any:
+    def default(self, o: type) -> t.Any:
         if dataclasses.is_dataclass(o):
             return dataclasses.asdict(o)
         if LazyType["ext.NpNDArray"]("numpy.ndarray").isinstance(o):
@@ -63,13 +70,13 @@ def default(self, o: _Serializable) -> t.Any:
             if "__root__" in obj_dict:
                 obj_dict = obj_dict.get("__root__")
             return obj_dict
-        if attr.has(o):  # type: ignore (trivial case)
+        if attr.has(o):
             return bentoml_cattr.unstructure(o)
 
         return super().default(o)
 
 
-class JSON(IODescriptor[JSONType], proto_fields=["map_value", "raw_value"]):
+class JSON(IODescriptor[JSONType], proto_field="json"):
     """
     :obj:`JSON` defines API specification for the inputs/outputs of a Service, where either
     inputs will be converted to or outputs will be converted from a JSON representation
@@ -168,14 +175,13 @@ def classify(input_data: IrisFeatures) -> NDArray[Any]:
         :obj:`JSON`: IO Descriptor that represents JSON format.
     """
 
-    _mime_type: str = MIME_TYPE_JSON
-
     def __init__(
         self,
         *,
         pydantic_model: t.Type[pydantic.BaseModel] | None = None,
         validate_json: bool | None = None,
         json_encoder: t.Type[json.JSONEncoder] = DefaultJsonEncoder,
+        _chunk_workers: int = 10,
     ):
         if pydantic_model:
             assert issubclass(
@@ -185,6 +191,8 @@ def __init__(
         self._pydantic_model = pydantic_model
         self._json_encoder = json_encoder
 
+        self._chunk_workers = _chunk_workers
+
         # Remove validate_json in version 1.0.2
         if validate_json is not None:
             logger.warning(
@@ -229,19 +237,21 @@ def openapi_responses(self) -> OpenAPIResponse:
             content={self._mime_type: MediaType(schema=self.openapi_schema())},
         )
 
-    async def from_http_request(self, request: Request) -> JSONType:
+    async def from_http_request(
+        self, request: Request
+    ) -> JSONType | pydantic.BaseModel:
         json_str = await request.body()
         try:
             json_obj = json.loads(json_str)
         except json.JSONDecodeError as e:
-            raise BadInput(f"Invalid JSON input received: {e}") from None
+            raise BadInput(f"Invalid JSON input received: {e}") from e
 
-        if self._pydantic_model is not None:
+        if self._pydantic_model:
             try:
                 pydantic_model = self._pydantic_model.parse_obj(json_obj)
                 return pydantic_model
             except pydantic.ValidationError as e:
-                raise BadInput(f"Invalid JSON input received: {e}") from None
+                raise BadInput(f"Invalid JSON input received: {e}") from e
         else:
             return json_obj
 
@@ -267,20 +277,75 @@ async def to_http_response(
         if ctx is not None:
             res = Response(
                 json_str,
-                media_type=MIME_TYPE_JSON,
+                media_type=self._mime_type,
                 headers=ctx.response.metadata,  # type: ignore (bad starlette types)
                 status_code=ctx.response.status_code,
             )
             set_cookies(res, ctx.response.cookies)
             return res
         else:
-            return Response(json_str, media_type=MIME_TYPE_JSON)
+            return Response(json_str, media_type=self._mime_type)
+
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> JSONType | pydantic.BaseModel:
+        from google.protobuf.json_format import MessageToDict
+
+        from ..utils.grpc import get_field
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        json_obj = MessageToDict(
+            get_field(request, self), preserving_proto_field_name=True
+        )
+
+        if self._pydantic_model:
+            try:
+                return self._pydantic_model.parse_obj(json_obj)
+            except pydantic.ValidationError as e:
+                raise_grpc_exception(
+                    f"Invalid JSON input received: {e}",
+                    context=context,
+                    exception_cls=UnprocessableEntity,
+                )
+
+        return json_obj
 
-    def generate_protobuf(self):
-        pass
+    async def to_grpc_response(
+        self, obj: JSONType | pydantic.BaseModel, context: BentoServicerContext
+    ) -> pb.Response:
+        from google.protobuf.struct_pb2 import Value
+        from google.protobuf.json_format import Parse
+
+        from ..utils.grpc import raise_grpc_exception
+
+        # use pydantic model for validation.
+        if self._pydantic_model:
+            try:
+                self._pydantic_model.parse_obj(obj)
+            except pydantic.ValidationError as e:
+                raise_grpc_exception(
+                    f"Invalid JSON input received: {e}",
+                    context=context,
+                    exception_cls=BadInput,
+                )
+
+        if isinstance(obj, pydantic.BaseModel):
+            obj = obj.dict()
+
+        if obj:
+            json_str = json.dumps(
+                obj,
+                cls=self._json_encoder,
+                ensure_ascii=False,
+                allow_nan=False,
+                indent=None,
+                separators=(",", ":"),
+            )
 
-    async def from_grpc_request(self, request, context) -> t.Any:
-        pass
+            return pb.Response(json=Parse(json_str, Value()))
 
-    async def to_grpc_response(self, obj, context) -> t.Any:
-        pass
+        return pb.Response(json=Value())
diff --git a/bentoml/_internal/io_descriptors/multipart.py b/bentoml/_internal/io_descriptors/multipart.py
index de4f76a162b..58ab364babb 100644
--- a/bentoml/_internal/io_descriptors/multipart.py
+++ b/bentoml/_internal/io_descriptors/multipart.py
@@ -21,11 +21,14 @@
 if TYPE_CHECKING:
     from types import UnionType
 
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
+
     from ..types import LazyType
     from ..context import InferenceApiContext as Context
 
 
-class Multipart(IODescriptor[t.Any]):
+class Multipart(IODescriptor[t.Any], proto_field="multipart"):
     """
     :obj:`Multipart` defines API specification for the inputs/outputs of a Service, where inputs/outputs
     of a Service can receive/send a **multipart** request/responses as specified in your API function signature.
@@ -154,12 +157,11 @@ async def predict(
     """
 
     def __init__(self, **inputs: IODescriptor[t.Any]):
-        for descriptor in inputs.values():
-            if isinstance(descriptor, Multipart):  # pragma: no cover
-                raise InvalidArgument(
-                    "Multipart IO can not contain nested Multipart IO descriptor"
-                )
-        self._inputs: dict[str, t.Any] = inputs
+        if any(isinstance(descriptor, Multipart) for descriptor in inputs.values()):
+            raise InvalidArgument(
+                "Multipart IO can not contain nested Multipart IO descriptor"
+            )
+        self._inputs = inputs
         self._mime_type = "multipart/form-data"
 
     def input_type(
@@ -204,29 +206,69 @@ async def from_http_request(self, request: Request) -> dict[str, t.Any]:
                 f"{self.__class__.__name__} only accepts `multipart/form-data` as Content-Type header, got {ctype} instead."
             )
 
-        res: dict[str, t.Any] = dict()
         reqs = await populate_multipart_requests(request)
 
-        for k, i in self._inputs.items():
-            req = reqs[k]
-            v = await i.from_http_request(req)
-            res[k] = v
-        return res
+        return {
+            key: await io_.from_http_request(reqs[key])
+            for key, io_ in self._inputs.items()
+        }
 
     async def to_http_response(
         self, obj: dict[str, t.Any], ctx: Context | None = None
     ) -> Response:
-        res_mapping: dict[str, Response] = {}
-        for k, io_ in self._inputs.items():
-            data = obj[k]
-            res_mapping[k] = await io_.to_http_response(data, ctx)
+        res_mapping: dict[str, Response] = {
+            key: await io_.to_http_response(obj[key], ctx)
+            for key, io_ in self._inputs.items()
+        }
         return await concat_to_multipart_response(res_mapping, ctx)
 
-    def generate_protobuf(self):
-        raise RuntimeError("Multipart IO is not supported in gRPC.")
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> dict[str, t.Any]:
+        from ..utils.grpc import get_field
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+        field = get_field(request, self)
+
+        if len(set(field) - set(self._inputs)) != 0:
+            raise_grpc_exception(
+                f"'{self.__class__.__name__}' only accepts '{set(self._inputs)}' as input fields. Invalid fields are: {set(field) - set(self._inputs)}",
+                context=context,
+            )
+
+        return {
+            key: await self._inputs[key].from_grpc_request(input_pb, context)
+            for key, input_pb in field.items()
+        }
 
-    async def from_grpc_request(self, request, context) -> t.Any:
-        raise RuntimeError("Multipart IO is not supported in gRPC.")
+    async def to_grpc_response(
+        self, obj: dict[str, t.Any], context: BentoServicerContext
+    ) -> pb.Response:
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        context.set_trailing_metadata((("content-type", self.grpc_content_type),))
+
+        if len(set(obj) - set(self._inputs)) != 0:
+            raise_grpc_exception(
+                f"'{self.__class__.__name__}' only accepts '{set(self._inputs)}' as output fields. Invalid fields are: {set(obj) - set(self._inputs)}",
+                context=context,
+                exception_cls=InvalidArgument,
+            )
+        multipart_map: dict[str, pb.Part] = {}
+
+        for key in obj:
+            io_descriptor = self._inputs[key]
+            resp = await io_descriptor.to_grpc_response(obj[key], context)
+            part = pb.Part(
+                **{io_descriptor.proto_field: getattr(resp, io_descriptor.proto_field)}
+            )
+            multipart_map[key] = part
 
-    async def to_grpc_response(self, obj, context) -> t.Any:
-        raise RuntimeError("Multipart IO is not supported in gRPC.")
+        return pb.Response(multipart=multipart_map)
diff --git a/bentoml/_internal/io_descriptors/numpy.py b/bentoml/_internal/io_descriptors/numpy.py
index 9c6c2792e91..4eb5e70272e 100644
--- a/bentoml/_internal/io_descriptors/numpy.py
+++ b/bentoml/_internal/io_descriptors/numpy.py
@@ -9,7 +9,6 @@
 from starlette.responses import Response
 
 from .base import IODescriptor
-from .json import MIME_TYPE_JSON
 from ..types import LazyType
 from ..utils import LazyLoader
 from ..utils.http import set_cookies
@@ -26,50 +25,17 @@
 if TYPE_CHECKING:
     import numpy as np
 
-    from bentoml.grpc.v1 import service_pb2
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
 
     from .. import external_typing as ext
     from ..context import InferenceApiContext as Context
-    from ..server.grpc.types import BentoServicerContext
 else:
     np = LazyLoader("np", globals(), "numpy")
-    service_pb2 = LazyLoader("service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
 
 logger = logging.getLogger(__name__)
 
-# TODO: support the following types for for protobuf message:
-# - support complex64, complex128, object and struct types
-# - BFLOAT16, QINT32, QINT16, QUINT16, QINT8, QUINT8
-#
-# For int16, uint16, int8, uint8 -> specify types in NumpyNdarray + using int_values.
-#
-# For bfloat16, half (float16) -> specify types in NumpyNdarray + using float_values.
-#
-# for string_values, use <U for np.dtype instead of S (zero-terminated bytes).
-_VALUES_TO_NP_DTYPE_MAP = {
-    "bool_values": "bool",
-    "float_values": "float32",
-    "string_values": "<U",
-    "double_values": "float64",
-    "int_values": "int32",
-    "long_values": "int64",
-    "uint32_values": "uint32",
-    "uint64_values": "uint64",
-}
-
-
-# array_descriptor -> {"float_contents": [1, 2, 3]}
-def get_array_proto(array: dict[str, t.Any]) -> tuple[str, list[t.Any]]:
-    # returns the array contents with whether the result is using bytes.
-    accepted_fields = list(service_pb2.Array.DESCRIPTOR.fields_by_name)
-    if len(set(array) - set(accepted_fields)) > 0:
-        raise UnprocessableEntity("Given array has unsupported fields.")
-    if len(array) != 1:
-        raise BadInput(
-            f"Array contents can only be one of {accepted_fields} as key. Use one of {list(array)} only."
-        )
-    return tuple(array.items())[0]
-
 
 def _is_matched_shape(left: tuple[int, ...], right: tuple[int, ...]) -> bool:
     if (left is None) or (right is None):
@@ -88,10 +54,7 @@ def _is_matched_shape(left: tuple[int, ...], right: tuple[int, ...]) -> bool:
 
 
 # TODO: when updating docs, add examples with gRPCurl
-class NumpyNdarray(
-    IODescriptor["ext.NpNDArray"],
-    proto_fields=["multi_dimensional_array_value", "array_value", "raw_value"],
-):
+class NumpyNdarray(IODescriptor["ext.NpNDArray"], proto_field="ndarray"):
     """
     :obj:`NumpyNdarray` defines API specification for the inputs/outputs of a Service, where
     either inputs will be converted to or outputs will be converted from type
@@ -180,7 +143,6 @@ def __init__(
         enforce_dtype: bool = False,
         shape: tuple[int, ...] | None = None,
         enforce_shape: bool = False,
-        packed: bool = False,
         bytesorder: t.Literal["C", "F", "A", None] = None,
     ):
         if dtype and not isinstance(dtype, np.dtype):
@@ -188,9 +150,7 @@ def __init__(
             try:
                 dtype = np.dtype(dtype)
             except TypeError as e:
-                raise UnprocessableEntity(
-                    f'NumpyNdarray: Invalid dtype "{dtype}": {e}'
-                ) from e
+                raise UnprocessableEntity(f'Invalid dtype "{dtype}": {e}') from e
 
         self._dtype = dtype
         self._shape = shape
@@ -199,18 +159,25 @@ def __init__(
 
         self._sample_input = None
 
-        # whether to use packed representation of numpy while sending protobuf
-        # this means users should be using raw_value instead of array_value or multi_dimensional_array_value
-        self._packed = packed
         if bytesorder and bytesorder not in ["C", "F", "A"]:
             raise BadInput(
                 f"'bytesorder' must be one of ['C', 'F', 'A'], got {bytesorder} instead."
             )
         if not bytesorder:
-            bytesorder = "C"  # default from numpy (C-order)
             # https://numpy.org/doc/stable/user/basics.byteswapping.html#introduction-to-byte-ordering-and-ndarrays
+            bytesorder = "C"  # default from numpy (C-order)
+
         self._bytesorder: t.Literal["C", "F", "A"] = bytesorder
 
+        if self._enforce_dtype and not self._dtype:
+            raise UnprocessableEntity(
+                "'dtype' must be specified when 'enforce_dtype=True'"
+            )
+        if self._enforce_shape and not self._shape:
+            raise UnprocessableEntity(
+                "'shape' must be specified when 'enforce_shape=True'"
+            )
+
     def _openapi_types(self) -> str:
         # convert numpy dtypes to openapi compatible types.
         var_type = "integer"
@@ -270,35 +237,35 @@ def openapi_responses(self) -> OpenAPIResponse:
             },
         )
 
-    def _verify_ndarray(
+    def validate_array(
         self,
-        obj: ext.NpNDArray,
-        dtype: ext.NpDTypeLike | None,
-        shape: tuple[int, ...] | None,
+        arr: ext.NpNDArray,
+        dtype: ext.NpDTypeLike | None = None,
+        shape: tuple[int, ...] | None = None,
         exception_cls: t.Type[Exception] = BadInput,
     ) -> ext.NpNDArray:
-        if dtype is not None and dtype != obj.dtype:
+        if dtype is not None and dtype != arr.dtype:
             # ‘same_kind’ means only safe casts or casts within a kind, like float64
             # to float32, are allowed.
-            if np.can_cast(obj.dtype, dtype, casting="same_kind"):
-                obj = obj.astype(dtype, casting="same_kind")  # type: ignore
+            if np.can_cast(arr.dtype, dtype, casting="same_kind"):
+                arr = arr.astype(dtype, casting="same_kind")  # type: ignore
             else:
-                msg = f'{self.__class__.__name__}: Expecting ndarray of dtype "{dtype}", but "{obj.dtype}" was received.'
+                msg = f'{self.__class__.__name__}: Expecting ndarray of dtype "{dtype}", but "{arr.dtype}" was received.'
                 if self._enforce_dtype:
                     raise exception_cls(msg)
                 else:
                     logger.debug(msg)
 
-        if shape is not None and not _is_matched_shape(shape, obj.shape):
-            msg = f'{self.__class__.__name__}: Expecting ndarray of shape "{shape}", but "{obj.shape}" was received.'
+        if shape is not None and not _is_matched_shape(shape, arr.shape):
+            msg = f'{self.__class__.__name__}: Expecting ndarray of shape "{shape}", but "{arr.shape}" was received.'
             if self._enforce_shape:
                 raise exception_cls(msg)
             try:
-                obj = obj.reshape(shape)
+                arr = arr.reshape(shape)
             except ValueError as e:
                 logger.debug(f"{msg} Failed to reshape: {e}.")
 
-        return obj
+        return arr
 
     async def from_http_request(self, request: Request) -> ext.NpNDArray:
         """
@@ -316,7 +283,7 @@ async def from_http_request(self, request: Request) -> ext.NpNDArray:
             res = np.array(obj, dtype=self._dtype)
         except ValueError:
             res = np.array(obj)
-        return self._verify_ndarray(res, dtype=self._dtype, shape=self._shape)
+        return self.validate_array(res, dtype=self._dtype, shape=self._shape)
 
     async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None):
         """
@@ -330,174 +297,20 @@ async def to_http_response(self, obj: ext.NpNDArray, ctx: Context | None = None)
             HTTP Response of type ``starlette.responses.Response``. This can
              be accessed via cURL or any external web traffic.
         """
-        obj = self._verify_ndarray(
+        obj = self.validate_array(
             obj, dtype=self._dtype, shape=self._shape, exception_cls=InternalServerError
         )
         if ctx is not None:
             res = Response(
                 json.dumps(obj.tolist()),
-                media_type=MIME_TYPE_JSON,
+                media_type=self._mime_type,
                 headers=ctx.response.metadata,  # type: ignore (bad starlette types)
                 status_code=ctx.response.status_code,
             )
             set_cookies(res, ctx.response.cookies)
             return res
         else:
-            return Response(json.dumps(obj.tolist()), media_type=MIME_TYPE_JSON)
-
-    async def from_grpc_request(
-        self, request: service_pb2.Request, context: BentoServicerContext
-    ) -> ext.NpNDArray:
-        """
-        Process incoming protobuf request and convert it to `numpy.ndarray`
-
-        Args:
-            request: Incoming Requests
-
-        Returns:
-            a `numpy.ndarray` object. This can then be used
-             inside users defined logics.
-        """
-        import grpc
-
-        from ..utils.grpc import deserialize_proto
-
-        # TODO: deserialize is pretty inefficient, but ok for first pass.
-        field, serialized = deserialize_proto(self, request)
-
-        if self._packed:
-            if field != "raw_value":
-                raise BentoMLException(
-                    f"'packed={self._packed}' requires to use 'raw_value' instead of {field}."
-                )
-            if not self._shape:
-                raise UnprocessableEntity("'shape' is required when 'packed' is set.")
-
-            metadata = serialized["metadata"]
-            if not self._dtype:
-                if "dtype" not in metadata:
-                    raise BentoMLException(
-                        f"'dtype' is not found in both {repr(self)} and {metadata}. Set either 'dtype' in {self.__class__.__name__} or add 'dtype' to metadata for 'raw_value' message."
-                    )
-                dtype = metadata["dtype"]
-            else:
-                dtype = self._dtype
-            obj = np.frombuffer(serialized["content"], dtype=dtype)
-
-            return np.reshape(obj, self._shape)
-
-        if field == "multi_dimensional_array_value":
-            # {'shape': [2, 3], 'array': {'dtype': 'DT_FLOAT', ...}}
-            if "array" not in serialized:
-                msg = "'array' cannot be None."
-                context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
-                context.set_details(msg)
-                raise BadInput(msg)
-
-            shape = tuple(serialized["shape"])
-            if self._shape:
-                if not self._enforce_shape:
-                    logger.warning(
-                        f"'shape={self._shape},enforce_shape={self._enforce_shape}' is set with {self.__class__.__name__}, while 'shape' field is present in request message. To avoid this warning, set 'enforce_shape=True'. Using 'shape={shape}' from request message."
-                    )
-                else:
-                    logger.debug(
-                        f"'enforce_shape={self._enforce_shape}', ignoring 'shape' field in request message."
-                    )
-                    shape = self._shape
-
-            array = serialized["array"]
-        else:
-            # {'float_contents': [1.0, 2.0, 3.0]}
-            array = serialized
-            shape = self._shape
-
-        dtype_string, content = get_array_proto(array)
-        dtype = np.dtype(_VALUES_TO_NP_DTYPE_MAP[dtype_string])
-        if self._dtype:
-            if not self._enforce_dtype:
-                logger.warning(
-                    f"'dtype={self._dtype},enforce_dtype={self._enforce_dtype}' is set with {self.__class__.__name__}, while 'dtype' field is present in request message. To avoid this warning, set 'enforce_dtype=True'. Using 'dtype={dtype}' from request message."
-                )
-            else:
-                logger.debug(
-                    f"'enforce_dtype={self._enforce_dtype}', ignoring 'dtype' field in request message."
-                )
-                dtype = self._dtype
-
-        try:
-            res = np.array(content, dtype=dtype)
-        except ValueError:
-            res = np.array(content)
-
-        return self._verify_ndarray(res, dtype=dtype, shape=shape)
-
-    async def to_grpc_response(
-        self, obj: ext.NpNDArray, context: BentoServicerContext
-    ) -> service_pb2.Response:
-        """
-        Process given objects and convert it to grpc protobuf response.
-
-        Args:
-            obj: `np.ndarray` that will be serialized to protobuf
-            context: grpc.aio.ServicerContext from grpc.aio.Server
-        Returns:
-            `io_descriptor_pb2.Array`:
-                Protobuf representation of given `np.ndarray`
-        """
-        from ..utils.grpc import grpc_status_code
-        from ..configuration import get_debug_mode
-
-        _NP_TO_VALUE_MAP = {np.dtype(v): k for k, v in _VALUES_TO_NP_DTYPE_MAP.items()}
-        value_key = _NP_TO_VALUE_MAP[obj.dtype]
-
-        try:
-            obj = self._verify_ndarray(
-                obj,
-                dtype=self._dtype,
-                shape=self._shape,
-                exception_cls=InternalServerError,
-            )
-        except InternalServerError as e:
-            context.set_code(grpc_status_code(e))
-            context.set_details(e.message)
-            raise
-
-        response = service_pb2.Response()
-        value = service_pb2.Value()
-
-        if self._packed:
-            raw = service_pb2.Raw(
-                metadata={"dtype": str(obj.dtype)},
-                content=obj.tobytes(order=self._bytesorder),
-            )
-            value.raw_value.CopyFrom(raw)
-        else:
-            if self._bytesorder and self._bytesorder != "C":
-                logger.warning(
-                    f"'bytesorder={self._bytesorder}' is ignored when 'packed={self._packed}'."
-                )
-            # we just need a view of the array, instead of copy it to contiguous memory.
-            array = service_pb2.Array(**{value_key: obj.ravel().tolist()})
-            if obj.ndim != 1:
-                ndarray = service_pb2.MultiDimensionalArray(
-                    shape=tuple(obj.shape), array=array
-                )
-                value.multi_dimensional_array_value.CopyFrom(ndarray)
-            else:
-                value.array_value.CopyFrom(array)
-
-        response.output.CopyFrom(value)
-
-        if get_debug_mode():
-            logger.debug(
-                f"Response proto: {response.SerializeToString(deterministic=True)}"
-            )
-
-        return response
-
-    def generate_protobuf(self):
-        pass
+            return Response(json.dumps(obj.tolist()), media_type=self._mime_type)
 
     @classmethod
     def from_sample(
@@ -546,15 +359,176 @@ async def predict(input: NDArray[np.int16]) -> NDArray[Any]:
         """
         if isinstance(sample_input, np.generic):
             raise BentoMLException(
-                "NumpyNdarray.from_sample() expects a numpy.array, not numpy.generic."
+                "'NumpyNdarray.from_sample()' expects a 'numpy.array', not 'numpy.generic'."
             )
 
-        inst = cls(
+        klass = cls(
             dtype=sample_input.dtype,
             shape=sample_input.shape,
             enforce_dtype=enforce_dtype,
             enforce_shape=enforce_shape,
         )
-        inst.sample_input = sample_input
+        klass.sample_input = sample_input
+
+        return klass
 
-        return inst
+    async def from_grpc_request(
+        self, request: pb.Request, context: BentoServicerContext
+    ) -> ext.NpNDArray:
+        """
+        Process incoming protobuf request and convert it to ``numpy.ndarray``
+
+        Args:
+            request: Incoming RPC request message.
+            context: grpc.ServicerContext
+
+        Returns:
+            a ``numpy.ndarray`` object. This can then be used
+             inside users defined logics.
+        """
+        from ..utils.grpc import get_field
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc import validate_content_type
+        from ..utils.grpc.mapping import dtypepb_to_fieldpb_map
+        from ..utils.grpc.mapping import dtypepb_to_npdtype_map
+        from ..utils.grpc.mapping import fieldpb_to_npdtype_map
+        from ..utils.grpc.mapping import npdtype_to_fieldpb_map
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        field = get_field(request, self)
+
+        # The logic behind processing shape is as follows:
+        # 1. If 'shape' is specified in the request message, then:
+        #    - if 'shape' is specified inside IO Descriptor, we will omit this value unless 'enforce_shape' is True.
+        #    - Otherwise, use 'shape' from message
+        #
+        # 2. If the 'shape' is not specified inside the request message, then:
+        #    - If 'shape' is not defined at the IO descriptor level, 'shape' value will be omitted unless 'enforce_shape' is True.
+        #    - Otherwise, we will use shape from the IO descriptor.
+        if self._enforce_shape:
+            logger.debug(
+                f"'enforce_shape={self._enforce_shape}', using '{self._shape}'..."
+            )
+            shape = self._shape
+        else:
+            shapepb = field.shape  # this can be an empty list ([])
+            if shapepb:
+                shape = tuple(shapepb)
+                if self._shape:
+                    logger.warning(
+                        f"'shape={self._shape}' is ignored when 'shape' is specified in request message. Using '{shape}' from request message..."
+                    )
+            else:
+                shape = self._shape
+
+        # The same logic for 'shape' applies to 'dtype' with the additional:
+        #
+        # 2a. If 'dtype' message is not specified, then we will infer given numpy dtype from the entry.
+        if self._enforce_dtype:
+            logger.debug(
+                f"'enforce_dtype={self._enforce_dtype}', using '{self._dtype}'..."
+            )
+            dtype = self._dtype
+            field_values = npdtype_to_fieldpb_map()[dtype]
+
+            values_arr = getattr(field, field_values, None)
+            if not values_arr:
+                raise_grpc_exception(
+                    f"field '{field_values}' (required for '{dtype}') is missing.",
+                    context=context,
+                    exception_cls=UnprocessableEntity,
+                )
+        else:
+            dtypepb = field.dtype
+            if dtypepb:
+                if dtypepb == pb.NDArray.DTYPE_UNSPECIFIED:
+                    dtype = None
+                else:
+                    dtype = dtypepb_to_npdtype_map()[dtypepb]
+
+                if self._dtype:
+                    logger.warning(
+                        f"'dtype={self._dtype}' is ignored when 'dtype' is specified in request message. Using '{dtype}' from request message..."
+                    )
+
+                values_arr = getattr(field, dtypepb_to_fieldpb_map()[dtypepb])
+
+                if not dtype:
+                    # This is the case where 'dtype' proto uses DTYPE_UNSPECIFIED
+                    return np.empty(shape=shape or 0)
+            else:
+                fieldpb = [
+                    f.name for f, _ in field.ListFields() if f.name.endswith("_values")
+                ]
+                if len(fieldpb) != 1:
+                    raise_grpc_exception(
+                        f"Array contents can only be one of given values key. Use one of {fieldpb} instead.",
+                        context=context,
+                        exception_cls=BadInput,
+                    )
+                dtype = fieldpb_to_npdtype_map()[fieldpb[0]]
+                values_arr = getattr(field, fieldpb[0])
+
+        if shape and dtype in [np.float32, np.double, np.bool_]:
+            buffer = field.SerializeToString()
+            num_entries = np.prod(shape)
+            return np.frombuffer(
+                memoryview(buffer[-(dtype.itemsize * num_entries) :]),
+                dtype=dtype,
+                count=num_entries,
+                offset=0,
+            ).reshape(shape)
+        else:
+            try:
+                res = np.array(values_arr, dtype=dtype)
+            except ValueError:
+                res = np.array(values_arr)
+
+            return self.validate_array(res, dtype=dtype, shape=shape)
+
+    async def to_grpc_response(
+        self, obj: ext.NpNDArray, context: BentoServicerContext
+    ) -> pb.Response:
+        """
+        Process given objects and convert it to grpc protobuf response.
+
+        Args:
+            obj: `np.ndarray` that will be serialized to protobuf
+            context: grpc.aio.ServicerContext from grpc.aio.Server
+        Returns:
+            `io_descriptor_pb2.Array`:
+                Protobuf representation of given `np.ndarray`
+        """
+        from ..utils.grpc import raise_grpc_exception
+        from ..utils.grpc.mapping import npdtype_to_dtypepb_map
+        from ..utils.grpc.mapping import npdtype_to_fieldpb_map
+
+        try:
+            obj = self.validate_array(
+                obj,
+                dtype=self._dtype,
+                shape=self._shape,
+                exception_cls=InternalServerError,
+            )
+        except InternalServerError as e:
+            raise_grpc_exception(e.message, context=context, exception_cls=e.__class__)
+
+        context.set_trailing_metadata((("content-type", self.grpc_content_type),))
+
+        try:
+            fieldpb = npdtype_to_fieldpb_map()[obj.dtype]
+            dtypepb = npdtype_to_dtypepb_map()[obj.dtype]
+            return pb.Response(
+                ndarray=pb.NDArray(
+                    dtype=dtypepb,
+                    shape=tuple(obj.shape),
+                    **{fieldpb: obj.ravel(order=self._bytesorder).tolist()},
+                )
+            )
+        except KeyError:
+            raise_grpc_exception(
+                f"Unsupported dtype '{obj.dtype}' for response message.",
+                context=context,
+            )
diff --git a/bentoml/_internal/io_descriptors/pandas.py b/bentoml/_internal/io_descriptors/pandas.py
index 0fb77f49bd1..558b4fbd7f9 100644
--- a/bentoml/_internal/io_descriptors/pandas.py
+++ b/bentoml/_internal/io_descriptors/pandas.py
@@ -12,7 +12,6 @@
 from starlette.responses import Response
 
 from .base import IODescriptor
-from .json import MIME_TYPE_JSON
 from ..types import LazyType
 from ..utils.http import set_cookies
 from ...exceptions import BadInput
@@ -36,7 +35,7 @@
         "pd",
         globals(),
         "pandas",
-        exc_msg="`pandas` is required to use PandasDataFrame or PandasSeries. Install with `pip install -U pandas`",
+        exc_msg="'pandas' is required to use PandasDataFrame or PandasSeries. Install with 'pip install -U pandas'",
     )
 
 logger = logging.getLogger(__name__)
@@ -111,15 +110,12 @@ def _infer_serialization_format_from_request(
         return SerializationFormat.CSV
     elif content_type:
         logger.debug(
-            "Unknown content-type (%s), falling back to %s serialization format.",
-            content_type,
-            default_format,
+            f"Unknown content-type ('{content_type}'), falling back to '{default_format}' serialization format.",
         )
         return default_format
     else:
         logger.debug(
-            "Content-type not specified, falling back to %s serialization format.",
-            default_format,
+            f"Content-type not specified, falling back to '{default_format}' serialization format.",
         )
         return default_format
 
@@ -134,9 +130,7 @@ def _validate_serialization_format(serialization_format: SerializationFormat):
         )
 
 
-class PandasDataFrame(
-    IODescriptor["ext.PdDataFrame"], proto_fields=["map_value", "raw_value"]
-):
+class PandasDataFrame(IODescriptor["ext.PdDataFrame"], proto_field="dataframe"):
     """
     :obj:`PandasDataFrame` defines API specification for the inputs/outputs of a Service,
     where either inputs will be converted to or outputs will be converted from type
@@ -205,7 +199,7 @@ def predict(input_arr):
                 - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``}
                 - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}]
                 - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}}
-                - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` -> {``index`` ↠ ``value``}}
+                - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}}
                 - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays
         columns: List of columns name that users wish to update.
         apply_column_names: Whether to update incoming DataFrame columns. If :code:`apply_column_names=True`,
@@ -415,15 +409,6 @@ async def to_http_response(
         else:
             return Response(resp, media_type=serialization_format.mime_type)
 
-    def generate_protobuf(self):
-        pass
-
-    async def from_grpc_request(self, request, context) -> t.Any:
-        pass
-
-    async def to_grpc_response(self, obj, context) -> t.Any:
-        pass
-
     @classmethod
     def from_sample(
         cls,
@@ -446,7 +431,7 @@ def from_sample(
                     - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``}
                     - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}]
                     - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}}
-                    - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` -> {``index`` ↠ ``value``}}
+                    - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}}
                     - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays
             apply_column_names: Update incoming DataFrame columns. ``columns`` must be specified at
                                 function signature. If you don't want to enforce a specific columns
@@ -496,10 +481,14 @@ def predict(inputs: pd.DataFrame) -> pd.DataFrame: ...
 
         return inst
 
+    async def from_grpc_request(self, request, context) -> t.Any:
+        pass
+
+    async def to_grpc_response(self, obj, context) -> t.Any:
+        pass
+
 
-class PandasSeries(
-    IODescriptor["ext.PdSeries"], proto_fields=["map_value", "raw_value"]
-):
+class PandasSeries(IODescriptor["ext.PdSeries"], proto_field="series"):
     """
     :code:`PandasSeries` defines API specification for the inputs/outputs of a Service, where
     either inputs will be converted to or outputs will be converted from type
@@ -564,7 +553,7 @@ def predict(input_arr):
                 - :obj:`split` - :code:`dict[str, Any]` ↦ {``idx`` ↠ ``[idx]``, ``columns`` ↠ ``[columns]``, ``data`` ↠ ``[values]``}
                 - :obj:`records` - :code:`list[Any]` ↦ [{``column`` ↠ ``value``}, ..., {``column`` ↠ ``value``}]
                 - :obj:`index` - :code:`dict[str, Any]` ↦ {``idx`` ↠ {``column`` ↠ ``value``}}
-                - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` -> {``index`` ↠ ``value``}}
+                - :obj:`columns` - :code:`dict[str, Any]` ↦ {``column`` ↠ {``index`` ↠ ``value``}}
                 - :obj:`values` - :code:`dict[str, Any]` ↦ Values arrays
         columns: List of columns name that users wish to update.
         apply_column_names (`bool`, `optional`, default to :code:`False`):
@@ -595,8 +584,6 @@ def infer(input_df: pd.DataFrame) -> pd.DataFrame:
         :obj:`PandasSeries`: IO Descriptor that represents a :code:`pd.Series`.
     """
 
-    _mime_type: str = MIME_TYPE_JSON
-
     def __init__(
         self,
         orient: ext.SeriesOrient = "records",
@@ -686,17 +673,16 @@ async def to_http_response(
         if ctx is not None:
             res = Response(
                 obj.to_json(orient=self._orient),
-                media_type=MIME_TYPE_JSON,
+                media_type=self._mime_type,
                 headers=ctx.response.headers,  # type: ignore (bad starlette types)
                 status_code=ctx.response.status_code,
             )
             set_cookies(res, ctx.response.cookies)
             return res
         else:
-            return Response(obj.to_json(orient=self._orient), media_type=MIME_TYPE_JSON)
-
-    def generate_protobuf(self):
-        pass
+            return Response(
+                obj.to_json(orient=self._orient), media_type=self._mime_type
+            )
 
     async def from_grpc_request(self, request, context) -> t.Any:
         pass
diff --git a/bentoml/_internal/io_descriptors/text.py b/bentoml/_internal/io_descriptors/text.py
index f7718b1a0a5..9c30497ac5d 100644
--- a/bentoml/_internal/io_descriptors/text.py
+++ b/bentoml/_internal/io_descriptors/text.py
@@ -12,24 +12,23 @@
 from ..utils.http import set_cookies
 from ..service.openapi import SUCCESS_DESCRIPTION
 from ..utils.lazy_loader import LazyLoader
+from ..service.openapi.specification import Schema
+from ..service.openapi.specification import Response as OpenAPIResponse
 from ..service.openapi.specification import MediaType
+from ..service.openapi.specification import RequestBody
 
 if TYPE_CHECKING:
-    from bentoml.grpc.v1 import service_pb2 as _service_pb2
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import BentoServicerContext
 
     from ..context import InferenceApiContext as Context
-    from ...server.grpc.types import BentoServicerContext
 else:
-    _service_pb2 = LazyLoader("_service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
-
-from ..service.openapi.specification import Schema
-from ..service.openapi.specification import Response as OpenAPIResponse
-from ..service.openapi.specification import RequestBody
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
 
 MIME_TYPE = "text/plain"
 
 
-class Text(IODescriptor[str], proto_fields=["string_value", "raw_value"]):
+class Text(IODescriptor[str], proto_field="text"):
     """
     :obj:`Text` defines API specification for the inputs/outputs of a Service. :obj:`Text`
     represents strings for all incoming requests/outcoming responses as specified in
@@ -139,16 +138,29 @@ async def to_http_response(self, obj: str, ctx: Context | None = None) -> Respon
             return Response(obj, media_type=MIME_TYPE)
 
     async def from_grpc_request(
-        self,
-        request: _service_pb2.Request,
-        context: BentoServicerContext,  # pylint: disable=unused-argument
+        self, request: pb.Request, context: BentoServicerContext
     ) -> str:
-        return str(request.input.string_value)
+        import ast
+
+        from ..utils.grpc import get_field
+        from ..utils.grpc import validate_content_type
+
+        # validate gRPC content type if content type is specified
+        validate_content_type(context, self)
+
+        field = ast.literal_eval(get_field(request, self))
+
+        try:
+            return bytes(field, "ascii").decode("utf-8")
+        except TypeError:
+            import base64
+
+            return base64.b64decode(field).decode("utf-8")
 
     async def to_grpc_response(
-        self, obj: str, context: BentoServicerContext  # pylint: disable=unused-argument
-    ) -> _service_pb2.Response:
-        return _service_pb2.Response(output=_service_pb2.Value(string_value=obj))
+        self, obj: str, context: BentoServicerContext
+    ) -> pb.Response:
 
-    def generate_protobuf(self):
-        pass
+        context.set_trailing_metadata((("content-type", self.grpc_content_type),))
+
+        return pb.Response(text=obj)
diff --git a/bentoml/_internal/server/grpc/interceptors/__init__.py b/bentoml/_internal/server/grpc/interceptors/__init__.py
index bf93cd5c827..263dc39515e 100644
--- a/bentoml/_internal/server/grpc/interceptors/__init__.py
+++ b/bentoml/_internal/server/grpc/interceptors/__init__.py
@@ -7,19 +7,17 @@
 from grpc import aio
 
 from ....utils import LazyLoader
-from ....utils.grpc import ProtoCodec
 from ....utils.grpc import wrap_rpc_handler
-from ....utils.grpc import get_grpc_content_type
+from ....utils.grpc import GRPC_CONTENT_TYPE
 
 if TYPE_CHECKING:
 
-    from ..types import Request
-    from ..types import Response
-    from ..types import RpcMethodHandler
-    from ..types import AsyncHandlerMethod
-    from ..types import HandlerCallDetails
-    from ..types import BentoServicerContext
-    from ....utils.grpc.codec import Codec
+    from bentoml.grpc.types import Request
+    from bentoml.grpc.types import Response
+    from bentoml.grpc.types import RpcMethodHandler
+    from bentoml.grpc.types import AsyncHandlerMethod
+    from bentoml.grpc.types import HandlerCallDetails
+    from bentoml.grpc.types import BentoServicerContext
 else:
     service_pb2 = LazyLoader("service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
 
@@ -30,11 +28,11 @@ class GenericHeadersServerInterceptor(aio.ServerInterceptor):
     Refers to https://chromium.googlesource.com/external/github.com/grpc/grpc/+/HEAD/doc/PROTOCOL-HTTP2.md
     """
 
-    def __init__(self, *, codec: Codec | None = None):
-        if not codec:
-            # By default, we use ProtoCodec.
-            codec = ProtoCodec()
-        self._codec = codec
+    def __init__(self, *, message_format: str | None = None):
+        if not message_format:
+            # By default, we are sending proto message.
+            message_format = "proto"
+        self._content_type = f"{GRPC_CONTENT_TYPE}+{message_format}"
 
     def set_trailing_metadata(self, context: BentoServicerContext):
         # We want to send some initial metadata to the client.
@@ -42,8 +40,7 @@ def set_trailing_metadata(self, context: BentoServicerContext):
         # of the current request. gRPC instead uses trailers for this purpose, and
         # trailers are sent during `send_trailing_metadata` call
         # For now we are sending over the content-type header.
-        headers = [("content-type", get_grpc_content_type(codec=self._codec))]
-        context.set_trailing_metadata(headers)
+        context.set_trailing_metadata((("content-type", self._content_type),))
 
     async def intercept_service(
         self,
@@ -64,11 +61,8 @@ async def new_behaviour(
                 self.set_trailing_metadata(context)
 
                 # for the rpc itself.
-                resp = behaviour(request, context)
-                if not hasattr(resp, "__aiter__"):
-                    resp = await resp
-                return resp
+                return await behaviour(request, context)
 
             return new_behaviour
 
-        return wrap_rpc_handler(wrapper, handler)
+        return t.cast("RpcMethodHandler", wrap_rpc_handler(wrapper, handler))
diff --git a/bentoml/_internal/server/grpc/interceptors/access.py b/bentoml/_internal/server/grpc/interceptors/access.py
index ba18c7ccace..5c76f0552be 100644
--- a/bentoml/_internal/server/grpc/interceptors/access.py
+++ b/bentoml/_internal/server/grpc/interceptors/access.py
@@ -12,24 +12,21 @@
 from ....utils import LazyLoader
 from ....utils.grpc import to_http_status
 from ....utils.grpc import wrap_rpc_handler
-from ....utils.grpc.codec import GRPC_CONTENT_TYPE
+from ....utils.grpc import GRPC_CONTENT_TYPE
 
 if TYPE_CHECKING:
     from grpc.aio._typing import MetadataType
 
     from bentoml.grpc.v1 import service_pb2
-
-    from ..types import Request
-    from ..types import Response
-    from ..types import RpcMethodHandler
-    from ..types import AsyncHandlerMethod
-    from ..types import HandlerCallDetails
-    from ..types import BentoServicerContext
+    from bentoml.grpc.types import Request
+    from bentoml.grpc.types import Response
+    from bentoml.grpc.types import RpcMethodHandler
+    from bentoml.grpc.types import AsyncHandlerMethod
+    from bentoml.grpc.types import HandlerCallDetails
+    from bentoml.grpc.types import BentoServicerContext
 else:
     service_pb2 = LazyLoader("service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
 
-logger = logging.getLogger(__name__)
-
 
 class AccessLogServerInterceptor(aio.ServerInterceptor):
     """
@@ -66,13 +63,13 @@ async def new_behaviour(
                 start = default_timer()
                 try:
                     response = await behaviour(request, context)
-                except Exception as e:
+                except Exception as e:  # pylint: disable=broad-except
                     context.set_code(grpc.StatusCode.INTERNAL)
                     context.set_details(str(e))
                 finally:
                     if TYPE_CHECKING:
                         assert response
-                    latency = max(default_timer() - start, 0)
+                    latency = max(default_timer() - start, 0) * 1000
 
                     req = [
                         "scheme=http",  # TODO: support https when ssl is added
@@ -96,4 +93,4 @@ async def new_behaviour(
 
             return new_behaviour
 
-        return wrap_rpc_handler(wrapper, handler)
+        return t.cast("RpcMethodHandler", wrap_rpc_handler(wrapper, handler))
diff --git a/bentoml/_internal/server/grpc/interceptors/opentelemetry.py b/bentoml/_internal/server/grpc/interceptors/opentelemetry.py
index 8f50d8c8af4..9a8a1d416a2 100644
--- a/bentoml/_internal/server/grpc/interceptors/opentelemetry.py
+++ b/bentoml/_internal/server/grpc/interceptors/opentelemetry.py
@@ -20,8 +20,8 @@
 
 from ....utils.pkg import get_pkg_version
 from ....utils.grpc import wrap_rpc_handler
+from ....utils.grpc import GRPC_CONTENT_TYPE
 from ....utils.grpc import parse_method_name
-from ....utils.grpc.codec import GRPC_CONTENT_TYPE
 from ....configuration.containers import BentoMLContainer
 
 if TYPE_CHECKING:
@@ -31,12 +31,12 @@
     from opentelemetry.trace import Span
     from opentelemetry.sdk.trace import TracerProvider
 
-    from ..types import Request
-    from ..types import Response
-    from ..types import RpcMethodHandler
-    from ..types import AsyncHandlerMethod
-    from ..types import HandlerCallDetails
-    from ..types import BentoServicerContext
+    from bentoml.grpc.types import Request
+    from bentoml.grpc.types import Response
+    from bentoml.grpc.types import RpcMethodHandler
+    from bentoml.grpc.types import AsyncHandlerMethod
+    from bentoml.grpc.types import HandlerCallDetails
+    from bentoml.grpc.types import BentoServicerContext
 
 logger = logging.getLogger(__name__)
 
@@ -221,7 +221,7 @@ def start_span(
             if ip in ("[::1]", "127.0.0.1"):
                 attributes[SpanAttributes.NET_PEER_NAME] = "localhost"
         except IndexError:
-            logger.warning("Failed to parse peer address '%s'", context.peer())
+            logger.warning(f"Failed to parse peer address '{context.peer()}'")
 
         return self._tracer.start_as_current_span(
             name=method_name,
@@ -265,4 +265,4 @@ async def new_behaviour(
 
             return new_behaviour
 
-        return wrap_rpc_handler(wrapper, handler)
+        return t.cast("RpcMethodHandler", wrap_rpc_handler(wrapper, handler))
diff --git a/bentoml/_internal/server/grpc/interceptors/prometheus.py b/bentoml/_internal/server/grpc/interceptors/prometheus.py
index 14afb270388..e3a556b1b9b 100644
--- a/bentoml/_internal/server/grpc/interceptors/prometheus.py
+++ b/bentoml/_internal/server/grpc/interceptors/prometheus.py
@@ -21,13 +21,13 @@
 
 if TYPE_CHECKING:
     from bentoml.grpc.v1 import service_pb2
+    from bentoml.grpc.types import Request
+    from bentoml.grpc.types import Response
+    from bentoml.grpc.types import RpcMethodHandler
+    from bentoml.grpc.types import AsyncHandlerMethod
+    from bentoml.grpc.types import HandlerCallDetails
+    from bentoml.grpc.types import BentoServicerContext
 
-    from ..types import Request
-    from ..types import Response
-    from ..types import RpcMethodHandler
-    from ..types import AsyncHandlerMethod
-    from ..types import HandlerCallDetails
-    from ..types import BentoServicerContext
     from ....service import Service
     from ...metrics.prometheus import PrometheusClient
 else:
@@ -50,7 +50,7 @@ def __init__(self, bento_service: Service):
     def _setup(
         self,
         metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client],
-    ):
+    ):  # pylint: disable=attribute-defined-outside-init
 
         # a valid tag name may includes invalid characters, so we need to escape them
         # ref: https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
@@ -138,4 +138,4 @@ async def new_behavior(
 
             return new_behavior
 
-        return wrap_rpc_handler(wrapper, handler)
+        return t.cast("RpcMethodHandler", wrap_rpc_handler(wrapper, handler))
diff --git a/bentoml/_internal/server/grpc/server.py b/bentoml/_internal/server/grpc/server.py
index b00c337fcf8..ca062bc0178 100644
--- a/bentoml/_internal/server/grpc/server.py
+++ b/bentoml/_internal/server/grpc/server.py
@@ -22,6 +22,8 @@
 
     from bentoml.grpc.v1 import service_pb2
     from bentoml.grpc.v1 import service_pb2_grpc
+    from bentoml.grpc.types import AddServicerFn
+    from bentoml.grpc.types import ServicerClass
 else:
     service_pb2 = LazyLoader("service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
     service_pb2_grpc = LazyLoader(
@@ -42,6 +44,7 @@ def __init__(
         server: aio.Server,
         on_startup: t.Sequence[t.Callable[[], t.Any]] | None = None,
         on_shutdown: t.Sequence[t.Callable[[], t.Any]] | None = None,
+        mount_servicers: t.Sequence[tuple[ServicerClass, AddServicerFn]] | None = None,
         *,
         _grace_period: int | None = None,
         _bento_servicer: service_pb2_grpc.BentoServiceServicer,
@@ -58,6 +61,7 @@ def __init__(
 
         self.on_startup = [] if on_startup is None else list(on_startup)
         self.on_shutdown = [] if on_shutdown is None else list(on_shutdown)
+        self.mount_servicers = [] if mount_servicers is None else list(mount_servicers)
 
     @cached_property
     def _loop(self) -> asyncio.AbstractEventLoop:
@@ -105,6 +109,11 @@ async def startup(self) -> None:
         )
         health_pb2_grpc.add_HealthServicer_to_server(self._health_servicer, self.server)
 
+        # register custom servicer
+        for servicer, add_servicer_fn in self.mount_servicers:
+            # TODO: Annotated types are not contravariant
+            add_servicer_fn(servicer(), self.server)
+
         services = tuple(
             service.full_name
             for service in service_pb2.DESCRIPTOR.services_by_name.values()
diff --git a/bentoml/_internal/server/grpc/servicer.py b/bentoml/_internal/server/grpc/servicer.py
index 533171ac421..3bbdf8403d9 100644
--- a/bentoml/_internal/server/grpc/servicer.py
+++ b/bentoml/_internal/server/grpc/servicer.py
@@ -22,8 +22,7 @@
 
     from bentoml.grpc.v1 import service_pb2 as _service_pb2
     from bentoml.grpc.v1 import service_pb2_grpc as _service_pb2_grpc
-
-    from .types import BentoServicerContext
+    from bentoml.grpc.types import BentoServicerContext
 else:
     _service_pb2 = LazyLoader("_service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
     _service_pb2_grpc = LazyLoader(
@@ -45,7 +44,7 @@ def create_bento_servicer(service: Service) -> _service_pb2_grpc.BentoServiceSer
     class BentoServiceServicer(_service_pb2_grpc.BentoServiceServicer):
         """An asyncio implementation of BentoService servicer."""
 
-        async def Call(  # type: ignore (no async types)
+        async def Call(  # type: ignore (no async types) # pylint: disable=invalid-overridden-method
             self,
             request: _service_pb2.Request,
             context: BentoServicerContext,
@@ -59,12 +58,12 @@ async def Call(  # type: ignore (no async types)
             response = _service_pb2.Response()
 
             try:
-                input = await api.input.from_grpc_request(request, context)
+                input_ = await api.input.from_grpc_request(request, context)
 
                 if asyncio.iscoroutinefunction(api.func):
-                    output = await api.func(input)
+                    output = await api.func(input_)
                 else:
-                    output = await anyio.to_thread.run_sync(api.func, input)
+                    output = await anyio.to_thread.run_sync(api.func, input_)
 
                 response = await api.output.to_grpc_response(output, context)
             except BentoMLException as e:
@@ -76,7 +75,7 @@ async def Call(  # type: ignore (no async types)
                     code=grpc.StatusCode.INTERNAL,
                     details="An internal runtime error has occurred, check out error details in server logs.",
                 )
-            except Exception:  # type: ignore (generic exception)
+            except Exception:  # pylint: disable=broad-except
                 log_exception(request, sys.exc_info())
                 await context.abort(
                     code=grpc.StatusCode.UNKNOWN,
diff --git a/bentoml/_internal/server/grpc_app.py b/bentoml/_internal/server/grpc_app.py
index 69db313be53..003390f13a2 100644
--- a/bentoml/_internal/server/grpc_app.py
+++ b/bentoml/_internal/server/grpc_app.py
@@ -108,6 +108,7 @@ def __call__(self) -> GRPCServer:
             server=server,
             on_startup=self.on_startup,
             on_shutdown=self.on_shutdown,
+            mount_servicers=self.bento_service.mount_servicers,
             _health_servicer=health_servicer,
             _bento_servicer=create_bento_servicer(self.bento_service),
         )
diff --git a/bentoml/_internal/server/http/access.py b/bentoml/_internal/server/http/access.py
index a6ba4e5ac0b..b6612281993 100644
--- a/bentoml/_internal/server/http/access.py
+++ b/bentoml/_internal/server/http/access.py
@@ -114,11 +114,7 @@ async def wrapped_send(message: "ext.ASGIMessage") -> None:
                 latency = max(default_timer() - start, 0)
 
                 self.logger.info(
-                    "%s (%s) (%s) %.3fms",
-                    address,
-                    ",".join(request),
-                    ",".join(response),
-                    latency,
+                    f"{address} ({','.join(request)}) ({','.join(response)}) {latency:.3f}ms"
                 )
 
             await send(message)
diff --git a/bentoml/_internal/server/http/instruments.py b/bentoml/_internal/server/http/instruments.py
index 96f4f3628b4..1eddd76dd3a 100644
--- a/bentoml/_internal/server/http/instruments.py
+++ b/bentoml/_internal/server/http/instruments.py
@@ -29,7 +29,7 @@ def __init__(self, app: ext.ASGIApp, bento_service: Service):
     def _setup(
         self,
         metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client],
-    ):
+    ):  # pylint: disable=attribute-defined-outside-init
         self.metrics_client = metrics_client
 
         service_name = self.bento_service.name
diff --git a/bentoml/_internal/server/http_app.py b/bentoml/_internal/server/http_app.py
index c8e8830337f..27bf2d1981a 100644
--- a/bentoml/_internal/server/http_app.py
+++ b/bentoml/_internal/server/http_app.py
@@ -63,7 +63,7 @@ def log_exception(request: Request, exc_info: t.Any) -> None:
     :attr:`logger`.
     """
     logger.error(
-        "Exception on %s [%s]", request.url.path, request.method, exc_info=exc_info
+        f"Exception on {request.url.path} [{request.method}]", exc_info=exc_info
     )
 
 
diff --git a/bentoml/_internal/service/service.py b/bentoml/_internal/service/service.py
index 62f4a6bf460..8e291666ab3 100644
--- a/bentoml/_internal/service/service.py
+++ b/bentoml/_internal/service/service.py
@@ -18,6 +18,9 @@
 if TYPE_CHECKING:
     import grpc
 
+    from bentoml.grpc.types import AddServicerFn
+    from bentoml.grpc.types import ServicerClass
+
     from .. import external_typing as ext
     from ..bento import Bento
     from ..server.grpc import GRPCServer
@@ -88,6 +91,7 @@ class Service:
     runners: t.List[Runner]
     models: t.List[Model]
 
+    # starlette related
     mount_apps: t.List[t.Tuple[ext.ASGIApp, str, str | None]] = attr.field(
         init=False, factory=list
     )
@@ -96,11 +100,15 @@ class Service:
     ] = attr.field(init=False, factory=list)
 
     # gRPC related
+    mount_servicers: list[tuple[ServicerClass, AddServicerFn]] = attr.field(
+        init=False, factory=list
+    )
     interceptors: list[t.Type[grpc.aio.ServerInterceptor]] = attr.field(
         init=False, factory=list
     )
     grpc_handlers: list[grpc.GenericRpcHandler] = attr.field(init=False, factory=list)
 
+    # list of APIs from @svc.api
     apis: t.Dict[str, InferenceAPI] = attr.field(init=False, factory=dict)
 
     # Tag/Bento are only set when the service was loaded from a bento
@@ -251,6 +259,11 @@ def add_asgi_middleware(
     ) -> None:
         self.middlewares.append((middleware_cls, options))
 
+    def mount_grpc_servicer(
+        self, servicer_cls: ServicerClass, add_servicer_fn: AddServicerFn
+    ) -> None:
+        self.mount_servicers.append((servicer_cls, add_servicer_fn))
+
     def add_grpc_interceptor(
         self, interceptor_cls: t.Type[grpc.aio.ServerInterceptor]
     ) -> None:
diff --git a/bentoml/_internal/utils/grpc/__init__.py b/bentoml/_internal/utils/grpc/__init__.py
index a355a485e7b..af4ba96506b 100644
--- a/bentoml/_internal/utils/grpc/__init__.py
+++ b/bentoml/_internal/utils/grpc/__init__.py
@@ -4,90 +4,186 @@
 import typing as t
 import logging
 from http import HTTPStatus
+from typing import overload
 from typing import TYPE_CHECKING
 from dataclasses import dataclass
 
-import grpc
-
+from bentoml.exceptions import InvalidArgument
 from bentoml.exceptions import BentoMLException
 from bentoml.exceptions import UnprocessableEntity
 
-from .codec import ProtoCodec
-from .codec import get_grpc_content_type
+from .mapping import grpc_status_to_http_status_map
+from .mapping import http_status_to_grpc_status_map
 from ..lazy_loader import LazyLoader
 
 if TYPE_CHECKING:
-
+    import grpc
+    from grpc import aio
+    from google.protobuf.struct_pb2 import Value
+
+    from bentoml.io import File
+    from bentoml.io import JSON
+    from bentoml.io import Text
+    from bentoml.io import Image
+    from bentoml.io import Multipart
     from bentoml.io import IODescriptor
-    from bentoml.grpc.v1 import service_pb2
-
-    from ...server.grpc.types import RpcMethodHandler
+    from bentoml.io import NumpyNdarray
+    from bentoml.io import PandasSeries
+    from bentoml.io import PandasDataFrame
+    from bentoml.grpc.v1 import service_pb2 as pb
+    from bentoml.grpc.types import MessageType
+    from bentoml.grpc.types import RpcMethodHandler
+    from bentoml.grpc.types import BentoServicerContext
 else:
-    service_pb2 = LazyLoader("service_pb2", globals(), "bentoml.grpc.v1.service_pb2")
+    exc_msg = "'grpc' is required. Install with 'pip install grpcio'."
+    grpc = LazyLoader("grpc", globals(), "grpc", exc_msg=exc_msg)
+    aio = LazyLoader("aio", globals(), "grpc.aio", exc_msg=exc_msg)
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
 
 __all__ = [
     "grpc_status_code",
     "parse_method_name",
-    "deserialize_proto",
     "to_http_status",
-    "get_grpc_content_type",
-    "ProtoCodec",
+    "get_field",
+    "serialize_proto",
+    "raise_grpc_exception",
+    "GRPC_CONTENT_TYPE",
+    "validate_content_type",
 ]
 
 logger = logging.getLogger(__name__)
 
 
-def deserialize_proto(
-    io_descriptor: IODescriptor[t.Any],
-    req: service_pb2.Request,
-    **kwargs: t.Any,
-) -> tuple[str, dict[str, t.Any]]:
-    # Deserialize a service_pb2.Request to dict.
-    from google.protobuf.json_format import MessageToDict
+# content-type is always application/grpc
+GRPC_CONTENT_TYPE = "application/grpc"
+
+
+def validate_content_type(
+    context: BentoServicerContext, descriptor: IODescriptor[t.Any]
+) -> None:
+    """
+    Validate 'content-type' from invocation metadata.
+    """
+    metadata = context.invocation_metadata()
+    if metadata:
+        if TYPE_CHECKING:
+            from grpc.aio._typing import MetadatumType
+
+            metadata = t.cast(tuple[MetadatumType], metadata)
+
+        metas = aio.Metadata.from_tuple(metadata)
+        maybe_content_type = metas.get_all("content-type")
+        if maybe_content_type:
+            if len(maybe_content_type) > 1:
+                raise_grpc_exception(
+                    f"{maybe_content_type} should only contain one 'Content-Type' headers.",
+                    context=context,
+                    exception_cls=InvalidArgument,
+                )
+
+            content_type = str(maybe_content_type[0])
+
+            if not content_type.startswith(GRPC_CONTENT_TYPE):
+                raise_grpc_exception(
+                    f"{content_type} should startwith {GRPC_CONTENT_TYPE}.",
+                    context=context,
+                    exception_cls=InvalidArgument,
+                )
+            if content_type != descriptor.grpc_content_type:
+                raise_grpc_exception(
+                    f"'{content_type}' is found while '{repr(descriptor)}' requires '{descriptor.grpc_content_type}'.",
+                    context=context,
+                    exception_cls=InvalidArgument,
+                )
+
+
+@overload
+def get_field(req: pb.Request, descriptor: File) -> MessageType[pb.File]:
+    ...
+
+
+@overload
+def get_field(req: pb.Request, descriptor: Image) -> MessageType[pb.File]:
+    ...
 
-    if "preserving_proto_field_name" not in kwargs:
-        kwargs.setdefault("preserving_proto_field_name", True)
 
-    kind = req.input.WhichOneof("kind")
-    if kind not in io_descriptor.accepted_proto_fields:
+@overload
+def get_field(req: pb.Request, descriptor: JSON) -> MessageType[Value]:
+    ...
+
+
+@overload
+def get_field(
+    req: pb.Request, descriptor: Multipart
+) -> MessageType[dict[str, pb.Part]]:
+    ...
+
+
+@overload
+def get_field(req: pb.Request, descriptor: NumpyNdarray) -> MessageType[pb.NDArray]:
+    ...
+
+
+@overload
+def get_field(
+    req: pb.Request, descriptor: PandasDataFrame
+) -> MessageType[pb.DataFrame]:
+    ...
+
+
+@overload
+def get_field(req: pb.Request, descriptor: PandasSeries) -> MessageType[pb.Series]:
+    ...
+
+
+@overload
+def get_field(req: pb.Request, descriptor: Text) -> MessageType[str]:
+    ...
+
+
+def get_field(req: pb.Request, descriptor: IODescriptor[t.Any]) -> MessageType[t.Any]:
+    try:
+        _ = req.HasField(descriptor.proto_field)
+    except KeyError as e:
         raise UnprocessableEntity(
-            f"{kind} is not supported for {io_descriptor.__class__.__name__}. Supported protobuf message fields are: {io_descriptor.accepted_proto_fields}"
-        )
+            f"Missing required '{descriptor.proto_field}' for {descriptor.__class__.__name__}.: {str(e)}"
+        ) from e
+    return getattr(req, descriptor.proto_field)
+
 
-    return kind, MessageToDict(getattr(req.input, kind), **kwargs)
+def serialize_proto(output: dict[str, t.Any], **kwargs: t.Any) -> pb.Response:
+    from google.protobuf.json_format import ParseDict
 
+    return ParseDict(output, pb.Response(), **kwargs)
 
-# Maps HTTP status code to grpc.StatusCode
-_STATUS_CODE_MAPPING = {
-    HTTPStatus.OK: grpc.StatusCode.OK,
-    HTTPStatus.UNAUTHORIZED: grpc.StatusCode.UNAUTHENTICATED,
-    HTTPStatus.FORBIDDEN: grpc.StatusCode.PERMISSION_DENIED,
-    HTTPStatus.NOT_FOUND: grpc.StatusCode.UNIMPLEMENTED,
-    HTTPStatus.TOO_MANY_REQUESTS: grpc.StatusCode.UNAVAILABLE,
-    HTTPStatus.BAD_GATEWAY: grpc.StatusCode.UNAVAILABLE,
-    HTTPStatus.SERVICE_UNAVAILABLE: grpc.StatusCode.UNAVAILABLE,
-    HTTPStatus.GATEWAY_TIMEOUT: grpc.StatusCode.DEADLINE_EXCEEDED,
-    HTTPStatus.BAD_REQUEST: grpc.StatusCode.INVALID_ARGUMENT,
-    HTTPStatus.INTERNAL_SERVER_ERROR: grpc.StatusCode.INTERNAL,
-    HTTPStatus.UNPROCESSABLE_ENTITY: grpc.StatusCode.FAILED_PRECONDITION,
-}
+
+def raise_grpc_exception(
+    msg: str,
+    context: BentoServicerContext,
+    exception_cls: t.Type[BentoMLException] = BentoMLException,
+):
+    code = http_status_to_grpc_status_map().get(
+        exception_cls.error_code, grpc.StatusCode.UNKNOWN
+    )
+    context.set_code(code)
+    context.set_details(msg)
+    raise exception_cls(msg)
 
 
 def grpc_status_code(err: BentoMLException) -> grpc.StatusCode:
     """
     Convert BentoMLException.error_code to grpc.StatusCode.
     """
-    return _STATUS_CODE_MAPPING.get(err.error_code, grpc.StatusCode.UNKNOWN)
+    return http_status_to_grpc_status_map().get(err.error_code, grpc.StatusCode.UNKNOWN)
 
 
 def to_http_status(status_code: grpc.StatusCode) -> int:
     """
     Convert grpc.StatusCode to HTTPStatus.
     """
-    try:
-        status = {v: k for k, v in _STATUS_CODE_MAPPING.items()}[status_code]
-    except KeyError:
-        status = HTTPStatus.INTERNAL_SERVER_ERROR
+    status = grpc_status_to_http_status_map().get(
+        status_code, HTTPStatus.INTERNAL_SERVER_ERROR
+    )
 
     return status.value
 
@@ -143,7 +239,7 @@ def wrap_rpc_handler(
 
     # The reason we are using TYPE_CHECKING for assert here
     # is that if the following bool request_streaming and response_streaming
-    # are set, then it is guaranteed that RpcMethodHandler are not None.
+    # are set, then it is guaranteed that one of the RpcMethodHandler are not None.
     if not handler.request_streaming and not handler.response_streaming:
         if TYPE_CHECKING:
             assert handler.unary_unary
diff --git a/bentoml/_internal/utils/grpc/codec.py b/bentoml/_internal/utils/grpc/codec.py
deleted file mode 100644
index cef98769b11..00000000000
--- a/bentoml/_internal/utils/grpc/codec.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-import typing as t
-from abc import ABC
-from abc import abstractmethod
-from typing import TYPE_CHECKING
-
-from typing_extensions import Self
-
-if TYPE_CHECKING:
-    from google.protobuf.message import Message
-
-# content-type is always application/grpc
-GRPC_CONTENT_TYPE = "application/grpc"
-
-
-class Codec(ABC):
-    _content_subtype: str
-
-    def __new__(cls: type[Self]) -> Self:
-        obj = object.__new__(cls)
-        if not cls._content_subtype:
-            raise TypeError(f"{cls} should have a '_content_subtype' attribute")
-        obj.__setattr__("_content_subtype", cls._content_subtype)
-        return obj
-
-    @property
-    def content_type(self) -> str:
-        return self._content_subtype
-
-    @abstractmethod
-    def encode(self, message: t.Any, message_type: t.Type[Message]) -> bytes:
-        # TODO: We will want to use this to encode headers message.
-        pass
-
-    @abstractmethod
-    def decode(self, data: bytes, message_type: t.Type[Message]) -> t.Any:
-        # TODO: We will want to use this to decode headers message.
-        pass
-
-
-class ProtoCodec(Codec):
-    _content_subtype: str = "proto"
-
-    def encode(self, message: t.Any, message_type: t.Type[Message]) -> bytes:
-        if not isinstance(message, message_type):
-            raise TypeError(f"message should be a {message_type}, got {type(message)}.")
-        return message.SerializeToString()
-
-    def decode(self, data: bytes, message_type: t.Type[Message]) -> t.Any:
-        return message_type.FromString(data)
-
-
-def get_grpc_content_type(codec: Codec | None = None) -> str:
-    return f"{GRPC_CONTENT_TYPE}" + f"+{codec.content_type}" if codec else ""
diff --git a/bentoml/_internal/utils/grpc/mapping.py b/bentoml/_internal/utils/grpc/mapping.py
new file mode 100644
index 00000000000..c58dc068a8b
--- /dev/null
+++ b/bentoml/_internal/utils/grpc/mapping.py
@@ -0,0 +1,145 @@
+"""
+Static mapping from BentoML protobuf message to values.
+
+For all function in this module, make sure to lazy load the generated protobuf.
+"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from functools import lru_cache
+
+from ..lazy_loader import LazyLoader
+
+if TYPE_CHECKING:
+    from enum import Enum
+
+    import grpc
+    import numpy as np
+
+    from bentoml.grpc.v1 import service_pb2 as pb
+
+    from ... import external_typing as ext
+else:
+    grpc = LazyLoader(
+        "grpc",
+        globals(),
+        "grpc",
+        exc_msg="'grpc' is required. Install with 'pip install grpcio'.",
+    )
+    pb = LazyLoader("pb", globals(), "bentoml.grpc.v1.service_pb2")
+    np = LazyLoader("np", globals(), "numpy")
+
+
+@lru_cache(maxsize=1)
+def http_status_to_grpc_status_map() -> dict[Enum, grpc.StatusCode]:
+    # Maps HTTP status code to grpc.StatusCode
+    from http import HTTPStatus
+
+    return {
+        HTTPStatus.OK: grpc.StatusCode.OK,
+        HTTPStatus.UNAUTHORIZED: grpc.StatusCode.UNAUTHENTICATED,
+        HTTPStatus.FORBIDDEN: grpc.StatusCode.PERMISSION_DENIED,
+        HTTPStatus.NOT_FOUND: grpc.StatusCode.UNIMPLEMENTED,
+        HTTPStatus.TOO_MANY_REQUESTS: grpc.StatusCode.UNAVAILABLE,
+        HTTPStatus.BAD_GATEWAY: grpc.StatusCode.UNAVAILABLE,
+        HTTPStatus.SERVICE_UNAVAILABLE: grpc.StatusCode.UNAVAILABLE,
+        HTTPStatus.GATEWAY_TIMEOUT: grpc.StatusCode.DEADLINE_EXCEEDED,
+        HTTPStatus.BAD_REQUEST: grpc.StatusCode.INVALID_ARGUMENT,
+        HTTPStatus.INTERNAL_SERVER_ERROR: grpc.StatusCode.INTERNAL,
+        HTTPStatus.UNPROCESSABLE_ENTITY: grpc.StatusCode.FAILED_PRECONDITION,
+    }
+
+
+@lru_cache(maxsize=1)
+def grpc_status_to_http_status_map() -> dict[grpc.StatusCode, Enum]:
+    return {v: k for k, v in http_status_to_grpc_status_map().items()}
+
+
+@lru_cache(maxsize=1)
+def filetype_pb_to_mimetype_map() -> dict[pb.File.FileType.ValueType, str]:
+    return {
+        pb.File.FILE_TYPE_CSV: "text/csv",
+        pb.File.FILE_TYPE_PLAINTEXT: "text/plain",
+        pb.File.FILE_TYPE_JSON: "application/json",
+        pb.File.FILE_TYPE_BYTES: "application/octet-stream",
+        pb.File.FILE_TYPE_PDF: "application/pdf",
+        pb.File.FILE_TYPE_PNG: "image/png",
+        pb.File.FILE_TYPE_JPEG: "image/jpeg",
+        pb.File.FILE_TYPE_GIF: "image/gif",
+        pb.File.FILE_TYPE_TIFF: "image/tiff",
+        pb.File.FILE_TYPE_BMP: "image/bmp",
+        pb.File.FILE_TYPE_WEBP: "image/webp",
+        pb.File.FILE_TYPE_SVG: "image/svg+xml",
+    }
+
+
+@lru_cache(maxsize=1)
+def mimetype_to_filetype_pb_map() -> dict[str, pb.File.FileType.ValueType]:
+    return {v: k for k, v in filetype_pb_to_mimetype_map().items()}
+
+
+# TODO: support the following types for for protobuf message:
+# - support complex64, complex128, object and struct types
+# - BFLOAT16, QINT32, QINT16, QUINT16, QINT8, QUINT8
+#
+# For int16, uint16, int8, uint8 -> specify types in NumpyNdarray + using int_values.
+#
+# For bfloat16, half (float16) -> specify types in NumpyNdarray + using float_values.
+#
+# for string_values, use <U for np.dtype instead of S (zero-terminated bytes).
+FIELDPB_TO_NPDTYPE_NAME_MAP = {
+    "bool_values": "bool",
+    "float_values": "float32",
+    "string_values": "<U",
+    "double_values": "float64",
+    "int32_values": "int32",
+    "int64_values": "int64",
+    "uint32_values": "uint32",
+    "uint64_values": "uint64",
+}
+
+
+@lru_cache(maxsize=1)
+def dtypepb_to_fieldpb_map() -> dict[pb.NDArray.DType.ValueType, str]:
+    return {
+        pb.NDArray.DTYPE_FLOAT: "float_values",
+        pb.NDArray.DTYPE_DOUBLE: "double_values",
+        pb.NDArray.DTYPE_INT32: "int32_values",
+        pb.NDArray.DTYPE_INT64: "int64_values",
+        pb.NDArray.DTYPE_UINT32: "uint32_values",
+        pb.NDArray.DTYPE_UINT64: "uint64_values",
+        pb.NDArray.DTYPE_BOOL: "bool_values",
+        pb.NDArray.DTYPE_STRING: "string_values",
+    }
+
+
+@lru_cache(maxsize=1)
+def fieldpb_to_dtypepb_map() -> dict[str, pb.NDArray.DType.ValueType]:
+    return {v: k for k, v in dtypepb_to_fieldpb_map().items()}
+
+
+@lru_cache(maxsize=1)
+def dtypepb_to_npdtype_map() -> dict[pb.NDArray.DType.ValueType, ext.NpDTypeLike]:
+    # pb.NDArray.Dtype -> np.dtype
+    return {
+        k: np.dtype(FIELDPB_TO_NPDTYPE_NAME_MAP[v])
+        for k, v in dtypepb_to_fieldpb_map().items()
+    }
+
+
+@lru_cache(maxsize=1)
+def npdtype_to_dtypepb_map() -> dict[ext.NpDTypeLike, pb.NDArray.DType.ValueType]:
+    # np.dtype -> pb.NDArray.Dtype
+    return {v: k for k, v in dtypepb_to_npdtype_map().items()}
+
+
+@lru_cache(maxsize=1)
+def fieldpb_to_npdtype_map() -> dict[str, ext.NpDTypeLike]:
+    # str -> np.dtype
+    return {k: np.dtype(v) for k, v in FIELDPB_TO_NPDTYPE_NAME_MAP.items()}
+
+
+@lru_cache(maxsize=1)
+def npdtype_to_fieldpb_map() -> dict[ext.NpDTypeLike, str]:
+    # np.dtype -> str
+    return {v: k for k, v in fieldpb_to_npdtype_map().items()}
diff --git a/bentoml/_internal/server/grpc/types.py b/bentoml/grpc/types.py
similarity index 71%
rename from bentoml/_internal/server/grpc/types.py
rename to bentoml/grpc/types.py
index e73fa629604..c711a861a8f 100644
--- a/bentoml/_internal/server/grpc/types.py
+++ b/bentoml/grpc/types.py
@@ -3,19 +3,29 @@
 """
 from __future__ import annotations
 
-from typing import TypeVar
-from typing import Callable
-from typing import Optional
-from typing import Awaitable
-from typing import NamedTuple
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
+    from typing import Any
+    from typing import Type
+    from typing import Literal
+    from typing import TypeVar
+    from typing import Callable
+    from typing import Optional
+    from typing import Annotated
+    from typing import Awaitable
+    from typing import NamedTuple
+
     import grpc
     from grpc import aio
 
+    from bentoml.grpc.v1.service_pb2 import File
+    from bentoml.grpc.v1.service_pb2 import Part
+    from bentoml.grpc.v1.service_pb2 import Series
+    from bentoml.grpc.v1.service_pb2 import NDArray
     from bentoml.grpc.v1.service_pb2 import Request
     from bentoml.grpc.v1.service_pb2 import Response
+    from bentoml.grpc.v1.service_pb2 import DataFrame
     from bentoml.grpc.v1.service_pb2_grpc import BentoServiceServicer
 
     P = TypeVar("P")
@@ -68,6 +78,18 @@ class HandlerCallDetails(
         method: str
         invocation_metadata: aio.Metadata
 
+    ServicerImpl = TypeVar("ServicerImpl")
+    Servicer = Annotated[ServicerImpl, object]
+    ServicerClass = Type[Servicer[Any]]
+    AddServicerFn = Callable[[Servicer[Any], aio.Server | grpc.Server], None]
+
+    ProtoField = Literal["dataframe", "file", "json", "ndarray", "series"]
+
+    ProtoType = TypeVar("ProtoType")
+    MessageType = Annotated[
+        ProtoType, str, NDArray, DataFrame, Series, File, dict[str, Part]
+    ]
+
     __all__ = [
         "Request",
         "Response",
diff --git a/bentoml/grpc/v1/service.proto b/bentoml/grpc/v1/service.proto
index bb145a321ab..a8486b27559 100644
--- a/bentoml/grpc/v1/service.proto
+++ b/bentoml/grpc/v1/service.proto
@@ -2,6 +2,8 @@ syntax = "proto3";
 
 package bentoml.grpc.v1;
 
+import "google/protobuf/struct.proto";
+
 // cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only)
 option cc_enable_arenas = true;
 option go_package = "github.com/bentoml/grpc/v1";
@@ -13,83 +15,243 @@ option py_generic_services = true;
 
 // a gRPC BentoServer.
 service BentoService {
-  // Infer handles unary API.
+  // Call handles methodcaller of given API entrypoint.
   rpc Call(Request) returns (Response) {}
 }
 
-// Request for Call.
+// Request message for incoming Call.
 message Request {
-  // a given API route the rpc request is sent to.
+  // api_name defines the API entrypoint to call.
+  // api_name is the name of the function defined in bentoml.Service.
+  // Example:
+  //
+  //     @svc.api(input=NumpyNdarray(), output=File())
+  //     def predict(input: NDArray[float]) -> bytes:
+  //         ...
+  //
+  //     api_name is "predict" in this case.
   string api_name = 1;
 
-  Value input = 2;
+  // NDArray represents a n-dimensional array of arbitrary type.
+  NDArray ndarray = 2;
+
+  // Tensor is similiar to ndarray but with a name
+  // We are reserving it for now for future use.
+  // repeated Tensor tensors = 3;
+  reserved 3, 10 to 15;
+
+  // DataFrame represents any tabular data type. We are using
+  // DataFrame as a trivial representation for tabular type.
+  DataFrame dataframe = 4;
+
+  // Series portrays a series of values. This can be used for
+  // representing Series types in tabular data.
+  Series series = 5;
+
+  // File represents for any arbitrary file type. This can be
+  // plaintext, image, video, audio, etc.
+  File file = 6;
+
+  // Text represents a string inputs.
+  optional string text = 7;
+
+  // JSON is represented by using google.protobuf.Value.
+  // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto
+  google.protobuf.Value json = 8;
+
+  // Multipart represents a multipart message.
+  // It comprises of a mapping from given type name to a subset of aforementioned types.
+  map<string, Part> multipart = 9;
 }
 
-// Response from Call.
+// Request message for incoming Call.
 message Response {
-  // representation of the output value.
-  Value output = 1;
-}
+  // NDArray represents a n-dimensional array of arbitrary type.
+  NDArray ndarray = 1;
+
+  // Tensor is similiar to ndarray but with a name
+  // We are reserving it for now for future use.
+  // repeated Tensor tensors = 3;
+  reserved 3, 10 to 15;
+
+  // DataFrame represents any tabular data type. We are using
+  // DataFrame as a trivial representation for tabular type.
+  DataFrame dataframe = 4;
 
-// Represents an n-dimensional array.
-message MultiDimensionalArray {
-  // The shape of the array.
-  repeated int32 shape = 1;
-  // The flattened contents of the nd array.
-  optional Array array = 2;
+  // Series portrays a series of values. This can be used for
+  // representing Series types in tabular data.
+  Series series = 5;
+
+  // File represents for any arbitrary file type. This can be
+  // plaintext, image, video, audio, etc.
+  File file = 2;
+
+  // Text represents a string inputs.
+  optional string text = 6;
+
+  // JSON is represented by using google.protobuf.Value.
+  // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto
+  google.protobuf.Value json = 7;
+
+  // Multipart represents a multipart message.
+  // It comprises of a mapping from given type name to a subset of aforementioned types.
+  map<string, Part> multipart = 8;
 }
 
-// This represents a 1-d array.
-message Array {
-  repeated bool bool_values = 5 [packed = true];
-  repeated float float_values = 1 [packed = true];
-  repeated string string_values = 4;
-  repeated double double_values = 2 [packed = true];
-  repeated int32 int_values = 3 [packed = true];
-  repeated int64 long_values = 6 [packed = true];
-  repeated uint32 uint32_values = 7 [packed = true];
-  repeated uint64 uint64_values = 8 [packed = true];
-
-  // TODO: supports the following:
-  // - arbitrary structs
-  // - single/double precision complex value type.
-  // - quantized value type
-  //
-  // repeated google.protobuf.Struct struct_contents = 10;
+// Part represents possible value types for multipart message.
+// These are the same as the types in Request message.
+message Part {
+  oneof representation {
+    // NDArray represents a n-dimensional array of arbitrary type.
+    NDArray ndarray = 2;
+
+    // DataFrame represents any tabular data type. We are using
+    // DataFrame as a trivial representation for tabular type.
+    DataFrame dataframe = 4;
+
+    // Series portrays a series of values. This can be used for
+    // representing Series types in tabular data.
+    Series series = 5;
 
-  reserved 10 to 15;
+    // File represents for any arbitrary file type. This can be
+    // plaintext, image, video, audio, etc.
+    File file = 6;
+
+    // Text represents a string inputs.
+    string text = 7;
+
+    // JSON is represented by using google.protobuf.Value.
+    // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto
+    google.protobuf.Value json = 8;
+  }
+
+  // Tensor is similiar to ndarray but with a name
+  // We are reserving it for now for future use.
+  // repeated Tensor tensors = 3;
+  reserved 3, 10 to 15;
 }
 
-// Represents raw bytes types.
-message Raw {
-  // type of file, let it be csv, text ,parquet, tensor type, etc.
-  optional string kind = 1;
-  map<string, string> metadata = 2;
+// File represents for any arbitrary file type. This can be
+// plaintext, image, video, audio, etc.
+message File {
+  // FileType represents possible file type to be handled by BentoML.
+  // Currently, we only support plaintext (Text()), image (Image()), and file (File()).
+  // TODO: support audio and video streaming file types.
+  enum FileType {
+    FILE_TYPE_UNSPECIFIED = 0;
+
+    // file types
+    FILE_TYPE_CSV = 1;
+    FILE_TYPE_PLAINTEXT = 2;
+    FILE_TYPE_JSON = 3;
+    FILE_TYPE_BYTES = 4;
+    FILE_TYPE_PDF = 5;
+
+    // image types
+    FILE_TYPE_PNG = 6;
+    FILE_TYPE_JPEG = 7;
+    FILE_TYPE_GIF = 8;
+    FILE_TYPE_BMP = 9;
+    FILE_TYPE_TIFF = 10;
+    FILE_TYPE_WEBP = 11;
+    FILE_TYPE_SVG = 12;
+  }
+
+  // optional type of file, let it be csv, text, parquet, etc.
+  optional FileType kind = 1;
+
   // contents of file as bytes.
-  bytes content = 3;
+  bytes content = 2;
 }
 
-// Represents a map value.
-// This can be used for PandasDataFrame IO Descriptor.
-message MapValue {
-  map<string, Value> fields = 1;
+// DataFrame represents any tabular data type. We are using
+// DataFrame as a trivial representation for tabular type.
+// This message carries given implementation of tabular data based on given orientation.
+// TODO: support index, records, etc.
+message DataFrame {
+  // columns name
+  repeated string column_names = 1;
+
+  // columns orient.
+  // { column ↠ { index ↠ value } }
+  repeated Series columns = 2;
 }
 
-// Representing contents of a given rpc call.
-message Value {
-  oneof kind {
-    // Text()
-    string string_value = 1;
-    // File(), Image(), raw byte forms of ndarray, dataframe
-    Raw raw_value = 2;
-    // NDArray(), etc.
-    Array array_value = 3;
-    // NDArray(), DataFrame(), etc.
-    MultiDimensionalArray multi_dimensional_array_value = 4;
-    // DataFrame()
-    MapValue map_value = 5;
+// Series portrays a series of values. This can be used for
+// representing Series types in tabular data.
+message Series {
+  // A bool parameter value
+  repeated bool bool_values = 1 [packed = true];
+
+  // A float parameter value
+  repeated float float_values = 2 [packed = true];
+
+  // A int32 parameter value
+  repeated int32 int32_values = 3 [packed = true];
+
+  // A int64 parameter value
+  repeated int64 int64_values = 4 [packed = true];
+}
+
+// NDArray represents a n-dimensional array of arbitrary type.
+message NDArray {
+  // Represents data type of a given array.
+  enum DType {
+    // Represents a None type.
+    DTYPE_UNSPECIFIED = 0;
+
+    // Represents an float type.
+    DTYPE_FLOAT = 1;
+
+    // Represents an double type.
+    DTYPE_DOUBLE = 2;
+
+    // Represents a bool type.
+    DTYPE_BOOL = 3;
+
+    // Represents an int32 type.
+    DTYPE_INT32 = 4;
+
+    // Represents an int64 type.
+    DTYPE_INT64 = 5;
+
+    // Represents a uint32 type.
+    DTYPE_UINT32 = 6;
+
+    // Represents a uint64 type.
+    DTYPE_UINT64 = 7;
+
+    // Represents a string type.
+    DTYPE_STRING = 8;
   }
 
-  // We want to reserve these for future uses.
-  reserved 56 to 100;
+  // DTYPE is the data type of given array
+  DType dtype = 1;
+
+  // shape is the shape of given array.
+  repeated int32 shape = 2;
+
+  // represents a string parameter value.
+  repeated string string_values = 5;
+
+  // represents a float parameter value.
+  repeated float float_values = 3 [packed = true];
+
+  // represents a double parameter value.
+  repeated double double_values = 4 [packed = true];
+
+  // represents a bool parameter value.
+  repeated bool bool_values = 6 [packed = true];
+
+  // represents a int32 parameter value.
+  repeated int32 int32_values = 7 [packed = true];
+
+  // represents a int64 parameter value.
+  repeated int64 int64_values = 8 [packed = true];
+
+  // represents a uint32 parameter value.
+  repeated uint32 uint32_values = 9 [packed = true];
+
+  // represents a uint64 parameter value.
+  repeated uint64 uint64_values = 10 [packed = true];
 }
diff --git a/bentoml/serve.py b/bentoml/serve.py
index 12f4d1604d1..9b04cfd0faa 100644
--- a/bentoml/serve.py
+++ b/bentoml/serve.py
@@ -5,6 +5,8 @@
 import json
 import math
 import shutil
+import socket
+import typing as t
 import logging
 import tempfile
 import contextlib
@@ -89,6 +91,26 @@ def ensure_prometheus_dir(
     return alternative
 
 
+@contextlib.contextmanager
+def enable_so_reuseport(host: str, port: int) -> t.Generator[int, None, None]:
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    if psutil.WINDOWS:
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    elif psutil.MACOS or psutil.FREEBSD:
+        sock.setsockopt(socket.SOL_SOCKET, 0x10000, 1)  # SO_REUSEPORT_LB
+    else:
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+
+        if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 0:
+            raise RuntimeError("Failed to set SO_REUSEPORT.")
+
+    sock.bind((host, port))
+    try:
+        yield sock.getsockname()[1]
+    finally:
+        sock.close()
+
+
 def create_watcher(
     name: str,
     args: list[str],
@@ -130,6 +152,37 @@ def log_grpcui_message(port: int) -> None:
         )
 
 
+def ssl_args(
+    ssl_certfile: str | None,
+    ssl_keyfile: str | None,
+    ssl_keyfile_password: str | None,
+    ssl_version: int | None,
+    ssl_cert_reqs: int | None,
+    ssl_ca_certs: str | None,
+    ssl_ciphers: str | None,
+) -> list[str | int]:
+    args: list[str | int] = []
+
+    # Add optional SSL args if they exist
+    if ssl_certfile:
+        args.extend(["--ssl-certfile", str(ssl_certfile)])
+    if ssl_keyfile:
+        args.extend(["--ssl-keyfile", str(ssl_keyfile)])
+    if ssl_keyfile_password:
+        args.extend(["--ssl-keyfile-password", ssl_keyfile_password])
+    if ssl_ca_certs:
+        args.extend(["--ssl-ca-certs", str(ssl_ca_certs)])
+
+    # match with default uvicorn values.
+    if ssl_version:
+        args.extend(["--ssl-version", int(ssl_version)])
+    if ssl_cert_reqs:
+        args.extend(["--ssl-cert-reqs", int(ssl_cert_reqs)])
+    if ssl_ciphers:
+        args.extend(["--ssl-ciphers", ssl_ciphers])
+    return args
+
+
 @inject
 def serve_development(
     bento_identifier: str,
@@ -162,27 +215,30 @@ def serve_development(
     circus_sockets: list[CircusSocket] = []
 
     if grpc:
-        watchers.append(
-            create_watcher(
-                name="grpc_dev_api_server",
-                args=[
-                    "-m",
-                    SCRIPT_GRPC_DEV_API_SERVER,
-                    bento_identifier,
-                    "--bind",
-                    f"tcp://{host}:{port}",
-                    "--working-dir",
-                    working_dir,
-                    "--prometheus-dir",
-                    prometheus_dir,
-                ],
-                use_sockets=False,
-                working_dir=working_dir,
-                # we don't want to close stdin for child process in case user use debugger.
-                # See https://circus.readthedocs.io/en/latest/for-ops/configuration/
-                close_child_stdin=False,
+        with contextlib.ExitStack() as port_stack:
+            api_port = port_stack.enter_context(enable_so_reuseport(host, port))
+            watchers.append(
+                create_watcher(
+                    name="grpc_dev_api_server",
+                    args=[
+                        "-m",
+                        SCRIPT_GRPC_DEV_API_SERVER,
+                        bento_identifier,
+                        "--bind",
+                        f"tcp://{host}:{api_port}",
+                        "--working-dir",
+                        working_dir,
+                        "--prometheus-dir",
+                        prometheus_dir,
+                    ],
+                    use_sockets=False,
+                    working_dir=working_dir,
+                    # we don't want to close stdin for child process in case user use debugger.
+                    # See https://circus.readthedocs.io/en/latest/for-ops/configuration/
+                    close_child_stdin=False,
+                )
             )
-        )
+
         if BentoMLContainer.api_server_config.metrics.enabled.get():
             metrics_host = BentoMLContainer.grpc.metrics_host.get()
             metrics_port = BentoMLContainer.grpc.metrics_port.get()
@@ -242,6 +298,15 @@ def serve_development(
                     working_dir,
                     "--prometheus-dir",
                     prometheus_dir,
+                    *ssl_args(
+                        ssl_certfile=ssl_certfile,
+                        ssl_keyfile=ssl_keyfile,
+                        ssl_keyfile_password=ssl_keyfile_password,
+                        ssl_version=ssl_version,
+                        ssl_cert_reqs=ssl_cert_reqs,
+                        ssl_ca_certs=ssl_ca_certs,
+                        ssl_ciphers=ssl_ciphers,
+                    ),
                 ],
                 working_dir=working_dir,
                 # we don't want to close stdin for child process in case user use debugger.
@@ -257,87 +322,6 @@ def serve_development(
             )
         )
 
-    args: list[str | int] = [
-        "-m",
-        SCRIPT_DEV_API_SERVER,
-        bento_identifier,
-        "--bind",
-        "fd://$(circus.sockets._bento_api_server)",
-        "--working-dir",
-        working_dir,
-        "--prometheus-dir",
-        prometheus_dir,
-    ]
-
-    # Add optional SSL args if they exist
-    if ssl_certfile:
-        args.extend(["--ssl-certfile", str(ssl_certfile)])
-    if ssl_keyfile:
-        args.extend(["--ssl-keyfile", str(ssl_keyfile)])
-    if ssl_keyfile_password:
-        args.extend(["--ssl-keyfile-password", ssl_keyfile_password])
-    if ssl_ca_certs:
-        args.extend(["--ssl-ca-certs", str(ssl_ca_certs)])
-
-    # match with default uvicorn values.
-    if ssl_version:
-        args.extend(["--ssl-version", int(ssl_version)])
-    if ssl_cert_reqs:
-        args.extend(["--ssl-cert-reqs", int(ssl_cert_reqs)])
-    if ssl_ciphers:
-        args.extend(["--ssl-ciphers", ssl_ciphers])
-
-    if grpc:
-        watchers.append(
-            Watcher(
-                name="grpc_dev_api_server",
-                cmd=sys.executable,
-                args=[
-                    "-m",
-                    SCRIPT_GRPC_DEV_API_SERVER,
-                    bento_identifier,
-                    "--bind",
-                    "fd://$(circus.sockets._bento_api_server)",
-                    "--working-dir",
-                    working_dir,
-                    "--port",
-                    str(port),
-                ],
-                copy_env=True,
-                stop_children=True,
-                use_sockets=True,
-                working_dir=working_dir,
-                # we don't want to close stdin for child process in case user use debugger.
-                # See https://circus.readthedocs.io/en/latest/for-ops/configuration/
-                close_child_stdin=False,
-            )
-        )
-    else:
-        watchers.append(
-            Watcher(
-                name="dev_api_server",
-                cmd=sys.executable,
-                args=[
-                    "-m",
-                    SCRIPT_DEV_API_SERVER,
-                    bento_identifier,
-                    "--bind",
-                    "fd://$(circus.sockets._bento_api_server)",
-                    "--working-dir",
-                    working_dir,
-                    "--prometheus-dir",
-                    prometheus_dir,
-                ],
-                copy_env=True,
-                stop_children=True,
-                use_sockets=True,
-                working_dir=working_dir,
-                # we don't want to close stdin for child process in case user use debugger.
-                # See https://circus.readthedocs.io/en/latest/for-ops/configuration/
-                close_child_stdin=False,
-            )
-        )
-
     plugins = []
     if reload:
         if psutil.WINDOWS:
@@ -493,68 +477,35 @@ def serve_production(
     else:
         raise NotImplementedError("Unsupported platform: {}".format(sys.platform))
 
-    logger.debug("Runner map: %s", runner_bind_map)
-
-    args: list[str | int] = [
-        "-m",
-        SCRIPT_API_SERVER,
-        bento_identifier,
-        "--bind",
-        "fd://$(circus.sockets._bento_api_server)",
-        "--runner-map",
-        json.dumps(runner_bind_map),
-        "--working-dir",
-        working_dir,
-        "--backlog",
-        f"{backlog}",
-        "--worker-id",
-        "$(CIRCUS.WID)",
-        "--prometheus-dir",
-        prometheus_dir,
-    ]
-
-    # Add optional SSL args if they exist
-    if ssl_certfile:
-        args.extend(["--ssl-certfile", str(ssl_certfile)])
-    if ssl_keyfile:
-        args.extend(["--ssl-keyfile", str(ssl_keyfile)])
-    if ssl_keyfile_password:
-        args.extend(["--ssl-keyfile-password", ssl_keyfile_password])
-    if ssl_ca_certs:
-        args.extend(["--ssl-ca-certs", str(ssl_ca_certs)])
-
-    # match with default uvicorn values.
-    if ssl_version:
-        args.extend(["--ssl-version", int(ssl_version)])
-    if ssl_cert_reqs:
-        args.extend(["--ssl-cert-reqs", int(ssl_cert_reqs)])
-    if ssl_ciphers:
-        args.extend(["--ssl-ciphers", ssl_ciphers])
+    logger.debug(f"Runner map: {runner_bind_map}")
 
     if grpc:
-        watchers.append(
-            create_watcher(
-                name="grpc_api_server",
-                args=[
-                    "-m",
-                    SCRIPT_GRPC_API_SERVER,
-                    bento_identifier,
-                    "--bind",
-                    f"tcp://{host}:{port}",
-                    "--runner-map",
-                    json.dumps(runner_bind_map),
-                    "--working-dir",
-                    working_dir,
-                    "--worker-id",
-                    "$(CIRCUS.WID)",
-                    "--prometheus-dir",
-                    prometheus_dir,
-                ],
-                use_sockets=False,
-                working_dir=working_dir,
-                numprocesses=api_workers or math.ceil(CpuResource.from_system()),
+        with contextlib.ExitStack() as port_stack:
+            api_port = port_stack.enter_context(enable_so_reuseport(host, port))
+
+            watchers.append(
+                create_watcher(
+                    name="grpc_api_server",
+                    args=[
+                        "-m",
+                        SCRIPT_GRPC_API_SERVER,
+                        bento_identifier,
+                        "--bind",
+                        f"tcp://{host}:{api_port}",
+                        "--runner-map",
+                        json.dumps(runner_bind_map),
+                        "--working-dir",
+                        working_dir,
+                        "--worker-id",
+                        "$(CIRCUS.WID)",
+                        "--prometheus-dir",
+                        prometheus_dir,
+                    ],
+                    use_sockets=False,
+                    working_dir=working_dir,
+                    numprocesses=api_workers or math.ceil(CpuResource.from_system()),
+                )
             )
-        )
 
         log_grpcui_message(port)
 
@@ -621,6 +572,15 @@ def serve_production(
                     "$(CIRCUS.WID)",
                     "--prometheus-dir",
                     prometheus_dir,
+                    *ssl_args(
+                        ssl_certfile=ssl_certfile,
+                        ssl_keyfile=ssl_keyfile,
+                        ssl_keyfile_password=ssl_keyfile_password,
+                        ssl_version=ssl_version,
+                        ssl_cert_reqs=ssl_cert_reqs,
+                        ssl_ca_certs=ssl_ca_certs,
+                        ssl_ciphers=ssl_ciphers,
+                    ),
                 ],
                 working_dir=working_dir,
                 numprocesses=api_workers or math.ceil(CpuResource.from_system()),
diff --git a/bentoml_cli/utils.py b/bentoml_cli/utils.py
index baa826eee43..3efb38f2efd 100644
--- a/bentoml_cli/utils.py
+++ b/bentoml_cli/utils.py
@@ -287,7 +287,6 @@ def unparse_click_params(
                     logger.warning(
                         f"{command_params} is a prompt, skip parsing it for now."
                     )
-                    pass
                 if command_param.is_flag:
                     args.append(command_param.opts[-1])
                 else:
diff --git a/pyproject.toml b/pyproject.toml
index df87afcbd3a..37daaefc05a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,21 +1,164 @@
 [build-system]
 requires = [
-  # sync with setup.cfg until we discard non-pep-517/518
   "grpcio-tools==1.47.0",
   "mypy-protobuf==3.2.0",
   "protobuf==3.19.4",
-  "setuptools>=59.0",
-  "setuptools-scm[toml]>=6.2.3",
+  "setuptools==63.4.0",
+  "setuptools-scm[toml]>=7.0.0",
   "wheel",
 ]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "bentoml"
+description = "BentoML: The Unified Model Serving Framework"
+readme = "README.md"
+requires-python = ">=3.7"
+keywords = ["MLOps", "AI", "BentoML", "Model Serving", "Model Deployment"]
+license = { file = "LICENSE" }
+authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }]
+classifiers = [
+  "License :: OSI Approved :: Apache Software License",
+  "Operating System :: OS Independent",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3 :: Only",
+  "Programming Language :: Python :: 3.7",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+  "Topic :: Software Development :: Libraries",
+]
+dependencies = [
+  "Jinja2>=3.0.1",
+  "PyYAML>=5.0",
+  "aiohttp",
+  "attrs>=21.1.0",
+  "cattrs>=22.1.0",
+  "certifi",
+  "circus",
+  # minimum Click support for python 3.7
+  "click>=8.1.0",
+  "cloudpickle",
+  "deepmerge",
+  "fs",
+  "numpy",
+  "opentelemetry-api==1.9.0",
+  "opentelemetry-instrumentation==0.28b0",
+  "opentelemetry-instrumentation-aiohttp-client==0.28b0",
+  "opentelemetry-instrumentation-asgi==0.28b0",
+  "opentelemetry-sdk==1.9.0",
+  "opentelemetry-semantic-conventions==0.28b0",
+  "opentelemetry-util-http==0.28b0",
+  "packaging>=20.0",
+  "pathspec",
+  "pip-tools>=6.6.2",
+  "prometheus-client>=0.10.0,<0.14.0",
+  "psutil",
+  "pynvml<12",
+  "python-dateutil",
+  "python-dotenv>=0.20.0",
+  "python-multipart",
+  "requests",
+  "rich>=11.2.0",
+  "schema",
+  "simple-di>=0.1.4",
+  "starlette",
+  "uvicorn",
+  "watchfiles>=0.15.0",
+  'backports.cached-property;python_version<"3.8"',
+  'importlib-metadata;python_version<"3.8"',
+]
+dynamic = ["version"]
+
+[project.scripts]
+bentoml = "bentoml_cli.cli:cli"
+
+[project.urls]
+"Documentation" = "https://docs.bentoml.org/en/latest/"
+"Bug Reports" = "https://github.com/bentoml/BentoML/issues"
+"BentoML Community Slack" = "https://bit.ly/2N5IpbB"
+"BentoML Official Blog" = "https://modelserving.com"
+"BentoML Twitter" = "https://twitter.com/bentomlai"
+
+[project.optional-dependencies]
+grpc = [
+  # Restrict maximum version due to breaking protobuf 4.21.0 changes
+  # (see https://github.com/protocolbuffers/protobuf/issues/10051)
+  "protobuf>=3.5.0, <3.20",
+  # There is memory leak in later Python GRPC (1.43.0 to be specific),
+  # use known working version until the memory leak is resolved in the future
+  # (see https://github.com/grpc/grpc/issues/28513)
+  # TODO: lock 1.41.0 for non M1 builds, otherwise use latest
+  "grpcio",
+  "grpcio-health-checking",
+  "grpcio-reflection",
+  "opentelemetry-instrumentation-grpc==0.28b0",
+]
+tracing = ["opentelemetry-exporter-jaeger", "opentelemetry-exporter-zipkin"]
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.packages.find]
+include = [
+  "bentoml",
+  "bentoml.grpc*",
+  "bentoml.testing",
+  "bentoml._internal*",
+  "bentoml_cli",
+  "bentoml_cli.server",
+]
+exclude = ["tests"]
+
+[tool.setuptools.package-data]
+"*" = ["py.typed", "*.pyi", "*_pb2*.py", "*.j2"]
+"bentoml._internal.configuration" = ["*.yaml", "*.py"]
+
 [tool.setuptools_scm]
 write_to = "bentoml/_version.py"
 git_describe_command = "git describe --dirty --tags --long --first-parent"
 version_scheme = "post-release"
 fallback_version = "0.0.0"
 
+[tool.coverage.paths]
+source = ["bentoml"]
+
+[tool.coverage.run]
+branch = true
+source = ["bentoml", "bentoml_cli"]
+omit = [
+  "bentoml/**/*_pb2.py",
+  "bentoml/__main__.py",
+  "bentoml/_internal/types.py",
+  "bentoml/_internal/external_typing/*",
+  "bentoml/testing/*",
+  "bentoml/io.py",
+]
+
+[tool.coverage.report]
+show_missing = true
+precision = 2
+omit = [
+  "bentoml/**/*_pb2*.py",
+  "bentoml/_internal/external_typing/*",
+  "bentoml/_internal/types.py",
+  "bentoml/testing/*",
+  'bentoml/__main__.py',
+  "bentoml/io.py",
+]
+exclude_lines = [
+  "pragma: no cover",
+  "def __repr__",
+  "raise AssertionError",
+  "raise NotImplementedError",
+  "raise MissingDependencyException",
+  "except ImportError",
+  "if __name__ == .__main__.:",
+  "if TYPE_CHECKING:",
+]
+
 [tool.black]
 line-length = 88
 exclude = '''
@@ -38,81 +181,130 @@ extend-exclude = "(_pb2.py$|_pb2_grpc.py$)"
 
 [tool.pytest.ini_options]
 addopts = "-rfEX -p pytester -p no:warnings -x"
-python_files = ["test_*.py","*_test.py"]
+python_files = ["test_*.py", "*_test.py"]
 testpaths = ["tests"]
 markers = ["gpus"]
 
-[tool.pylint.master]
-ignore=".ipynb_checkpoints,typings,bentoml/_internal/external_typing,bentoml/grpc"
-ignore-paths=".*.pyi"
-unsafe-load-any-extension="no"
-extension-pkg-whitelist="numpy,tensorflow,torch,paddle,keras,pydantic"
-jobs=4
-persistent="yes"
-suggestion-mode="yes"
-max-line-length=88
-
-[tool.pylint.messages_control]
-disable="import-error,print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,raw-checker-failed,bad-inline-option,locally-disabled,file-ignored,suppressed-message,useless-suppression,deprecated-pragma,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,div-method,idiv-method,rdiv-method,exception-message-attribute,invalid-str-codec,sys-max-int,bad-python3-import,deprecated-string-function,deprecated-str-translate-call,deprecated-itertools-function,deprecated-types-field,next-method-defined,dict-items-not-iterating,dict-keys-not-iterating,dict-values-not-iterating,deprecated-operator-function,deprecated-urllib-function,xreadlines-attribute,deprecated-sys-function,exception-escape,comprehension-escape,logging-fstring-interpolation,logging-format-interpolation,logging-not-lazy,C,R,fixme,protected-access,no-member,unsubscriptable-object,raise-missing-from,isinstance-second-argument-not-valid-type"
-enable="c-extension-no-member"
-
-[tool.pylint.reports]
-evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)"
-msg-template="{msg_id}:{symbol} [{line:0>3d}:{column:0>2d}] {obj}: {msg}"
-output-format="colorized"
-reports="no"
-score="yes"
+[tool.pylint.main]
+recursive = true
+extension-pkg-allow-list = [
+  "numpy",
+  "tensorflow",
+  "torch",
+  "paddle",
+  "onnxruntime",
+  "onnx",
+  "pydantic.schema",
+]
+ignore-paths = ["typings", "bentoml/_internal/external_typing", "bentoml/grpc"]
+disable = [
+  "import-error",
+  "print-statement",
+  "parameter-unpacking",
+  "unpacking-in-except",
+  "old-raise-syntax",
+  "backtick",
+  "raw-checker-failed",
+  "bad-inline-option",
+  "locally-disabled",
+  "file-ignored",
+  "suppressed-message",
+  "useless-suppression",
+  "deprecated-pragma",
+  "apply-builtin",
+  "basestring-builtin",
+  "buffer-builtin",
+  "cmp-builtin",
+  "coerce-builtin",
+  "execfile-builtin",
+  "file-builtin",
+  "long-builtin",
+  "raw_input-builtin",
+  "reduce-builtin",
+  "standarderror-builtin",
+  "coerce-method",
+  "delslice-method",
+  "getslice-method",
+  "setslice-method",
+  "no-absolute-import",
+  "old-division",
+  "dict-iter-method",
+  "dict-view-method",
+  "next-method-called",
+  "metaclass-assignment",
+  "indexing-exception",
+  "raising-string",
+  "reload-builtin",
+  "oct-method",
+  "hex-method",
+  "nonzero-method",
+  "cmp-method",
+  "input-builtin",
+  "round-builtin",
+  "intern-builtin",
+  "unichr-builtin",
+  "map-builtin-not-iterating",
+  "zip-builtin-not-iterating",
+  "range-builtin-not-iterating",
+  "filter-builtin-not-iterating",
+  "using-cmp-argument",
+  "exception-message-attribute",
+  "invalid-str-codec",
+  "sys-max-int",
+  "bad-python3-import",
+  "deprecated-string-function",
+  "deprecated-str-translate-call",
+  "deprecated-itertools-function",
+  "deprecated-types-field",
+  "next-method-defined",
+  "dict-items-not-iterating",
+  "dict-keys-not-iterating",
+  "dict-values-not-iterating",
+  "deprecated-operator-function",
+  "deprecated-urllib-function",
+  "xreadlines-attribute",
+  "deprecated-sys-function",
+  "exception-escape",
+  "comprehension-escape",
+  "logging-fstring-interpolation",
+  "logging-format-interpolation",
+  "logging-not-lazy",
+  "C",
+  "R",
+  "fixme",
+  "protected-access",
+  "no-member",
+  "unsubscriptable-object",
+  "raise-missing-from",
+  "isinstance-second-argument-not-valid-type",
+]
+enable = ["c-extension-no-member"]
+evaluation = "10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)"
+msg-template = "{msg_id}:{symbol} [{line:0>3d}:{column:0>2d}] {obj}: {msg}"
+output-format = "colorized"
+score = true
 
-[tool.pylint.refactoring]
-max-nested-blocks=5
-never-returning-functions="optparse.Values,sys.exit"
+[tool.pylint.classes]
+valid-metaclass-classmethod-first-arg = ["cls", "mcls", "kls"]
 
-[tool.pylint.miscellaneous]
-notes="FIXME,XXX,TODO,NOTE"
+[tool.pylint.logging]
+logging-format-style = "new" # using f-string formatter for logging.
 
-[tool.pylint.typecheck]
-ignored-classes="Namespace"
-contextmanager-decorators="contextlib.contextmanager"
+[tool.pylint.miscellaneous]
+notes = ["FIXME", "XXX", "TODO", "NOTE", "WARNING"]
 
-[tool.pylint.format]
-indent-after-paren=4
-indent-string='    '
-max-line-length=100
-max-module-lines=1000
-single-line-class-stmt="no"
-single-line-if-stmt="no"
+[tool.pylint.refactoring]
+never-returning-functions = [
+  "sys.exit",
+  "raise_grpc_exception",
+] # specify functions that should not return
 
-[tool.pylint.imports]
-allow-wildcard-with-all="no"
-analyse-fallback-blocks="no"
+[tool.pylint.spelling]
+spelling-ignore-comment-directives = "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:,pylint:,type:"
 
-[tool.pylint.classes]
-defining-attr-methods="__init__,__new__,setUp"
-exclude-protected="_asdict,_fields,_replace,_source,_make"
-valid-classmethod-first-arg="cls"
-valid-metaclass-classmethod-first-arg="mcs"
-
-[tool.pylint.design]
-# Maximum number of arguments for function / method
-max-args=5
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-# Maximum number of boolean expressions in a if statement
-max-bool-expr=5
-# Maximum number of branch for function / method body
-max-branches=12
-# Maximum number of locals for function / method body
-max-locals=15
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-# Maximum number of return / yield for function / method body
-max-returns=6
-# Maximum number of statements in function / method body
-max-statements=50
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
+[tool.pylint.variables]
+allowed-redefined-builtins = ["create_bento_servicer.BentoServiceServicer.Call"]
+init-import = true
 
 [tool.isort]
 profile = "black"
@@ -121,12 +313,19 @@ length_sort = true
 force_single_line = true
 order_by_type = true
 force_alphabetical_sort_within_sections = true
-skip_glob = ["typings/*","test/*", "**/*_pb2.py", "**/*_pb2_grpc.py"]
+skip_glob = ["typings/*", "test/*", "**/*_pb2.py", "**/*_pb2_grpc.py"]
 
 [tool.pyright]
 pythonVersion = "3.10"
 include = ["bentoml"]
-exclude = ['bentoml/_version.py','bentoml/__main__.py','bentoml/_internal/external_typing','bentoml/grpc', '**/*_pb2.py', "**/*_pb2_grpc.py"]
+exclude = [
+  'bentoml/_version.py',
+  'bentoml/__main__.py',
+  'bentoml/_internal/external_typing',
+  'bentoml/grpc',
+  '**/*_pb2.py',
+  "**/*_pb2_grpc.py",
+]
 analysis.useLibraryCodeForTypes = true
 strictListInference = true
 strictDictionaryInference = true
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
index cf6a6b9b4f0..6a6a481f781 100644
--- a/requirements/dev-requirements.txt
+++ b/requirements/dev-requirements.txt
@@ -1,5 +1,4 @@
 # Dev dependencies
 -r tests-requirements.txt
-setup-cfg-fmt
 twine
 wheel
diff --git a/requirements/docs-requirements.txt b/requirements/docs-requirements.txt
index c6a7d734ac9..cad2d878530 100644
--- a/requirements/docs-requirements.txt
+++ b/requirements/docs-requirements.txt
@@ -1,7 +1,8 @@
 # Docs dependencies
 -r tests-requirements.txt
 mypy-protobuf
-setuptools>=62.0.0
+# locking according to pyproject.toml
+setuptools==63.4.0
 sphinx==4.5.0
 myst-parser
 sphinx-click>=3.0.2
diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh
index 63e77177bf4..12c1892c222 100755
--- a/scripts/ci/run_tests.sh
+++ b/scripts/ci/run_tests.sh
@@ -6,7 +6,6 @@
 #  pip install -e .
 #  pip install requirements/tests-requirements.txt
 
-
 fname=$(basename "$0")
 dname=$(dirname "$0")
 
@@ -26,32 +25,31 @@ SKIP_DEPS=0
 cd "$GIT_ROOT" || exit
 
 run_yq() {
-  need_cmd yq
-  yq "$@";
+	need_cmd yq
+	yq "$@"
 }
 
-getval(){
-  run_yq eval "$@" "$CONFIG_FILE";
+getval() {
+	run_yq eval "$@" "$CONFIG_FILE"
 }
 
 validate_yaml() {
-  # validate YAML file
-  if ! [ -f "$CONFIG_FILE" ]; then
-    FAIL "$CONFIG_FILE does not exists"
-    exit 1
-  fi
-
-  if ! (run_yq e --exit-status 'tag == "!!map" or tag== "!!seq"' "$CONFIG_FILE"> /dev/null); then
-    FAIL "Invalid YAML file"
-    exit 1
-  fi
+	# validate YAML file
+	if ! [ -f "$CONFIG_FILE" ]; then
+		FAIL "$CONFIG_FILE does not exists"
+		exit 1
+	fi
+
+	if ! (run_yq e --exit-status 'tag == "!!map" or tag== "!!seq"' "$CONFIG_FILE" >/dev/null); then
+		FAIL "Invalid YAML file"
+		exit 1
+	fi
 }
 
-
 usage() {
-  need_cmd cat
+	need_cmd cat
 
-  cat <<HEREDOC
+	cat <<HEREDOC
 Running unit/integration tests with pytest and generate coverage reports. Make sure that given testcases is defined under $CONFIG_FILE.
 
 Usage:
@@ -68,161 +66,160 @@ If pytest_additional_arguments is given, this will be appended to given tests ru
 Example:
   $ $dname/$fname pytorch --gpus
 HEREDOC
-  exit 2
+	exit 2
 }
 
-
 parse_args() {
-  if [ "${#@}" -eq 0 ]; then
-    FAIL "$0 doesn't run without any arguments";
-    exit 1;
-  fi
-
-  for arg in "$@"; do
-    case "$arg" in
-      -h | --help)
-        usage;;
-      -v | --verbose)
-        set -x;
-        shift;;
-      -s | --skip_deps)
-        SKIP_DEPS=1;
-        shift;;
-      *)
-        ;;
-    esac
-  done
-  PYTESTARGS=( "${*:2}" )
-  shift $(( OPTIND - 1 ))
+	if [ "${#@}" -eq 0 ]; then
+		FAIL "$0 doesn't run without any arguments"
+		exit 1
+	fi
+
+	for arg in "$@"; do
+		case "$arg" in
+		-h | --help)
+			usage
+			;;
+		-v | --verbose)
+			set -x
+			shift
+			;;
+		-s | --skip_deps)
+			SKIP_DEPS=1
+			shift
+			;;
+		*) ;;
+
+		esac
+	done
+	PYTESTARGS=("${*:2}")
+	shift $((OPTIND - 1))
 }
 
-
 parse_config() {
-  target=$@
-  test_dir=
-  is_dir=
-  override_name_or_path=
-  external_scripts=
-  type_tests=
-
-  test_dir=$(getval ".$target.root_test_dir")
-  is_dir=$(getval ".$target.is_dir")
-  override_name_or_path=$(getval ".$target.override_name_or_path")
-  external_scripts=$(getval ".$target.external_scripts")
-  type_tests=$(getval ".$target.type_tests")
-
-  # processing file name
-  if [[ "$override_name_or_path" != "" ]]; then
-    fname="$override_name_or_path"
-  elif [[ "$is_dir" == "false" ]]; then
-    fname="test_""$target""_impl.py"
-  elif [[ "$is_dir" == "true" ]]; then
-    fname=""
-    shift
-  else
-    fname="$target"
-  fi
-
-  # processing dependencies
-  run_yq eval '.'"$target"'.dependencies[]' "$CONFIG_FILE" >"$REQ_FILE" || exit
+	target=$@
+	test_dir=
+	is_dir=
+	override_name_or_path=
+	external_scripts=
+	type_tests=
+
+	test_dir=$(getval ".$target.root_test_dir")
+	is_dir=$(getval ".$target.is_dir")
+	override_name_or_path=$(getval ".$target.override_name_or_path")
+	external_scripts=$(getval ".$target.external_scripts")
+	type_tests=$(getval ".$target.type_tests")
+
+	# processing file name
+	if [[ "$override_name_or_path" != "" ]]; then
+		fname="$override_name_or_path"
+	elif [[ "$is_dir" == "false" ]]; then
+		fname="test_""$target""_impl.py"
+	elif [[ "$is_dir" == "true" ]]; then
+		fname=""
+		shift
+	else
+		fname="$target"
+	fi
+
+	# processing dependencies
+	run_yq eval '.'"$target"'.dependencies[]' "$CONFIG_FILE" >"$REQ_FILE" || exit
 }
 
 install_yq() {
-  set -ex
-  target_dir="$HOME/.local/bin"
-
-  mkdir -p "$target_dir"
-  export PATH=$target_dir:$PATH
-
-  YQ_VERSION=4.16.1
-  echo "Trying to install yq..."
-  shell=$(uname | tr '[:upper:]' '[:lower:]')
-  extensions=".tar.gz"
-  if [[ "$shell" =~ "mingw64" ]]; then
-    shell="windows"
-    extensions=".zip"
-  fi
-
-  YQ_BINARY=yq_"$shell"_amd64
-  YQ_EXTRACT="./$YQ_BINARY"
-  if [[ "$shell" == "windows" ]]; then
-    YQ_EXTRACT="$YQ_BINARY.exe"
-  fi
-  curl -fsSLO https://github.com/mikefarah/yq/releases/download/v"$YQ_VERSION"/"$YQ_BINARY""$extensions"
-  echo "tar $YQ_BINARY$extensions and move to /usr/bin/yq..."
-  if [[ $(uname | tr '[:upper:]' '[:lower:]') =~ "mingw64" ]]; then
-    unzip -qq "$YQ_BINARY$extensions" -d yq_dir && cd yq_dir
-    mv "$YQ_EXTRACT" "$target_dir"/yq && cd ..
-    rm -rf yq_dir
-  else
-    tar -zvxf "$YQ_BINARY$extensions" "$YQ_EXTRACT" && mv "$YQ_EXTRACT" "$target_dir"/yq
-  fi
-  rm -f ./"$YQ_BINARY""$extensions"
+	set -ex
+	target_dir="$HOME/.local/bin"
+
+	mkdir -p "$target_dir"
+	export PATH=$target_dir:$PATH
+
+	YQ_VERSION=4.16.1
+	echo "Trying to install yq..."
+	shell=$(uname | tr '[:upper:]' '[:lower:]')
+	extensions=".tar.gz"
+	if [[ "$shell" =~ "mingw64" ]]; then
+		shell="windows"
+		extensions=".zip"
+	fi
+
+	YQ_BINARY=yq_"$shell"_amd64
+	YQ_EXTRACT="./$YQ_BINARY"
+	if [[ "$shell" == "windows" ]]; then
+		YQ_EXTRACT="$YQ_BINARY.exe"
+	fi
+	curl -fsSLO https://github.com/mikefarah/yq/releases/download/v"$YQ_VERSION"/"$YQ_BINARY""$extensions"
+	echo "tar $YQ_BINARY$extensions and move to /usr/bin/yq..."
+	if [[ $(uname | tr '[:upper:]' '[:lower:]') =~ "mingw64" ]]; then
+		unzip -qq "$YQ_BINARY$extensions" -d yq_dir && cd yq_dir
+		mv "$YQ_EXTRACT" "$target_dir"/yq && cd ..
+		rm -rf yq_dir
+	else
+		tar -zvxf "$YQ_BINARY$extensions" "$YQ_EXTRACT" && mv "$YQ_EXTRACT" "$target_dir"/yq
+	fi
+	rm -f ./"$YQ_BINARY""$extensions"
 }
 
-
 main() {
-  parse_args "$@"
-
-  need_cmd make
-  need_cmd curl
-  need_cmd tr
-  (need_cmd yq && echo "Using yq via $(which yq)...";) || install_yq
-
-  pip install -U "setuptools<60"
-
-
-  for args in "$@"; do
-    if [[ "$args" != "-"* ]]; then
-      argv="$args"
-      break;
-    else
-      shift;
-    fi
-  done
-
-  #  validate_yaml
-  parse_config "$argv"
-
-  OPTS=(--cov=bentoml --cov-config="$GIT_ROOT"/setup.cfg --cov-report=xml:"$target.xml" --cov-report=term-missing -x)
-
-  if [ -n "${PYTESTARGS[*]}" ]; then
-    # shellcheck disable=SC2206
-    OPTS=( ${OPTS[@]} ${PYTESTARGS[@]} )
-  fi
-
-  if [ "$fname" == "test_frameworks.py" ]; then
-    OPTS=( "--framework" "$target" ${OPTS[@]} )
-  fi
-
-  if [ "$SKIP_DEPS" -eq 0 ]; then
-    # setup tests environment
-    if [ -f "$REQ_FILE" ]; then
-      pip install -r "$REQ_FILE" || exit 1
-    fi
-  fi
-
-  if [ -n "$external_scripts" ]; then
-    eval "$external_scripts" || exit 1
-  fi
-
-  if [ "$type_tests" == 'e2e' ]; then
-    cd "$GIT_ROOT"/"$test_dir"/"$fname" || exit 1
-    path="."
-  else
-    path="$GIT_ROOT"/"$test_dir"/"$fname"
-  fi
-
-  # run pytest
-  python -m pytest "$path" "${OPTS[@]}" || ERR=1
-
-  # Return non-zero if pytest failed
-  if ! test $ERR = 0; then
-    FAIL "$args $type_tests tests failed!"
-    exit 1
-  fi
-
-  PASS "$args $type_tests tests passed!"
+	parse_args "$@"
+
+	need_cmd make
+	need_cmd curl
+	need_cmd tr
+	(need_cmd yq && echo "Using yq via $(which yq)...") || install_yq
+
+	pip install -U "setuptools==63.4.0"
+
+	for args in "$@"; do
+		if [[ "$args" != "-"* ]]; then
+			argv="$args"
+			break
+		else
+			shift
+		fi
+	done
+
+	#  validate_yaml
+	parse_config "$argv"
+
+	OPTS=(--cov=bentoml --cov-config="$GIT_ROOT"/pyproject.toml --cov-report=xml:"$target.xml" --cov-report=term-missing -x --capture=tee-sys)
+
+	if [ -n "${PYTESTARGS[*]}" ]; then
+		# shellcheck disable=SC2206
+		OPTS=(${OPTS[@]} ${PYTESTARGS[@]})
+	fi
+
+	if [ "$fname" == "test_frameworks.py" ]; then
+		OPTS=("--framework" "$target" ${OPTS[@]})
+	fi
+
+	if [ "$SKIP_DEPS" -eq 0 ]; then
+		# setup tests environment
+		if [ -f "$REQ_FILE" ]; then
+			pip install -r "$REQ_FILE" || exit 1
+		fi
+	fi
+
+	if [ -n "$external_scripts" ]; then
+		eval "$external_scripts" || exit 1
+	fi
+
+	if [ "$type_tests" == 'e2e' ]; then
+		cd "$GIT_ROOT"/"$test_dir"/"$fname" || exit 1
+		path="."
+	else
+		path="$GIT_ROOT"/"$test_dir"/"$fname"
+	fi
+
+	# run pytest
+	python -m pytest "$path" "${OPTS[@]}" || ERR=1
+
+	# Return non-zero if pytest failed
+	if ! test $ERR = 0; then
+		FAIL "$args $type_tests tests failed!"
+		exit 1
+	fi
+
+	PASS "$args $type_tests tests passed!"
 }
 
 main "$@" || exit 1
diff --git a/scripts/ci/style/proto_check.sh b/scripts/ci/style/proto_check.sh
deleted file mode 100755
index 40cd1729bb0..00000000000
--- a/scripts/ci/style/proto_check.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-GIT_ROOT=$(git rev-parse --show-toplevel)
-
-cd "$GIT_ROOT" || exit 1
-
-source ./scripts/ci/helpers.sh
-
-set_on_failed_callback "[FAIL] proto check failed"
-
-echo "Running proto format check..."
-
-if ! (git diff --name-only --diff-filter=d "origin/$GITHUB_BASE_REF" -z -- '*.proto' | xargs -0 --no-run-if-empty buf format --config "./bentoml/grpc/buf.yaml" -d --exit-code); then
-  FAIL "proto format check failed"
-  echo "Format incorrectly. Make sure to run \`make format\`"
-  exit 1
-fi
-
-PASS "proto format check passed!"
-
-echo "Running proto lint check..."
-
-if ! (git diff --name-only --diff-filter=d "origin/$GITHUB_BASE_REF" -z -- '*.proto' | xargs -0 --no-run-if-empty buf lint --config "./bentoml/grpc/buf.yaml" --error-format msvs); then
-  FAIL "proto lint check failed"
-  echo "Lint error. Make sure to run \`make lint\`"
-  exit 1
-fi
-
-PASS "proto lint check passed!"
-
-PASS "proto check passed!"
-exit 0
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 2d14b9c9f9b..00000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,162 +0,0 @@
-[metadata]
-name = bentoml
-version = attr: bentoml.__version__
-description = Bentoml: The Unified Model Serving Framework
-long_description = file: README.md
-long_description_content_type = text/markdown
-url = https://github.com/bentoml/BentoML
-author = BentoML Team
-author_email = contact@bentoml.ai
-license = Apache-2.0
-license_files = LICENSE
-classifiers =
-    License :: OSI Approved :: Apache Software License
-    Operating System :: OS Independent
-    Programming Language :: Python :: 3
-    Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.7
-    Programming Language :: Python :: 3.8
-    Programming Language :: Python :: 3.9
-    Programming Language :: Python :: 3.10
-    Programming Language :: Python :: Implementation :: CPython
-    Topic :: Scientific/Engineering :: Artificial Intelligence
-    Topic :: Software Development :: Libraries
-keywords = MLOps, AI, BentoML, Model Serving, Model Deployment
-project_urls =
-    Documentation = https://docs.bentoml.org/en/latest/
-    Bug Reports = https://github.com/bentoml/BentoML/issues
-    BentoML Community Slack = https://bit.ly/2N5IpbB
-    BentoML Official Blog = https://modelserving.com
-    BentoML Twitter = https://twitter.com/bentomlai
-
-[options]
-packages = find_namespace:
-install_requires =
-    Jinja2>=3.0.1
-    PyYAML>=5.0
-    aiohttp
-    attrs>=21.1.0
-    cattrs>=22.1.0
-    certifi
-    circus
-    click>=7.0
-    cloudpickle
-    deepmerge
-    fs
-    numpy
-    opentelemetry-api==1.9.0
-    opentelemetry-instrumentation==0.28b0
-    opentelemetry-instrumentation-aiohttp-client==0.28b0
-    opentelemetry-instrumentation-asgi==0.28b0
-    opentelemetry-sdk==1.9.0
-    opentelemetry-semantic-conventions==0.28b0
-    opentelemetry-util-http==0.28b0
-    packaging>=20.0
-    pathspec
-    pip-tools>=6.6.2
-    prometheus-client>=0.10.0,<0.14.0
-    psutil
-    pynvml<12
-    python-dateutil
-    python-dotenv>=0.20.0
-    python-multipart
-    requests
-    rich>=11.2.0
-    schema
-    simple-di>=0.1.4
-    starlette
-    uvicorn
-    watchfiles>=0.15.0
-    backports.cached-property;python_version<"3.8"
-    importlib-metadata;python_version<"3.8"
-python_requires = >=3.7
-include_package_data = True
-setup_requires =
-    grpcio-tools==1.47.0
-    mypy-protobuf==3.2.0  # generating stubs for type checking
-    protobuf==3.19.4
-    setuptools>=59.0
-    setuptools-scm>=6.2.3
-    wheel
-zip_safe = False
-
-[options.packages.find]
-include =
-    # include bentoml packages
-    bentoml
-    bentoml.grpc*
-    bentoml.testing
-    bentoml._internal*
-    # include bentoml_cli packages
-    bentoml_cli
-    bentoml_cli.server
-
-[options.entry_points]
-console_scripts =
-    bentoml = bentoml_cli.cli:cli
-
-[options.extras_require]
-grpc =
-    # Restrict maximum version due to breaking protobuf 4.21.0 changes
-    # (see https://github.com/protocolbuffers/protobuf/issues/10051)
-    protobuf>=3.5.0, <3.20
-    # There is memory leak in later Python GRPC (1.43.0 to be specific),
-    # use known working version until the memory leak is resolved in the future
-    # (see https://github.com/grpc/grpc/issues/28513)
-    # TODO: lock 1.41.0 for non M1 builds, otherwise use latest
-    grpcio
-    grpcio-health-checking
-    grpcio-reflection
-    grpcio-status
-    opentelemetry-instrumentation-grpc==0.28b0
-tracing =
-    opentelemetry-exporter-jaeger
-    opentelemetry-exporter-zipkin
-
-[options.package_data]
-* = *._pb2*.py, *.pyi
-bentoml =
-    py.typed
-    bentoml/*
-bentoml_cli =
-    py.typed
-    bentoml_cli/*
-
-[global]
-quiet = true
-
-[bdist_wheel]
-universal = false
-keep_temp = false
-
-[sdist]
-formats = gztar
-
-[coverage:run]
-omit =
-    bentoml/**/*_pb2.py
-    bentoml/__main__.py
-    bentoml/_internal/types.py
-    bentoml/_internal/external_typing/*
-    bentoml/testing/*
-    bentoml/io.py
-
-[coverage:report]
-show_missing = true
-precision = 2
-omit =
-    bentoml/**/*_pb2*.py
-    bentoml/_internal/external_typing/*
-    bentoml/_internal/types.py
-    bentoml/testing/*
-    bentoml/__main__.py
-    bentoml/io.py
-exclude_lines =
-    pragma: no cover
-    def __repr__
-    raise AssertionError
-    raise NotImplementedError
-    raise MissingDependencyException
-    except ImportError
-    if __name__ == .__main__.:
-    if TYPE_CHECKING:
diff --git a/tests/unit/_internal/io/test_numpy.py b/tests/unit/_internal/io/test_numpy.py
index 811891af763..48055ef5429 100644
--- a/tests/unit/_internal/io/test_numpy.py
+++ b/tests/unit/_internal/io/test_numpy.py
@@ -32,7 +32,7 @@ def test_invalid_dtype():
     generic = ExampleGeneric("asdf")
     with pytest.raises(BentoMLException) as e:
         _ = NumpyNdarray.from_sample(generic)  # type: ignore (test exception)
-    assert "expects a numpy.array" in str(e.value)
+    assert "expects a 'numpy.array'" in str(e.value)
 
 
 @pytest.mark.parametrize("dtype, expected", [("float", "number"), (">U8", "integer")])
@@ -83,21 +83,24 @@ def test_numpy_openapi_responses():
 
 def test_verify_numpy_ndarray(caplog: LogCaptureFixture):
     partial_check = partial(
-        from_example._verify_ndarray, exception_cls=BentoMLException  # type: ignore (test internal check)
+        from_example.validate_array,
+        dtype=from_example._dtype,
+        shape=from_example._shape,
+        exception_cls=BentoMLException,
     )
 
     with pytest.raises(BentoMLException) as ex:
         partial_check(np.array(["asdf"]))
-    assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value)  # type: ignore (testing message)
+    assert f'Expecting ndarray of dtype "{from_example._dtype}"' in str(ex.value)
 
     with pytest.raises(BentoMLException) as e:
         partial_check(np.array([[1]]))
-    assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value)  # type: ignore (testing message)
+    assert f'Expecting ndarray of shape "{from_example._shape}"' in str(e.value)
 
-    # test cases whwere reshape is failed
+    # test cases where reshape is failed
     example = NumpyNdarray.from_sample(np.ones((2, 2, 3)))
-    example._enforce_shape = False  # type: ignore (test internal check)
-    example._enforce_dtype = False  # type: ignore (test internal check)
+    example._enforce_shape = False
+    example._enforce_dtype = False
     with caplog.at_level(logging.DEBUG):
-        example._verify_ndarray(np.array("asdf"))
+        example.validate_array(np.array("asdf"), shape=(2, 2, 3))
     assert "Failed to reshape" in caplog.text