Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(frameworks): bentoml.onnx runner accept kwargs #3561

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/frameworks.yml
Expand Up @@ -96,6 +96,7 @@ jobs:
- *related
- src/bentoml/onnx.py
- src/bentoml/_internal/frameworks/onnx.py
- src/bentoml/_internal/frameworks/utils/onnx.py
- tests/integration/frameworks/models/onnx.py
picklable_model:
- *related
Expand Down Expand Up @@ -498,7 +499,7 @@ jobs:
- name: Install dependencies
run: |
pip install .
pip install onnx onnxruntime skl2onnx
pip install onnx onnxruntime skl2onnx transformers[onnx] torch
pip install -r requirements/tests-requirements.txt

- name: Run tests and generate coverage report
Expand Down
48 changes: 35 additions & 13 deletions src/bentoml/_internal/frameworks/onnx.py
Expand Up @@ -26,6 +26,7 @@

from .utils.onnx import ONNXArgType
from .utils.onnx import ONNXArgCastedType
from .utils.onnx import ONNXArgCastingFuncType

ProvidersType = list[str | tuple[str, dict[str, t.Any]]]

Expand Down Expand Up @@ -65,7 +66,8 @@ class ONNXOptions(ModelOptions):
input_specs: dict[str, list[dict[str, t.Any]]] = attr.field(factory=dict)
output_specs: dict[str, list[dict[str, t.Any]]] = attr.field(factory=dict)
providers: ProvidersType = attr.field(default=None)
session_options: t.Optional["ort.SessionOptions"] = attr.field(default=None)
session_options: "ort.SessionOptions" | None = attr.field(default=None)
use_kwargs_inputs: bool = attr.field(default=False)


def get(tag_like: str | Tag) -> bentoml.Model:
Expand Down Expand Up @@ -407,7 +409,13 @@ def add_runnable_method(
output_specs: list[dict[str, t.Any]],
):

casting_funcs = [gen_input_casting_func(spec) for spec in input_specs]
casting_funcs: list[ONNXArgCastingFuncType] = []
input_name2casting_func: dict[str, ONNXArgCastingFuncType] = {}

for spec in input_specs:
casting_f: ONNXArgCastingFuncType = gen_input_casting_func(spec)
casting_funcs.append(casting_f)
input_name2casting_func[spec["name"]] = casting_f

if len(output_specs) > 1:

Expand All @@ -419,17 +427,31 @@ def _process_output(outs):
def _process_output(outs):
return outs[0]

def _run(self: ONNXRunnable, *args: ONNXArgType) -> t.Any:
casted_args = [
casting_funcs[idx](args[idx]) for idx in range(len(casting_funcs))
]

input_names: dict[str, ONNXArgCastedType] = {
i.name: val for i, val in zip(self.model.get_inputs(), casted_args)
}
output_names: list[str] = [o.name for o in self.model.get_outputs()]
raw_outs = self.predict_fns[method_name](output_names, input_names)
return _process_output(raw_outs)
if options.use_kwargs_inputs:

def _run(self: ONNXRunnable, **kwargs: ONNXArgType) -> t.Any:
Copy link
Member

@bojiang bojiang Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean we can not call onnx_runner.run(arg1, arg2, kwarg1=xxx), using args and kwargs at same time?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no I don't think so.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it does.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For every onnx model we have a fixed list of inputs with their own names. Calling by args means refer each argument by its position while calling by kwargs means refer each argument by its name. Now if we mix these 2 methods, I feel that it's very easy for user to miss some arguments.

Calling by kwargs is useful when dealing with NLP models where the tokenizer output a dictionary and the dictionary is passed to the model as kwargs. In this use case there's no additional positional argument to be passed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need more discussion about this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's have a discussion on engineer meeting


input_names: dict[str, ONNXArgCastedType] = {
name: input_name2casting_func[name](val)
for name, val in kwargs.items()
}
output_names: list[str] = [o.name for o in self.model.get_outputs()]
raw_outs = self.predict_fns[method_name](output_names, input_names)
return _process_output(raw_outs)

else:

def _run(self: ONNXRunnable, *args: ONNXArgType) -> t.Any:
casted_args = [
casting_funcs[idx](args[idx]) for idx in range(len(casting_funcs))
]

input_names: dict[str, ONNXArgCastedType] = {
i.name: val for i, val in zip(self.model.get_inputs(), casted_args)
}
output_names: list[str] = [o.name for o in self.model.get_outputs()]
raw_outs = self.predict_fns[method_name](output_names, input_names)
return _process_output(raw_outs)

ONNXRunnable.add_method(
_run,
Expand Down
93 changes: 92 additions & 1 deletion tests/integration/frameworks/models/onnx.py
Expand Up @@ -12,6 +12,8 @@
import torch.nn as nn
import onnxruntime as ort
from skl2onnx import convert_sklearn
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
Expand Down Expand Up @@ -279,4 +281,93 @@ def _check(
onnx_le_models.append(onnx_le_model)


models: list[FrameworkTestModel] = [onnx_pytorch_model, onnx_rf_model] + onnx_le_models
# tiny bert model
TINY_BERT_MODEL_ID = "prajjwal1/bert-tiny"


def make_bert_onnx_model(tmpdir) -> tuple[onnx.ModelProto, t.Any]:
model_id = TINY_BERT_MODEL_ID
bert_model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
sample_text = "This is a sample"
sample_input = tokenizer(sample_text, return_tensors="pt")
model_path = os.path.join(tmpdir, "bert-tiny.onnx")

torch.onnx.export(
bert_model,
tuple(sample_input.values()),
f=model_path,
input_names=["input_ids", "attention_mask", "token_type_ids"],
output_names=["logits"],
dynamic_axes={
"input_ids": {0: "batch_size", 1: "sequence"},
"attention_mask": {0: "batch_size", 1: "sequence"},
"logits": {0: "batch_size", 1: "sequence"},
},
do_constant_folding=True,
opset_version=13,
)

onnx_model = onnx.load(model_path)

expected_input = tokenizer(sample_text, return_tensors="np")
model_output = bert_model(**sample_input)
expected_output = model_output.logits.detach().to("cpu").numpy()
expected_data = (expected_input, expected_output)
return (onnx_model, expected_data)


with tempfile.TemporaryDirectory() as tmpdir:
onnx_bert_raw_model, _expected_data = make_bert_onnx_model(tmpdir)
bert_input, bert_expected_output = _expected_data


def method_caller_kwargs(
framework_test_model: FrameworkTestModel,
method: str,
args: list[t.Any],
kwargs: dict[str, t.Any],
):
with tempfile.NamedTemporaryFile() as temp:
onnx.save(framework_test_model.model, temp.name)
ort_sess = ort.InferenceSession(temp.name, providers=["CPUExecutionProvider"])

def to_numpy(item):
if isinstance(item, np.ndarray):
pass
elif isinstance(item, torch.Tensor):
item = item.detach().to("cpu").numpy()
return item

input_names = {k: list(v) for k, v in kwargs}
output_names = [o.name for o in ort_sess.get_outputs()]
out = getattr(ort_sess, method)(output_names, input_names)[0]
print("hahahah lkasjdfklasfdsaf")
return out


onnx_bert_model = FrameworkTestModel(
name="onnx_bert_model",
model=onnx_bert_raw_model,
model_method_caller=method_caller_kwargs,
model_signatures={"run": {"batchable": True}},
configurations=[
Config(
test_inputs={
"run": [
Input(
input_args=[],
input_kwargs=bert_input,
expected=close_to(bert_expected_output),
),
],
},
load_kwargs={"use_kwargs_inputs": True},
check_model=check_model,
),
],
)

models: list[FrameworkTestModel] = (
[onnx_pytorch_model, onnx_rf_model] + onnx_le_models + [onnx_bert_model]
)