Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Python API to load model input examples #5212

Merged
merged 5 commits into from Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions mlflow/models/model.py
Expand Up @@ -68,6 +68,24 @@ def get_input_schema(self):
def get_output_schema(self):
return self.signature.outputs if self.signature is not None else None

def load_input_example(self, path: str):
"""
Load the input example saved along a model. Returns None if there is no example metadata
(i.e. the model was saved without example). Raises FileNotFoundError if there is model
metadata but the example file is missing.

:param path: Path to the model directory.

:return: Input example (NumPy ndarray, SciPy csc_matrix, SciPy csr_matrix,
pandas DataFrame, dict) or None if the model has no example.
"""

# Just-in-time import to only load example-parsing libraries (e.g. numpy, pandas, etc.) if
# example is requested.
from mlflow.models.utils import _read_example

return _read_example(self, path)

def add_flavor(self, name, **params):
"""Add an entry for how to serve the model in a given format."""
self.flavors[name] = params
Expand Down
4 changes: 2 additions & 2 deletions mlflow/models/utils.py
Expand Up @@ -192,8 +192,8 @@ def _save_example(mlflow_model: Model, input_example: ModelInputExample, path: s
def _read_example(mlflow_model: Model, path: str):
"""
Read example from a model directory. Returns None if there is no example metadata (i.e. the
model was saved without example). Raises IO Exception if there is model metadata but the example
file is missing.
model was saved without example). Raises FileNotFoundError if there is model metadata but the
example file is missing.

:param mlflow_model: Model metadata.
:param path: Path to the model directory.
Expand Down
80 changes: 79 additions & 1 deletion tests/models/test_model.py
@@ -1,4 +1,5 @@
import os
import pytest
from datetime import date

import mlflow
Expand All @@ -9,11 +10,12 @@
from mlflow.models import Model
from mlflow.models.signature import ModelSignature
from mlflow.models.utils import _save_example
from mlflow.types.schema import Schema, ColSpec
from mlflow.types.schema import Schema, ColSpec, TensorSpec
from mlflow.utils.file_utils import TempDir
from mlflow.utils.proto_json_utils import _dataframe_from_json

from unittest import mock
from scipy.sparse import csc_matrix


def test_model_save_load():
Expand Down Expand Up @@ -104,6 +106,10 @@ def test_model_log():
assert x.to_dict(orient="records")[0] == input_example
assert not hasattr(loaded_model, "databricks_runtime")

loaded_example = loaded_model.load_input_example(local_path)
assert isinstance(loaded_example, pd.DataFrame)
assert loaded_example.to_dict(orient="records")[0] == input_example


def test_model_log_with_databricks_runtime():
dbr = "8.3.x-snapshot-gpu-ml-scala2.12"
Expand Down Expand Up @@ -165,6 +171,78 @@ def test_model_log_with_input_example_succeeds():
input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
assert x.equals(input_example)

loaded_example = loaded_model.load_input_example(local_path)
assert isinstance(loaded_example, pd.DataFrame)
assert loaded_example.equals(input_example)
maitre-matt marked this conversation as resolved.
Show resolved Hide resolved


def test_model_load_input_example_numpy():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)

assert isinstance(loaded_example, np.ndarray)
assert np.array_equal(input_example, loaded_example)


def test_model_load_input_example_scipy():
with TempDir(chdr=True) as tmp:
input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8))
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)

assert isinstance(loaded_example, csc_matrix)
assert np.array_equal(input_example.data, loaded_example.data)


def test_model_load_input_example_failures():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)
assert loaded_example is not None

with pytest.raises(FileNotFoundError, match="No such file or directory"):
loaded_model.load_input_example(os.path.join(local_path, "folder_which_does_not_exist"))

path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"])
os.remove(path)
with pytest.raises(FileNotFoundError, match="No such file or directory"):
loaded_model.load_input_example(local_path)


def test_model_load_input_example_no_signature():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)
assert loaded_example is None


def _is_valid_uuid(val):
import uuid
Expand Down