diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 806357c10ab4a..1e75c95dd19f9 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -68,6 +68,24 @@ def get_input_schema(self): def get_output_schema(self): return self.signature.outputs if self.signature is not None else None + def load_input_example(self, path: str): + """ + Load the input example saved along a model. Returns None if there is no example metadata + (i.e. the model was saved without example). Raises FileNotFoundError if there is model + metadata but the example file is missing. + + :param path: Path to the model directory. + + :return: Input example (NumPy ndarray, SciPy csc_matrix, SciPy csr_matrix, + pandas DataFrame, dict) or None if the model has no example. + """ + + # Just-in-time import to only load example-parsing libraries (e.g. numpy, pandas, etc.) if + # example is requested. + from mlflow.models.utils import _read_example + + return _read_example(self, path) + def add_flavor(self, name, **params): """Add an entry for how to serve the model in a given format.""" self.flavors[name] = params diff --git a/mlflow/models/utils.py b/mlflow/models/utils.py index 137a1eac0f0ab..206d0de6a128b 100644 --- a/mlflow/models/utils.py +++ b/mlflow/models/utils.py @@ -192,8 +192,8 @@ def _save_example(mlflow_model: Model, input_example: ModelInputExample, path: s def _read_example(mlflow_model: Model, path: str): """ Read example from a model directory. Returns None if there is no example metadata (i.e. the - model was saved without example). Raises IO Exception if there is model metadata but the example - file is missing. + model was saved without example). Raises FileNotFoundError if there is model metadata but the + example file is missing. :param mlflow_model: Model metadata. :param path: Path to the model directory. diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 131a70004b54a..67be4d7fb21c9 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -1,4 +1,5 @@ import os +import pytest from datetime import date import mlflow @@ -9,11 +10,12 @@ from mlflow.models import Model from mlflow.models.signature import ModelSignature from mlflow.models.utils import _save_example -from mlflow.types.schema import Schema, ColSpec +from mlflow.types.schema import Schema, ColSpec, TensorSpec from mlflow.utils.file_utils import TempDir from mlflow.utils.proto_json_utils import _dataframe_from_json from unittest import mock +from scipy.sparse import csc_matrix def test_model_save_load(): @@ -104,6 +106,10 @@ def test_model_log(): assert x.to_dict(orient="records")[0] == input_example assert not hasattr(loaded_model, "databricks_runtime") + loaded_example = loaded_model.load_input_example(local_path) + assert isinstance(loaded_example, pd.DataFrame) + assert loaded_example.to_dict(orient="records")[0] == input_example + def test_model_log_with_databricks_runtime(): dbr = "8.3.x-snapshot-gpu-ml-scala2.12" @@ -165,6 +171,78 @@ def test_model_log_with_input_example_succeeds(): input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example) + loaded_example = loaded_model.load_input_example(local_path) + assert isinstance(loaded_example, pd.DataFrame) + assert loaded_example.equals(input_example) + + +def test_model_load_input_example_numpy(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + + assert isinstance(loaded_example, np.ndarray) + assert np.array_equal(input_example, loaded_example) + + +def test_model_load_input_example_scipy(): + with TempDir(chdr=True) as tmp: + input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8)) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + + assert isinstance(loaded_example, csc_matrix) + assert np.array_equal(input_example.data, loaded_example.data) + + +def test_model_load_input_example_failures(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + assert loaded_example is not None + + with pytest.raises(FileNotFoundError, match="No such file or directory"): + loaded_model.load_input_example(os.path.join(local_path, "folder_which_does_not_exist")) + + path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"]) + os.remove(path) + with pytest.raises(FileNotFoundError, match="No such file or directory"): + loaded_model.load_input_example(local_path) + + +def test_model_load_input_example_no_signature(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + assert loaded_example is None + def _is_valid_uuid(val): import uuid