From e2dc085ec9010b031a2a59c2a8972be398255fa3 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 15 Dec 2021 14:09:08 +0800 Subject: [PATCH 1/6] init Signed-off-by: Weichen Xu --- mlflow/models/model.py | 5 +++-- ...st_model_export_with_loader_module_and_data_path.py | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 1ada7241bd406..9cc27457f4304 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -54,7 +54,7 @@ def __init__( self.flavors = flavors if flavors is not None else {} self.signature = signature self.saved_input_example_info = saved_input_example_info - self.model_uuid = uuid.uuid4().hex if model_uuid is None else model_uuid + self.model_uuid = model_uuid self.__dict__.update(kwargs) def __eq__(self, other): @@ -186,7 +186,8 @@ def log( with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id - mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) + model_uuid = uuid.uuid4().hex + mlflow_model = cls(artifact_path=artifact_path, run_id=run_id, model_uuid=model_uuid) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: diff --git a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py index 6e9baca1e20df..038243d96d7d6 100644 --- a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py +++ b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py @@ -566,16 +566,6 @@ def _is_valid_uuid(val): return False -def test_model_uuid(): - m = Model() - assert m.model_uuid is not None - assert _is_valid_uuid(m.model_uuid) - m_dict = m.to_dict() - assert m_dict["model_uuid"] == m.model_uuid - m2 = Model.from_dict(m_dict) - assert m2.model_uuid == m.model_uuid - - def test_tensor_schema_enforcement_no_col_names(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))]) From 00db8ac4d24c73aadbcca399b785e6214c0a553f Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 15 Dec 2021 14:22:13 +0800 Subject: [PATCH 2/6] add test Signed-off-by: Weichen Xu --- ...export_with_loader_module_and_data_path.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py index 038243d96d7d6..ddd251b5ebe20 100644 --- a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py +++ b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py @@ -85,6 +85,16 @@ def model_path(tmpdir): return os.path.join(str(tmpdir), "model") +def _is_valid_uuid(val): + import uuid + + try: + uuid.UUID(str(val)) + return True + except ValueError: + return False + + @pytest.mark.large def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") @@ -102,6 +112,7 @@ def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): reloaded_model_config = Model.load(os.path.join(model_path, "MLmodel")) assert model_config.__dict__ == reloaded_model_config.__dict__ + assert model_config.model_uuid is not None and _is_valid_uuid(model_config) assert mlflow.pyfunc.FLAVOR_NAME in reloaded_model_config.flavors assert mlflow.pyfunc.PY_VERSION in reloaded_model_config.flavors[mlflow.pyfunc.FLAVOR_NAME] reloaded_model = mlflow.pyfunc.load_pyfunc(model_path) @@ -109,6 +120,9 @@ def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): sklearn_knn_model.predict(iris_data[0]), reloaded_model.predict(iris_data[0]) ) + reloaded_model_config2 = Model.load(os.path.join(model_path, "MLmodel")) + assert reloaded_model_config.model_uuid == reloaded_model_config2.model_uuid + @pytest.mark.large def test_signature_and_examples_are_saved_correctly(sklearn_knn_model, iris_data): @@ -556,16 +570,6 @@ def test_column_schema_enforcement_no_col_names(): assert pyfunc_model.predict(d).equals(pd.DataFrame(d)) -def _is_valid_uuid(val): - import uuid - - try: - uuid.UUID(str(val)) - return True - except ValueError: - return False - - def test_tensor_schema_enforcement_no_col_names(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))]) From 1d279b6e910784f1aad838934ef9a2ccb7700245 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 15 Dec 2021 15:48:41 +0800 Subject: [PATCH 3/6] update Signed-off-by: Weichen Xu --- mlflow/models/model.py | 12 +++++---- mlflow/pyfunc/__init__.py | 1 + tests/models/test_model.py | 25 +++++++++++++++++++ ...export_with_loader_module_and_data_path.py | 14 ----------- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 9cc27457f4304..9d0d2dd8527a1 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -42,7 +42,6 @@ def __init__( flavors=None, signature=None, # ModelSignature saved_input_example_info: Dict[str, Any] = None, - model_uuid=None, **kwargs, ): # store model id instead of run_id and path to avoid confusion when model gets exported @@ -54,7 +53,7 @@ def __init__( self.flavors = flavors if flavors is not None else {} self.signature = signature self.saved_input_example_info = saved_input_example_info - self.model_uuid = model_uuid + self.model_uuid = uuid.uuid4().hex self.__dict__.update(kwargs) def __eq__(self, other): @@ -137,7 +136,11 @@ def from_dict(cls, model_dict): model_dict = model_dict.copy() model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"]) - return cls(**model_dict) + model_dict = model_dict.copy() + model_uuid = model_dict.pop('model_uuid', None) + model = cls(**model_dict) + model.model_uuid = model_uuid # restore the saved model_uuid + return model @classmethod def log( @@ -186,8 +189,7 @@ def log( with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id - model_uuid = uuid.uuid4().hex - mlflow_model = cls(artifact_path=artifact_path, run_id=run_id, model_uuid=model_uuid) + mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: diff --git a/mlflow/pyfunc/__init__.py b/mlflow/pyfunc/__init__.py index a5d772a9d54fd..fd34ec40c3f89 100644 --- a/mlflow/pyfunc/__init__.py +++ b/mlflow/pyfunc/__init__.py @@ -213,6 +213,7 @@ import yaml from copy import deepcopy import logging +import uuid from typing import Any, Union, List, Dict import mlflow diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 3f058c765b87e..1953c90a4ece2 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -164,3 +164,28 @@ def test_model_log_with_input_example_succeeds(): # date column will get deserialized into string input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example) + + +def _is_valid_uuid(val): + import uuid + + try: + uuid.UUID(str(val)) + return True + except ValueError: + return False + + +def test_model_uuid(): + m = Model() + assert m.model_uuid is not None + assert _is_valid_uuid(m.model_uuid) + m_dict = m.to_dict() + print(m_dict) + assert m_dict["model_uuid"] == m.model_uuid + m2 = Model.from_dict(m_dict) + assert m2.model_uuid == m.model_uuid + + m_dict.pop("model_uuid") + m3 = Model.from_dict(m_dict) + assert m3.model_uuid is None diff --git a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py index ddd251b5ebe20..8b4f395096bc8 100644 --- a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py +++ b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py @@ -85,16 +85,6 @@ def model_path(tmpdir): return os.path.join(str(tmpdir), "model") -def _is_valid_uuid(val): - import uuid - - try: - uuid.UUID(str(val)) - return True - except ValueError: - return False - - @pytest.mark.large def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") @@ -112,7 +102,6 @@ def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): reloaded_model_config = Model.load(os.path.join(model_path, "MLmodel")) assert model_config.__dict__ == reloaded_model_config.__dict__ - assert model_config.model_uuid is not None and _is_valid_uuid(model_config) assert mlflow.pyfunc.FLAVOR_NAME in reloaded_model_config.flavors assert mlflow.pyfunc.PY_VERSION in reloaded_model_config.flavors[mlflow.pyfunc.FLAVOR_NAME] reloaded_model = mlflow.pyfunc.load_pyfunc(model_path) @@ -120,9 +109,6 @@ def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): sklearn_knn_model.predict(iris_data[0]), reloaded_model.predict(iris_data[0]) ) - reloaded_model_config2 = Model.load(os.path.join(model_path, "MLmodel")) - assert reloaded_model_config.model_uuid == reloaded_model_config2.model_uuid - @pytest.mark.large def test_signature_and_examples_are_saved_correctly(sklearn_knn_model, iris_data): From c45d16d4e2539b8ded158406589c4290308386f9 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 15 Dec 2021 15:54:26 +0800 Subject: [PATCH 4/6] update Signed-off-by: Weichen Xu --- mlflow/pyfunc/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlflow/pyfunc/__init__.py b/mlflow/pyfunc/__init__.py index fd34ec40c3f89..a5d772a9d54fd 100644 --- a/mlflow/pyfunc/__init__.py +++ b/mlflow/pyfunc/__init__.py @@ -213,7 +213,6 @@ import yaml from copy import deepcopy import logging -import uuid from typing import Any, Union, List, Dict import mlflow From fcba89d0ddc0af8332f178ee41c3e97885fa381d Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 15 Dec 2021 17:54:42 +0800 Subject: [PATCH 5/6] update Signed-off-by: Weichen Xu --- mlflow/models/model.py | 19 +++++++++++-------- tests/models/test_model.py | 13 ++++++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 9d0d2dd8527a1..73c3ac981927a 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -6,7 +6,7 @@ import os import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union, Callable import mlflow from mlflow.exceptions import MlflowException @@ -42,6 +42,7 @@ def __init__( flavors=None, signature=None, # ModelSignature saved_input_example_info: Dict[str, Any] = None, + model_uuid: Union[str, Callable, None] = lambda: uuid.uuid4().hex, **kwargs, ): # store model id instead of run_id and path to avoid confusion when model gets exported @@ -53,7 +54,10 @@ def __init__( self.flavors = flavors if flavors is not None else {} self.signature = signature self.saved_input_example_info = saved_input_example_info - self.model_uuid = uuid.uuid4().hex + if callable(model_uuid): + self.model_uuid = model_uuid() + else: + self.model_uuid = model_uuid self.__dict__.update(kwargs) def __eq__(self, other): @@ -132,15 +136,14 @@ def from_dict(cls, model_dict): from .signature import ModelSignature + model_dict = model_dict.copy() if "signature" in model_dict and isinstance(model_dict["signature"], dict): - model_dict = model_dict.copy() model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"]) - model_dict = model_dict.copy() - model_uuid = model_dict.pop('model_uuid', None) - model = cls(**model_dict) - model.model_uuid = model_uuid # restore the saved model_uuid - return model + if "model_uuid" not in model_dict: + model_dict["model_uuid"] = None + + return cls(**model_dict) @classmethod def log( diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 1953c90a4ece2..131a70004b54a 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -180,12 +180,15 @@ def test_model_uuid(): m = Model() assert m.model_uuid is not None assert _is_valid_uuid(m.model_uuid) + + m2 = Model() + assert m.model_uuid != m2.model_uuid + m_dict = m.to_dict() - print(m_dict) assert m_dict["model_uuid"] == m.model_uuid - m2 = Model.from_dict(m_dict) - assert m2.model_uuid == m.model_uuid + m3 = Model.from_dict(m_dict) + assert m3.model_uuid == m.model_uuid m_dict.pop("model_uuid") - m3 = Model.from_dict(m_dict) - assert m3.model_uuid is None + m4 = Model.from_dict(m_dict) + assert m4.model_uuid is None From 2d4a3cb90f03914357dd565d7e0f0d20407b30a5 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Thu, 16 Dec 2021 10:13:04 +0800 Subject: [PATCH 6/6] updates Signed-off-by: Weichen Xu --- mlflow/models/model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 73c3ac981927a..806357c10ab4a 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -54,10 +54,7 @@ def __init__( self.flavors = flavors if flavors is not None else {} self.signature = signature self.saved_input_example_info = saved_input_example_info - if callable(model_uuid): - self.model_uuid = model_uuid() - else: - self.model_uuid = model_uuid + self.model_uuid = model_uuid() if callable(model_uuid) else model_uuid self.__dict__.update(kwargs) def __eq__(self, other):