Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only generate model uuid when logging model #5167

Merged
merged 6 commits into from Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 6 additions & 3 deletions mlflow/models/model.py
Expand Up @@ -42,7 +42,6 @@ def __init__(
flavors=None,
signature=None, # ModelSignature
saved_input_example_info: Dict[str, Any] = None,
model_uuid=None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of removing model_uuid from the constructor and modifying the attribute during load(), can we define a third type of value for model_uuid?

  1. By default, model_uuid should be a function that generates a UUID, e.g. lambda: uuid.uuid4().hex
  2. Model uuid could be None, indicating that the model has no ID
  3. Model uuid could be a string, indicating that the model already has a UUID

In the constructor, we can check if the input is a function and, if it is, call it to generate an ID. Otherwise, set self.model_uuid = model_uuid.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good

**kwargs,
):
# store model id instead of run_id and path to avoid confusion when model gets exported
Expand All @@ -54,7 +53,7 @@ def __init__(
self.flavors = flavors if flavors is not None else {}
self.signature = signature
self.saved_input_example_info = saved_input_example_info
self.model_uuid = uuid.uuid4().hex if model_uuid is None else model_uuid
self.model_uuid = uuid.uuid4().hex
Copy link
Member

@harupy harupy Dec 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This basically means every Model instance has a different model_uuid. Is this really the desired behavior?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every new constructed model has a different ID, this is desired.
We only need keep the rule that the model load back from saved model should load old ID back or set it None if saved model don't have ID

self.__dict__.update(kwargs)

def __eq__(self, other):
Expand Down Expand Up @@ -137,7 +136,11 @@ def from_dict(cls, model_dict):
model_dict = model_dict.copy()
model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"])

return cls(**model_dict)
model_dict = model_dict.copy()
model_uuid = model_dict.pop('model_uuid', None)
model = cls(**model_dict)
model.model_uuid = model_uuid # restore the saved model_uuid
return model

@classmethod
def log(
Expand Down
25 changes: 25 additions & 0 deletions tests/models/test_model.py
Expand Up @@ -164,3 +164,28 @@ def test_model_log_with_input_example_succeeds():
# date column will get deserialized into string
input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
assert x.equals(input_example)


def _is_valid_uuid(val):
import uuid

try:
uuid.UUID(str(val))
return True
except ValueError:
return False


def test_model_uuid():
m = Model()
assert m.model_uuid is not None
assert _is_valid_uuid(m.model_uuid)
m_dict = m.to_dict()
print(m_dict)
assert m_dict["model_uuid"] == m.model_uuid
m2 = Model.from_dict(m_dict)
assert m2.model_uuid == m.model_uuid

m_dict.pop("model_uuid")
m3 = Model.from_dict(m_dict)
assert m3.model_uuid is None
20 changes: 0 additions & 20 deletions tests/pyfunc/test_model_export_with_loader_module_and_data_path.py
Expand Up @@ -556,26 +556,6 @@ def test_column_schema_enforcement_no_col_names():
assert pyfunc_model.predict(d).equals(pd.DataFrame(d))


def _is_valid_uuid(val):
import uuid

try:
uuid.UUID(str(val))
return True
except ValueError:
return False


def test_model_uuid():
m = Model()
assert m.model_uuid is not None
assert _is_valid_uuid(m.model_uuid)
m_dict = m.to_dict()
assert m_dict["model_uuid"] == m.model_uuid
m2 = Model.from_dict(m_dict)
assert m2.model_uuid == m.model_uuid


def test_tensor_schema_enforcement_no_col_names():
m = Model()
input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))])
Expand Down