Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add model_uuid to Java Model #5165

Merged
merged 8 commits into from Dec 15, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions mlflow/java/scoring/src/main/java/org/mlflow/models/Model.java
Expand Up @@ -6,6 +6,7 @@
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import org.mlflow.Flavor;
import org.mlflow.utils.FileUtils;
import org.mlflow.utils.SerializationUtils;
Expand Down Expand Up @@ -41,6 +42,9 @@ public static class Signature {
@JsonProperty("input_example")
private Map<String, Object> input_example;

@JsonProperty("model_uuid")
private String modelUuid;

private String rootPath;

/**
Expand All @@ -61,6 +65,11 @@ public static Model fromRootPath(String modelRootPath) throws IOException {
public static Model fromConfigPath(String configPath) throws IOException {
File configFile = new File(configPath);
Model model = SerializationUtils.parseYamlFromFile(configFile, Model.class);
// Set the model uuid if it's absent.
if (!model.getModelUuid().isPresent()) {
String uuid = UUID.randomUUID().toString().replace("-", "");
model.setModelUuid(uuid);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we set a new ID under this case ? I prefer to keep it empty because the ID should be generated while logging model (and after model logged, the ID should be immutable)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we set new generated ID here, suppose we load a old version model twice, we will get 2 in-memory model with different model ID, this break the rule of model ID being immutable

Copy link
Member Author

@harupy harupy Dec 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we set new generated ID here, suppose we load a old version model twice, we will get 2 in-memory model with different model ID, this break the rule of model ID being immutable.

@WeichenXu123 If so, I think we need to fix the python code.

import tempfile
import os

from mlflow.models import Model

with tempfile.TemporaryDirectory() as tmp_dir:
    path = os.path.join(tmp_dir, "MLmodel")

    with open(path, "w") as f:
        f.write(
            """
artifact_path: model
flavors:
  python_function:
    env: conda.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    python_version: 3.7.9
  sklearn:
    pickled_model: model.pkl
    serialization_format: cloudpickle
    sklearn_version: 0.24.1
"""
        )

    print(Model.load(path).model_uuid)
    print(Model.load(path).model_uuid)

output:

cac277097e54410c8d191a717e640cf6
560075c0351f43dca8554f8ab6eb96f4

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I create a fixing PR for python side:
#5167

}
// Set the root path to the directory containing the configuration file.
// This will be used to create an absolute path to the serialized model
model.setRootPath(configFile.getParentFile().getAbsolutePath());
Expand All @@ -82,6 +91,11 @@ public Optional<String> getRunId() {
return Optional.ofNullable(this.runId);
}

/** @return The MLflow model's uuid */
public Optional<String> getModelUuid() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: <4 Acronym naming convention in Java typically preserves the upper case acronym, particularly if the std library preserves.

public Optional<String> getModelUUID() {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BenWilson2 Can we use getModelUuid since we already have getRunUuid?

return Optional.ofNullable(this.modelUuid);
}

/** @return The path to the root directory of the MLflow model */
public Optional<String> getRootPath() {
return Optional.ofNullable(this.rootPath);
Expand All @@ -104,4 +118,8 @@ public <T extends Flavor> Optional<T> getFlavor(String flavorName, Class<T> flav
private void setRootPath(String rootPath) {
this.rootPath = rootPath;
}

private void setModelUuid(String modelUuid) {
this.modelUuid = modelUuid;
}
}
13 changes: 13 additions & 0 deletions mlflow/java/scoring/src/test/java/org/mlflow/ModelTest.java
Expand Up @@ -17,6 +17,19 @@ public void testModelIsLoadedFromYamlUsingConfigPathCorrectly() {
Model model = Model.fromConfigPath(configPath);
Assert.assertTrue(model.getFlavor(MLeapFlavor.FLAVOR_NAME, MLeapFlavor.class).isPresent());
Assert.assertTrue(model.getUtcTimeCreated().isPresent());
Assert.assertTrue(model.getModelUuid().isPresent());
} catch (IOException e) {
e.printStackTrace();
Assert.fail("Encountered an exception while reading the model from a configuration path!");
}
}

@Test
public void testModelIsLoadedCorrectlyWhenModelUuidDoesNotExist() {
String configPath = getClass().getResource("sample_model_root/MLmodel.no.model_uuid").getFile();
try {
Model model = Model.fromConfigPath(configPath);
Assert.assertTrue(model.getModelUuid().isPresent());
} catch (IOException e) {
e.printStackTrace();
Assert.fail("Encountered an exception while reading the model from a configuration path!");
Expand Down
Expand Up @@ -5,7 +5,7 @@ flavors:
utc_time_created: '2018-08-10 18:34:28.720095'
run_id: c228016dea284522882b657d91eeffa6
artifact_path: model

model_uuid: 4ab08bf9d91e4535bc2719f9210840c3
signature:
inputs: '[{"name": "fixed acidity", "type": "double"}, {"name": "volatile acidity",
"type": "double"}, {"name": "citric acid", "type": "double"}, {"name": "residual
Expand Down
@@ -0,0 +1,7 @@
flavors:
mleap:
mleap_version: 0.8.1
model_data: mleap/model
utc_time_created: "2018-08-10 18:34:28.720095"
run_id: c228016dea284522882b657d91eeffa6
artifact_path: model
2 changes: 0 additions & 2 deletions mlflow/mleap.py
Expand Up @@ -182,8 +182,6 @@ def save_model(
model. The given example will be converted to a Pandas DataFrame and then
serialized to json using the Pandas split-oriented format. Bytes are
base64-encoded.


"""
if mlflow_model is None:
mlflow_model = Model()
Expand Down
21 changes: 21 additions & 0 deletions tests/models/test_model.py
Expand Up @@ -164,3 +164,24 @@ def test_model_log_with_input_example_succeeds():
# date column will get deserialized into string
input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
assert x.equals(input_example)


def test_model_can_be_loaded_without_model_uuid(tmpdir):
ml_model_file = tmpdir.join("MLmodel")
ml_model_file.write(
"""
artifact_path: model
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.7.9
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 0.24.1
"""
)
model = Model.load(ml_model_file)
assert model.model_uuid is not None