Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional experiment_id parameter to mlflow.set_experiment #5012

Merged
merged 5 commits into from Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 54 additions & 15 deletions mlflow/tracking/fluent.py
Expand Up @@ -14,6 +14,10 @@
from mlflow.entities import Experiment, Run, RunInfo, RunStatus, Param, RunTag, Metric, ViewType
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import (
INVALID_PARAMETER_VALUE,
RESOURCE_DOES_NOT_EXIST,
)
from mlflow.tracking.client import MlflowClient
from mlflow.tracking import artifact_utils, _get_store
from mlflow.tracking.context import registry as context_registry
Expand Down Expand Up @@ -50,12 +54,19 @@
_logger = logging.getLogger(__name__)


def set_experiment(experiment_name: str) -> None:
def set_experiment(experiment_name: str = None, experiment_id: str = None) -> None:
"""
Set given experiment as active experiment. If experiment does not exist, create an experiment
with provided name.

:param experiment_name: Case sensitive name of an experiment to be activated.
Set the given experiment as the active experiment. The experiment must either be specified by
name via `experiment_name` or by ID via `experiment_id`. The experiment name and ID cannot
both be specified.

:param experiment_name: Case sensitive name of the experiment to be activated. If an experiment
with this name does not exist, a new experiment wth this name is
created.
:param experiment_id: ID of the experiment to be activated. If an experiment with this ID
does not exist, an exception is thrown.
:return: An instance of :py:class:`mlflow.entities.Experiment` representing the new active
experiment.

.. code-block:: python
:caption: Example
Expand All @@ -80,20 +91,48 @@ def set_experiment(experiment_name: str) -> None:
Tags: {}
Lifecycle_stage: active
"""
if (experiment_name is not None and experiment_id is not None) or (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is empty string legit for either name or id? I guess not. In that case, it might be better to verify
if not (experiment_name and experiment_id)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious on why do we need this condition? We just need experiment_name is None and experiment_id is None right? Let me know if I am missing something.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't the guarded exclusivity check protect against conflicting submission behavior. For an invalid id but a valid name it would raise an Exception based on the id validation. Could be super confusing for users.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is empty string legit for either name or id? I guess not. In that case, it might be better to verify
if not (experiment_name and experiment_id)

I would imagine that it's invalid on most if not all backends, but I'm hesitant to enforce this on the off chance that someone's third-party backend has a legitimate use case for this. For example, MLflow's default experiment used to be a falsey integer value (0) before the change to string representations.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the same logic applies for experiment names, I've removed the test case asserting that an empty string name is invalid, as that's backend-dependent.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM.

experiment_name is None and experiment_id is None
):
raise MlflowException(
message="Must specify exactly one of: `experiment_id` or `experiment_name`.",
error_code=INVALID_PARAMETER_VALUE,
)

client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
exp_id = experiment.experiment_id if experiment else None
if exp_id is None: # id can be 0
print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name))
exp_id = client.create_experiment(experiment_name)
elif experiment.lifecycle_stage == LifecycleStage.DELETED:
if experiment_id is None:
experiment = client.get_experiment_by_name(experiment_name)
if not experiment:
_logger.info(
"Experiment with name '%s' does not exist. Creating a new experiment.",
experiment_name,
)
# NB: If two simultaneous threads or processes attempt to set the same experiment
# simultaneously, a race condition may be encountered here wherein experiment creation
# fails
experiment_id = client.create_experiment(experiment_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a potential race condition here between client.get_experiment_by_name() and client.create_experiment(experiment_name). It could cause issues in distributed logging situation. I don't think you need to fix it here, but might be good to annotate the code to make future life easier.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good call-out. It's worth noting that the MLflow fluent API in general is not meant to be safe across threads or processes. I've added a note here nonetheless.

experiment = client.get_experiment(experiment_id)
else:
experiment = client.get_experiment(experiment_id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this function throw if experiment_id doesn't exist?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

404 in REST and throws in file_store and sqlalchemy_store

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @BenWilson2 ! Yes, while this throws for MLflow-native stores, it may not always throw for alternative stores. I don't think the None check is particularly problematic here as an extra layer of defense.

if experiment is None:
raise MlflowException(
message=f"Experiment with ID '{experiment_id}' does not exist.",
error_code=RESOURCE_DOES_NOT_EXIST,
)

if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
raise MlflowException(
"Cannot set a deleted experiment '%s' as the active experiment."
" You can restore the experiment, or permanently delete the "
" experiment to create a new one." % experiment.name
message=(
"Cannot set a deleted experiment '%s' as the active experiment."
" You can restore the experiment, or permanently delete the "
" experiment to create a new one." % experiment.name
),
error_code=INVALID_PARAMETER_VALUE,
)

global _active_experiment_id
_active_experiment_id = exp_id
_active_experiment_id = experiment.experiment_id
return experiment
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This actually simplifies some MLOps pipeline flows! A bonus feature is always a great thing



class ActiveRun(Run): # pylint: disable=W0223
Expand Down
63 changes: 47 additions & 16 deletions tests/tracking/test_tracking.py
Expand Up @@ -16,7 +16,7 @@
from mlflow.entities import RunStatus, LifecycleStage, Metric, Param, RunTag, ViewType
from mlflow.exceptions import MlflowException
from mlflow.store.tracking.file_store import FileStore
from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE
from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE, RESOURCE_DOES_NOT_EXIST
from mlflow.tracking.client import MlflowClient
from mlflow.tracking.fluent import start_run
from mlflow.utils.file_utils import local_file_uri_to_path
Expand Down Expand Up @@ -77,39 +77,70 @@ def test_create_experiments_with_bad_name_types(name):


@pytest.mark.usefixtures("reset_active_experiment")
def test_set_experiment():
with pytest.raises(TypeError):
mlflow.set_experiment() # pylint: disable=no-value-for-parameter
def test_set_experiment_by_name():
name = "random_exp"
exp_id = mlflow.create_experiment(name)
exp1 = mlflow.set_experiment(name)
assert exp1.experiment_id == exp_id
with start_run() as run:
assert run.info.experiment_id == exp_id

with pytest.raises(Exception):
mlflow.set_experiment(None)
another_name = "another_experiment"
exp2 = mlflow.set_experiment(another_name)
with start_run() as another_run:
assert another_run.info.experiment_id == exp2.experiment_id

with pytest.raises(Exception):
mlflow.set_experiment("")

@pytest.mark.usefixtures("reset_active_experiment")
def test_set_experiment_by_id():
name = "random_exp"
exp_id = mlflow.create_experiment(name)
mlflow.set_experiment(name)
active_exp = mlflow.set_experiment(experiment_id=exp_id)
assert active_exp.experiment_id == exp_id
with start_run() as run:
assert run.info.experiment_id == exp_id

another_name = "another_experiment"
mlflow.set_experiment(another_name)
exp_id2 = mlflow.tracking.MlflowClient().get_experiment_by_name(another_name)
with start_run() as another_run:
assert another_run.info.experiment_id == exp_id2.experiment_id
nonexistent_id = "-1337"
with pytest.raises(MlflowException) as exc:
mlflow.set_experiment(experiment_id=nonexistent_id)
assert exc.value.error_code == ErrorCode.Name(RESOURCE_DOES_NOT_EXIST)
with start_run() as run:
assert run.info.experiment_id == exp_id


def test_set_experiment_parameter_validation():
with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
mlflow.set_experiment()
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
mlflow.set_experiment(None)
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
mlflow.set_experiment(None, None)
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
mlflow.set_experiment("name", "id")
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

def test_set_experiment_with_deleted_experiment_name():

def test_set_experiment_with_deleted_experiment():
name = "dead_exp"
mlflow.set_experiment(name)
with start_run() as run:
exp_id = run.info.experiment_id

tracking.MlflowClient().delete_experiment(exp_id)

with pytest.raises(MlflowException):
with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc:
mlflow.set_experiment(name)
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc:
mlflow.set_experiment(experiment_id=exp_id)
assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)


def test_list_experiments():
Expand Down