Add optional experiment_id parameter to mlflow.set_experiment (#5012)

* Impl Signed-off-by: dbczumar <corey.zumar@databricks.com> * PR Signed-off-by: dbczumar <corey.zumar@databricks.com> * Format Signed-off-by: dbczumar <corey.zumar@databricks.com> * Address comments Signed-off-by: dbczumar <corey.zumar@databricks.com> * Return experiment Signed-off-by: dbczumar <corey.zumar@databricks.com>
mlflow · Nov 5, 2021 · 0f9d1e0 · 0f9d1e0
1 parent 1942d78
commit 0f9d1e0
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 31 deletions.
diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py
@@ -14,6 +14,10 @@
 from mlflow.entities import Experiment, Run, RunInfo, RunStatus, Param, RunTag, Metric, ViewType
 from mlflow.entities.lifecycle_stage import LifecycleStage
 from mlflow.exceptions import MlflowException
+from mlflow.protos.databricks_pb2 import (
+    INVALID_PARAMETER_VALUE,
+    RESOURCE_DOES_NOT_EXIST,
+)
 from mlflow.tracking.client import MlflowClient
 from mlflow.tracking import artifact_utils, _get_store
 from mlflow.tracking.context import registry as context_registry
@@ -50,12 +54,19 @@
 _logger = logging.getLogger(__name__)
 
 
-def set_experiment(experiment_name: str) -> None:
+def set_experiment(experiment_name: str = None, experiment_id: str = None) -> None:
     """
-    Set given experiment as active experiment. If experiment does not exist, create an experiment
-    with provided name.
-
-    :param experiment_name: Case sensitive name of an experiment to be activated.
+    Set the given experiment as the active experiment. The experiment must either be specified by
+    name via `experiment_name` or by ID via `experiment_id`. The experiment name and ID cannot
+    both be specified.
+
+    :param experiment_name: Case sensitive name of the experiment to be activated. If an experiment
+                            with this name does not exist, a new experiment wth this name is
+                            created.
+    :param experiment_id: ID of the experiment to be activated. If an experiment with this ID
+                          does not exist, an exception is thrown.
+    :return: An instance of :py:class:`mlflow.entities.Experiment` representing the new active
+             experiment.
 
     .. code-block:: python
         :caption: Example
@@ -80,20 +91,48 @@ def set_experiment(experiment_name: str) -> None:
         Tags: {}
         Lifecycle_stage: active
     """
+    if (experiment_name is not None and experiment_id is not None) or (
+        experiment_name is None and experiment_id is None
+    ):
+        raise MlflowException(
+            message="Must specify exactly one of: `experiment_id` or `experiment_name`.",
+            error_code=INVALID_PARAMETER_VALUE,
+        )
+
     client = MlflowClient()
-    experiment = client.get_experiment_by_name(experiment_name)
-    exp_id = experiment.experiment_id if experiment else None
-    if exp_id is None:  # id can be 0
-        print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name))
-        exp_id = client.create_experiment(experiment_name)
-    elif experiment.lifecycle_stage == LifecycleStage.DELETED:
+    if experiment_id is None:
+        experiment = client.get_experiment_by_name(experiment_name)
+        if not experiment:
+            _logger.info(
+                "Experiment with name '%s' does not exist. Creating a new experiment.",
+                experiment_name,
+            )
+            # NB: If two simultaneous threads or processes attempt to set the same experiment
+            # simultaneously, a race condition may be encountered here wherein experiment creation
+            # fails
+            experiment_id = client.create_experiment(experiment_name)
+            experiment = client.get_experiment(experiment_id)
+    else:
+        experiment = client.get_experiment(experiment_id)
+        if experiment is None:
+            raise MlflowException(
+                message=f"Experiment with ID '{experiment_id}' does not exist.",
+                error_code=RESOURCE_DOES_NOT_EXIST,
+            )
+
+    if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
-            "Cannot set a deleted experiment '%s' as the active experiment."
-            " You can restore the experiment, or permanently delete the "
-            " experiment to create a new one." % experiment.name
+            message=(
+                "Cannot set a deleted experiment '%s' as the active experiment."
+                " You can restore the experiment, or permanently delete the "
+                " experiment to create a new one." % experiment.name
+            ),
+            error_code=INVALID_PARAMETER_VALUE,
         )
+
     global _active_experiment_id
-    _active_experiment_id = exp_id
+    _active_experiment_id = experiment.experiment_id
+    return experiment
 
 
 class ActiveRun(Run):  # pylint: disable=W0223

diff --git a/tests/tracking/test_tracking.py b/tests/tracking/test_tracking.py
@@ -16,7 +16,7 @@
 from mlflow.entities import RunStatus, LifecycleStage, Metric, Param, RunTag, ViewType
 from mlflow.exceptions import MlflowException
 from mlflow.store.tracking.file_store import FileStore
-from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE
+from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE, RESOURCE_DOES_NOT_EXIST
 from mlflow.tracking.client import MlflowClient
 from mlflow.tracking.fluent import start_run
 from mlflow.utils.file_utils import local_file_uri_to_path
@@ -77,39 +77,70 @@ def test_create_experiments_with_bad_name_types(name):
 
 
 @pytest.mark.usefixtures("reset_active_experiment")
-def test_set_experiment():
-    with pytest.raises(TypeError):
-        mlflow.set_experiment()  # pylint: disable=no-value-for-parameter
+def test_set_experiment_by_name():
+    name = "random_exp"
+    exp_id = mlflow.create_experiment(name)
+    exp1 = mlflow.set_experiment(name)
+    assert exp1.experiment_id == exp_id
+    with start_run() as run:
+        assert run.info.experiment_id == exp_id
 
-    with pytest.raises(Exception):
-        mlflow.set_experiment(None)
+    another_name = "another_experiment"
+    exp2 = mlflow.set_experiment(another_name)
+    with start_run() as another_run:
+        assert another_run.info.experiment_id == exp2.experiment_id
 
-    with pytest.raises(Exception):
-        mlflow.set_experiment("")
 
+@pytest.mark.usefixtures("reset_active_experiment")
+def test_set_experiment_by_id():
     name = "random_exp"
     exp_id = mlflow.create_experiment(name)
-    mlflow.set_experiment(name)
+    active_exp = mlflow.set_experiment(experiment_id=exp_id)
+    assert active_exp.experiment_id == exp_id
     with start_run() as run:
         assert run.info.experiment_id == exp_id
 
-    another_name = "another_experiment"
-    mlflow.set_experiment(another_name)
-    exp_id2 = mlflow.tracking.MlflowClient().get_experiment_by_name(another_name)
-    with start_run() as another_run:
-        assert another_run.info.experiment_id == exp_id2.experiment_id
+    nonexistent_id = "-1337"
+    with pytest.raises(MlflowException) as exc:
+        mlflow.set_experiment(experiment_id=nonexistent_id)
+    assert exc.value.error_code == ErrorCode.Name(RESOURCE_DOES_NOT_EXIST)
+    with start_run() as run:
+        assert run.info.experiment_id == exp_id
+
+
+def test_set_experiment_parameter_validation():
+    with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
+        mlflow.set_experiment()
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
 
+    with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
+        mlflow.set_experiment(None)
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
+
+    with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
+        mlflow.set_experiment(None, None)
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
+
+    with pytest.raises(MlflowException, match="Must specify exactly one") as exc:
+        mlflow.set_experiment("name", "id")
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
 
-def test_set_experiment_with_deleted_experiment_name():
+
+def test_set_experiment_with_deleted_experiment():
     name = "dead_exp"
     mlflow.set_experiment(name)
     with start_run() as run:
         exp_id = run.info.experiment_id
 
     tracking.MlflowClient().delete_experiment(exp_id)
 
-    with pytest.raises(MlflowException):
+    with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc:
         mlflow.set_experiment(name)
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
+
+    with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc:
+        mlflow.set_experiment(experiment_id=exp_id)
+    assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
 
 
 def test_list_experiments():