From 11283dc79072d877e630ceb1937023b66e7a2b20 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Fri, 5 Nov 2021 00:24:32 -0700 Subject: [PATCH 1/5] Impl Signed-off-by: dbczumar --- mlflow/tracking/fluent.py | 62 +++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index 060c9f3437c34..e2f2a859c65ba 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -14,6 +14,10 @@ from mlflow.entities import Experiment, Run, RunInfo, RunStatus, Param, RunTag, Metric, ViewType from mlflow.entities.lifecycle_stage import LifecycleStage from mlflow.exceptions import MlflowException +from mlflow.protos.databricks_pb2 import ( + INVALID_PARAMETER_VALUE, + RESOURCE_DOES_NOT_EXIST, +) from mlflow.tracking.client import MlflowClient from mlflow.tracking import artifact_utils, _get_store from mlflow.tracking.context import registry as context_registry @@ -50,12 +54,17 @@ _logger = logging.getLogger(__name__) -def set_experiment(experiment_name: str) -> None: +def set_experiment(experiment_name: str = None, experiment_id: str = None) -> None: """ - Set given experiment as active experiment. If experiment does not exist, create an experiment - with provided name. + Set the given experiment as the active experiment. The experiment must either be specified by + name via `experiment_name` or by ID via `experiment_id`. The experiment name and ID cannot + both be specified. - :param experiment_name: Case sensitive name of an experiment to be activated. + :param experiment_name: Case sensitive name of the experiment to be activated. If an experiment + with this name does not exist, a new experiment wth this name is + created. + :param experiment_id: ID of the experiment to be activated. If an experiment with this ID + does not exist, an exception is thrown. .. code-block:: python :caption: Example @@ -80,20 +89,43 @@ def set_experiment(experiment_name: str) -> None: Tags: {} Lifecycle_stage: active """ - client = MlflowClient() - experiment = client.get_experiment_by_name(experiment_name) - exp_id = experiment.experiment_id if experiment else None - if exp_id is None: # id can be 0 - print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name)) - exp_id = client.create_experiment(experiment_name) - elif experiment.lifecycle_stage == LifecycleStage.DELETED: + if (experiment_name is not None and experiment_id is not None) or (experiment_name is None and experiment_id is None): raise MlflowException( - "Cannot set a deleted experiment '%s' as the active experiment." - " You can restore the experiment, or permanently delete the " - " experiment to create a new one." % experiment.name + message="Must specify exactly one of: `experiment_id` or `experiment_name`.", + error_code=INVALID_PARAMETER_VALUE, ) + + + client = MlflowClient() + if experiment_id is None: + experiment = client.get_experiment_by_name(experiment_name) + if experiment: + experiment_id = experiment.experiment_id + else: + _logger.info( + "Experiment with name '%s' does not exist. Creating a new experiment.", + experiment_name, + ) + experiment_id = client.create_experiment(experiment_name) + else: + experiment = client.get_experiment(experiment_id) + if experiment is None: + raise MlflowException( + message=f"Experiment with ID '{experiment_id}' does not exist.", + error_code=RESOURCE_DOES_NOT_EXIST, + ) + elif experiment.lifecycle_stage == LifecycleStage.DELETED: + raise MlflowException( + message=( + "Cannot set a deleted experiment '%s' as the active experiment." + " You can restore the experiment, or permanently delete the " + " experiment to create a new one." % experiment.name + ), + error_code=INVALID_PARAMETER_VALUE, + ) + global _active_experiment_id - _active_experiment_id = exp_id + _active_experiment_id = experiment_id class ActiveRun(Run): # pylint: disable=W0223 From bb3ee5b513d9384800dd7b075e99a8ea9fa225c4 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Fri, 5 Nov 2021 00:47:09 -0700 Subject: [PATCH 2/5] PR Signed-off-by: dbczumar --- mlflow/tracking/fluent.py | 21 ++++++----- tests/tracking/test_tracking.py | 67 ++++++++++++++++++++++++++------- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index e2f2a859c65ba..14837a5a32256 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -95,11 +95,22 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No error_code=INVALID_PARAMETER_VALUE, ) + def verify_experiment_active(experiment): + if experiment.lifecycle_stage == LifecycleStage.DELETED: + raise MlflowException( + message=( + "Cannot set a deleted experiment '%s' as the active experiment." + " You can restore the experiment, or permanently delete the " + " experiment to create a new one." % experiment.name + ), + error_code=INVALID_PARAMETER_VALUE, + ) client = MlflowClient() if experiment_id is None: experiment = client.get_experiment_by_name(experiment_name) if experiment: + verify_experiment_active(experiment) experiment_id = experiment.experiment_id else: _logger.info( @@ -114,15 +125,7 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No message=f"Experiment with ID '{experiment_id}' does not exist.", error_code=RESOURCE_DOES_NOT_EXIST, ) - elif experiment.lifecycle_stage == LifecycleStage.DELETED: - raise MlflowException( - message=( - "Cannot set a deleted experiment '%s' as the active experiment." - " You can restore the experiment, or permanently delete the " - " experiment to create a new one." % experiment.name - ), - error_code=INVALID_PARAMETER_VALUE, - ) + verify_experiment_active(experiment) global _active_experiment_id _active_experiment_id = experiment_id diff --git a/tests/tracking/test_tracking.py b/tests/tracking/test_tracking.py index 26f558f8e35a6..04679c8814432 100644 --- a/tests/tracking/test_tracking.py +++ b/tests/tracking/test_tracking.py @@ -16,7 +16,7 @@ from mlflow.entities import RunStatus, LifecycleStage, Metric, Param, RunTag, ViewType from mlflow.exceptions import MlflowException from mlflow.store.tracking.file_store import FileStore -from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE +from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE, RESOURCE_DOES_NOT_EXIST from mlflow.tracking.client import MlflowClient from mlflow.tracking.fluent import start_run from mlflow.utils.file_utils import local_file_uri_to_path @@ -77,16 +77,7 @@ def test_create_experiments_with_bad_name_types(name): @pytest.mark.usefixtures("reset_active_experiment") -def test_set_experiment(): - with pytest.raises(TypeError): - mlflow.set_experiment() # pylint: disable=no-value-for-parameter - - with pytest.raises(Exception): - mlflow.set_experiment(None) - - with pytest.raises(Exception): - mlflow.set_experiment("") - +def test_set_experiment_by_name(): name = "random_exp" exp_id = mlflow.create_experiment(name) mlflow.set_experiment(name) @@ -100,7 +91,50 @@ def test_set_experiment(): assert another_run.info.experiment_id == exp_id2.experiment_id -def test_set_experiment_with_deleted_experiment_name(): +@pytest.mark.usefixtures("reset_active_experiment") +def test_set_experiment_by_id(): + name = "random_exp" + exp_id = mlflow.create_experiment(name) + mlflow.set_experiment(experiment_id=exp_id) + with start_run() as run: + assert run.info.experiment_id == exp_id + + nonexistent_id = "-1337" + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment(experiment_id=nonexistent_id) + assert exc.value.error_code == ErrorCode.Name(RESOURCE_DOES_NOT_EXIST) + with start_run() as run: + assert run.info.experiment_id == exp_id + + +def test_set_experiment_parameter_validation(): + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment() + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Must specify exactly one" in str(exc.value) + + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment(None) + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Must specify exactly one" in str(exc.value) + + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment(None, None) + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Must specify exactly one" in str(exc.value) + + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment("name", "id") + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Must specify exactly one" in str(exc.value) + + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment("") + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Invalid experiment name" in str(exc.value) + + +def test_set_experiment_with_deleted_experiment(): name = "dead_exp" mlflow.set_experiment(name) with start_run() as run: @@ -108,8 +142,15 @@ def test_set_experiment_with_deleted_experiment_name(): tracking.MlflowClient().delete_experiment(exp_id) - with pytest.raises(MlflowException): + with pytest.raises(MlflowException) as exc: mlflow.set_experiment(name) + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Cannot set a deleted experiment" in str(exc.value) + + with pytest.raises(MlflowException) as exc: + mlflow.set_experiment(experiment_id=exp_id) + assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + assert "Cannot set a deleted experiment" in str(exc.value) def test_list_experiments(): From 614a0f8a60b51eed374dace314a179df6babb041 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Fri, 5 Nov 2021 00:48:11 -0700 Subject: [PATCH 3/5] Format Signed-off-by: dbczumar --- mlflow/tracking/fluent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index 14837a5a32256..1e25dc0c0d1d8 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -89,7 +89,9 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No Tags: {} Lifecycle_stage: active """ - if (experiment_name is not None and experiment_id is not None) or (experiment_name is None and experiment_id is None): + if (experiment_name is not None and experiment_id is not None) or ( + experiment_name is None and experiment_id is None + ): raise MlflowException( message="Must specify exactly one of: `experiment_id` or `experiment_name`.", error_code=INVALID_PARAMETER_VALUE, From fee73f9deee9c7932e62de44bed43216f44cbd32 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Fri, 5 Nov 2021 13:21:47 -0700 Subject: [PATCH 4/5] Address comments Signed-off-by: dbczumar --- mlflow/tracking/fluent.py | 5 ++++- tests/tracking/test_tracking.py | 23 ++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index 1e25dc0c0d1d8..b7a10f5141edf 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -98,7 +98,7 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No ) def verify_experiment_active(experiment): - if experiment.lifecycle_stage == LifecycleStage.DELETED: + if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( message=( "Cannot set a deleted experiment '%s' as the active experiment." @@ -119,6 +119,9 @@ def verify_experiment_active(experiment): "Experiment with name '%s' does not exist. Creating a new experiment.", experiment_name, ) + # NB: If two simultaneous threads or processes attempt to set the same experiment + # simultaneously, a race condition may be encountered here wherein experiment creation + # fails experiment_id = client.create_experiment(experiment_name) else: experiment = client.get_experiment(experiment_id) diff --git a/tests/tracking/test_tracking.py b/tests/tracking/test_tracking.py index 04679c8814432..40aa6d4733866 100644 --- a/tests/tracking/test_tracking.py +++ b/tests/tracking/test_tracking.py @@ -108,30 +108,21 @@ def test_set_experiment_by_id(): def test_set_experiment_parameter_validation(): - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Must specify exactly one") as exc: mlflow.set_experiment() assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Must specify exactly one" in str(exc.value) - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Must specify exactly one") as exc: mlflow.set_experiment(None) assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Must specify exactly one" in str(exc.value) - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Must specify exactly one") as exc: mlflow.set_experiment(None, None) assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Must specify exactly one" in str(exc.value) - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Must specify exactly one") as exc: mlflow.set_experiment("name", "id") assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Must specify exactly one" in str(exc.value) - - with pytest.raises(MlflowException) as exc: - mlflow.set_experiment("") - assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Invalid experiment name" in str(exc.value) def test_set_experiment_with_deleted_experiment(): @@ -142,15 +133,13 @@ def test_set_experiment_with_deleted_experiment(): tracking.MlflowClient().delete_experiment(exp_id) - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc: mlflow.set_experiment(name) assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Cannot set a deleted experiment" in str(exc.value) - with pytest.raises(MlflowException) as exc: + with pytest.raises(MlflowException, match="Cannot set a deleted experiment") as exc: mlflow.set_experiment(experiment_id=exp_id) assert exc.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) - assert "Cannot set a deleted experiment" in str(exc.value) def test_list_experiments(): From 34ad0a07d6a77265d115c89af15f5a3250cc8b37 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Fri, 5 Nov 2021 13:34:41 -0700 Subject: [PATCH 5/5] Return experiment Signed-off-by: dbczumar --- mlflow/tracking/fluent.py | 33 ++++++++++++++++----------------- tests/tracking/test_tracking.py | 11 ++++++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index b7a10f5141edf..d528759d170ba 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -65,6 +65,8 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No created. :param experiment_id: ID of the experiment to be activated. If an experiment with this ID does not exist, an exception is thrown. + :return: An instance of :py:class:`mlflow.entities.Experiment` representing the new active + experiment. .. code-block:: python :caption: Example @@ -97,24 +99,10 @@ def set_experiment(experiment_name: str = None, experiment_id: str = None) -> No error_code=INVALID_PARAMETER_VALUE, ) - def verify_experiment_active(experiment): - if experiment.lifecycle_stage != LifecycleStage.ACTIVE: - raise MlflowException( - message=( - "Cannot set a deleted experiment '%s' as the active experiment." - " You can restore the experiment, or permanently delete the " - " experiment to create a new one." % experiment.name - ), - error_code=INVALID_PARAMETER_VALUE, - ) - client = MlflowClient() if experiment_id is None: experiment = client.get_experiment_by_name(experiment_name) - if experiment: - verify_experiment_active(experiment) - experiment_id = experiment.experiment_id - else: + if not experiment: _logger.info( "Experiment with name '%s' does not exist. Creating a new experiment.", experiment_name, @@ -123,6 +111,7 @@ def verify_experiment_active(experiment): # simultaneously, a race condition may be encountered here wherein experiment creation # fails experiment_id = client.create_experiment(experiment_name) + experiment = client.get_experiment(experiment_id) else: experiment = client.get_experiment(experiment_id) if experiment is None: @@ -130,10 +119,20 @@ def verify_experiment_active(experiment): message=f"Experiment with ID '{experiment_id}' does not exist.", error_code=RESOURCE_DOES_NOT_EXIST, ) - verify_experiment_active(experiment) + + if experiment.lifecycle_stage != LifecycleStage.ACTIVE: + raise MlflowException( + message=( + "Cannot set a deleted experiment '%s' as the active experiment." + " You can restore the experiment, or permanently delete the " + " experiment to create a new one." % experiment.name + ), + error_code=INVALID_PARAMETER_VALUE, + ) global _active_experiment_id - _active_experiment_id = experiment_id + _active_experiment_id = experiment.experiment_id + return experiment class ActiveRun(Run): # pylint: disable=W0223 diff --git a/tests/tracking/test_tracking.py b/tests/tracking/test_tracking.py index 40aa6d4733866..c83f2bdefb5cd 100644 --- a/tests/tracking/test_tracking.py +++ b/tests/tracking/test_tracking.py @@ -80,22 +80,23 @@ def test_create_experiments_with_bad_name_types(name): def test_set_experiment_by_name(): name = "random_exp" exp_id = mlflow.create_experiment(name) - mlflow.set_experiment(name) + exp1 = mlflow.set_experiment(name) + assert exp1.experiment_id == exp_id with start_run() as run: assert run.info.experiment_id == exp_id another_name = "another_experiment" - mlflow.set_experiment(another_name) - exp_id2 = mlflow.tracking.MlflowClient().get_experiment_by_name(another_name) + exp2 = mlflow.set_experiment(another_name) with start_run() as another_run: - assert another_run.info.experiment_id == exp_id2.experiment_id + assert another_run.info.experiment_id == exp2.experiment_id @pytest.mark.usefixtures("reset_active_experiment") def test_set_experiment_by_id(): name = "random_exp" exp_id = mlflow.create_experiment(name) - mlflow.set_experiment(experiment_id=exp_id) + active_exp = mlflow.set_experiment(experiment_id=exp_id) + assert active_exp.experiment_id == exp_id with start_run() as run: assert run.info.experiment_id == exp_id