From 6719acbc668bf22872116f3f060968eb3492ffdf Mon Sep 17 00:00:00 2001 From: Kr4is Date: Wed, 9 Mar 2022 18:41:16 +0100 Subject: [PATCH 01/11] specify mlflow exception type for mlflow get_run call --- pytorch_lightning/loggers/mlflow.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index f43d8358e1d43..19582533872a9 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -87,7 +87,8 @@ def any_lightning_module_function_or_hook(self): self.logger.experiment.whatever_ml_flow_supports(...) Args: - experiment_name: The name of the experiment + run_id: The run identifier of the experiment. If not provided, a new run is started. + experiment_name: The name of the experiment. run_name: Name of the new run. The `run_name` is internally stored as a ``mlflow.runName`` tag. If the ``mlflow.runName`` tag has already been set in `tags`, the value is overridden by the `run_name`. tracking_uri: Address of local or remote tracking server. @@ -110,6 +111,7 @@ def any_lightning_module_function_or_hook(self): def __init__( self, + run_id: Optional[str] = None, experiment_name: str = "lightning_logs", run_name: Optional[str] = None, tracking_uri: Optional[str] = os.getenv("MLFLOW_TRACKING_URI"), @@ -130,7 +132,7 @@ def __init__( self._experiment_id = None self._tracking_uri = tracking_uri self._run_name = run_name - self._run_id = None + self._run_id = run_id self.tags = tags self._prefix = prefix self._artifact_location = artifact_location @@ -149,6 +151,15 @@ def experiment(self) -> MlflowClient: self.logger.experiment.some_mlflow_function() """ + + if self._run_id: + try: + self._mlflow_client.get_run(self._run_id) + return self._mlflow_client + except mlflow.exceptions.MlflowException: + log.warning(f"Run id with name {self._run_id} not found. Creating a new one.") + self._run_id = None + if self._experiment_id is None: expt = self._mlflow_client.get_experiment_by_name(self._experiment_name) if expt is not None: From 0a52a8a0e5274a2d495c551476f5817f40a72a9b Mon Sep 17 00:00:00 2001 From: "bruno.cabado" Date: Thu, 10 Mar 2022 12:14:28 +0100 Subject: [PATCH 02/11] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4efa7677191a7..0488e4e705512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -151,6 +151,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed +- Allow log to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/PyTorchLightning/pytorch-lightning/pull/12290)) + - Drop PyTorch 1.7 support ([#12191](https://github.com/PyTorchLightning/pytorch-lightning/pull/12191)) From 9d7517680ae747abbd913dbb7326c4505425e49b Mon Sep 17 00:00:00 2001 From: Kr4is Date: Fri, 11 Mar 2022 08:58:17 +0100 Subject: [PATCH 03/11] place run_id parameter at constructor end --- pytorch_lightning/loggers/mlflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index 19582533872a9..d6bbb0c536fa8 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -111,7 +111,6 @@ def any_lightning_module_function_or_hook(self): def __init__( self, - run_id: Optional[str] = None, experiment_name: str = "lightning_logs", run_name: Optional[str] = None, tracking_uri: Optional[str] = os.getenv("MLFLOW_TRACKING_URI"), @@ -119,6 +118,7 @@ def __init__( save_dir: Optional[str] = "./mlruns", prefix: str = "", artifact_location: Optional[str] = None, + run_id: Optional[str] = None, ): if mlflow is None: raise ModuleNotFoundError( From e98c39071df62befc5a026d9b116f3c23c4c3f8c Mon Sep 17 00:00:00 2001 From: Kr4is Date: Fri, 11 Mar 2022 09:00:25 +0100 Subject: [PATCH 04/11] move changelog description from changed to added --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0488e4e705512..c140198939898 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Allow log to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/PyTorchLightning/pytorch-lightning/pull/12290)) + + - Enable gradient accumulation using Horovod's `backward_passes_per_step` ([#11911](https://github.com/PyTorchLightning/pytorch-lightning/pull/11911)) @@ -151,8 +154,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Allow log to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/PyTorchLightning/pytorch-lightning/pull/12290)) - - Drop PyTorch 1.7 support ([#12191](https://github.com/PyTorchLightning/pytorch-lightning/pull/12191)) From c050e86489738c4fd6ea037749d2b2814582df79 Mon Sep 17 00:00:00 2001 From: Bruno Cabado Date: Sat, 12 Mar 2022 10:40:46 +0100 Subject: [PATCH 05/11] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c140198939898..ae3ce7ec9bea9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- Allow log to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/PyTorchLightning/pytorch-lightning/pull/12290)) +- Allow logging to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/PyTorchLightning/pytorch-lightning/pull/12290)) - Enable gradient accumulation using Horovod's `backward_passes_per_step` ([#11911](https://github.com/PyTorchLightning/pytorch-lightning/pull/11911)) From 28d606d9090c389a7ae56586cfe429e967bd7e6f Mon Sep 17 00:00:00 2001 From: Kr4is Date: Sat, 12 Mar 2022 12:07:05 +0100 Subject: [PATCH 06/11] set existing run experiment_id if run_if already exists --- pytorch_lightning/loggers/mlflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index d6bbb0c536fa8..25fdcbcc26238 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -154,7 +154,8 @@ def experiment(self) -> MlflowClient: if self._run_id: try: - self._mlflow_client.get_run(self._run_id) + run = self._mlflow_client.get_run(self._run_id) + self._experiment_id = run.info.experiment_id return self._mlflow_client except mlflow.exceptions.MlflowException: log.warning(f"Run id with name {self._run_id} not found. Creating a new one.") From bd11951ac2fa6e6417f1356fb94bc838a949d564 Mon Sep 17 00:00:00 2001 From: Kr4is Date: Sat, 12 Mar 2022 12:07:31 +0100 Subject: [PATCH 07/11] add mlflow logger tests --- tests/loggers/test_mlflow.py | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/loggers/test_mlflow.py b/tests/loggers/test_mlflow.py index 5ce5ceb75a0b1..1b14f068f2b80 100644 --- a/tests/loggers/test_mlflow.py +++ b/tests/loggers/test_mlflow.py @@ -113,6 +113,50 @@ def test_mlflow_run_name_setting(client, mlflow, tmpdir): client.return_value.create_run.assert_called_with(experiment_id="exp-id", tags=default_tags) +@mock.patch("pytorch_lightning.loggers.mlflow.mlflow") +@mock.patch("pytorch_lightning.loggers.mlflow.MlflowClient") +def test_mlflow_run_id_setting(client, mlflow, tmpdir): + """Test that the run_id argument uses the provided run_id.""" + + run = MagicMock() + run.info.run_id = "run-id" + run.info.experiment_id = "experiment-id" + + # simulate existing run + client.return_value.get_run = MagicMock(return_value=run.info.run_id) + + # run_id exists uses the existing run + logger = MLFlowLogger("test", run_id=run.info.run_id, save_dir=tmpdir) + _ = logger.experiment + client.return_value.get_run.assert_called_with(run.info.run_id) + assert logger.experiment_id == run.info.experiment_id + assert logger.run_id == run.info.run_id + client.reset_mock(return_value=True) + + # simulate not existing run + client.return_value.create_experiment = MagicMock(return_value=run.info.experiment_id) + client.return_value.create_run = MagicMock(return_value=run) + client.return_value.get_run = MagicMock(side_effect=mlflow.exceptions.MlflowException) + + # run_id doesn't exists create a new run + logger = MLFlowLogger("test", run_id="not-existing-run-id", save_dir=tmpdir) + _ = logger.experiment + assert logger.experiment_id == run.info.experiment_id + assert logger.run_id == run.info.run_id + client.reset_mock(return_value=True) + + # simulate not run + client.return_value.create_experiment = MagicMock(return_value=run.info.experiment_id) + client.return_value.create_run = MagicMock(return_value=run) + + # default run_id (= None) create a new run + logger = MLFlowLogger("test", save_dir=tmpdir) + _ = logger.experiment + assert logger.experiment_id == run.info.experiment_id + assert logger.run_id == run.info.run_id + client.reset_mock(return_value=True) + + @mock.patch("pytorch_lightning.loggers.mlflow.mlflow") @mock.patch("pytorch_lightning.loggers.mlflow.MlflowClient") def test_mlflow_log_dir(client, mlflow, tmpdir): From df7e23e73102be850f906aa185807d0df6b11dd5 Mon Sep 17 00:00:00 2001 From: Kr4is Date: Fri, 18 Mar 2022 15:34:32 +0100 Subject: [PATCH 08/11] solve logger initialization and tests --- pytorch_lightning/loggers/mlflow.py | 19 +++++++++++-------- tests/loggers/test_mlflow.py | 27 +++------------------------ 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index 25fdcbcc26238..ab445051bf131 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -137,6 +137,8 @@ def __init__( self._prefix = prefix self._artifact_location = artifact_location + self._initialized = False + self._mlflow_client = MlflowClient(tracking_uri) @property @@ -152,14 +154,14 @@ def experiment(self) -> MlflowClient: """ - if self._run_id: - try: - run = self._mlflow_client.get_run(self._run_id) - self._experiment_id = run.info.experiment_id - return self._mlflow_client - except mlflow.exceptions.MlflowException: - log.warning(f"Run id with name {self._run_id} not found. Creating a new one.") - self._run_id = None + if self._initialized: + return self._mlflow_client + + if self._run_id is not None: + run = self._mlflow_client.get_run(self._run_id) + self._experiment_id = run.info.experiment_id + self._initialized = True + return self._mlflow_client if self._experiment_id is None: expt = self._mlflow_client.get_experiment_by_name(self._experiment_name) @@ -181,6 +183,7 @@ def experiment(self) -> MlflowClient: self.tags[MLFLOW_RUN_NAME] = self._run_name run = self._mlflow_client.create_run(experiment_id=self._experiment_id, tags=resolve_tags(self.tags)) self._run_id = run.info.run_id + self._initialized = True return self._mlflow_client @property diff --git a/tests/loggers/test_mlflow.py b/tests/loggers/test_mlflow.py index 1b14f068f2b80..07b3b106c9b7f 100644 --- a/tests/loggers/test_mlflow.py +++ b/tests/loggers/test_mlflow.py @@ -40,6 +40,7 @@ def test_mlflow_logger_exists(client, mlflow, tmpdir): run1 = MagicMock() run1.info.run_id = "run-id-1" + run1.info.experiment_id = "exp-id-1" run2 = MagicMock() run2.info.run_id = "run-id-2" @@ -51,6 +52,7 @@ def test_mlflow_logger_exists(client, mlflow, tmpdir): client.return_value.get_experiment_by_name = MagicMock(return_value=None) client.return_value.create_experiment = MagicMock(return_value="exp-id-1") # experiment_id client.return_value.create_run = MagicMock(return_value=run1) + #client.return_value.get_run = MagicMock(return_value=run1) logger = MLFlowLogger("test", save_dir=tmpdir) assert logger._experiment_id is None @@ -123,7 +125,7 @@ def test_mlflow_run_id_setting(client, mlflow, tmpdir): run.info.experiment_id = "experiment-id" # simulate existing run - client.return_value.get_run = MagicMock(return_value=run.info.run_id) + client.return_value.get_run = MagicMock(return_value=run) # run_id exists uses the existing run logger = MLFlowLogger("test", run_id=run.info.run_id, save_dir=tmpdir) @@ -133,29 +135,6 @@ def test_mlflow_run_id_setting(client, mlflow, tmpdir): assert logger.run_id == run.info.run_id client.reset_mock(return_value=True) - # simulate not existing run - client.return_value.create_experiment = MagicMock(return_value=run.info.experiment_id) - client.return_value.create_run = MagicMock(return_value=run) - client.return_value.get_run = MagicMock(side_effect=mlflow.exceptions.MlflowException) - - # run_id doesn't exists create a new run - logger = MLFlowLogger("test", run_id="not-existing-run-id", save_dir=tmpdir) - _ = logger.experiment - assert logger.experiment_id == run.info.experiment_id - assert logger.run_id == run.info.run_id - client.reset_mock(return_value=True) - - # simulate not run - client.return_value.create_experiment = MagicMock(return_value=run.info.experiment_id) - client.return_value.create_run = MagicMock(return_value=run) - - # default run_id (= None) create a new run - logger = MLFlowLogger("test", save_dir=tmpdir) - _ = logger.experiment - assert logger.experiment_id == run.info.experiment_id - assert logger.run_id == run.info.run_id - client.reset_mock(return_value=True) - @mock.patch("pytorch_lightning.loggers.mlflow.mlflow") @mock.patch("pytorch_lightning.loggers.mlflow.MlflowClient") From c22e5642581d47cf052b62d635324329a36f08cd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Mar 2022 14:36:58 +0000 Subject: [PATCH 09/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/loggers/test_mlflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/loggers/test_mlflow.py b/tests/loggers/test_mlflow.py index 07b3b106c9b7f..7c1acc9d6bf69 100644 --- a/tests/loggers/test_mlflow.py +++ b/tests/loggers/test_mlflow.py @@ -52,7 +52,7 @@ def test_mlflow_logger_exists(client, mlflow, tmpdir): client.return_value.get_experiment_by_name = MagicMock(return_value=None) client.return_value.create_experiment = MagicMock(return_value="exp-id-1") # experiment_id client.return_value.create_run = MagicMock(return_value=run1) - #client.return_value.get_run = MagicMock(return_value=run1) + # client.return_value.get_run = MagicMock(return_value=run1) logger = MLFlowLogger("test", save_dir=tmpdir) assert logger._experiment_id is None From 250017317d390d68b169cf230f42f1e1356f82af Mon Sep 17 00:00:00 2001 From: Kr4is Date: Fri, 18 Mar 2022 17:49:55 +0100 Subject: [PATCH 10/11] remove comment --- tests/loggers/test_mlflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/loggers/test_mlflow.py b/tests/loggers/test_mlflow.py index 7c1acc9d6bf69..bf5301e45a6a9 100644 --- a/tests/loggers/test_mlflow.py +++ b/tests/loggers/test_mlflow.py @@ -52,7 +52,6 @@ def test_mlflow_logger_exists(client, mlflow, tmpdir): client.return_value.get_experiment_by_name = MagicMock(return_value=None) client.return_value.create_experiment = MagicMock(return_value="exp-id-1") # experiment_id client.return_value.create_run = MagicMock(return_value=run1) - # client.return_value.get_run = MagicMock(return_value=run1) logger = MLFlowLogger("test", save_dir=tmpdir) assert logger._experiment_id is None From 83c530456ccfec6a6f01d6fee613b826f242b399 Mon Sep 17 00:00:00 2001 From: Kr4is Date: Wed, 23 Mar 2022 10:16:14 +0100 Subject: [PATCH 11/11] move run_id to the end documentation args --- pytorch_lightning/loggers/mlflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index ab445051bf131..a74dda324e15f 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -87,7 +87,6 @@ def any_lightning_module_function_or_hook(self): self.logger.experiment.whatever_ml_flow_supports(...) Args: - run_id: The run identifier of the experiment. If not provided, a new run is started. experiment_name: The name of the experiment. run_name: Name of the new run. The `run_name` is internally stored as a ``mlflow.runName`` tag. If the ``mlflow.runName`` tag has already been set in `tags`, the value is overridden by the `run_name`. @@ -101,6 +100,7 @@ def any_lightning_module_function_or_hook(self): prefix: A string to put at the beginning of metric keys. artifact_location: The location to store run artifacts. If not provided, the server picks an appropriate default. + run_id: The run identifier of the experiment. If not provided, a new run is started. Raises: ModuleNotFoundError: