From f880c73cb8bbc656d639d74c33f8f069a3df47e5 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 10 Nov 2022 12:51:54 +0100 Subject: [PATCH 01/43] Fix restarting attribute for lr finder --- src/pytorch_lightning/tuner/lr_finder.py | 1 + tests/tests_pytorch/tuner/test_lr_finder.py | 37 +++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/pytorch_lightning/tuner/lr_finder.py b/src/pytorch_lightning/tuner/lr_finder.py index 29a5d47776a9e..4b39411e361d2 100644 --- a/src/pytorch_lightning/tuner/lr_finder.py +++ b/src/pytorch_lightning/tuner/lr_finder.py @@ -270,6 +270,7 @@ def lr_find( # Restore initial state of model trainer._checkpoint_connector.restore(ckpt_path) trainer.strategy.remove_checkpoint(ckpt_path) + trainer.fit_loop.restarting = False # reset restarting flag as checkpoint restoring sets it to True return lr_finder diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py index ed4d9d33430f0..01722dca86438 100644 --- a/tests/tests_pytorch/tuner/test_lr_finder.py +++ b/tests/tests_pytorch/tuner/test_lr_finder.py @@ -441,6 +441,43 @@ def test_if_lr_finder_callback_already_configured(): trainer.tune(model) +def test_lr_finder_callback_restarting(tmpdir): + """Test that `LearningRateFinder` does not set restarting=True when loading checkpoint.""" + + class MyBoringModel(BoringModel): + def __init__(self): + super().__init__() + self.learning_rate = 0.123 + + def configure_optimizers(self): + return torch.optim.SGD(self.parameters(), lr=self.learning_rate) + + class CustomLearningRateFinder(LearningRateFinder): + milestones = (1,) + + def lr_find(self, trainer, pl_module) -> None: + super().lr_find(trainer, pl_module) + assert not trainer.fit_loop.restarting + + def on_train_epoch_start(self, trainer, pl_module): + if trainer.current_epoch in self.milestones or trainer.current_epoch == 0: + self.lr_find(trainer, pl_module) + + model = MyBoringModel() + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=3, + callbacks=[CustomLearningRateFinder(early_stop_threshold=None, update_attr=True)], + limit_train_batches=10, + limit_val_batches=0, + limit_test_batches=00, + num_sanity_val_steps=0, + enable_model_summary=False, + ) + + trainer.fit(model) + + @mock.patch.dict(os.environ, os.environ.copy(), clear=True) @RunIf(standalone=True) def test_lr_finder_with_ddp(tmpdir): From c13c1a6812e3768838e241543964f2e9356f0f72 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:57:15 +0100 Subject: [PATCH 02/43] update lite executor --- .../components/multi_node/lite.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index 2a9b33b0880d1..db6c5087d36e3 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -1,4 +1,5 @@ import os +import warnings from dataclasses import dataclass from typing import Any, Callable, Type @@ -31,6 +32,7 @@ def run( nprocs: int, ): from lightning.lite import LightningLite + from lightning.lite.accelerators import MPSAccelerator from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy # Used to configure PyTorch progress group @@ -52,7 +54,18 @@ def run( def pre_fn(lite, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - kwargs["accelerator"] = "auto" + + if MPSAccelerator.is_available(): + old_acc_value = kwargs.get("accelerator", "auto") + kwargs["accelerator"] = "cpu" + + if old_acc_value != kwargs["accelerator"]: + warnings.warn( + "Forcing accelerator=cpu as other accelerators (specifically MPS) are not supported " + "by PyTorch for distributed training on mps capable devices" + ) + else: + kwargs["accelerator"] = "auto" strategy = kwargs.get("strategy", None) if strategy: if isinstance(strategy, str): @@ -61,14 +74,18 @@ def pre_fn(lite, *args, **kwargs): elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): - raise Exception("DDP Spawned strategies aren't supported yet.") + raise ValueError("DDP Spawned strategies aren't supported yet.") + + kwargs["strategy"] = strategy + return {}, args, kwargs tracer = Tracer() tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn) tracer._instrument() - work_run() + ret_val = work_run() tracer._restore() + return ret_val class LiteMultiNode(MultiNode): From 8fceed0588af89bc497ffd3bf09a66a2b9d77851 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:58:28 +0100 Subject: [PATCH 03/43] update trainer executor --- .../components/multi_node/trainer.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 222f71ce59557..a3cac945c1c7c 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -1,4 +1,5 @@ import os +import warnings from dataclasses import dataclass from typing import Any, Callable, Type @@ -30,8 +31,9 @@ def run( node_rank: int, nprocs: int, ): - from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy from lightning.pytorch import Trainer as LTrainer + from lightning.pytorch.accelerators import MPSAccelerator + from lightning.pytorch.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy from pytorch_lightning import Trainer as PLTrainer # Used to configure PyTorch progress group @@ -50,7 +52,15 @@ def run( def pre_fn(trainer, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - kwargs["accelerator"] = "auto" + if MPSAccelerator.is_available(): + old_acc_value = kwargs.get("accelerator", "auto") + kwargs["accelerator"] = "cpu" + + if old_acc_value != kwargs["accelerator"]: + warnings.warn( + "Forcing accelerator=cpu as other accelerators (specifically MPS) are not supported " + "by PyTorch for distributed training on mps capable devices" + ) strategy = kwargs.get("strategy", None) if strategy: if isinstance(strategy, str): @@ -59,15 +69,17 @@ def pre_fn(trainer, *args, **kwargs): elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): - raise Exception("DDP Spawned strategies aren't supported yet.") + raise ValueError("DDP Spawned strategies aren't supported yet.") + kwargs["strategy"] = strategy return {}, args, kwargs tracer = Tracer() tracer.add_traced(PLTrainer, "__init__", pre_fn=pre_fn) tracer.add_traced(LTrainer, "__init__", pre_fn=pre_fn) tracer._instrument() - work_run() + ret_val = work_run() tracer._restore() + return ret_val class LightningTrainerMultiNode(MultiNode): From 13cb379d99ac5f1aa68ace52ba2d3932457b6b46 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:58:44 +0100 Subject: [PATCH 04/43] update spawn executor --- src/lightning_app/components/multi_node/pytorch_spawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/components/multi_node/pytorch_spawn.py b/src/lightning_app/components/multi_node/pytorch_spawn.py index 3119ffc51e0b5..013bdbcaec347 100644 --- a/src/lightning_app/components/multi_node/pytorch_spawn.py +++ b/src/lightning_app/components/multi_node/pytorch_spawn.py @@ -88,7 +88,7 @@ def run( elif world_size > 1: raise Exception("Torch distributed should be available.") - work_run(world_size, node_rank, global_rank, local_rank) + return work_run(world_size, node_rank, global_rank, local_rank) class PyTorchSpawnMultiNode(MultiNode): From 49b20b39b94fefee4f5ad244436ca375b01ddd9a Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:59:06 +0100 Subject: [PATCH 05/43] add multinode component tests --- tests/tests_app/components/multi_node/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/tests_app/components/multi_node/__init__.py diff --git a/tests/tests_app/components/multi_node/__init__.py b/tests/tests_app/components/multi_node/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From f80f95be4bcf6cb5be3e4de9dc542539f47abeba Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:59:19 +0100 Subject: [PATCH 06/43] add testing helpers --- tests/tests_app/helpers/__init__.py | 0 tests/tests_app/helpers/utils.py | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/tests_app/helpers/__init__.py create mode 100644 tests/tests_app/helpers/utils.py diff --git a/tests/tests_app/helpers/__init__.py b/tests/tests_app/helpers/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/tests_app/helpers/utils.py b/tests/tests_app/helpers/utils.py new file mode 100644 index 0000000000000..12619a9a4d3dc --- /dev/null +++ b/tests/tests_app/helpers/utils.py @@ -0,0 +1,27 @@ +import re +from contextlib import contextmanager +from typing import Optional, Type + +import pytest + + +@contextmanager +def no_warning_call(expected_warning: Type[Warning] = UserWarning, match: Optional[str] = None): + with pytest.warns(None) as record: + yield + + if match is None: + try: + w = record.pop(expected_warning) + except AssertionError: + # no warning raised + return + else: + for w in record.list: + if w.category is expected_warning and re.compile(match).search(w.message.args[0]): + break + else: + return + + msg = "A warning" if expected_warning is None else f"`{expected_warning.__name__}`" + raise AssertionError(f"{msg} was raised: {w}") From 07ad6aa92ce252b4061cc0c536ca3df31f4f0f2d Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 11:59:29 +0100 Subject: [PATCH 07/43] add lite tests --- .../components/multi_node/test_lite.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 tests/tests_app/components/multi_node/test_lite.py diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py new file mode 100644 index 0000000000000..e1874e128cea6 --- /dev/null +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -0,0 +1,98 @@ +import os +from copy import deepcopy +from functools import partial +from unittest import mock + +import pytest +from lightning_utilities.core.imports import module_available +from tests_app.helpers.utils import no_warning_call + +import lightning as L +from lightning_app.components.multi_node.lite import _LiteRunExecutor + + +def dummy_callable(**kwargs): + ll = L.lite.LightningLite(**kwargs) + return ll._all_passed_kwargs + + +def dummy_init(self, **kwargs): + self._all_passed_kwargs = kwargs + + +def _get_args_after_tracer_injection(**kwargs): + with mock.patch.object(L.lite.LightningLite, "__init__", dummy_init): + ret_val = _LiteRunExecutor.run( + local_rank=0, + work_run=partial(dummy_callable, **kwargs), + main_address="1.2.3.4", + main_port=5, + node_rank=6, + num_nodes=7, + nprocs=8, + ) + env_vars = deepcopy(os.environ) + return ret_val, env_vars + + +@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") +@pytest.mark.skipif(not L.lite.accelerators.MPSAccelerator.is_available(), reason="mps not available") +@pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) +def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): + warning_str = ( + r"Forcing accelerator=cpu as other accelerators \(specifically MPS\) are not supported " + + "by PyTorch for distributed training on mps capable devices" + ) + if accelerator_expected != accelerator_given: + warning_context = pytest.warns(UserWarning, match=warning_str) + else: + warning_context = no_warning_call(match=warning_str + "*") + + with warning_context: + ret_val, env_vars = _get_args_after_tracer_injection(accelerator=accelerator_given) + assert ret_val["accelerator"] == accelerator_expected + + +@pytest.mark.parametrize( + "args_given,args_expected", + [ + ( + { + "devices": 1, + "num_nodes": 1, + "accelerator": "gpu", + }, + {"devices": 8, "num_nodes": 7, "accelerator": "auto"}, + ), + ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), + ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), + ], +) +def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): + + # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) + if L.lite.accelerators.MPSAccelerator.is_available(): + args_expected["accelerator"] = "cpu" + + ret_val, env_vars = _get_args_after_tracer_injection(**args_given) + + for k, v in args_expected.items(): + assert ret_val[k] == v + + assert env_vars["MASTER_ADDR"] == "1.2.3.4" + assert env_vars["MASTER_PORT"] == "5" + assert env_vars["GROUP_RANK"] == "6" + assert env_vars["RANK"] == str(0 + 6 * 8) + assert env_vars["LOCAL_RANK"] == "0" + assert env_vars["WORLD_SIZE"] == str(7 * 8) + assert env_vars["LOCAL_WORLD_SIZE"] == "8" + assert env_vars["TORCHELASTIC_RUN_ID"] == "1" + assert env_vars["LT_CLI_USED"] == "1" + + +def test_lite_run_executor_invalid_strategy_instances(): + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=L.lite.strategies.DDPSpawnStrategy()) + + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=L.lite.strategies.DDPSpawnShardedStrategy()) From a1b2e61f938b0cdf3bb76ae907af8f95cbf982a8 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 12:00:08 +0100 Subject: [PATCH 08/43] add trainer tests --- .../components/multi_node/test_trainer.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/tests_app/components/multi_node/test_trainer.py diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py new file mode 100644 index 0000000000000..df620cf040f7c --- /dev/null +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -0,0 +1,97 @@ +import os +from copy import deepcopy +from functools import partial +from unittest import mock + +import pytest +from lightning_utilities.core.imports import module_available +from tests_app.helpers.utils import no_warning_call + +import lightning as L +from lightning_app.components.multi_node.trainer import _LightningTrainerRunExecutor + + +def dummy_callable(**kwargs): + t = L.pytorch.Trainer(**kwargs) + return t._all_passed_kwargs + + +def dummy_init(self, **kwargs): + self._all_passed_kwargs = kwargs + + +def _get_args_after_tracer_injection(**kwargs): + with mock.patch.object(L.pytorch.Trainer, "__init__", dummy_init): + ret_val = _LightningTrainerRunExecutor.run( + local_rank=0, + work_run=partial(dummy_callable, **kwargs), + main_address="1.2.3.4", + main_port=5, + node_rank=6, + num_nodes=7, + nprocs=8, + ) + env_vars = deepcopy(os.environ) + return ret_val, env_vars + + +@pytest.mark.skipif(not module_available("lightning.pytorch")) +@pytest.mark.skipif(not L.pytorch.accelerators.MPSAccelerator.is_available()) +@pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) +def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): + warning_str = ( + r"Forcing accelerator=cpu as other accelerators \(specifically MPS\) are not supported " + + "by PyTorch for distributed training on mps capable devices" + ) + if accelerator_expected != accelerator_given: + warning_context = pytest.warns(UserWarning, match=warning_str) + else: + warning_context = no_warning_call(match=warning_str + "*") + + with warning_context: + ret_val, env_vars = _get_args_after_tracer_injection(accelerator=accelerator_given) + assert ret_val["accelerator"] == accelerator_expected + + +@pytest.mark.parametrize( + "args_given,args_expected", + [ + ( + { + "devices": 1, + "num_nodes": 1, + "accelerator": "gpu", + }, + {"devices": 8, "num_nodes": 7, "accelerator": "auto"}, + ), + ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), + ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), + ], +) +def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): + + # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) + if L.pytorch.accelerators.MPSAccelerator.is_available(): + args_expected["accelerator"] = "cpu" + + ret_val, env_vars = _get_args_after_tracer_injection(**args_given) + + for k, v in args_expected.items(): + assert ret_val[k] == v + + assert env_vars["MASTER_ADDR"] == "1.2.3.4" + assert env_vars["MASTER_PORT"] == "5" + assert env_vars["GROUP_RANK"] == "6" + assert env_vars["RANK"] == str(0 + 6 * 8) + assert env_vars["LOCAL_RANK"] == "0" + assert env_vars["WORLD_SIZE"] == str(7 * 8) + assert env_vars["LOCAL_WORLD_SIZE"] == "8" + assert env_vars["TORCHELASTIC_RUN_ID"] == "1" + + +def test_trainer_run_executor_invalid_strategy_instances(): + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnStrategy()) + + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnShardedStrategy()) From 21231c1f39bfeca25c77284d2e4bed4f68e89aea Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 10 Nov 2022 12:51:54 +0100 Subject: [PATCH 09/43] Revert "Fix restarting attribute for lr finder" This reverts commit f880c73cb8bbc656d639d74c33f8f069a3df47e5. --- src/pytorch_lightning/tuner/lr_finder.py | 1 - tests/tests_pytorch/tuner/test_lr_finder.py | 37 --------------------- 2 files changed, 38 deletions(-) diff --git a/src/pytorch_lightning/tuner/lr_finder.py b/src/pytorch_lightning/tuner/lr_finder.py index 4b39411e361d2..29a5d47776a9e 100644 --- a/src/pytorch_lightning/tuner/lr_finder.py +++ b/src/pytorch_lightning/tuner/lr_finder.py @@ -270,7 +270,6 @@ def lr_find( # Restore initial state of model trainer._checkpoint_connector.restore(ckpt_path) trainer.strategy.remove_checkpoint(ckpt_path) - trainer.fit_loop.restarting = False # reset restarting flag as checkpoint restoring sets it to True return lr_finder diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py index 01722dca86438..ed4d9d33430f0 100644 --- a/tests/tests_pytorch/tuner/test_lr_finder.py +++ b/tests/tests_pytorch/tuner/test_lr_finder.py @@ -441,43 +441,6 @@ def test_if_lr_finder_callback_already_configured(): trainer.tune(model) -def test_lr_finder_callback_restarting(tmpdir): - """Test that `LearningRateFinder` does not set restarting=True when loading checkpoint.""" - - class MyBoringModel(BoringModel): - def __init__(self): - super().__init__() - self.learning_rate = 0.123 - - def configure_optimizers(self): - return torch.optim.SGD(self.parameters(), lr=self.learning_rate) - - class CustomLearningRateFinder(LearningRateFinder): - milestones = (1,) - - def lr_find(self, trainer, pl_module) -> None: - super().lr_find(trainer, pl_module) - assert not trainer.fit_loop.restarting - - def on_train_epoch_start(self, trainer, pl_module): - if trainer.current_epoch in self.milestones or trainer.current_epoch == 0: - self.lr_find(trainer, pl_module) - - model = MyBoringModel() - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=3, - callbacks=[CustomLearningRateFinder(early_stop_threshold=None, update_attr=True)], - limit_train_batches=10, - limit_val_batches=0, - limit_test_batches=00, - num_sanity_val_steps=0, - enable_model_summary=False, - ) - - trainer.fit(model) - - @mock.patch.dict(os.environ, os.environ.copy(), clear=True) @RunIf(standalone=True) def test_lr_finder_with_ddp(tmpdir): From 0b3157ad51953b9567cb28cf4016b57838b74c3f Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 21 Nov 2022 12:11:49 +0100 Subject: [PATCH 10/43] update changelog --- src/lightning_app/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index e14c5fcfe748a..54afc5818674f 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -46,6 +46,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed race condition to over-write the frontend with app infos ([#15398](https://github.com/Lightning-AI/lightning/pull/15398)) +- Fixed MPS error for multinode component (defaults to cpu on mps devices now as distributed operations are not supported by pytorch on mps) ([#15748](https://github.com/Ligtning-AI/lightning/pull/15748)) + + - From cb98589c78c9ad76d25f21ab734b8219b5cb4dfa Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Tue, 22 Nov 2022 11:27:27 +0100 Subject: [PATCH 11/43] update skip reasons --- tests/tests_app/components/multi_node/test_trainer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index df620cf040f7c..45e1aaa2d8bd1 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -35,8 +35,10 @@ def _get_args_after_tracer_injection(**kwargs): return ret_val, env_vars -@pytest.mark.skipif(not module_available("lightning.pytorch")) -@pytest.mark.skipif(not L.pytorch.accelerators.MPSAccelerator.is_available()) +@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") +@pytest.mark.skipif( + not L.pytorch.accelerators.MPSAccelerator.is_available(), reason="MPS not available but required for this test" +) @pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): warning_str = ( From 0e7acf0733272327068855397046591d700d7fc2 Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 22 Nov 2022 20:42:39 +0100 Subject: [PATCH 12/43] skipif --- tests/tests_app/components/multi_node/test_lite.py | 2 ++ tests/tests_app/components/multi_node/test_trainer.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index e1874e128cea6..f9992085df757 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -68,6 +68,7 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) +@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) @@ -90,6 +91,7 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["LT_CLI_USED"] == "1" +@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") def test_lite_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): _, _ = _get_args_after_tracer_injection(strategy=L.lite.strategies.DDPSpawnStrategy()) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 45e1aaa2d8bd1..73a6cdf802bf1 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -70,6 +70,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) +@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) From e4dde73b072a5d76f0f737a4ac0830edc156ed0a Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Wed, 23 Nov 2022 09:50:31 +0100 Subject: [PATCH 13/43] update skip conditions to only use L.lite and L.pytorch if available --- .../tests_app/components/multi_node/test_lite.py | 11 ++++++++--- .../components/multi_node/test_trainer.py | 15 +++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index f9992085df757..eba88037a9002 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -35,8 +35,13 @@ def _get_args_after_tracer_injection(**kwargs): return ret_val, env_vars -@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") -@pytest.mark.skipif(not L.lite.accelerators.MPSAccelerator.is_available(), reason="mps not available") +def check_lightning_lite_mps(): + if module_available("lightning_lite"): + return L.lite.accelerators.MPSAccelerator.is_avalable() + return False + + +@pytest.mark.skipif(not check_lightning_lite_mps(), reason="Lightning.lite not available or mps not available") @pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): warning_str = ( @@ -68,7 +73,7 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") +@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning Lite is required for this test") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 73a6cdf802bf1..5efe3d6193ee1 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -35,10 +35,13 @@ def _get_args_after_tracer_injection(**kwargs): return ret_val, env_vars -@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") -@pytest.mark.skipif( - not L.pytorch.accelerators.MPSAccelerator.is_available(), reason="MPS not available but required for this test" -) +def check_lightning_pytorch_and_mps(): + if module_available("lightning.pytorch"): + return L.pytorch.accelerators.MPSAccelerator.is_available() + return False + + +@pytest.mark.skipif(not check_lightning_pytorch_and_mps(), reason="lightning.pytorch and mps are required") @pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): warning_str = ( @@ -70,7 +73,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") +@pytest.mark.skipif(not module_available("lightning.pytorch")) def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) @@ -91,7 +94,7 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["LOCAL_WORLD_SIZE"] == "8" assert env_vars["TORCHELASTIC_RUN_ID"] == "1" - +@pytest.mark.skipif(not module_available('lightning.pytorch'), reason='lightning.pytorch not available') def test_trainer_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnStrategy()) From b64b19eec2acb8248d2c44d1505399007849ef47 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Nov 2022 08:54:42 +0000 Subject: [PATCH 14/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_app/components/multi_node/test_trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 5efe3d6193ee1..4b21c102360e4 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -94,7 +94,8 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["LOCAL_WORLD_SIZE"] == "8" assert env_vars["TORCHELASTIC_RUN_ID"] == "1" -@pytest.mark.skipif(not module_available('lightning.pytorch'), reason='lightning.pytorch not available') + +@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") def test_trainer_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnStrategy()) From e0989d04551d9ff2a73c9ffca395d7dc2fcd757b Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Wed, 23 Nov 2022 10:07:21 +0100 Subject: [PATCH 15/43] typo --- tests/tests_app/components/multi_node/test_lite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index eba88037a9002..5f44d44de4aa6 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -37,7 +37,7 @@ def _get_args_after_tracer_injection(**kwargs): def check_lightning_lite_mps(): if module_available("lightning_lite"): - return L.lite.accelerators.MPSAccelerator.is_avalable() + return L.lite.accelerators.MPSAccelerator.is_available() return False From 29ae55302169b7711e990db67c6be09419e6415e Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Wed, 23 Nov 2022 19:02:32 +0100 Subject: [PATCH 16/43] fix ci --- .../components/multi_node/test_lite.py | 22 +++++++++---------- .../components/multi_node/test_trainer.py | 22 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index 5f44d44de4aa6..f8ed4b99b706d 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -7,13 +7,13 @@ from lightning_utilities.core.imports import module_available from tests_app.helpers.utils import no_warning_call -import lightning as L +import lightning_lite as ll from lightning_app.components.multi_node.lite import _LiteRunExecutor def dummy_callable(**kwargs): - ll = L.lite.LightningLite(**kwargs) - return ll._all_passed_kwargs + lite = ll.LightningLite(**kwargs) + return lite._all_passed_kwargs def dummy_init(self, **kwargs): @@ -21,7 +21,7 @@ def dummy_init(self, **kwargs): def _get_args_after_tracer_injection(**kwargs): - with mock.patch.object(L.lite.LightningLite, "__init__", dummy_init): + with mock.patch.object(ll.LightningLite, "__init__", dummy_init): ret_val = _LiteRunExecutor.run( local_rank=0, work_run=partial(dummy_callable, **kwargs), @@ -37,11 +37,11 @@ def _get_args_after_tracer_injection(**kwargs): def check_lightning_lite_mps(): if module_available("lightning_lite"): - return L.lite.accelerators.MPSAccelerator.is_available() + return ll.accelerators.MPSAccelerator.is_available() return False -@pytest.mark.skipif(not check_lightning_lite_mps(), reason="Lightning.lite not available or mps not available") +@pytest.mark.skipif(not check_lightning_lite_mps(), reason="Lightning lite not available or mps not available") @pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): warning_str = ( @@ -73,11 +73,11 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning Lite is required for this test") +@pytest.mark.skipif(not module_available("lightning_lite"), reason="Lightning Lite is required for this test") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) - if L.lite.accelerators.MPSAccelerator.is_available(): + if ll.accelerators.MPSAccelerator.is_available(): args_expected["accelerator"] = "cpu" ret_val, env_vars = _get_args_after_tracer_injection(**args_given) @@ -96,10 +96,10 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["LT_CLI_USED"] == "1" -@pytest.mark.skipif(not module_available("lightning.lite"), reason="Lightning.lite not available") +@pytest.mark.skipif(not module_available("lightning_lite"), reason="Lightning lite not available") def test_lite_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): - _, _ = _get_args_after_tracer_injection(strategy=L.lite.strategies.DDPSpawnStrategy()) + _, _ = _get_args_after_tracer_injection(strategy=ll.strategies.DDPSpawnStrategy()) with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): - _, _ = _get_args_after_tracer_injection(strategy=L.lite.strategies.DDPSpawnShardedStrategy()) + _, _ = _get_args_after_tracer_injection(strategy=ll.strategies.DDPSpawnShardedStrategy()) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 4b21c102360e4..4255c565245d4 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -7,12 +7,12 @@ from lightning_utilities.core.imports import module_available from tests_app.helpers.utils import no_warning_call -import lightning as L +import pytorch_lightning as pl from lightning_app.components.multi_node.trainer import _LightningTrainerRunExecutor def dummy_callable(**kwargs): - t = L.pytorch.Trainer(**kwargs) + t = pl.Trainer(**kwargs) return t._all_passed_kwargs @@ -21,7 +21,7 @@ def dummy_init(self, **kwargs): def _get_args_after_tracer_injection(**kwargs): - with mock.patch.object(L.pytorch.Trainer, "__init__", dummy_init): + with mock.patch.object(pl.Trainer, "__init__", dummy_init): ret_val = _LightningTrainerRunExecutor.run( local_rank=0, work_run=partial(dummy_callable, **kwargs), @@ -36,12 +36,12 @@ def _get_args_after_tracer_injection(**kwargs): def check_lightning_pytorch_and_mps(): - if module_available("lightning.pytorch"): - return L.pytorch.accelerators.MPSAccelerator.is_available() + if module_available("pytorch_lightning"): + return pl.accelerators.MPSAccelerator.is_available() return False -@pytest.mark.skipif(not check_lightning_pytorch_and_mps(), reason="lightning.pytorch and mps are required") +@pytest.mark.skipif(not check_lightning_pytorch_and_mps(), reason="pytorch_lightning and mps are required") @pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): warning_str = ( @@ -73,11 +73,11 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning.pytorch")) +@pytest.mark.skipif(not module_available("pytorch_lightning")) def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) - if L.pytorch.accelerators.MPSAccelerator.is_available(): + if pl.accelerators.MPSAccelerator.is_available(): args_expected["accelerator"] = "cpu" ret_val, env_vars = _get_args_after_tracer_injection(**args_given) @@ -95,10 +95,10 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["TORCHELASTIC_RUN_ID"] == "1" -@pytest.mark.skipif(not module_available("lightning.pytorch"), reason="lightning.pytorch not available") +@pytest.mark.skipif(not module_available("pytorch_lightning"), reason="pytorch_lightning not available") def test_trainer_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): - _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnStrategy()) + _, _ = _get_args_after_tracer_injection(strategy=pl.strategies.DDPSpawnStrategy()) with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): - _, _ = _get_args_after_tracer_injection(strategy=L.pytorch.strategies.DDPSpawnShardedStrategy()) + _, _ = _get_args_after_tracer_injection(strategy=pl.strategies.DDPSpawnShardedStrategy()) From be15c13175c34731a37108710c452294d628495b Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Thu, 24 Nov 2022 10:55:49 +0100 Subject: [PATCH 17/43] Update src/lightning_app/CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- src/lightning_app/CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index eafdccaa0e6ba..fa28f4c1c9859 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -60,7 +60,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - ## [1.8.1] - 2022-11-10 From 7b93a09a1366011e02b29c0562fb86b0e2003547 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Thu, 24 Nov 2022 10:58:01 +0100 Subject: [PATCH 18/43] test workflow --- .github/workflows/ci-app-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 8ddbf2a5ddb8d..7f5713fe6c84a 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -100,7 +100,7 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app" --target_import="lightning.app" + run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.pytorch,lightning.lite" - name: Adjust examples if: ${{ matrix.pkg-name != 'lightning' }} From 2ac55e477e68186744fcac06ead56bab2a9420f5 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 24 Nov 2022 10:59:44 +0100 Subject: [PATCH 19/43] update trainer --- src/lightning_app/components/multi_node/trainer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index a3cac945c1c7c..b3711207b579c 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -61,6 +61,9 @@ def pre_fn(trainer, *args, **kwargs): "Forcing accelerator=cpu as other accelerators (specifically MPS) are not supported " "by PyTorch for distributed training on mps capable devices" ) + else: + kwargs["accelerator"] = "auto" + strategy = kwargs.get("strategy", None) if strategy: if isinstance(strategy, str): From f87fc0e834c5ebbdd29cae77cbd74a472575380d Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 24 Nov 2022 11:20:47 +0100 Subject: [PATCH 20/43] update workflow --- .github/workflows/ci-app-tests.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 7f5713fe6c84a..bdf0fc1693579 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -78,16 +78,16 @@ jobs: - name: Switch PyTorch URL run: python -c "print('TORCH_URL=https://download.pytorch.org/whl/' + str('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html'))" >> $GITHUB_ENV - - name: Install package - env: - PACKAGE_NAME: ${{ matrix.pkg-name }} + - name: Install dependencies run: | - pip install -e . pytest --upgrade --find-links ${TORCH_URL} + pip install -r requirements/app/devel.txt --quiet --find-links ${TORCH_URL} pip list - - name: Install dependencies + - name: Install package + env: + PACKAGE_NAME: ${{ matrix.pkg-name }} run: | - pip install -r requirements/app/devel.txt --quiet --find-links ${TORCH_URL} + pip install -e .[dev] pytest --upgrade --find-links ${TORCH_URL} pip list - name: Setup Node.js From e4911e6b0be1705f38f49f53989b3c3809930022 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 24 Nov 2022 14:03:47 +0100 Subject: [PATCH 21/43] update tests --- tests/tests_app/components/multi_node/test_lite.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index f8ed4b99b706d..cbd252434d8ad 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -11,8 +11,13 @@ from lightning_app.components.multi_node.lite import _LiteRunExecutor +class DummyLite(ll.LightningLite): + def run(self): + pass + + def dummy_callable(**kwargs): - lite = ll.LightningLite(**kwargs) + lite = DummyLite(**kwargs) return lite._all_passed_kwargs From 44fe4389b4ab6d8a7df8b9252d67c3cbfba7969a Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:10:28 +0100 Subject: [PATCH 22/43] Update ci-app-tests.yml --- .github/workflows/ci-app-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 0e691dc379dcb..2b431f1de5539 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -99,7 +99,7 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.pytorch,lightning.lite" + run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.lite,lightning.pytorch" - name: Adjust examples if: ${{ matrix.pkg-name != 'lightning' }} From 31ee30bbd14754788c364a43200c4380adf35cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 24 Nov 2022 10:23:32 -0500 Subject: [PATCH 23/43] Update tests/tests_app/components/multi_node/test_lite.py --- tests/tests_app/components/multi_node/test_lite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index cbd252434d8ad..53ea5175dee8d 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -7,7 +7,7 @@ from lightning_utilities.core.imports import module_available from tests_app.helpers.utils import no_warning_call -import lightning_lite as ll +import lightning.lite as ll from lightning_app.components.multi_node.lite import _LiteRunExecutor From f598ecbfb040cb5ee220e5e1b558544665b933d8 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Thu, 24 Nov 2022 16:25:28 +0100 Subject: [PATCH 24/43] debug --- src/lightning_app/components/multi_node/lite.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index db6c5087d36e3..b67071960d522 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -31,6 +31,7 @@ def run( node_rank: int, nprocs: int, ): + from lightning_lite import LightningLite as StandaloneLightningLite from lightning.lite import LightningLite from lightning.lite.accelerators import MPSAccelerator from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy @@ -81,6 +82,7 @@ def pre_fn(lite, *args, **kwargs): return {}, args, kwargs tracer = Tracer() + tracer.add_traced(StandaloneLightningLite, "__init__", pre_fn=pre_fn) tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn) tracer._instrument() ret_val = work_run() From 7914c335d640685101447b99b4c1d791f07ab163 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:28:12 +0000 Subject: [PATCH 25/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/lightning_app/components/multi_node/lite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index b67071960d522..abd4000f39937 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -31,10 +31,10 @@ def run( node_rank: int, nprocs: int, ): - from lightning_lite import LightningLite as StandaloneLightningLite from lightning.lite import LightningLite from lightning.lite.accelerators import MPSAccelerator from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy + from lightning_lite import LightningLite as StandaloneLightningLite # Used to configure PyTorch progress group os.environ["MASTER_ADDR"] = main_address From 1365d909dc11784b4cb6b4fc6a0dbff6795d164e Mon Sep 17 00:00:00 2001 From: awaelchli Date: Thu, 24 Nov 2022 16:36:58 +0100 Subject: [PATCH 26/43] debug --- tests/tests_app/utilities/packaging/test_build_spec.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index e1b9466dc0806..c89cfae7b87a9 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -27,7 +27,7 @@ def test_build_config_requirements_provided(): assert spec.requirements == [ "dask", "pandas", - "pytorch_" + "lightning==1.5.9", # ugly hack due to replacing `pytorch_lightning string` + "lightning.pytorch==1.5.9", "git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0", ] assert spec == BuildConfig.from_dict(spec.to_dict()) @@ -47,8 +47,7 @@ def test_build_config_invalid_requirements(): def test_build_config_dockerfile_provided(): spec = BuildConfig(dockerfile="./projects/Dockerfile.cpu") assert not spec.requirements - # ugly hack due to replacing `pytorch_lightning string - assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile[0] + assert "pytorchlightning/lightning.pytorch" in spec.dockerfile[0] class DockerfileLightningTestApp(LightningTestApp): From a7941049aa73589d2c6dc44eb21838272fad168d Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 24 Nov 2022 16:41:51 +0100 Subject: [PATCH 27/43] update executors to work with standalone and unified --- .../components/multi_node/lite.py | 23 ++++++++++++------- .../components/multi_node/trainer.py | 23 ++++++++++++------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index abd4000f39937..213893d825a96 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -2,6 +2,7 @@ import warnings from dataclasses import dataclass from typing import Any, Callable, Type +import importlib from typing_extensions import Protocol, runtime_checkable @@ -31,10 +32,16 @@ def run( node_rank: int, nprocs: int, ): - from lightning.lite import LightningLite - from lightning.lite.accelerators import MPSAccelerator - from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy - from lightning_lite import LightningLite as StandaloneLightningLite + lites = [] + strategies = [] + mps_accelerators = [] + + for pkg_name in ("lightning.lite", "lightning_" + "lite"): + pkg = importlib.import_module(pkg_name) + lites.append(pkg.LightningLite) + strategies.append(pkg.strategies.DDPSpawnShardedStrategy) + strategies.append(pkg.strategies.DDPSpawnStrategy) + mps_accelerators.append(pkg.accelerators.MPSAccelerator) # Used to configure PyTorch progress group os.environ["MASTER_ADDR"] = main_address @@ -56,7 +63,7 @@ def pre_fn(lite, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - if MPSAccelerator.is_available(): + if any(x.is_available() for x in mps_accelerators): old_acc_value = kwargs.get("accelerator", "auto") kwargs["accelerator"] = "cpu" @@ -74,7 +81,7 @@ def pre_fn(lite, *args, **kwargs): strategy = "ddp" elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" - elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): + elif isinstance(strategy, tuple(strategies)): raise ValueError("DDP Spawned strategies aren't supported yet.") kwargs["strategy"] = strategy @@ -82,8 +89,8 @@ def pre_fn(lite, *args, **kwargs): return {}, args, kwargs tracer = Tracer() - tracer.add_traced(StandaloneLightningLite, "__init__", pre_fn=pre_fn) - tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn) + for ll in lites: + tracer.add_traced(ll, "__init__", pre_fn=pre_fn) tracer._instrument() ret_val = work_run() tracer._restore() diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index b3711207b579c..3e1b906d7cc56 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -1,3 +1,4 @@ +import importlib import os import warnings from dataclasses import dataclass @@ -31,10 +32,16 @@ def run( node_rank: int, nprocs: int, ): - from lightning.pytorch import Trainer as LTrainer - from lightning.pytorch.accelerators import MPSAccelerator - from lightning.pytorch.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy - from pytorch_lightning import Trainer as PLTrainer + trainers = [] + strategies = [] + mps_accelerators = [] + + for pkg_name in ("lightning.pytorch", "pytorch_" + "lightning"): + pkg = importlib.import_module(pkg_name) + trainers.append(pkg.Trainer) + strategies.append(pkg.strategies.DDPSpawnShardedStrategy) + strategies.append(pkg.strategies.DDPSpawnStrategy) + mps_accelerators.append(pkg.accelerators.MPSAccelerator) # Used to configure PyTorch progress group os.environ["MASTER_ADDR"] = main_address @@ -52,7 +59,7 @@ def run( def pre_fn(trainer, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - if MPSAccelerator.is_available(): + if any(x.is_available for x in mps_accelerators): old_acc_value = kwargs.get("accelerator", "auto") kwargs["accelerator"] = "cpu" @@ -71,14 +78,14 @@ def pre_fn(trainer, *args, **kwargs): strategy = "ddp" elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" - elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): + elif isinstance(strategy, tuple(strategies)): raise ValueError("DDP Spawned strategies aren't supported yet.") kwargs["strategy"] = strategy return {}, args, kwargs tracer = Tracer() - tracer.add_traced(PLTrainer, "__init__", pre_fn=pre_fn) - tracer.add_traced(LTrainer, "__init__", pre_fn=pre_fn) + for trainer in trainers: + tracer.add_traced(trainer, "__init__", pre_fn=pre_fn) tracer._instrument() ret_val = work_run() tracer._restore() From 4bb858034a22e1d82cef80d67093b90aecd30125 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:49:36 +0000 Subject: [PATCH 28/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/lightning_app/components/multi_node/lite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index 213893d825a96..c67c986294af7 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -1,8 +1,8 @@ +import importlib import os import warnings from dataclasses import dataclass from typing import Any, Callable, Type -import importlib from typing_extensions import Protocol, runtime_checkable From bfd884581d9b4f7c8b25552b29a0ba64907240d4 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 24 Nov 2022 17:53:18 +0100 Subject: [PATCH 29/43] add reason for skipif --- tests/tests_app/components/multi_node/test_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 4255c565245d4..40a37ef420b97 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -73,7 +73,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("pytorch_lightning")) +@pytest.mark.skipif(not module_available("pytorch_lightning"), reason="Pytorch Lightning is not available") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) From 5295c45cf8683351c6566dfa4847a65a95edfaf8 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 10:45:58 +0100 Subject: [PATCH 30/43] update test --- tests/tests_app/components/multi_node/test_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 40a37ef420b97..4152fcd636d59 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -78,7 +78,7 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) if pl.accelerators.MPSAccelerator.is_available(): - args_expected["accelerator"] = "cpu" + args_expected.pop("accelerator") ret_val, env_vars = _get_args_after_tracer_injection(**args_given) From b9af23db1a9c05669c0ab88ce7f054020ee7ba6e Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 10:47:05 +0100 Subject: [PATCH 31/43] update test --- tests/tests_app/utilities/packaging/test_build_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index fc31249e04097..185bab0e363d7 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -29,7 +29,7 @@ def test_build_config_requirements_provided(): assert spec.requirements == [ "dask", "pandas", - "lightning.pytorch==1.5.9", + "pytorch_lightning==1.5.9", "git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0", ] assert spec == BuildConfig.from_dict(spec.to_dict()) From bb73075103215a0772c3ca58a03cc2bcfbd74856 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 10:49:47 +0100 Subject: [PATCH 32/43] update test --- tests/tests_app/utilities/packaging/test_build_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index 185bab0e363d7..eeb02df58315a 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -29,7 +29,7 @@ def test_build_config_requirements_provided(): assert spec.requirements == [ "dask", "pandas", - "pytorch_lightning==1.5.9", + "pytorch_" + "lightning==1.5.9", "git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0", ] assert spec == BuildConfig.from_dict(spec.to_dict()) From 7eeb77376a3755823eb85fb058e61f1483b4fee2 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 14:22:05 +0100 Subject: [PATCH 33/43] update test --- src/lightning_app/components/multi_node/trainer.py | 2 +- tests/tests_app/components/multi_node/test_trainer.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 3e1b906d7cc56..feada83f1808a 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -59,7 +59,7 @@ def run( def pre_fn(trainer, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - if any(x.is_available for x in mps_accelerators): + if any(x.is_available() for x in mps_accelerators): old_acc_value = kwargs.get("accelerator", "auto") kwargs["accelerator"] = "cpu" diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 4152fcd636d59..616361ff97165 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -74,11 +74,13 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ], ) @pytest.mark.skipif(not module_available("pytorch_lightning"), reason="Pytorch Lightning is not available") -def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): +def test_trainer_run_executor_arguments_choices( + args_given: dict, + args_expected: dict, +): - # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) if pl.accelerators.MPSAccelerator.is_available(): - args_expected.pop("accelerator") + args_expected.pop("accelerator", None) # Cross platform tests -> MPS is tested separately ret_val, env_vars = _get_args_after_tracer_injection(**args_given) From cfe922642c782fb0e5eb80f62bd679772d86ea95 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 15:19:54 +0100 Subject: [PATCH 34/43] update test --- tests/tests_app/utilities/packaging/test_build_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index eeb02df58315a..185bab0e363d7 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -29,7 +29,7 @@ def test_build_config_requirements_provided(): assert spec.requirements == [ "dask", "pandas", - "pytorch_" + "lightning==1.5.9", + "pytorch_lightning==1.5.9", "git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0", ] assert spec == BuildConfig.from_dict(spec.to_dict()) From 4934ac18b5be8eab941150ed672597aa8230844f Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 25 Nov 2022 15:35:42 +0100 Subject: [PATCH 35/43] update test --- tests/tests_app/utilities/packaging/test_build_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index 185bab0e363d7..70c4a60374b67 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -50,7 +50,7 @@ def test_build_config_dockerfile_provided(): spec = BuildConfig(dockerfile="./projects/Dockerfile.cpu") assert not spec.requirements # ugly hack due to replacing `pytorch_lightning string - assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile.data[0] + assert "pytorchlightning/pytorch_lightning" in spec.dockerfile.data[0] class DockerfileLightningTestApp(LightningTestApp): From 9d69ce1c5e3f1407d8ad81dfe5d9bca2189a4e2c Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 7 Dec 2022 11:50:32 +0100 Subject: [PATCH 36/43] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- src/lightning_app/components/multi_node/lite.py | 3 +-- src/lightning_app/components/multi_node/trainer.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index c67c986294af7..c0b5f09498750 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -69,8 +69,7 @@ def pre_fn(lite, *args, **kwargs): if old_acc_value != kwargs["accelerator"]: warnings.warn( - "Forcing accelerator=cpu as other accelerators (specifically MPS) are not supported " - "by PyTorch for distributed training on mps capable devices" + "Forcing `accelerator=cpu` as MPS does not support distributed training." ) else: kwargs["accelerator"] = "auto" diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index feada83f1808a..e2e5d2224cc05 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -65,8 +65,7 @@ def pre_fn(trainer, *args, **kwargs): if old_acc_value != kwargs["accelerator"]: warnings.warn( - "Forcing accelerator=cpu as other accelerators (specifically MPS) are not supported " - "by PyTorch for distributed training on mps capable devices" + "Forcing `accelerator=cpu` as MPS does not support distributed training." ) else: kwargs["accelerator"] = "auto" From ae285c937a502aa358d6f0ac8c84fddac3163363 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Dec 2022 10:51:59 +0000 Subject: [PATCH 37/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/lightning_app/components/multi_node/lite.py | 4 +--- src/lightning_app/components/multi_node/trainer.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index c0b5f09498750..b07a757dde6b2 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -68,9 +68,7 @@ def pre_fn(lite, *args, **kwargs): kwargs["accelerator"] = "cpu" if old_acc_value != kwargs["accelerator"]: - warnings.warn( - "Forcing `accelerator=cpu` as MPS does not support distributed training." - ) + warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.") else: kwargs["accelerator"] = "auto" strategy = kwargs.get("strategy", None) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index e2e5d2224cc05..8f11537c7d3c8 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -64,9 +64,7 @@ def pre_fn(trainer, *args, **kwargs): kwargs["accelerator"] = "cpu" if old_acc_value != kwargs["accelerator"]: - warnings.warn( - "Forcing `accelerator=cpu` as MPS does not support distributed training." - ) + warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.") else: kwargs["accelerator"] = "auto" From 6e0e00b102fa3829e58d94c68430e0b21ee7960a Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 7 Dec 2022 12:19:21 +0100 Subject: [PATCH 38/43] ll --- tests/tests_app/components/multi_node/test_lite.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index 53ea5175dee8d..0f8bc1cf3b425 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -7,7 +7,7 @@ from lightning_utilities.core.imports import module_available from tests_app.helpers.utils import no_warning_call -import lightning.lite as ll +import lightning_lite as ll from lightning_app.components.multi_node.lite import _LiteRunExecutor @@ -66,14 +66,7 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte @pytest.mark.parametrize( "args_given,args_expected", [ - ( - { - "devices": 1, - "num_nodes": 1, - "accelerator": "gpu", - }, - {"devices": 8, "num_nodes": 7, "accelerator": "auto"}, - ), + ({"devices": 1, "num_nodes": 1, "accelerator": "gpu"}, {"devices": 8, "num_nodes": 7, "accelerator": "auto"}), ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], From 24ca54fe709ce7c2b5cc13df3dfd820267b36dfe Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 7 Dec 2022 13:20:44 +0100 Subject: [PATCH 39/43] switch skipif --- tests/tests_app/components/multi_node/test_lite.py | 2 +- tests/tests_app/components/multi_node/test_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index 0f8bc1cf3b425..5c91f21b4a25c 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -71,7 +71,7 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning_lite"), reason="Lightning Lite is required for this test") +@pytest.mark.skipif(not module_available("lightning"), reason="Lightning Lite is required for this test") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 616361ff97165..dffec2979ec7d 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -73,7 +73,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("pytorch_lightning"), reason="Pytorch Lightning is not available") +@pytest.mark.skipif(not module_available("pytorch"), reason="Pytorch Lightning is not available") def test_trainer_run_executor_arguments_choices( args_given: dict, args_expected: dict, From 935a16275f03cf4b9a5acd423cc4d42839b4d57a Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 7 Dec 2022 14:02:43 +0100 Subject: [PATCH 40/43] . --- tests/tests_app/components/multi_node/test_lite.py | 2 +- tests/tests_app/components/multi_node/test_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index 5c91f21b4a25c..ed94a6923ee94 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -71,7 +71,7 @@ def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expecte ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("lightning"), reason="Lightning Lite is required for this test") +@pytest.mark.skipif(not module_available("lightning"), reason="Lightning is required for this test") def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index dffec2979ec7d..5dc62c8ccaff8 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -73,7 +73,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("pytorch"), reason="Pytorch Lightning is not available") +@pytest.mark.skipif(not module_available("pytorch"), reason="Lightning is not available") def test_trainer_run_executor_arguments_choices( args_given: dict, args_expected: dict, From 0c0aa6d6561eb697e3f11ffb8a3f4ca3ba795166 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 7 Dec 2022 14:18:02 +0100 Subject: [PATCH 41/43] another --- tests/tests_app/components/multi_node/test_lite.py | 2 +- tests/tests_app/components/multi_node/test_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py index ed94a6923ee94..9b8aa29779fd2 100644 --- a/tests/tests_app/components/multi_node/test_lite.py +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -94,7 +94,7 @@ def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: assert env_vars["LT_CLI_USED"] == "1" -@pytest.mark.skipif(not module_available("lightning_lite"), reason="Lightning lite not available") +@pytest.mark.skipif(not module_available("lightning"), reason="Lightning not available") def test_lite_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): _, _ = _get_args_after_tracer_injection(strategy=ll.strategies.DDPSpawnStrategy()) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index 5dc62c8ccaff8..edd266aa14a1b 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -97,7 +97,7 @@ def test_trainer_run_executor_arguments_choices( assert env_vars["TORCHELASTIC_RUN_ID"] == "1" -@pytest.mark.skipif(not module_available("pytorch_lightning"), reason="pytorch_lightning not available") +@pytest.mark.skipif(not module_available("lightning"), reason="lightning not available") def test_trainer_run_executor_invalid_strategy_instances(): with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): _, _ = _get_args_after_tracer_injection(strategy=pl.strategies.DDPSpawnStrategy()) From c24e1a05436225bf1612b4ae8e9af19654d4c236 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 8 Dec 2022 08:03:20 +0100 Subject: [PATCH 42/43] Apply suggestions from code review --- src/lightning_app/components/multi_node/lite.py | 2 +- src/lightning_app/components/multi_node/trainer.py | 2 +- tests/tests_app/components/multi_node/test_trainer.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index b07a757dde6b2..36709d409e1a0 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -63,7 +63,7 @@ def pre_fn(lite, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - if any(x.is_available() for x in mps_accelerators): + if any(acc.is_available() for acc in mps_accelerators): old_acc_value = kwargs.get("accelerator", "auto") kwargs["accelerator"] = "cpu" diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 8f11537c7d3c8..8f25b71d622c1 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -59,7 +59,7 @@ def run( def pre_fn(trainer, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - if any(x.is_available() for x in mps_accelerators): + if any(acc.is_available() for acc in mps_accelerators): old_acc_value = kwargs.get("accelerator", "auto") kwargs["accelerator"] = "cpu" diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index edd266aa14a1b..e82ab9392f83e 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -65,9 +65,9 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe { "devices": 1, "num_nodes": 1, - "accelerator": "gpu", + "accelerator": "gpu" }, - {"devices": 8, "num_nodes": 7, "accelerator": "auto"}, + {"devices": 8, "num_nodes": 7, "accelerator": "auto"} ), ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), From ee0f19a624f24bb50700ae926e53200508578091 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Dec 2022 07:04:40 +0000 Subject: [PATCH 43/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_app/components/multi_node/test_trainer.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index e82ab9392f83e..c86e0968e2ab0 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -61,14 +61,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe @pytest.mark.parametrize( "args_given,args_expected", [ - ( - { - "devices": 1, - "num_nodes": 1, - "accelerator": "gpu" - }, - {"devices": 8, "num_nodes": 7, "accelerator": "auto"} - ), + ({"devices": 1, "num_nodes": 1, "accelerator": "gpu"}, {"devices": 8, "num_nodes": 7, "accelerator": "auto"}), ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ],