Lightning-AI · awaelchli · Apr 28, 2024 · Apr 27, 2024 · Apr 27, 2024 · Apr 27, 2024
@@ -14,6 +14,7 @@
 import os
 import sys
 import threading
+from pathlib import Path
 from typing import List
 from unittest.mock import Mock
 
@@ -185,6 +186,17 @@ def caplog(caplog):
     lightning_logger.propagate = propagate
 
 
+@pytest.fixture(autouse=True)
+def leave_no_artifacts_behind():
+    tests_root = Path(__file__).parent.parent
+    files_before = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
+    yield
+    files_after = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
+    difference = files_after - files_before
+    difference = {str(f.relative_to(tests_root)) for f in difference}
+    assert not difference, f"Test left artifacts behind: {difference}"
+
+
 def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.Config) -> None:
     """An adaptation of `tests/tests_pytorch/conftest.py::pytest_collection_modifyitems`"""
     initial_size = len(items)

@@ -13,24 +13,19 @@
 # limitations under the License.
 import os
 import warnings
+from pathlib import Path
 
 import pytest
 
-_TEST_ROOT = os.path.dirname(__file__)
-_PROJECT_ROOT = os.path.dirname(_TEST_ROOT)
-_TEMP_PATH = os.path.join(_PROJECT_ROOT, "test_temp")
-_PATH_DATASETS = os.path.join(_PROJECT_ROOT, "Datasets")
-_PATH_LEGACY = os.path.join(_PROJECT_ROOT, "legacy")
+_TEST_ROOT = Path(__file__).parent.parent
+_PROJECT_ROOT = _TEST_ROOT.parent
+_PATH_DATASETS = _PROJECT_ROOT / "Datasets"
+_PATH_LEGACY = _TEST_ROOT / "legacy"
 
 # todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages
-if _PROJECT_ROOT not in os.getenv("PYTHONPATH", ""):
+if str(_PROJECT_ROOT) not in os.getenv("PYTHONPATH", ""):
     splitter = ":" if os.environ.get("PYTHONPATH", "") else ""
     os.environ["PYTHONPATH"] = f'{_PROJECT_ROOT}{splitter}{os.environ.get("PYTHONPATH", "")}'
 
-
-if not os.path.isdir(_TEMP_PATH):
-    os.mkdir(_TEMP_PATH)
-
-
 # Ignore cleanup warnings from pytest (rarely happens due to a race condition when executing pytest in parallel)
 warnings.filterwarnings("ignore", category=pytest.PytestWarning, message=r".*\(rm_rf\) error removing.*")
@@ -447,9 +447,10 @@ def test_rich_progress_bar_padding():
 
 
 @RunIf(rich=True)
-def test_rich_progress_bar_can_be_pickled():
+def test_rich_progress_bar_can_be_pickled(tmp_path):
     bar = RichProgressBar()
     trainer = Trainer(
+        default_root_dir=tmp_path,
         callbacks=[bar],
         max_epochs=1,
         limit_train_batches=1,

@@ -550,9 +550,10 @@ def test_tqdm_progress_bar_print_disabled(tqdm_write, mock_print, tmp_path):
     tqdm_write.assert_not_called()
 
 
-def test_tqdm_progress_bar_can_be_pickled():
+def test_tqdm_progress_bar_can_be_pickled(tmp_path):
     bar = TQDMProgressBar()
     trainer = Trainer(
+        default_root_dir=tmp_path,
         callbacks=[bar],
         max_epochs=1,
         limit_train_batches=1,

@@ -162,7 +162,7 @@ def test_device_stats_monitor_warning_when_psutil_not_available(monkeypatch, tmp
 
     monkeypatch.setattr(imports, "_PSUTIL_AVAILABLE", False)
     monitor = DeviceStatsMonitor()
-    trainer = Trainer(logger=CSVLogger(tmp_path))
+    trainer = Trainer(accelerator="cpu", logger=CSVLogger(tmp_path))
     assert trainer.strategy.root_device == torch.device("cpu")
     with pytest.raises(ModuleNotFoundError, match="psutil` is not installed"):
         monitor.setup(trainer, Mock(), "fit")

@@ -113,7 +113,7 @@ def configure_optimizers(self):
         trainer.fit(model)
 
     assert model.backbone.has_been_used
-    trainer = Trainer(max_epochs=3)
+    trainer = Trainer(default_root_dir=tmp_path, max_epochs=3)
     trainer.fit(model, ckpt_path=chk.last_model_path)
 
 
@@ -245,7 +245,7 @@ def configure_optimizers(self):
 
     model = FreezeModel()
     cb = OnEpochLayerFinetuning()
-    trainer = Trainer(max_epochs=10, callbacks=[cb])
+    trainer = Trainer(default_root_dir=tmp_path, max_epochs=10, callbacks=[cb])
     with pytest.raises(IndexError, match="index 6 is out of range"):
         trainer.fit(model, ckpt_path=chk.last_model_path)
 

@@ -35,7 +35,7 @@ def test_prediction_writer_invalid_write_interval():
         DummyPredictionWriter("something")
 
 
-def test_prediction_writer_hook_call_intervals():
+def test_prediction_writer_hook_call_intervals(tmp_path):
     """Test that the `write_on_batch_end` and `write_on_epoch_end` hooks get invoked based on the defined interval."""
     DummyPredictionWriter.write_on_batch_end = Mock()
     DummyPredictionWriter.write_on_epoch_end = Mock()
@@ -44,7 +44,7 @@ def test_prediction_writer_hook_call_intervals():
 
     model = BoringModel()
     cb = DummyPredictionWriter("batch_and_epoch")
-    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=cb)
     results = trainer.predict(model, dataloaders=dataloader)
     assert len(results) == 4
     assert cb.write_on_batch_end.call_count == 4
@@ -54,7 +54,7 @@ def test_prediction_writer_hook_call_intervals():
     DummyPredictionWriter.write_on_epoch_end.reset_mock()
 
     cb = DummyPredictionWriter("batch_and_epoch")
-    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=cb)
     trainer.predict(model, dataloaders=dataloader, return_predictions=False)
     assert cb.write_on_batch_end.call_count == 4
     assert cb.write_on_epoch_end.call_count == 1
@@ -63,7 +63,7 @@ def test_prediction_writer_hook_call_intervals():
     DummyPredictionWriter.write_on_epoch_end.reset_mock()
 
     cb = DummyPredictionWriter("batch")
-    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=cb)
     trainer.predict(model, dataloaders=dataloader, return_predictions=False)
     assert cb.write_on_batch_end.call_count == 4
     assert cb.write_on_epoch_end.call_count == 0
@@ -72,21 +72,21 @@ def test_prediction_writer_hook_call_intervals():
     DummyPredictionWriter.write_on_epoch_end.reset_mock()
 
     cb = DummyPredictionWriter("epoch")
-    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=cb)
     trainer.predict(model, dataloaders=dataloader, return_predictions=False)
     assert cb.write_on_batch_end.call_count == 0
     assert cb.write_on_epoch_end.call_count == 1
 
 
 @pytest.mark.parametrize("num_workers", [0, 2])
-def test_prediction_writer_batch_indices(num_workers):
+def test_prediction_writer_batch_indices(num_workers, tmp_path):
     DummyPredictionWriter.write_on_batch_end = Mock()
     DummyPredictionWriter.write_on_epoch_end = Mock()
 
     dataloader = DataLoader(RandomDataset(32, 64), batch_size=4, num_workers=num_workers)
     model = BoringModel()
     writer = DummyPredictionWriter("batch_and_epoch")
-    trainer = Trainer(limit_predict_batches=4, callbacks=writer)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=writer)
     trainer.predict(model, dataloaders=dataloader)
 
     writer.write_on_batch_end.assert_has_calls([
@@ -101,7 +101,7 @@ def test_prediction_writer_batch_indices(num_workers):
     ])
 
 
-def test_batch_level_batch_indices():
+def test_batch_level_batch_indices(tmp_path):
     """Test that batch_indices are returned when `return_predictions=False`."""
     DummyPredictionWriter.write_on_batch_end = Mock()
 
@@ -112,7 +112,7 @@ def on_predict_epoch_end(self, *args, **kwargs):
     writer = DummyPredictionWriter("batch")
     model = CustomBoringModel()
     dataloader = DataLoader(RandomDataset(32, 64), batch_size=4)
-    trainer = Trainer(limit_predict_batches=4, callbacks=writer)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, limit_predict_batches=4, callbacks=writer)
     trainer.predict(model, dataloaders=dataloader, return_predictions=False)
 
     writer.write_on_batch_end.assert_has_calls([

@@ -190,7 +190,7 @@ def test_pruning_callback_ddp_cpu(tmp_path):
 
 
 @pytest.mark.parametrize("resample_parameters", [False, True])
-def test_pruning_lth_callable(tmp_path, resample_parameters: bool):
+def test_pruning_lth_callable(tmp_path, resample_parameters):
     model = TestModel()
 
     class ModelPruningTestCallback(ModelPruning):
@@ -206,7 +206,7 @@ def apply_lottery_ticket_hypothesis(self):
                     curr, curr_name = self._parameters_to_prune[i]
                     assert name == curr_name
                     actual, expected = getattr(curr, name).data, getattr(copy, name).data
-                    allclose = torch.allclose(actual, expected)
+                    allclose = torch.allclose(actual.cpu(), expected)
                     assert not allclose if self._resample_parameters else allclose
 
     pruning = ModelPruningTestCallback(
@@ -310,7 +310,13 @@ def on_save_checkpoint(self, trainer, pl_module, checkpoint):
     ckpt_callback = ModelCheckpoint(
         monitor="test", save_top_k=2, save_last=True, save_on_train_epoch_end=save_on_train_epoch_end
     )
-    trainer = Trainer(callbacks=[pruning_callback, ckpt_callback], max_epochs=3, enable_progress_bar=False)
+    trainer = Trainer(
+        default_root_dir=tmp_path,
+        logger=False,
+        callbacks=[pruning_callback, ckpt_callback],
+        max_epochs=3,
+        enable_progress_bar=False,
+    )
     with caplog.at_level(INFO):
         trainer.fit(model)
 

@@ -213,6 +213,8 @@ def test_trainer_spike_detection_integration(tmp_path, global_rank_spike, num_de
     cb.should_raise = spike_value is None or finite_only or spike_value == float("inf")
 
     trainer = Trainer(
+        default_root_dir=tmp_path,
+        logger=False,
         callbacks=[cb],
         accelerator="cpu",
         devices=num_devices,

@@ -26,24 +26,24 @@
 from tests_pytorch.helpers.runif import RunIf
 
 
-def test_trainer_flag(caplog):
+def test_trainer_flag(caplog, tmp_path):
     class TestModel(BoringModel):
         def on_fit_start(self):
             raise SystemExit()
 
-    trainer = Trainer(max_time={"seconds": 1337})
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, max_time={"seconds": 1337})
     with pytest.raises(SystemExit):
         trainer.fit(TestModel())
     timer = [c for c in trainer.callbacks if isinstance(c, Timer)][0]
     assert timer._duration == 1337
 
-    trainer = Trainer(max_time={"seconds": 1337}, callbacks=[Timer()])
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, max_time={"seconds": 1337}, callbacks=[Timer()])
     with pytest.raises(SystemExit), caplog.at_level(level=logging.INFO):
         trainer.fit(TestModel())
     assert "callbacks list already contains a Timer" in caplog.text
 
     # Make sure max_time still honored even if max_epochs == -1
-    trainer = Trainer(max_time={"seconds": 1}, max_epochs=-1)
+    trainer = Trainer(default_root_dir=tmp_path, logger=False, max_time={"seconds": 1}, max_epochs=-1)
     with pytest.raises(SystemExit):
         trainer.fit(TestModel())
     timer = [c for c in trainer.callbacks if isinstance(c, Timer)][0]

@@ -24,7 +24,7 @@
 
 def test_disabled_checkpointing():
     # no callback
-    trainer = Trainer(max_epochs=3, enable_checkpointing=False)
+    trainer = Trainer(logger=False, max_epochs=3, enable_checkpointing=False)
     assert not trainer.checkpoint_callbacks
     trainer.fit(BoringModel())
     assert not trainer.checkpoint_callbacks

@@ -308,6 +308,17 @@ def single_process_pg():
         os.environ.update(orig_environ)
 
 
+@pytest.fixture(autouse=True)
+def leave_no_artifacts_behind():
+    tests_root = Path(__file__).parent.parent
+    files_before = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
+    yield
+    files_after = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
+    difference = files_after - files_before
+    difference = {str(f.relative_to(tests_root)) for f in difference}
+    assert not difference, f"Test left artifacts behind: {difference}"
+
+
 def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.Config) -> None:
     initial_size = len(items)
     conditions = []

@@ -452,11 +452,12 @@ class BoringDataModule2(LightningDataModule):
 
 
 @RunIf(skip_windows=True)  # TODO: all durations are 0 on Windows
-def test_datamodule_hooks_are_profiled():
+def test_datamodule_hooks_are_profiled(tmp_path):
     """Test that `LightningDataModule` hooks are profiled."""
 
     def get_trainer():
         return Trainer(
+            default_root_dir=tmp_path,
             max_steps=1,
             limit_val_batches=0,
             profiler="simple",

@@ -23,6 +23,8 @@
 from lightning.pytorch.tuner.tuning import Tuner
 from torch.optim import SGD, Adam, Optimizer
 
+from tests_pytorch.helpers.runif import RunIf
+
 
 @pytest.mark.parametrize("auto", [True, False])
 def test_lightning_optimizer(tmp_path, auto):
@@ -232,6 +234,7 @@ def configure_optimizers(self):
     assert sgd["zero_grad"].call_count == limit_train_batches
 
 
+@RunIf(mps=False)  # mps does not support LBFGS
 def test_lightning_optimizer_automatic_optimization_lbfgs_zero_grad(tmp_path):
     """Test zero_grad is called the same number of times as LBFGS requires for reevaluation of the loss in
     automatic_optimization."""

@@ -395,7 +395,7 @@ def on_train_epoch_end(self) -> None:
 @pytest.mark.parametrize(
     "kwargs",
     [
-        {},
+        pytest.param({}, marks=RunIf(mps=False)),
         pytest.param({"strategy": "ddp", "accelerator": "gpu", "devices": 1}, marks=RunIf(min_cuda_gpus=1)),
         pytest.param(
             {"strategy": "ddp", "accelerator": "gpu", "devices": 2}, marks=RunIf(min_cuda_gpus=2, standalone=True)

@@ -13,6 +13,8 @@
 def create_boring_checkpoint(tmp_path, model, accelerator="cuda"):
     checkpoint_callback = ModelCheckpoint(dirpath=tmp_path, filename="checkpoint")
     trainer = pl.Trainer(
+        default_root_dir=tmp_path,
+        logger=False,
         devices=1,
         accelerator=accelerator,
         max_epochs=1,

@@ -39,14 +39,6 @@ class MNIST(Dataset):
         download: If true, downloads the dataset from the internet and
             puts it in root directory. If dataset is already downloaded, it is not
             downloaded again.
-
-    Examples:
-        >>> dataset = MNIST(".", download=True)
-        >>> len(dataset)
-        60000
-        >>> torch.bincount(dataset.targets)
-        tensor([5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949])
-
     """
 
     RESOURCES = (
@@ -141,15 +133,6 @@ class TrialMNIST(MNIST):
         digits: list selected MNIST digits/classes
         kwargs: Same as MNIST
 
-    Examples:
-        >>> dataset = TrialMNIST(".", download=True)
-        >>> len(dataset)
-        300
-        >>> sorted(set([d.item() for d in dataset.targets]))
-        [0, 1, 2]
-        >>> torch.bincount(dataset.targets)
-        tensor([100, 100, 100])
-
     """
 
     def __init__(self, root: str, num_samples: int = 100, digits: Optional[Sequence] = (0, 1, 2), **kwargs):

@@ -15,11 +15,25 @@
 
 import cloudpickle
 import pytest
+import torch
 
 from tests_pytorch import _PATH_DATASETS
 from tests_pytorch.helpers.datasets import MNIST, AverageDataset, TrialMNIST
 
 
+def test_mnist(tmp_path):
+    dataset = MNIST(tmp_path, download=True)
+    assert len(dataset) == 60000
+    assert torch.bincount(dataset.targets).tolist() == [5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949]
+
+
+def test_trial_mnist(tmp_path):
+    dataset = TrialMNIST(tmp_path, download=True)
+    assert len(dataset) == 300
+    assert set(dataset.targets.tolist()) == {0, 1, 2}
+    assert torch.bincount(dataset.targets).tolist() == [100, 100, 100]
+
+
 @pytest.mark.parametrize(
     ("dataset_cls", "args"),
     [(MNIST, {"root": _PATH_DATASETS}), (TrialMNIST, {"root": _PATH_DATASETS}), (AverageDataset, {})],