From 703c3b7f740a644d640e1b2a11738831e0c9773f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 15:21:38 +0100 Subject: [PATCH 01/11] Introduce {Work,Flow}.lightningignore --- .../run_app_on_cloud/cloud_files.rst | 3 + src/lightning_app/core/flow.py | 10 ++ src/lightning_app/core/work.py | 10 ++ src/lightning_app/runners/cloud.py | 40 ++++--- src/lightning_app/source_code/copytree.py | 6 +- src/lightning_app/source_code/local.py | 9 +- tests/tests_app/runners/test_cloud.py | 100 +++++++++++++++--- 7 files changed, 144 insertions(+), 34 deletions(-) diff --git a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst index 3130cd0f336b3..a0a9aebd22feb 100644 --- a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst +++ b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst @@ -56,3 +56,6 @@ We recommend your app contain the following files: ├── app.py ├── README.md (optional- a markdown description of your app) └── requirements.txt (optional- conatins all your app dependencies) + + +# FIXME: update this diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index 3773c7a0dd4f0..48bcea250c819 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -104,6 +104,7 @@ def __init__(self): self._layout: Union[List[Dict], Dict] = {} self._paths = {} self._backend: Optional[Backend] = None + self._lightningignore: List[str] = [] @property def name(self): @@ -286,6 +287,15 @@ def flows(self) -> Dict[str, "LightningFlow"]: flows.update(getattr(self, struct_name).flows) return flows + @property + def lightningignore(self) -> List[str]: + """Programmatic equivalent of the ``.lightningignore`` file.""" + return self._lightningignore + + @lightningignore.setter + def lightningignore(self, lightningignore: List[str]) -> None: + self._lightningignore = lightningignore + def works(self, recurse: bool = True) -> List[LightningWork]: """Return its :class:`~lightning_app.core.work.LightningWork`.""" works = [getattr(self, el) for el in sorted(self._works)] diff --git a/src/lightning_app/core/work.py b/src/lightning_app/core/work.py index ab0dc8426ac91..26804c1979183 100644 --- a/src/lightning_app/core/work.py +++ b/src/lightning_app/core/work.py @@ -148,6 +148,7 @@ def __init__( self._local_build_config = local_build_config or BuildConfig() self._cloud_build_config = cloud_build_config or BuildConfig() self._cloud_compute = cloud_compute or CloudCompute() + self._lightningignore: List[str] = [] self._backend: Optional[Backend] = None self._check_run_is_implemented() self._on_init_end() @@ -247,6 +248,15 @@ def cloud_compute(self, cloud_compute: CloudCompute) -> None: compute_store.remove(self.name) self._cloud_compute = cloud_compute + @property + def lightningignore(self) -> List[str]: + """Programmatic equivalent of the ``.lightningignore`` file.""" + return self._lightningignore + + @lightningignore.setter + def lightningignore(self, lightningignore: List[str]) -> None: + self._lightningignore = lightningignore + @property def status(self) -> WorkStatus: """Return the current status of the work. diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 0752a3b5be8a8..aa6fbfa851589 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -5,6 +5,7 @@ import sys import time from dataclasses import dataclass +from functools import partial from pathlib import Path from typing import Any, Callable, List, Optional, Union @@ -59,6 +60,7 @@ from lightning_app.runners.backends.cloud import CloudBackend from lightning_app.runners.runtime import Runtime from lightning_app.source_code import LocalSourceCodeDir +from lightning_app.source_code.copytree import _filter_ignored, _parse_lightningignore from lightning_app.storage import Drive, Mount from lightning_app.utilities.app_helpers import Logger from lightning_app.utilities.cloud import _get_project @@ -214,7 +216,18 @@ def dispatch( root = Path(self.entrypoint_file).absolute().parent cleanup_handle = _prepare_lightning_wheels_and_requirements(root) self.app._update_index_file() - repo = LocalSourceCodeDir(path=root) + + # gather and merge all lightningignores + children = self.app.flows + self.app.works + lightningignores = [c.lightningignore for c in children] + if lightningignores: + merged = sum(lightningignores, []) + patterns = _parse_lightningignore(merged) + ignore_function = partial(_filter_ignored, root, patterns) + else: + ignore_function = None + + repo = LocalSourceCodeDir(path=root, ignore_functions=[ignore_function]) self._check_uploaded_folder(root, repo) requirements_file = root / "requirements.txt" # The entry point file needs to be relative to the root of the uploaded source file directory, @@ -481,23 +494,26 @@ def _ensure_cluster_project_binding(self, project_id: str, cluster_id: str): def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: """This method is used to inform the users if their folder files are large and how to filter them.""" lightning_tar = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz")) - app_folder_size = sum(Path(p).stat().st_size for p in repo.files if p not in lightning_tar) - app_folder_size_in_mb = round(app_folder_size / (1000 * 1000), 5) + files = [Path(f) for f in repo.files] + file_sizes = {f: f.stat().st_size for f in files} + mb = 1000 * 1000 + app_folder_size_in_mb = sum(v for k, v in file_sizes.items() if k not in lightning_tar) / mb if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING: - path_sizes = [(p, Path(p).stat().st_size / (1000 * 1000)) for p in repo.files] - largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25] - largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths) + by_largest = dict(sorted(file_sizes.items(), key=lambda x: x[1], reverse=True)) + by_largest = dict(list(by_largest.items())[:25]) # trim + largest_paths_msg = "\n".join( + f"{round(s / mb, 5)} MB: {p.relative_to(root)}" for p, s in by_largest.items() + ) warning_msg = ( f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " - f"The total size is {app_folder_size_in_mb} MB\n" - f"Here are the largest files: \n{largest_paths_msg}\n" + f"The total size is {round(app_folder_size_in_mb, 5)} MB\n" + f"Here are the largest files:\n{largest_paths_msg}\n" "Perhaps you should try running the app in an empty directory." ) if not (root / DOT_IGNORE_FILENAME).is_file(): - warning_msg = ( - warning_msg - + "\nIn order to ignore some files or folder, " - + "create a `.lightningignore` file and add the paths to ignore." + warning_msg += ( + "\nIn order to ignore some files or folder, create a `.lightningignore` file and add the paths to" + " ignore. You can also set the `lightningingore` attribute in a Flow or Work." ) else: warning_msg += "\nYou can ignore some files or folders by adding them to `.lightningignore`." diff --git a/src/lightning_app/source_code/copytree.py b/src/lightning_app/source_code/copytree.py index 554537849f4cf..7435c332b50f6 100644 --- a/src/lightning_app/source_code/copytree.py +++ b/src/lightning_app/source_code/copytree.py @@ -3,18 +3,20 @@ from functools import partial from pathlib import Path from shutil import copy2, copystat, Error -from typing import Callable, List, Set, Union +from typing import Callable, List, Optional, Set, Union from lightning_app.core.constants import DOT_IGNORE_FILENAME from lightning_app.utilities.app_helpers import Logger logger = Logger(__name__) +_IGNORE_FUNCTION = Callable[[Path, List[Path]], List[Path]] + def _copytree( src: Union[Path, str], dst: Union[Path, str], - ignore_functions: List[Callable] = None, + ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None, dirs_exist_ok=False, dry_run=False, ) -> List[str]: diff --git a/src/lightning_app/source_code/local.py b/src/lightning_app/source_code/local.py index b461d3814e9db..79d655cefbc06 100644 --- a/src/lightning_app/source_code/local.py +++ b/src/lightning_app/source_code/local.py @@ -4,7 +4,7 @@ from shutil import rmtree from typing import List, Optional -from lightning_app.source_code.copytree import _copytree +from lightning_app.source_code.copytree import _copytree, _IGNORE_FUNCTION from lightning_app.source_code.hashing import _get_hash from lightning_app.source_code.tar import _tar_path from lightning_app.source_code.uploader import FileUploader @@ -15,8 +15,9 @@ class LocalSourceCodeDir: cache_location: Path = Path.home() / ".lightning" / "cache" / "repositories" - def __init__(self, path: Path): + def __init__(self, path: Path, ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None) -> None: self.path = path + self.ignore_functions = ignore_functions # cache checksum version self._version: Optional[str] = None @@ -33,7 +34,7 @@ def __init__(self, path: Path): def files(self) -> List[str]: """Returns a set of files that are not ignored by .lightningignore.""" if self._non_ignored_files is None: - self._non_ignored_files = _copytree(self.path, "", dry_run=True) + self._non_ignored_files = _copytree(self.path, "", ignore_functions=self.ignore_functions, dry_run=True) return self._non_ignored_files @property @@ -59,7 +60,7 @@ def packaging_session(self) -> Path: session_path = self.cache_location / "packaging_sessions" / self.version try: rmtree(session_path, ignore_errors=True) - _copytree(self.path, session_path) + _copytree(self.path, session_path, ignore_functions=self.ignore_functions) yield session_path finally: rmtree(session_path, ignore_errors=True) diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index d1a6f9daaffe1..e1c93694eb903 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -41,13 +41,15 @@ V1Work, ) -from lightning_app import BuildConfig, LightningApp, LightningWork +from lightning_app import BuildConfig, LightningApp, LightningFlow, LightningWork from lightning_app.runners import backends, cloud, CloudRuntime from lightning_app.runners.cloud import ( _generate_works_json_gallery, _generate_works_json_web, _validate_build_spec_and_compute, ) +from lightning_app.source_code.copytree import _copytree, _parse_lightningignore +from lightning_app.source_code.local import LocalSourceCodeDir from lightning_app.storage import Drive, Mount from lightning_app.testing.helpers import EmptyWork from lightning_app.utilities.cloud import _get_project @@ -1184,31 +1186,35 @@ def test_get_project(monkeypatch): assert ret.project_id == "test-project-id1" +def tmp_file_of_size(path, size): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as f: + f.seek(size) + f.write(b"\0") + + @mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock()) @mock.patch("lightning_app.runners.backends.cloud.LightningClient", MagicMock()) def test_check_uploaded_folder(monkeypatch, tmpdir, caplog): - - monkeypatch.setattr(cloud, "logger", logging.getLogger()) - app = MagicMock() - repo = MagicMock() + root = Path(tmpdir) + repo = LocalSourceCodeDir(root) backend = cloud.CloudRuntime(app) with caplog.at_level(logging.WARN): - backend._check_uploaded_folder(Path(tmpdir), repo) + backend._check_uploaded_folder(root, repo) assert caplog.messages == [] - mock = MagicMock() - mock.st_mode = 33188 - mock.st_size = 5 * 1000 * 1000 - repo.files = [str(Path("./a.png"))] - monkeypatch.setattr(Path, "stat", MagicMock(return_value=mock)) + tmp_file_of_size(root / "a.png", 4 * 1000 * 1000) + tmp_file_of_size(root / "b.txt", 5 * 1000 * 1000) + tmp_file_of_size(root / "c.jpg", 6 * 1000 * 1000) - path = Path(".") + repo._non_ignored_files = None # force reset with caplog.at_level(logging.WARN): - backend._check_uploaded_folder(path, repo) - assert caplog.messages[0].startswith( - f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 5.0 MB" - ) + backend._check_uploaded_folder(root, repo) + assert (f"Your application folder '{root.absolute()}' is more than 2 MB. The total size is 15.0 MB") in caplog.text + assert "6.0 MB: c.jpg\n5.0 MB: b.txt\n4.0 MB: a.png" in caplog.text + assert "create a `.lightningignore` file" in caplog.text + assert "lightningingore` attribute in a Flow or Work" in caplog.text @mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock()) @@ -1368,3 +1374,65 @@ def run(self): with pytest.raises(ValueError, match="You requested a custom base image for the Work with name"): _validate_build_spec_and_compute(Work()) + + +def test_programmatic_lightningignore(monkeypatch, caplog, tmpdir): + mock_client = mock.MagicMock() + mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( + memberships=[V1Membership(name="test-project", project_id="test-project-id")] + ) + mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( + V1ListLightningappInstancesResponse(lightningapps=[]) + ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( + cluster_id="test" + ) + cloud_backend = mock.MagicMock(client=mock_client) + monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend)) + + class MyWork(LightningWork): + def __init__(self): + super().__init__() + self.lightningignore = ["foo", "lightning_logs"] + + def run(self): + # this is ignored + self.lightningignore.append("foobar") + + class MyFlow(LightningFlow): + def __init__(self): + super().__init__() + self.lightningignore = ["foo"] + self.w = MyWork() + + def run(self): + # this is ignored + self.lightningignore.append("baz") + self.w.run() + + root = MyFlow() + app = LightningApp(root) + monkeypatch.setattr(app, "_update_index_file", mock.MagicMock()) + + path = Path(tmpdir) + cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=path / "entrypoint.py") + monkeypatch.setattr(LocalSourceCodeDir, "upload", mock.MagicMock()) + + tmp_file_of_size(path / "a.txt", 5 * 1000 * 1000) + tmp_file_of_size(path / "foo.png", 4 * 1000 * 1000) + tmp_file_of_size(path / "lightning_logs" / "foo.ckpt", 6 * 1000 * 1000) + + with mock.patch( + "lightning_app.runners.cloud._parse_lightningignore", wraps=_parse_lightningignore + ) as parse_mock, mock.patch( + "lightning_app.source_code.local._copytree", wraps=_copytree + ) as copy_mock, caplog.at_level( + logging.WARN + ): + cloud_runtime.dispatch() + + parse_mock.assert_called_once_with(["foo", "foo", "lightning_logs"]) + assert copy_mock.mock_calls[0].kwargs["ignore_functions"][0].args[1] == {"lightning_logs", "foo"} + + assert (f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 9.0 MB") in caplog.text + assert "files:\n5.0 MB: a.txt\n4.0 MB: foo.png\nPerhaps" in caplog.text # only these 2 files show From a074cf75fa497b4842c4820e9699bed67d038abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 15:55:49 +0100 Subject: [PATCH 02/11] Docs --- .../run_app_on_cloud/cloud_files.rst | 16 +++++++--- src/lightning_app/CHANGELOG.md | 2 ++ src/lightning_app/runners/cloud.py | 22 +++++++++----- tests/tests_app/runners/test_cloud.py | 30 ++++++++++++------- 4 files changed, 47 insertions(+), 23 deletions(-) diff --git a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst index a0a9aebd22feb..2e9cfe9b62387 100644 --- a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst +++ b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst @@ -30,7 +30,6 @@ For example, the source code directory below with the ``.lightningignore`` file ├── requirements.txt └── model.pt - .. code:: bash ~/project/home ❯ cat .lightningignore @@ -39,6 +38,18 @@ For example, the source code directory below with the ``.lightningignore`` file A sample ``.lightningignore`` file can be found `here `_. +If you are a component author and your components creates local files that you want to ignore, you can do: + +.. code-block:: python + + class MyComponent(L.LightningWork): # or L.LightningFlow + def __init__(self): + super().__init__() + self.lightningignore = ["model.pt", "data_dir"] + + +This has the benefit that the files will be ignored automatically for all the component users, making an easier +transition between running locally vs in the cloud. ---- @@ -56,6 +67,3 @@ We recommend your app contain the following files: ├── app.py ├── README.md (optional- a markdown description of your app) └── requirements.txt (optional- conatins all your app dependencies) - - -# FIXME: update this diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 4c5da2c96e2e4..9f8fe0368f57e 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -13,6 +13,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Show a message when `BuildConfig(requirements=[...])` is passed but a `requirements.txt` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799)) - Show a message when `BuildConfig(dockerfile="...")` is passed but a `Dockerfile` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799)) +- Added `Lightning{Flow,Work}.lightningignores` attributes to programmatically ignore files before uploading to the cloud ([#15818](https://github.com/Lightning-AI/lightning/pull/15818)) + - Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800)) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index aa6fbfa851589..bc2840a4c71ab 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -499,16 +499,22 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: mb = 1000 * 1000 app_folder_size_in_mb = sum(v for k, v in file_sizes.items() if k not in lightning_tar) / mb if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING: - by_largest = dict(sorted(file_sizes.items(), key=lambda x: x[1], reverse=True)) - by_largest = dict(list(by_largest.items())[:25]) # trim - largest_paths_msg = "\n".join( - f"{round(s / mb, 5)} MB: {p.relative_to(root)}" for p, s in by_largest.items() - ) + # filter out files under 0.01mb or special files + relevant_files = {f: s for f, s in file_sizes.items() if f.name != ".lightningignore" and s > 0.01 * mb} + if relevant_files: + by_largest = dict(sorted(relevant_files.items(), key=lambda x: x[1], reverse=True)) + by_largest = dict(list(by_largest.items())[:25]) # trim + largest_paths_msg = "\n".join( + f"{round(s / mb, 5)} MB: {p.relative_to(root)}" for p, s in by_largest.items() + ) + largest_paths_msg = f"Here are the largest files:\n{largest_paths_msg}\n" + else: + largest_paths_msg = "" warning_msg = ( f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " - f"The total size is {round(app_folder_size_in_mb, 5)} MB\n" - f"Here are the largest files:\n{largest_paths_msg}\n" - "Perhaps you should try running the app in an empty directory." + f"The total size is {round(app_folder_size_in_mb, 2)} MB. {len(files)} files were uploaded.\n" + + largest_paths_msg + + "Perhaps you should try running the app in an empty directory." ) if not (root / DOT_IGNORE_FILENAME).is_file(): warning_msg += ( diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index e1c93694eb903..ee59a7b84c254 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1186,7 +1186,7 @@ def test_get_project(monkeypatch): assert ret.project_id == "test-project-id1" -def tmp_file_of_size(path, size): +def write_file_of_size(path, size): os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "wb") as f: f.seek(size) @@ -1204,15 +1204,18 @@ def test_check_uploaded_folder(monkeypatch, tmpdir, caplog): backend._check_uploaded_folder(root, repo) assert caplog.messages == [] - tmp_file_of_size(root / "a.png", 4 * 1000 * 1000) - tmp_file_of_size(root / "b.txt", 5 * 1000 * 1000) - tmp_file_of_size(root / "c.jpg", 6 * 1000 * 1000) + # write some files to assert the message below. + write_file_of_size(root / "a.png", 4 * 1000 * 1000) + write_file_of_size(root / "b.txt", 5 * 1000 * 1000) + write_file_of_size(root / "c.jpg", 6 * 1000 * 1000) repo._non_ignored_files = None # force reset with caplog.at_level(logging.WARN): backend._check_uploaded_folder(root, repo) - assert (f"Your application folder '{root.absolute()}' is more than 2 MB. The total size is 15.0 MB") in caplog.text - assert "6.0 MB: c.jpg\n5.0 MB: b.txt\n4.0 MB: a.png" in caplog.text + assert f"Your application folder '{root.absolute()}' is more than 2 MB" in caplog.text + assert "The total size is 15.0 MB" in caplog.text + assert "3 files were uploaded" in caplog.text + assert "files:\n6.0 MB: c.jpg\n5.0 MB: b.txt\n4.0 MB: a.png\nPerhaps" in caplog.text # tests the order assert "create a `.lightningignore` file" in caplog.text assert "lightningingore` attribute in a Flow or Work" in caplog.text @@ -1418,9 +1421,12 @@ def run(self): cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=path / "entrypoint.py") monkeypatch.setattr(LocalSourceCodeDir, "upload", mock.MagicMock()) - tmp_file_of_size(path / "a.txt", 5 * 1000 * 1000) - tmp_file_of_size(path / "foo.png", 4 * 1000 * 1000) - tmp_file_of_size(path / "lightning_logs" / "foo.ckpt", 6 * 1000 * 1000) + # write some files + write_file_of_size(path / "a.txt", 5 * 1000 * 1000) + write_file_of_size(path / "foo.png", 4 * 1000 * 1000) + write_file_of_size(path / "lightning_logs" / "foo.ckpt", 6 * 1000 * 1000) + # also an actual .lightningignore file + (path / ".lightningignore").write_text("foo.png") with mock.patch( "lightning_app.runners.cloud._parse_lightningignore", wraps=_parse_lightningignore @@ -1434,5 +1440,7 @@ def run(self): parse_mock.assert_called_once_with(["foo", "foo", "lightning_logs"]) assert copy_mock.mock_calls[0].kwargs["ignore_functions"][0].args[1] == {"lightning_logs", "foo"} - assert (f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 9.0 MB") in caplog.text - assert "files:\n5.0 MB: a.txt\n4.0 MB: foo.png\nPerhaps" in caplog.text # only these 2 files show + assert f"Your application folder '{path.absolute()}' is more than 2 MB" in caplog.text + assert "The total size is 5.0 MB" in caplog.text + assert "2 files were uploaded" # a.txt and .lightningignore + assert "files:\n5.0 MB: a.txt\nPerhaps" in caplog.text # only this file appears From 5578998f61f9e4ab5e470a8764aad17b00ba8d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 17:36:57 +0100 Subject: [PATCH 03/11] Debug --- .github/workflows/ci-app-tests.yml | 4 ++++ tests/tests_app/runners/test_cloud.py | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 0f8d63e70d3b8..5e2e8b6cf1891 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -109,6 +109,10 @@ jobs: - name: Switch coverage scope run: python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_app'))" >> $GITHUB_ENV + - name: Setup tmate session + if: ${{ matrix.pkg-name == 'lightning' && matrix.os == 'ubuntu-20.04' }} + uses: mxschmitt/action-tmate@v3 + - name: Tests working-directory: ./tests env: diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index ee59a7b84c254..059b855a1e491 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1413,8 +1413,7 @@ def run(self): self.lightningignore.append("baz") self.w.run() - root = MyFlow() - app = LightningApp(root) + app = LightningApp(MyFlow()) monkeypatch.setattr(app, "_update_index_file", mock.MagicMock()) path = Path(tmpdir) From 81deecb28d2e9a4d59a13b2d59c75e5e224f39e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 17:54:44 +0100 Subject: [PATCH 04/11] Fix --- src/lightning_app/components/multi_node/trainer.py | 3 +++ src/lightning_app/runners/cloud.py | 1 + 2 files changed, 4 insertions(+) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 222f71ce59557..7a76b25182333 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -92,3 +92,6 @@ def __init__( cloud_compute=cloud_compute, **work_kwargs, ) + + # the Trainer enables TensorBoard by default, so this is often an undesired directory to upload to the cloud + self.lightningignore.append("lightning_logs") diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index bc2840a4c71ab..16373001ce9a9 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -222,6 +222,7 @@ def dispatch( lightningignores = [c.lightningignore for c in children] if lightningignores: merged = sum(lightningignores, []) + logger.debug(f"Found the following lightningignores: {merged}") patterns = _parse_lightningignore(merged) ignore_function = partial(_filter_ignored, root, patterns) else: From 7893239e43d9818e999b1217058d840785218814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 18:11:09 +0100 Subject: [PATCH 05/11] Revert debug --- .github/workflows/ci-app-tests.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 5e2e8b6cf1891..0f8d63e70d3b8 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -109,10 +109,6 @@ jobs: - name: Switch coverage scope run: python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_app'))" >> $GITHUB_ENV - - name: Setup tmate session - if: ${{ matrix.pkg-name == 'lightning' && matrix.os == 'ubuntu-20.04' }} - uses: mxschmitt/action-tmate@v3 - - name: Tests working-directory: ./tests env: From 22a5c42a7d87b8d7c1deb78191b530d67d2b4a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 18:16:43 +0100 Subject: [PATCH 06/11] Fix --- src/lightning_app/runners/cloud.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 16373001ce9a9..b99461a944079 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -494,14 +494,15 @@ def _ensure_cluster_project_binding(self, project_id: str, cluster_id: str): @staticmethod def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: """This method is used to inform the users if their folder files are large and how to filter them.""" - lightning_tar = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz")) - files = [Path(f) for f in repo.files] + excludes = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz")) + excludes.update(fnmatch.filter(repo.files, ".lightningignore")) + files = [Path(f) for f in repo.files if f not in excludes] file_sizes = {f: f.stat().st_size for f in files} mb = 1000 * 1000 - app_folder_size_in_mb = sum(v for k, v in file_sizes.items() if k not in lightning_tar) / mb + app_folder_size_in_mb = sum(file_sizes.values()) / mb if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING: - # filter out files under 0.01mb or special files - relevant_files = {f: s for f, s in file_sizes.items() if f.name != ".lightningignore" and s > 0.01 * mb} + # filter out files under 0.01mb + relevant_files = {f: s for f, s in file_sizes.items() if s > 0.01 * mb} if relevant_files: by_largest = dict(sorted(relevant_files.items(), key=lambda x: x[1], reverse=True)) by_largest = dict(list(by_largest.items())[:25]) # trim From 81ca2af1e4912268eec763b26902d50919ea0ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 25 Nov 2022 18:17:57 +0100 Subject: [PATCH 07/11] Simpler --- src/lightning_app/runners/cloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index b99461a944079..c8de4aa9a058e 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -224,11 +224,11 @@ def dispatch( merged = sum(lightningignores, []) logger.debug(f"Found the following lightningignores: {merged}") patterns = _parse_lightningignore(merged) - ignore_function = partial(_filter_ignored, root, patterns) + ignore_functions = [partial(_filter_ignored, root, patterns)] else: - ignore_function = None + ignore_functions = None - repo = LocalSourceCodeDir(path=root, ignore_functions=[ignore_function]) + repo = LocalSourceCodeDir(path=root, ignore_functions=ignore_functions) self._check_uploaded_folder(root, repo) requirements_file = root / "requirements.txt" # The entry point file needs to be relative to the root of the uploaded source file directory, From 2ca1710b22317e451e0469cb1715ecbaa61cb120 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Dec 2022 14:07:47 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_app/runners/test_cloud.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index 40fec1d2e7f5a..f7ea154e32227 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1528,4 +1528,3 @@ def test_get_app_url(lightning_app_instance, lightning_cloud_url, expected_url): "lightning_app.runners.cloud.get_lightning_cloud_url", mock.MagicMock(return_value=lightning_cloud_url) ): assert CloudRuntime._get_app_url(lightning_app_instance) == expected_url - From 2f721dbe8a5ed3aeea03cd638eed348340609d49 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 7 Dec 2022 15:29:15 +0100 Subject: [PATCH 09/11] Apply suggestions from code review --- src/lightning_app/runners/cloud.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 9bcbc3acb67b8..9a062aca90d81 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -511,16 +511,16 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: excludes.update(fnmatch.filter(repo.files, ".lightningignore")) files = [Path(f) for f in repo.files if f not in excludes] file_sizes = {f: f.stat().st_size for f in files} - mb = 1000 * 1000 + mb = 1000_000 app_folder_size_in_mb = sum(file_sizes.values()) / mb if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING: # filter out files under 0.01mb - relevant_files = {f: s for f, s in file_sizes.items() if s > 0.01 * mb} + relevant_files = {f: sz for f, sz in file_sizes.items() if sz > 0.01 * mb} if relevant_files: by_largest = dict(sorted(relevant_files.items(), key=lambda x: x[1], reverse=True)) by_largest = dict(list(by_largest.items())[:25]) # trim largest_paths_msg = "\n".join( - f"{round(s / mb, 5)} MB: {p.relative_to(root)}" for p, s in by_largest.items() + f"{round(sz / mb, 5)} MB: {p.relative_to(root)}" for p, sz in by_largest.items() ) largest_paths_msg = f"Here are the largest files:\n{largest_paths_msg}\n" else: @@ -529,7 +529,7 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " f"The total size is {round(app_folder_size_in_mb, 2)} MB. {len(files)} files were uploaded.\n" + largest_paths_msg - + "Perhaps you should try running the app in an empty directory." + + " Perhaps you should try running the app in an empty directory." ) if not (root / DOT_IGNORE_FILENAME).is_file(): warning_msg += ( From 4b33c35bf3be4a5b39edf2223f9ed1278fd7d62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 12 Dec 2022 18:57:42 +0100 Subject: [PATCH 10/11] Revert broken change --- src/lightning_app/runners/cloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 9a062aca90d81..0a6d1505550c1 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -529,7 +529,7 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " f"The total size is {round(app_folder_size_in_mb, 2)} MB. {len(files)} files were uploaded.\n" + largest_paths_msg - + " Perhaps you should try running the app in an empty directory." + + "Perhaps you should try running the app in an empty directory." ) if not (root / DOT_IGNORE_FILENAME).is_file(): warning_msg += ( From 50d8eb5b36e6227305d231576216c837f3ff6b12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 12 Dec 2022 19:32:17 +0100 Subject: [PATCH 11/11] Thomas' suggestion --- .../run_app_on_cloud/cloud_files.rst | 2 +- .../components/multi_node/trainer.py | 2 +- src/lightning_app/core/flow.py | 20 ++++++++++++---- src/lightning_app/core/work.py | 21 ++++++++++++---- src/lightning_app/runners/cloud.py | 2 +- src/lightning_app/utilities/app_helpers.py | 6 ++++- tests/tests_app/runners/test_cloud.py | 24 ++++++++++++------- 7 files changed, 56 insertions(+), 21 deletions(-) diff --git a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst index 2e9cfe9b62387..dfef0dc1c13aa 100644 --- a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst +++ b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst @@ -45,7 +45,7 @@ If you are a component author and your components creates local files that you w class MyComponent(L.LightningWork): # or L.LightningFlow def __init__(self): super().__init__() - self.lightningignore = ["model.pt", "data_dir"] + self.lightningignore = ("model.pt", "data_dir") This has the benefit that the files will be ignored automatically for all the component users, making an easier diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index b651067df8042..e3f738abad329 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -116,4 +116,4 @@ def __init__( ) # the Trainer enables TensorBoard by default, so this is often an undesired directory to upload to the cloud - self.lightningignore.append("lightning_logs") + self.lightningignore += ("lightning_logs",) diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index 720b1197058e9..a098dc4823f46 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -10,7 +10,13 @@ from lightning_app.frontend import Frontend from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive -from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden +from lightning_app.utilities.app_helpers import ( + _is_json_serializable, + _lightning_dispatched, + _LightningAppRef, + _set_child_name, + is_overridden, +) from lightning_app.utilities.component import _sanitize_state from lightning_app.utilities.exceptions import ExitAppException from lightning_app.utilities.introspection import _is_init_context, _is_run_context @@ -104,7 +110,8 @@ def __init__(self): self._layout: Union[List[Dict], Dict] = {} self._paths = {} self._backend: Optional[Backend] = None - self._lightningignore: List[str] = [] + # tuple instead of a list so that it cannot be modified without using the setter + self._lightningignore: Tuple[str, ...] = tuple() @property def name(self): @@ -312,12 +319,17 @@ def flows(self) -> Dict[str, "LightningFlow"]: return flows @property - def lightningignore(self) -> List[str]: + def lightningignore(self) -> Tuple[str, ...]: """Programmatic equivalent of the ``.lightningignore`` file.""" return self._lightningignore @lightningignore.setter - def lightningignore(self, lightningignore: List[str]) -> None: + def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: + if _lightning_dispatched(): + raise RuntimeError( + f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" + " effect" + ) self._lightningignore = lightningignore def works(self, recurse: bool = True) -> List[LightningWork]: diff --git a/src/lightning_app/core/work.py b/src/lightning_app/core/work.py index 57d8f0dad7b32..43ffc0006d5ea 100644 --- a/src/lightning_app/core/work.py +++ b/src/lightning_app/core/work.py @@ -3,7 +3,7 @@ import warnings from copy import deepcopy from functools import partial, wraps -from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TYPE_CHECKING, Union from deepdiff import DeepHash, Delta @@ -11,7 +11,12 @@ from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive from lightning_app.storage.payload import Payload -from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, is_overridden +from lightning_app.utilities.app_helpers import ( + _is_json_serializable, + _lightning_dispatched, + _LightningAppRef, + is_overridden, +) from lightning_app.utilities.component import _is_flow_context, _sanitize_state from lightning_app.utilities.enum import ( CacheCallsKeys, @@ -154,7 +159,8 @@ def __init__( self._local_build_config = local_build_config or BuildConfig() self._cloud_build_config = cloud_build_config or BuildConfig() self._cloud_compute = cloud_compute or CloudCompute() - self._lightningignore: List[str] = [] + # tuple instead of a list so that it cannot be modified without using the setter + self._lightningignore: Tuple[str, ...] = tuple() self._backend: Optional[Backend] = None self._check_run_is_implemented() self._on_init_end() @@ -255,12 +261,17 @@ def cloud_compute(self, cloud_compute: CloudCompute) -> None: self._cloud_compute = cloud_compute @property - def lightningignore(self) -> List[str]: + def lightningignore(self) -> Tuple[str, ...]: """Programmatic equivalent of the ``.lightningignore`` file.""" return self._lightningignore @lightningignore.setter - def lightningignore(self, lightningignore: List[str]) -> None: + def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: + if _lightning_dispatched(): + raise RuntimeError( + f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" + " effect" + ) self._lightningignore = lightningignore @property diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 0a6d1505550c1..36d39bac1f4b8 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -224,7 +224,7 @@ def dispatch( children = self.app.flows + self.app.works lightningignores = [c.lightningignore for c in children] if lightningignores: - merged = sum(lightningignores, []) + merged = sum(lightningignores, tuple()) logger.debug(f"Found the following lightningignores: {merged}") patterns = _parse_lightningignore(merged) ignore_functions = [partial(_filter_ignored, root, patterns)] diff --git a/src/lightning_app/utilities/app_helpers.py b/src/lightning_app/utilities/app_helpers.py index 83b78e1929aa5..3b152786b682a 100644 --- a/src/lightning_app/utilities/app_helpers.py +++ b/src/lightning_app/utilities/app_helpers.py @@ -511,11 +511,15 @@ def is_static_method(klass_or_instance, attr) -> bool: return isinstance(inspect.getattr_static(klass_or_instance, attr), staticmethod) +def _lightning_dispatched() -> bool: + return bool(int(os.getenv("LIGHTNING_DISPATCHED", 0))) + + def _should_dispatch_app() -> bool: return ( __debug__ and "_pytest.doctest" not in sys.modules - and not bool(int(os.getenv("LIGHTNING_DISPATCHED", "0"))) + and not _lightning_dispatched() and "LIGHTNING_APP_STATE_URL" not in os.environ ) diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index f7ea154e32227..7ce4ea397d95b 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1443,6 +1443,8 @@ def run(self): def test_programmatic_lightningignore(monkeypatch, caplog, tmpdir): + monkeypatch.setenv("LIGHTNING_DISPATCHED", "0") # this is not cleaned up + mock_client = mock.MagicMock() mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( memberships=[V1Membership(name="test-project", project_id="test-project-id")] @@ -1459,24 +1461,26 @@ def test_programmatic_lightningignore(monkeypatch, caplog, tmpdir): class MyWork(LightningWork): def __init__(self): super().__init__() - self.lightningignore = ["foo", "lightning_logs"] + self.lightningignore += ("foo", "lightning_logs") def run(self): - # this is ignored - self.lightningignore.append("foobar") + with pytest.raises(RuntimeError, match="w.lightningignore` does not"): + self.lightningignore += ("foobar",) class MyFlow(LightningFlow): def __init__(self): super().__init__() - self.lightningignore = ["foo"] + self.lightningignore = ("foo",) self.w = MyWork() def run(self): - # this is ignored - self.lightningignore.append("baz") + with pytest.raises(RuntimeError, match="root.lightningignore` does not"): + self.lightningignore = ("baz",) self.w.run() - app = LightningApp(MyFlow()) + flow = MyFlow() + app = LightningApp(flow) + monkeypatch.setattr(app, "_update_index_file", mock.MagicMock()) path = Path(tmpdir) @@ -1499,7 +1503,7 @@ def run(self): ): cloud_runtime.dispatch() - parse_mock.assert_called_once_with(["foo", "foo", "lightning_logs"]) + parse_mock.assert_called_once_with(("foo", "foo", "lightning_logs")) assert copy_mock.mock_calls[0].kwargs["ignore_functions"][0].args[1] == {"lightning_logs", "foo"} assert f"Your application folder '{path.absolute()}' is more than 2 MB" in caplog.text @@ -1507,6 +1511,10 @@ def run(self): assert "2 files were uploaded" # a.txt and .lightningignore assert "files:\n5.0 MB: a.txt\nPerhaps" in caplog.text # only this file appears + # replicate how the app would dispatch the app, and call `run` + monkeypatch.setenv("LIGHTNING_DISPATCHED", "1") + flow.run() + @pytest.mark.parametrize( "lightning_app_instance, lightning_cloud_url, expected_url",