diff --git a/pyproject.toml b/pyproject.toml index 5e8a2bfa0e4819..8f16fe6bc15c85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,7 +135,6 @@ module = [ "lightning_app.utilities.name_generator", "lightning_app.utilities.network", "lightning_app.utilities.openapi", - "lightning_app.utilities.packaging.build_config", "lightning_app.utilities.packaging.cloud_compute", "lightning_app.utilities.packaging.docker", "lightning_app.utilities.packaging.lightning_utils", diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 69f8e323d4f70b..1b635b8aa5e84d 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - +- Show a message when `BuildConfig(requirements=[...])` is passed but a `requirements.txt` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799)) +- Show a message when `BuildConfig(dockerfile="...")` is passed but a `Dockerfile` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799)) ### Changed diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index ae5bb811496a55..e6a474ac921ce0 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -1,6 +1,5 @@ import fnmatch import json -import os import random import string import sys @@ -50,6 +49,7 @@ CLOUD_UPLOAD_WARNING, DEFAULT_NUMBER_OF_EXPOSED_PORTS, DISABLE_DEPENDENCY_CACHE, + DOT_IGNORE_FILENAME, ENABLE_APP_COMMENT_COMMAND_EXECUTION, ENABLE_MULTIPLE_WORKS_IN_DEFAULT_CONTAINER, ENABLE_MULTIPLE_WORKS_IN_NON_DEFAULT_CONTAINER, @@ -488,12 +488,12 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25] largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths) warning_msg = ( - f"Your application folder {root} is more than {CLOUD_UPLOAD_WARNING} MB. " - f"Found {app_folder_size_in_mb} MB \n" - "Here are the largest files: \n" - f"{largest_paths_msg}" + f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " + f"The total size is {app_folder_size_in_mb} MB\n" + f"Here are the largest files: \n{largest_paths_msg}\n" + "Perhaps you should try running the app in an empty directory." ) - if not os.path.exists(os.path.join(root, ".lightningignore")): + if not (root / DOT_IGNORE_FILENAME).is_file(): warning_msg = ( warning_msg + "\nIn order to ignore some files or folder, " diff --git a/src/lightning_app/source_code/copytree.py b/src/lightning_app/source_code/copytree.py index 81eed3c127f2ef..554537849f4cf9 100644 --- a/src/lightning_app/source_code/copytree.py +++ b/src/lightning_app/source_code/copytree.py @@ -1,5 +1,6 @@ import fnmatch import os +from functools import partial from pathlib import Path from shutil import copy2, copystat, Error from typing import Callable, List, Set, Union @@ -58,8 +59,10 @@ def _copytree( _ignore_filename_spell_check(src) src = Path(src) dst = Path(dst) - if src.joinpath(DOT_IGNORE_FILENAME).exists(): - ignore_fn = _get_ignore_function(src) + ignore_filepath = src / DOT_IGNORE_FILENAME + if ignore_filepath.is_file(): + patterns = _read_lightningignore(ignore_filepath) + ignore_fn = partial(_filter_ignored, src, patterns) # creating new list so we won't modify the original ignore_functions = [*ignore_functions, ignore_fn] @@ -108,19 +111,22 @@ def _copytree( return files_copied -def _get_ignore_function(src: Path) -> Callable: - patterns = _read_lightningignore(src / DOT_IGNORE_FILENAME) +def _filter_ignored(src: Path, patterns: Set[str], current_dir: Path, entries: List[Path]) -> List[Path]: + relative_dir = current_dir.relative_to(src) + names = [str(relative_dir / entry.name) for entry in entries] + ignored_names = set() + for pattern in patterns: + ignored_names.update(fnmatch.filter(names, pattern)) + return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names] - def filter_ignored(current_dir: Path, entries: List[Path]) -> List[Path]: - relative_dir = current_dir.relative_to(src) - names = [str(relative_dir / entry.name) for entry in entries] - ignored_names = [] - for pattern in patterns: - ignored_names.extend(fnmatch.filter(names, pattern)) - ignored_names_set = set(ignored_names) - return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names_set] - return filter_ignored +def _parse_lightningignore(lines: List[str]) -> Set[str]: + """Creates a set that removes empty lines and comments.""" + lines = [ln.strip() for ln in lines] + # removes first `/` character for posix and `\\` for windows + lines = [ln.lstrip("/").lstrip("\\") for ln in lines if ln != "" and not ln.startswith("#")] + # convert to path and converting back to string to sanitize the pattern + return {str(Path(ln)) for ln in lines} def _read_lightningignore(path: Path) -> Set[str]: @@ -137,14 +143,8 @@ def _read_lightningignore(path: Path) -> Set[str]: Set[str] Set of unique lines. """ - raw_lines = [ln.strip() for ln in path.open().readlines()] - - # creates a set that removes empty lines and comments - lines = {ln for ln in raw_lines if ln != "" and ln is not None and not ln.startswith("#")} - - # removes first `/` character for posix and `\\` for windows - # also converting to path and converting back to string to sanitize the pattern - return {str(Path(ln.lstrip("/").lstrip("\\"))) for ln in lines} + raw_lines = path.open().readlines() + return _parse_lightningignore(raw_lines) def _ignore_filename_spell_check(src: Path): diff --git a/src/lightning_app/utilities/packaging/build_config.py b/src/lightning_app/utilities/packaging/build_config.py index bc29a75882f699..7c20853bec1578 100644 --- a/src/lightning_app/utilities/packaging/build_config.py +++ b/src/lightning_app/utilities/packaging/build_config.py @@ -1,16 +1,16 @@ import inspect import os import re -from dataclasses import asdict, dataclass -from types import FrameType -from typing import cast, List, Optional, TYPE_CHECKING, Union +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Dict, List, Optional, Union +from typing_extensions import Self + +import lightning_app as L from lightning_app.utilities.app_helpers import Logger from lightning_app.utilities.packaging.cloud_compute import CloudCompute -if TYPE_CHECKING: - from lightning_app import LightningWork - logger = Logger(__name__) @@ -19,11 +19,10 @@ def load_requirements( ) -> List[str]: """Load requirements from a file. - .. code-block:: python - - path_req = os.path.join(_PROJECT_ROOT, "requirements") - requirements = load_requirements(path_req) - print(requirements) # ['numpy...', 'torch...', ...] + >>> from lightning_app import _PROJECT_ROOT + >>> path_req = os.path.join(_PROJECT_ROOT, "requirements") + >>> load_requirements(path_req, "docs.txt") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE +SKIP + ['sphinx>=4.0', ...] # TODO: remove SKIP, fails on python 3.7 """ path = os.path.join(path_dir, file_name) if not os.path.isfile(path): @@ -49,12 +48,19 @@ def load_requirements( return reqs +@dataclass +class _Dockerfile: + path: str + data: List[str] + + @dataclass class BuildConfig: """The Build Configuration describes how the environment a LightningWork runs in should be set up. Arguments: - requirements: List of requirements or paths to requirement files. + requirements: List of requirements or list of paths to requirement files. If not passed, they will be + automatically extracted from a `requirements.txt` if it exists. dockerfile: The path to a dockerfile to be used to build your container. You need to add those lines to ensure your container works in the cloud. @@ -64,20 +70,19 @@ class BuildConfig: WORKDIR /gridai/project COPY . . - - Learn more by checking out: - https://docs.grid.ai/features/runs/running-experiments-with-a-dockerfile image: The base image that the work runs on. This should be a publicly accessible image from a registry that doesn't enforce rate limits (such as DockerHub) to pull this image, otherwise your application will not start. """ - requirements: Optional[Union[str, List[str]]] = None - dockerfile: Optional[str] = None + requirements: List[str] = field(default_factory=list) + dockerfile: Optional[Union[str, Path, _Dockerfile]] = None image: Optional[str] = None - def __post_init__(self): - self._call_dir = os.path.dirname(cast(FrameType, inspect.currentframe()).f_back.f_back.f_code.co_filename) + def __post_init__(self) -> None: + current_frame = inspect.currentframe() + co_filename = current_frame.f_back.f_back.f_code.co_filename # type: ignore[union-attr] + self._call_dir = os.path.dirname(co_filename) self._prepare_requirements() self._prepare_dockerfile() @@ -101,76 +106,90 @@ def build_commands(self): """ return [] - def on_work_init(self, work, cloud_compute: Optional["CloudCompute"] = None): + def on_work_init(self, work: "L.LightningWork", cloud_compute: Optional["CloudCompute"] = None) -> None: """Override with your own logic to load the requirements or dockerfile.""" - try: - self.requirements = sorted(self.requirements or self._find_requirements(work) or []) - self.dockerfile = self.dockerfile or self._find_dockerfile(work) - except TypeError: - logger.debug("The provided work couldn't be found.") + found_requirements = self._find_requirements(work) + if self.requirements: + if found_requirements and self.requirements != found_requirements: + # notify the user of this silent behaviour + logger.info( + f"A 'requirements.txt' exists with {found_requirements} but {self.requirements} was passed to" + f" the `{type(self).__name__}` in {work.name!r}. The `requirements.txt` file will be ignored." + ) + else: + self.requirements = found_requirements + self._prepare_requirements() - def _find_requirements(self, work: "LightningWork") -> List[str]: - # 1. Get work file - file = inspect.getfile(work.__class__) + found_dockerfile = self._find_dockerfile(work) + if self.dockerfile: + if found_dockerfile and self.dockerfile != found_dockerfile: + # notify the user of this silent behaviour + logger.info( + f"A Dockerfile exists at {found_dockerfile!r} but {self.dockerfile!r} was passed to" + f" the `{type(self).__name__}` in {work.name!r}. {found_dockerfile!r}` will be ignored." + ) + else: + self.dockerfile = found_dockerfile + self._prepare_dockerfile() - # 2. Try to find a requirement file associated the file. - dirname = os.path.dirname(file) or "." - requirement_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "requirements.txt"] - if not requirement_files: + def _find_requirements(self, work: "L.LightningWork", filename: str = "requirements.txt") -> List[str]: + # 1. Get work file + file = _get_work_file(work) + if file is None: return [] - dirname, basename = os.path.dirname(requirement_files[0]), os.path.basename(requirement_files[0]) + # 2. Try to find a requirement file associated the file. + dirname = os.path.dirname(file) try: - requirements = load_requirements(dirname, basename) + requirements = load_requirements(dirname, filename) except NotADirectoryError: - requirements = [] + return [] return [r for r in requirements if r != "lightning"] - def _find_dockerfile(self, work: "LightningWork") -> List[str]: + def _find_dockerfile(self, work: "L.LightningWork", filename: str = "Dockerfile") -> Optional[str]: # 1. Get work file - file = inspect.getfile(work.__class__) - - # 2. Check for Dockerfile. - dirname = os.path.dirname(file) or "." - dockerfiles = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "Dockerfile"] - - if not dockerfiles: - return [] - - # 3. Read the dockerfile - with open(dockerfiles[0]) as f: - dockerfile = list(f.readlines()) - return dockerfile - - def _prepare_requirements(self) -> Optional[Union[str, List[str]]]: - if not self.requirements: + file = _get_work_file(work) + if file is None: return None + # 2. Check for Dockerfile. + dirname = os.path.dirname(file) + dockerfile = os.path.join(dirname, filename) + if os.path.isfile(dockerfile): + return dockerfile + def _prepare_requirements(self) -> None: requirements = [] for req in self.requirements: # 1. Check for relative path path = os.path.join(self._call_dir, req) - if os.path.exists(path): + if os.path.isfile(path): try: - requirements.extend( - load_requirements(os.path.dirname(path), os.path.basename(path)), - ) + new_requirements = load_requirements(self._call_dir, req) except NotADirectoryError: - pass + continue + requirements.extend(new_requirements) else: requirements.append(req) - self.requirements = requirements - def _prepare_dockerfile(self): - if self.dockerfile: - dockerfile_path = os.path.join(self._call_dir, self.dockerfile) - if os.path.exists(dockerfile_path): - with open(dockerfile_path) as f: - self.dockerfile = list(f.readlines()) + def _prepare_dockerfile(self) -> None: + if isinstance(self.dockerfile, (str, Path)): + path = os.path.join(self._call_dir, self.dockerfile) + if os.path.exists(path): + with open(path) as f: + self.dockerfile = _Dockerfile(path, f.readlines()) - def to_dict(self): + def to_dict(self) -> Dict: return {"__build_config__": asdict(self)} @classmethod - def from_dict(cls, d): + def from_dict(cls, d: Dict) -> Self: # type: ignore[valid-type] return cls(**d["__build_config__"]) + + +def _get_work_file(work: "L.LightningWork") -> Optional[str]: + cls = work.__class__ + try: + return inspect.getfile(cls) + except TypeError: + logger.debug(f"The {cls.__name__} file couldn't be found.") + return None diff --git a/tests/tests_app/cli/test_run_app.py b/tests/tests_app/cli/test_run_app.py index 96df275fab049e..f4d90c9ed58fc6 100644 --- a/tests/tests_app/cli/test_run_app.py +++ b/tests/tests_app/cli/test_run_app.py @@ -11,11 +11,9 @@ from lightning_app import LightningApp from lightning_app.cli.lightning_cli import _run_app, run_app from lightning_app.runners.runtime_type import RuntimeType -from lightning_app.testing.helpers import _RunIf from lightning_app.utilities.app_helpers import convert_print_to_logger_info -@_RunIf(skip_linux=True) @mock.patch("click.launch") @pytest.mark.parametrize("open_ui", (True, False)) def test_lightning_run_app(lauch_mock: mock.MagicMock, open_ui, caplog, monkeypatch): @@ -51,7 +49,7 @@ def _lightning_app_run_and_logging(self, *args, **kwargs): else: lauch_mock.assert_not_called() assert result.exit_code == 0 - assert len(caplog.messages) == 2 + assert len(caplog.messages) == 4 assert bool(int(caplog.messages[0])) is open_ui diff --git a/tests/tests_app/conftest.py b/tests/tests_app/conftest.py index 7b9d56850c84d3..6f74feb8a360cb 100644 --- a/tests/tests_app/conftest.py +++ b/tests/tests_app/conftest.py @@ -73,3 +73,30 @@ def another_tmpdir(tmp_path: Path) -> py.path.local: random_dir = datetime.now().strftime("%m-%d-%Y-%H-%M-%S") tmp_path = os.path.join(tmp_path, random_dir) return py.path.local(tmp_path) + + +@pytest.fixture +def caplog(caplog): + """Workaround for https://github.com/pytest-dev/pytest/issues/3697. + + Setting ``filterwarnings`` with pytest breaks ``caplog`` when ``not logger.propagate``. + """ + import logging + + root_logger = logging.getLogger() + root_propagate = root_logger.propagate + root_logger.propagate = True + + propagation_dict = { + name: logging.getLogger(name).propagate + for name in logging.root.manager.loggerDict + if name.startswith("lightning_app") + } + for name in propagation_dict.keys(): + logging.getLogger(name).propagate = True + + yield caplog + + root_logger.propagate = root_propagate + for name, propagate in propagation_dict.items(): + logging.getLogger(name).propagate = propagate diff --git a/tests/tests_app/core/test_lightning_app.py b/tests/tests_app/core/test_lightning_app.py index 8bc6c6418e81c2..a5fac6bf0da419 100644 --- a/tests/tests_app/core/test_lightning_app.py +++ b/tests/tests_app/core/test_lightning_app.py @@ -974,11 +974,16 @@ def on_after_run_once(self): def test_non_updated_flow(caplog): - """This tests validate the app can run 3 times and call the flow only once.""" + """Validate that the app can run 3 times and calls the flow only once.""" + app = NonUpdatedLightningTestApp(FlowUpdated()) + runtime = MultiProcessRuntime(app, start_server=False) with caplog.at_level(logging.INFO): - app = NonUpdatedLightningTestApp(FlowUpdated()) - MultiProcessRuntime(app, start_server=False).dispatch() - assert caplog.messages == ["Hello World"] + runtime.dispatch() + assert caplog.messages == [ + "Hello World", + "Your Lightning App is being stopped. This won't take long.", + "Your Lightning App has been stopped successfully!", + ] assert app.counter == 3 diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index cec1f34c0e011b..d1a6f9daaffe13 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1198,13 +1198,17 @@ def test_check_uploaded_folder(monkeypatch, tmpdir, caplog): assert caplog.messages == [] mock = MagicMock() + mock.st_mode = 33188 mock.st_size = 5 * 1000 * 1000 repo.files = [str(Path("./a.png"))] monkeypatch.setattr(Path, "stat", MagicMock(return_value=mock)) + path = Path(".") with caplog.at_level(logging.WARN): - backend._check_uploaded_folder(Path("."), repo) - assert caplog.messages[0].startswith("Your application folder . is more than 2 MB. Found 5.0 MB") + backend._check_uploaded_folder(path, repo) + assert caplog.messages[0].startswith( + f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 5.0 MB" + ) @mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock()) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index e1b9466dc0806d..ba497a5efbdb45 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -1,5 +1,7 @@ +import logging import os import sys +from unittest.mock import Mock from tests_app import _TESTS_ROOT @@ -48,14 +50,14 @@ def test_build_config_dockerfile_provided(): spec = BuildConfig(dockerfile="./projects/Dockerfile.cpu") assert not spec.requirements # ugly hack due to replacing `pytorch_lightning string - assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile[0] + assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile.data[0] class DockerfileLightningTestApp(LightningTestApp): def on_after_run_once(self): print(self.root.work.local_build_config.dockerfile) # ugly hack due to replacing `pytorch_lightning string - assert "pytorchlightning/pytorch_" + "lightning" in self.root.work.local_build_config.dockerfile[0] + assert "pytorchlightning/pytorch_" + "lightning" in self.root.work.local_build_config.dockerfile.data[0] return super().on_after_run_once() @@ -79,3 +81,25 @@ def test_build_config_requirements(): command_line = [os.path.join(_TESTS_ROOT, "utilities/packaging/projects/req/app.py")] application_testing(RequirementsLightningTestApp, command_line + EXTRAS_ARGS) sys.path = sys.path[:-1] + + +def test_build_config_requirements_warns(monkeypatch, caplog): + requirements = ["foo", "bar"] + bc = BuildConfig(requirements=requirements) + monkeypatch.setattr(bc, "_find_requirements", lambda *_, **__: ["baz"]) + work = Mock() + with caplog.at_level(logging.INFO): + bc.on_work_init(work) + assert "requirements.txt' exists with ['baz'] but ['foo', 'bar']" in caplog.text + assert bc.requirements == requirements # they are not merged or replaced + + +def test_build_config_dockerfile_warns(monkeypatch, caplog): + dockerfile = "foo" + bc = BuildConfig(dockerfile=dockerfile) + monkeypatch.setattr(bc, "_find_dockerfile", lambda *_, **__: "bar") + work = Mock() + with caplog.at_level(logging.INFO): + bc.on_work_init(work) + assert "exists at 'bar' but 'foo' was passed" in caplog.text + assert bc.dockerfile == dockerfile # they are not merged or replaced diff --git a/tests/tests_app/utilities/test_load_app.py b/tests/tests_app/utilities/test_load_app.py index 87894e7e8c6112..573f73a670aadb 100644 --- a/tests/tests_app/utilities/test_load_app.py +++ b/tests/tests_app/utilities/test_load_app.py @@ -37,8 +37,8 @@ def test_extract_metadata_from_component(): "cls_name": "WorkA", "module": "__main__", "docstring": "WorkA.", - "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, - "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, + "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, + "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, "cloud_compute": { "type": "__cloud_compute__", "name": "default", @@ -60,8 +60,8 @@ def test_extract_metadata_from_component(): "cls_name": "WorkA", "module": "__main__", "docstring": "WorkA.", - "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, - "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, + "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, + "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, "cloud_compute": { "type": "__cloud_compute__", "name": "default", @@ -78,8 +78,8 @@ def test_extract_metadata_from_component(): "cls_name": "WorkB", "module": "__main__", "docstring": "WorkB.", - "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, - "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}}, + "local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, + "cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}}, "cloud_compute": { "type": "__cloud_compute__", "name": "gpu",