Notify the user of ignored requirements (#15799)

(cherry picked from commit 9e43604)
Lightning-AI · Nov 30, 2022 · f557e46 · f557e46
1 parent 6ae458a
commit f557e46
Show file tree

Hide file tree

Showing 11 changed files with 189 additions and 111 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -135,7 +135,6 @@ module = [
     "lightning_app.utilities.name_generator",
     "lightning_app.utilities.network",
     "lightning_app.utilities.openapi",
-    "lightning_app.utilities.packaging.build_config",
     "lightning_app.utilities.packaging.cloud_compute",
     "lightning_app.utilities.packaging.docker",
     "lightning_app.utilities.packaging.lightning_utils",

diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md
@@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 -
 
+- Show a message when `BuildConfig(requirements=[...])` is passed but a `requirements.txt` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799))
+- Show a message when `BuildConfig(dockerfile="...")` is passed but a `Dockerfile` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799))
 
 ### Changed
 

diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py
@@ -1,6 +1,5 @@
 import fnmatch
 import json
-import os
 import random
 import string
 import sys
@@ -50,6 +49,7 @@
     CLOUD_UPLOAD_WARNING,
     DEFAULT_NUMBER_OF_EXPOSED_PORTS,
     DISABLE_DEPENDENCY_CACHE,
+    DOT_IGNORE_FILENAME,
     ENABLE_APP_COMMENT_COMMAND_EXECUTION,
     ENABLE_MULTIPLE_WORKS_IN_DEFAULT_CONTAINER,
     ENABLE_MULTIPLE_WORKS_IN_NON_DEFAULT_CONTAINER,
@@ -488,12 +488,12 @@ def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None:
             largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25]
             largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths)
             warning_msg = (
-                f"Your application folder {root} is more than {CLOUD_UPLOAD_WARNING} MB. "
-                f"Found {app_folder_size_in_mb} MB \n"
-                "Here are the largest files: \n"
-                f"{largest_paths_msg}"
+                f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. "
+                f"The total size is {app_folder_size_in_mb} MB\n"
+                f"Here are the largest files: \n{largest_paths_msg}\n"
+                "Perhaps you should try running the app in an empty directory."
             )
-            if not os.path.exists(os.path.join(root, ".lightningignore")):
+            if not (root / DOT_IGNORE_FILENAME).is_file():
                 warning_msg = (
                     warning_msg
                     + "\nIn order to ignore some files or folder, "

diff --git a/src/lightning_app/source_code/copytree.py b/src/lightning_app/source_code/copytree.py
@@ -1,5 +1,6 @@
 import fnmatch
 import os
+from functools import partial
 from pathlib import Path
 from shutil import copy2, copystat, Error
 from typing import Callable, List, Set, Union
@@ -58,8 +59,10 @@ def _copytree(
     _ignore_filename_spell_check(src)
     src = Path(src)
     dst = Path(dst)
-    if src.joinpath(DOT_IGNORE_FILENAME).exists():
-        ignore_fn = _get_ignore_function(src)
+    ignore_filepath = src / DOT_IGNORE_FILENAME
+    if ignore_filepath.is_file():
+        patterns = _read_lightningignore(ignore_filepath)
+        ignore_fn = partial(_filter_ignored, src, patterns)
         # creating new list so we won't modify the original
         ignore_functions = [*ignore_functions, ignore_fn]
 
@@ -108,19 +111,22 @@ def _copytree(
     return files_copied
 
 
-def _get_ignore_function(src: Path) -> Callable:
-    patterns = _read_lightningignore(src / DOT_IGNORE_FILENAME)
+def _filter_ignored(src: Path, patterns: Set[str], current_dir: Path, entries: List[Path]) -> List[Path]:
+    relative_dir = current_dir.relative_to(src)
+    names = [str(relative_dir / entry.name) for entry in entries]
+    ignored_names = set()
+    for pattern in patterns:
+        ignored_names.update(fnmatch.filter(names, pattern))
+    return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names]
 
-    def filter_ignored(current_dir: Path, entries: List[Path]) -> List[Path]:
-        relative_dir = current_dir.relative_to(src)
-        names = [str(relative_dir / entry.name) for entry in entries]
-        ignored_names = []
-        for pattern in patterns:
-            ignored_names.extend(fnmatch.filter(names, pattern))
-        ignored_names_set = set(ignored_names)
-        return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names_set]
 
-    return filter_ignored
+def _parse_lightningignore(lines: List[str]) -> Set[str]:
+    """Creates a set that removes empty lines and comments."""
+    lines = [ln.strip() for ln in lines]
+    # removes first `/` character for posix and `\\` for windows
+    lines = [ln.lstrip("/").lstrip("\\") for ln in lines if ln != "" and not ln.startswith("#")]
+    # convert to path and converting back to string to sanitize the pattern
+    return {str(Path(ln)) for ln in lines}
 
 
 def _read_lightningignore(path: Path) -> Set[str]:
@@ -137,14 +143,8 @@ def _read_lightningignore(path: Path) -> Set[str]:
     Set[str]
         Set of unique lines.
     """
-    raw_lines = [ln.strip() for ln in path.open().readlines()]
-
-    # creates a set that removes empty lines and comments
-    lines = {ln for ln in raw_lines if ln != "" and ln is not None and not ln.startswith("#")}
-
-    # removes first `/` character for posix and `\\` for windows
-    # also converting to path and converting back to string to sanitize the pattern
-    return {str(Path(ln.lstrip("/").lstrip("\\"))) for ln in lines}
+    raw_lines = path.open().readlines()
+    return _parse_lightningignore(raw_lines)
 
 
 def _ignore_filename_spell_check(src: Path):

diff --git a/src/lightning_app/utilities/packaging/build_config.py b/src/lightning_app/utilities/packaging/build_config.py
@@ -1,16 +1,16 @@
 import inspect
 import os
 import re
-from dataclasses import asdict, dataclass
-from types import FrameType
-from typing import cast, List, Optional, TYPE_CHECKING, Union
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Union
 
+from typing_extensions import Self
+
+import lightning_app as L
 from lightning_app.utilities.app_helpers import Logger
 from lightning_app.utilities.packaging.cloud_compute import CloudCompute
 
-if TYPE_CHECKING:
-    from lightning_app import LightningWork
-
 logger = Logger(__name__)
 
 
@@ -19,11 +19,10 @@ def load_requirements(
 ) -> List[str]:
     """Load requirements from a file.
 
-    .. code-block:: python
-
-        path_req = os.path.join(_PROJECT_ROOT, "requirements")
-        requirements = load_requirements(path_req)
-        print(requirements)  # ['numpy...', 'torch...', ...]
+    >>> from lightning_app import _PROJECT_ROOT
+    >>> path_req = os.path.join(_PROJECT_ROOT, "requirements")
+    >>> load_requirements(path_req, "docs.txt")  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE +SKIP
+    ['sphinx>=4.0', ...]  # TODO: remove SKIP, fails on python 3.7
     """
     path = os.path.join(path_dir, file_name)
     if not os.path.isfile(path):
@@ -49,12 +48,19 @@ def load_requirements(
     return reqs
 
 
+@dataclass
+class _Dockerfile:
+    path: str
+    data: List[str]
+
+
 @dataclass
 class BuildConfig:
     """The Build Configuration describes how the environment a LightningWork runs in should be set up.
 
     Arguments:
-        requirements: List of requirements or paths to requirement files.
+        requirements: List of requirements or list of paths to requirement files. If not passed, they will be
+            automatically extracted from a `requirements.txt` if it exists.
         dockerfile: The path to a dockerfile to be used to build your container.
             You need to add those lines to ensure your container works in the cloud.
 
@@ -64,20 +70,19 @@ class BuildConfig:
 
                 WORKDIR /gridai/project
                 COPY . .
-
-            Learn more by checking out:
-            https://docs.grid.ai/features/runs/running-experiments-with-a-dockerfile
         image: The base image that the work runs on. This should be a publicly accessible image from a registry that
             doesn't enforce rate limits (such as DockerHub) to pull this image, otherwise your application will not
             start.
     """
 
-    requirements: Optional[Union[str, List[str]]] = None
-    dockerfile: Optional[str] = None
+    requirements: List[str] = field(default_factory=list)
+    dockerfile: Optional[Union[str, Path, _Dockerfile]] = None
     image: Optional[str] = None
 
-    def __post_init__(self):
-        self._call_dir = os.path.dirname(cast(FrameType, inspect.currentframe()).f_back.f_back.f_code.co_filename)
+    def __post_init__(self) -> None:
+        current_frame = inspect.currentframe()
+        co_filename = current_frame.f_back.f_back.f_code.co_filename  # type: ignore[union-attr]
+        self._call_dir = os.path.dirname(co_filename)
         self._prepare_requirements()
         self._prepare_dockerfile()
 
@@ -101,76 +106,90 @@ def build_commands(self):
         """
         return []
 
-    def on_work_init(self, work, cloud_compute: Optional["CloudCompute"] = None):
+    def on_work_init(self, work: "L.LightningWork", cloud_compute: Optional["CloudCompute"] = None) -> None:
         """Override with your own logic to load the requirements or dockerfile."""
-        try:
-            self.requirements = sorted(self.requirements or self._find_requirements(work) or [])
-            self.dockerfile = self.dockerfile or self._find_dockerfile(work)
-        except TypeError:
-            logger.debug("The provided work couldn't be found.")
+        found_requirements = self._find_requirements(work)
+        if self.requirements:
+            if found_requirements and self.requirements != found_requirements:
+                # notify the user of this silent behaviour
+                logger.info(
+                    f"A 'requirements.txt' exists with {found_requirements} but {self.requirements} was passed to"
+                    f" the `{type(self).__name__}` in {work.name!r}. The `requirements.txt` file will be ignored."
+                )
+        else:
+            self.requirements = found_requirements
+        self._prepare_requirements()
 
-    def _find_requirements(self, work: "LightningWork") -> List[str]:
-        # 1. Get work file
-        file = inspect.getfile(work.__class__)
+        found_dockerfile = self._find_dockerfile(work)
+        if self.dockerfile:
+            if found_dockerfile and self.dockerfile != found_dockerfile:
+                # notify the user of this silent behaviour
+                logger.info(
+                    f"A Dockerfile exists at {found_dockerfile!r} but {self.dockerfile!r} was passed to"
+                    f" the `{type(self).__name__}` in {work.name!r}. {found_dockerfile!r}` will be ignored."
+                )
+        else:
+            self.dockerfile = found_dockerfile
+        self._prepare_dockerfile()
 
-        # 2. Try to find a requirement file associated the file.
-        dirname = os.path.dirname(file) or "."
-        requirement_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "requirements.txt"]
-        if not requirement_files:
+    def _find_requirements(self, work: "L.LightningWork", filename: str = "requirements.txt") -> List[str]:
+        # 1. Get work file
+        file = _get_work_file(work)
+        if file is None:
             return []
-        dirname, basename = os.path.dirname(requirement_files[0]), os.path.basename(requirement_files[0])
+        # 2. Try to find a requirement file associated the file.
+        dirname = os.path.dirname(file)
         try:
-            requirements = load_requirements(dirname, basename)
+            requirements = load_requirements(dirname, filename)
         except NotADirectoryError:
-            requirements = []
+            return []
         return [r for r in requirements if r != "lightning"]
 
-    def _find_dockerfile(self, work: "LightningWork") -> List[str]:
+    def _find_dockerfile(self, work: "L.LightningWork", filename: str = "Dockerfile") -> Optional[str]:
         # 1. Get work file
-        file = inspect.getfile(work.__class__)
-
-        # 2. Check for Dockerfile.
-        dirname = os.path.dirname(file) or "."
-        dockerfiles = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "Dockerfile"]
-
-        if not dockerfiles:
-            return []
-
-        # 3. Read the dockerfile
-        with open(dockerfiles[0]) as f:
-            dockerfile = list(f.readlines())
-        return dockerfile
-
-    def _prepare_requirements(self) -> Optional[Union[str, List[str]]]:
-        if not self.requirements:
+        file = _get_work_file(work)
+        if file is None:
             return None
+        # 2. Check for Dockerfile.
+        dirname = os.path.dirname(file)
+        dockerfile = os.path.join(dirname, filename)
+        if os.path.isfile(dockerfile):
+            return dockerfile
 
+    def _prepare_requirements(self) -> None:
         requirements = []
         for req in self.requirements:
             # 1. Check for relative path
             path = os.path.join(self._call_dir, req)
-            if os.path.exists(path):
+            if os.path.isfile(path):
                 try:
-                    requirements.extend(
-                        load_requirements(os.path.dirname(path), os.path.basename(path)),
-                    )
+                    new_requirements = load_requirements(self._call_dir, req)
                 except NotADirectoryError:
-                    pass
+                    continue
+                requirements.extend(new_requirements)
             else:
                 requirements.append(req)
-
         self.requirements = requirements
 
-    def _prepare_dockerfile(self):
-        if self.dockerfile:
-            dockerfile_path = os.path.join(self._call_dir, self.dockerfile)
-            if os.path.exists(dockerfile_path):
-                with open(dockerfile_path) as f:
-                    self.dockerfile = list(f.readlines())
+    def _prepare_dockerfile(self) -> None:
+        if isinstance(self.dockerfile, (str, Path)):
+            path = os.path.join(self._call_dir, self.dockerfile)
+            if os.path.exists(path):
+                with open(path) as f:
+                    self.dockerfile = _Dockerfile(path, f.readlines())
 
-    def to_dict(self):
+    def to_dict(self) -> Dict:
         return {"__build_config__": asdict(self)}
 
     @classmethod
-    def from_dict(cls, d):
+    def from_dict(cls, d: Dict) -> Self:  # type: ignore[valid-type]
         return cls(**d["__build_config__"])
+
+
+def _get_work_file(work: "L.LightningWork") -> Optional[str]:
+    cls = work.__class__
+    try:
+        return inspect.getfile(cls)
+    except TypeError:
+        logger.debug(f"The {cls.__name__} file couldn't be found.")
+        return None
diff --git a/tests/tests_app/cli/test_run_app.py b/tests/tests_app/cli/test_run_app.py
@@ -11,11 +11,9 @@
 from lightning_app import LightningApp
 from lightning_app.cli.lightning_cli import _run_app, run_app
 from lightning_app.runners.runtime_type import RuntimeType
-from lightning_app.testing.helpers import _RunIf
 from lightning_app.utilities.app_helpers import convert_print_to_logger_info
 
 
-@_RunIf(skip_linux=True)
 @mock.patch("click.launch")
 @pytest.mark.parametrize("open_ui", (True, False))
 def test_lightning_run_app(lauch_mock: mock.MagicMock, open_ui, caplog, monkeypatch):
@@ -51,7 +49,7 @@ def _lightning_app_run_and_logging(self, *args, **kwargs):
             else:
                 lauch_mock.assert_not_called()
         assert result.exit_code == 0
-    assert len(caplog.messages) == 2
+    assert len(caplog.messages) == 4
     assert bool(int(caplog.messages[0])) is open_ui
 
 

diff --git a/tests/tests_app/conftest.py b/tests/tests_app/conftest.py
@@ -73,3 +73,30 @@ def another_tmpdir(tmp_path: Path) -> py.path.local:
     random_dir = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
     tmp_path = os.path.join(tmp_path, random_dir)
     return py.path.local(tmp_path)
+
+
+@pytest.fixture
+def caplog(caplog):
+    """Workaround for https://github.com/pytest-dev/pytest/issues/3697.
+
+    Setting ``filterwarnings`` with pytest breaks ``caplog`` when ``not logger.propagate``.
+    """
+    import logging
+
+    root_logger = logging.getLogger()
+    root_propagate = root_logger.propagate
+    root_logger.propagate = True
+
+    propagation_dict = {
+        name: logging.getLogger(name).propagate
+        for name in logging.root.manager.loggerDict
+        if name.startswith("lightning_app")
+    }
+    for name in propagation_dict.keys():
+        logging.getLogger(name).propagate = True
+
+    yield caplog
+
+    root_logger.propagate = root_propagate
+    for name, propagate in propagation_dict.items():
+        logging.getLogger(name).propagate = propagate