From 6a39743d8639f27604781cd0849fcf78c31645a0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 9 Dec 2022 17:16:27 +0100 Subject: [PATCH] Releasing/1.8.4.post (#15988) * Apply dynamo to training_step, validation_step, test_step, predict_step (#15957) * Apply dynamo to training_step, validation_step, test_step, predict_step * Add entry to CHANGELOG.md (cherry picked from commit edc998608464f27be8b9c05385cd464d2f0fc73e) * [App] Resolve run installation (#15974) (cherry picked from commit dd83587102bf9650babf2af407af79d623934a84) * App: Move AutoScaler dependency to extra requirements (#15971) * Make autoscaler dependency optional * update chglog * dont directly import aiohttp (cherry picked from commit 346e93665102f4faebce388a8e556f9394b7804f) # Conflicts: # requirements/app/base.txt # src/lightning_app/CHANGELOG.md * Avoid using the same port number for autoscaler works (#15966) * dont hardcode port in python server * add another chglog (cherry picked from commit a72d268a51534331d388b0dbd16a716c5805af0f) * Fix `action_name` usage in `XLAProfiler` (#15886) * Fix `action_name` usage in `XLAProfiler` * add changelog * Update src/pytorch_ligh * Update xla.py Co-authored-by: awaelchli Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit c748f828e5758465b295dc9aace208368f88c44c) * Fix multinode cloud component (#15965) * fix multinode cloud component * add tests (cherry picked from commit d21b8992eead8f544a41792e4ef40a2710423a62) * ci: update signaling (#15981) * ci: update signaling * config (cherry picked from commit e56e7f11b0e337c5b8c110887f5fb351697666ca) * Fix cloudcomputes registration for structures (#15964) * fix cloudcomputes * updates cloudcompute registration * changelog (cherry picked from commit 90a4c0289d6b404939a4cf59fe6e722b97abe441) * Document running dev lightning on the cloud (#15962) * document running dev lightning on the cloud * document running dev lightning on the cloud * Update .github/CONTRIBUTING.md Co-authored-by: Noha Alon * document running dev lightning on the cloud * git clone & pip install -e * Update .github/CONTRIBUTING.md Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Noha Alon Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit cfd00d3e47853ff45d3259056f2bbc42b9b764cb) * [App] Install exact version whn upgrading and not when testing (#15984) * [App] Install exact version whn upgrading and not when testing * Update CHANGELOG.md Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 1657ea8a39c3795e1276c491eff4f13b06a06e61) * releasing 1.8.4.post0 Co-authored-by: Luca Antiga Co-authored-by: thomas chaton Co-authored-by: Akihiro Nitta Co-authored-by: Liyang90 Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Ethan Harris --- .github/CONTRIBUTING.md | 14 ++++++++ .github/workflows/release-pypi.yml | 36 +++++++++---------- examples/app_server_with_auto_scaler/app.py | 21 ++++++----- requirements/app/base.txt | 2 -- requirements/app/cloud.txt | 2 ++ requirements/app/components.txt | 1 + src/lightning/__version__.py | 2 +- src/lightning_app/CHANGELOG.md | 10 +++++- src/lightning_app/__version__.py | 2 +- src/lightning_app/cli/cmd_install.py | 15 ++++---- src/lightning_app/cli/lightning_cli.py | 5 +-- src/lightning_app/components/auto_scaler.py | 11 ++++-- .../components/multi_node/base.py | 2 +- .../components/serve/python_server.py | 6 +--- src/lightning_app/core/flow.py | 19 ++++++---- src/lightning_app/testing/testing.py | 1 + src/lightning_app/utilities/cli_helpers.py | 4 +-- src/lightning_app/utilities/imports.py | 4 +++ .../utilities/packaging/cloud_compute.py | 10 +++++- src/lightning_lite/__version__.py | 2 +- src/pytorch_lightning/CHANGELOG.md | 6 +++- src/pytorch_lightning/__version__.py | 2 +- src/pytorch_lightning/core/module.py | 12 +++++++ src/pytorch_lightning/profilers/xla.py | 6 ++-- tests/tests_app/cli/test_cmd_install.py | 12 +++++++ .../components/multi_node/test_base.py | 12 +++++++ tests/tests_app/core/test_lightning_flow.py | 29 ++++++++++++++- .../utilities/packaging/test_cloud_compute.py | 18 ++++++++++ tests/tests_app/utilities/test_cli_helpers.py | 2 +- .../core/test_lightning_module.py | 21 +++++++++-- tests/tests_pytorch/trainer/test_trainer.py | 3 +- 31 files changed, 223 insertions(+), 69 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a1edacea7c104..42bda00d58a3e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -309,6 +309,20 @@ and the last true master commit is `ccc111` and your first commit is `mmm222`. git push -f ``` +#### How to run an app on the cloud with a local version of lightning + +The lightning cloud uses the latest release by default. However, you might want to run your app with some local changes you've made to the lightning framework. To use your local version of lightning on the cloud, set the following environment variable: + +```bash +git clone https://github.com/Lightning-AI/lightning.git +cd lightning +pip install -e . +export PACKAGE_LIGHTNING=1 # <- this is the magic to use your version (not mainstream PyPI)! +lightning run app app.py --cloud +``` + +By seting `PACKAGE_LIGHTNING=1`, lightning packages the lightning source code in your local directory in addition to your app source code and uploads them to the cloud. + ### Bonus Workflow Tip If you don't want to remember all the commands above every time you want to push some code/setup a Lightning Dev environment on a new VM, you can set up bash aliases for some common commands. You can add these to one of your `~/.bashrc`, `~/.zshrc`, or `~/.bash_aliases` files. diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 5e500670865de..1d9d0a2eeda25 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -12,6 +12,7 @@ defaults: shell: bash jobs: + init: runs-on: ubuntu-20.04 steps: @@ -23,6 +24,7 @@ jobs: name: dist-packages-${{ github.sha }} path: dist + build-packages: needs: init runs-on: ubuntu-20.04 @@ -40,22 +42,20 @@ jobs: - uses: actions/setup-python@v4 with: python-version: 3.9 - - name: Install dependencies run: pip install -U setuptools wheel - - name: Build packages env: PACKAGE_NAME: ${{ matrix.pkg-name }} run: | python setup.py sdist bdist_wheel ls -lh dist/ - - uses: actions/upload-artifact@v3 with: name: dist-packages-${{ github.sha }} path: dist + upload-packages: runs-on: ubuntu-20.04 needs: build-packages @@ -73,6 +73,7 @@ jobs: files: 'dist/*' repo-token: ${{ secrets.GITHUB_TOKEN }} + release-version: runs-on: ubuntu-20.04 outputs: @@ -87,6 +88,7 @@ jobs: id: lai-package run: python -c "import lightning as L; print(f'version={L.__version__}')" >> $GITHUB_OUTPUT + signaling: runs-on: ubuntu-20.04 needs: [release-version] @@ -100,12 +102,6 @@ jobs: with: repository: gridai/base-images token: ${{ secrets.PAT_GHOST }} - ref: main - - uses: fregante/setup-git-token@v1 - with: - token: ${{ secrets.PAT_GHOST }} - name: PL Ghost - email: pl-github@grid.ai - name: Update lightning version run: | import json, os @@ -115,20 +111,21 @@ jobs: with open("versions.json", "w") as fw: json.dump(vers, fw) shell: python - - name: GIT Commit + - run: cat versions.json + - name: GIT commit & push + env: + BRANCH_NAME: "trigger/lightning-${{ env.TAG }}" run: | + git config --global user.name "PL Ghost" + git config --global user.email pl-github@grid.ai + git checkout -b ${BRANCH_NAME} git add versions.json - git commit -m "bumping lightning version -> ${TAG}" - cat versions.json - - name: GIT Push - run: | git status - # force push is not very nice - # but so far the push is rejected even with exception for this user - git push -f + git commit -m "bumping lightning version -> ${TAG}" + git push -u origin ${BRANCH_NAME} -f + waiting: - # TODO: replace with back signal from build images/ loop checking for a specific branch? runs-on: ubuntu-20.04 needs: [release-version, signaling] env: @@ -152,6 +149,7 @@ jobs: time.sleep(60) shell: python + pre-publish-packages: runs-on: ubuntu-20.04 needs: build-packages @@ -181,6 +179,7 @@ jobs: pkg-pattern: "*" pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_LAI }} + publish-packages: runs-on: ubuntu-20.04 needs: [build-packages, waiting] @@ -210,6 +209,7 @@ jobs: pkg-pattern: "*" pypi-token: ${{ secrets.PYPI_TOKEN_LAI }} + legacy-checkpoints: needs: [build-packages] uses: ./.github/workflows/legacy-checkpoints.yml diff --git a/examples/app_server_with_auto_scaler/app.py b/examples/app_server_with_auto_scaler/app.py index b713bd6d1dcfc..70799827776a8 100644 --- a/examples/app_server_with_auto_scaler/app.py +++ b/examples/app_server_with_auto_scaler/app.py @@ -1,3 +1,4 @@ +# ! pip install torch torchvision from typing import Any, List import torch @@ -22,10 +23,10 @@ class BatchResponse(BaseModel): class PyTorchServer(L.app.components.PythonServer): def __init__(self, *args, **kwargs): super().__init__( - port=L.app.utilities.network.find_free_network_port(), input_type=BatchRequestModel, output_type=BatchResponse, - cloud_compute=L.CloudCompute("gpu"), + *args, + **kwargs, ) def setup(self): @@ -57,16 +58,14 @@ def scale(self, replicas: int, metrics: dict) -> int: """The default scaling logic that users can override.""" # scale out if the number of pending requests exceeds max batch size. max_requests_per_work = self.max_batch_size - pending_requests_per_running_or_pending_work = metrics["pending_requests"] / ( - replicas + metrics["pending_works"] - ) - if pending_requests_per_running_or_pending_work >= max_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / (replicas + metrics["pending_works"]) + if pending_requests_per_work >= max_requests_per_work: return replicas + 1 # scale in if the number of pending requests is below 25% of max_requests_per_work min_requests_per_work = max_requests_per_work * 0.25 - pending_requests_per_running_work = metrics["pending_requests"] / replicas - if pending_requests_per_running_work < min_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / replicas + if pending_requests_per_work < min_requests_per_work: return replicas - 1 return replicas @@ -74,13 +73,17 @@ def scale(self, replicas: int, metrics: dict) -> int: app = L.LightningApp( MyAutoScaler( + # work class and args PyTorchServer, - min_replicas=2, + cloud_compute=L.CloudCompute("gpu"), + # autoscaler specific args + min_replicas=1, max_replicas=4, autoscale_interval=10, endpoint="predict", input_type=RequestModel, output_type=Any, timeout_batching=1, + max_batch_size=8, ) ) diff --git a/requirements/app/base.txt b/requirements/app/base.txt index b3200940e4fe2..9590f2c1a2fbd 100644 --- a/requirements/app/base.txt +++ b/requirements/app/base.txt @@ -12,5 +12,3 @@ beautifulsoup4>=4.8.0, <4.11.2 inquirer>=2.10.0 psutil<5.9.4 click<=8.1.3 -s3fs>=2022.5.0, <2022.8.3 -aiohttp>=3.8.0, <=3.8.3 diff --git a/requirements/app/cloud.txt b/requirements/app/cloud.txt index 314676d5db5a7..512cacf130e1d 100644 --- a/requirements/app/cloud.txt +++ b/requirements/app/cloud.txt @@ -3,3 +3,5 @@ redis>=4.0.1, <=4.2.4 docker>=5.0.0, <=5.0.3 # setuptools==59.5.0 +s3fs>=2022.5.0, <2022.8.3 +aiohttp>=3.8.0, <=3.8.3 diff --git a/requirements/app/components.txt b/requirements/app/components.txt index 38180a480a59b..dd2cadfc1c17e 100644 --- a/requirements/app/components.txt +++ b/requirements/app/components.txt @@ -1,2 +1,3 @@ # deps required by components in the lightning app repository (src/lightning_app/components) lightning_api_access>=0.0.3 +aiohttp>=3.8.0, <=3.8.3 diff --git a/src/lightning/__version__.py b/src/lightning/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning/__version__.py +++ b/src/lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 9bae487d2ad88..b61d45b79f3e2 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -13,7 +13,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added the CLI command `lightning delete app` to delete a lightning app on the cloud ([#15783](https://github.com/Lightning-AI/lightning/pull/15783)) - Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800)) - Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836)) -- Added `AutoScaler` component ([#15769](https://github.com/Lightning-AI/lightning/pull/15769)) +- Added `AutoScaler` component ( + [#15769](https://github.com/Lightning-AI/lightning/pull/15769), + [#15971](https://github.com/Lightning-AI/lightning/pull/15971), + [#15966](https://github.com/Lightning-AI/lightning/pull/15966) +) - Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921)) - Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923)) - Added a `configure_layout` method to the `LightningWork` which can be used to control how the work is handled in the layout of a parent flow ([#15926](https://github.com/Lightning-AI/lightning/pull/15926)) @@ -50,6 +54,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed multiprocessing breakpoint ([#15950](https://github.com/Lightning-AI/lightning/pull/15950)) - Fixed detection of a Lightning App running in debug mode ([#15951](https://github.com/Lightning-AI/lightning/pull/15951)) - Fixed `ImportError` on Multinode if package not present ([#15963](https://github.com/Lightning-AI/lightning/pull/15963)) +- Fixed MultiNode Component to use separate cloud computes ([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) +- Fixed Registration for CloudComputes of Works in `L.app.structures` ([#15964](https://github.com/Lightning-AI/lightning/pull/15964)) +- Fixed a bug where auto-upgrading to the latest lightning via the CLI could get stuck in a loop ([#15984](https://github.com/Lightning-AI/lightning/pull/15984)) + ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_app/__version__.py b/src/lightning_app/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning_app/__version__.py +++ b/src/lightning_app/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/lightning_app/cli/cmd_install.py b/src/lightning_app/cli/cmd_install.py index 579a921179b4c..56c3d07b3d37f 100644 --- a/src/lightning_app/cli/cmd_install.py +++ b/src/lightning_app/cli/cmd_install.py @@ -101,7 +101,7 @@ def gallery_apps_and_components( except Exception: return None - entry, kind = _resolve_entry(app_or_component, version_arg) + entry, kind = _resolve_entry(name, version_arg) if kind == "app": # give the user the chance to do a manual install @@ -111,16 +111,19 @@ def gallery_apps_and_components( # run installation if requested _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) - return os.path.join(os.getcwd(), folder_name, entry["appEntrypointFile"]) + return os.path.join(os.getcwd(), *entry["appEntrypointFile"].split("/")) elif kind == "component": # give the user the chance to do a manual install - git_url = _show_install_component_prompt(entry, app_or_component, org, yes_arg) - + source_url, git_url, folder_name, git_sha = _show_install_app_prompt( + entry, app_or_component, org, yes_arg, resource_type="component" + ) + if "@" in git_url: + git_url = git_url.split("git+")[1].split("@")[0] # run installation if requested - _install_component_from_source(git_url) + _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) - return os.path.join(os.getcwd(), entry["appEntrypointFile"]) + return os.path.join(os.getcwd(), *entry["entrypointFile"].split("/")) return None diff --git a/src/lightning_app/cli/lightning_cli.py b/src/lightning_app/cli/lightning_cli.py index 68027e7784f0b..4696745ada95f 100644 --- a/src/lightning_app/cli/lightning_cli.py +++ b/src/lightning_app/cli/lightning_cli.py @@ -48,8 +48,9 @@ def main() -> None: - # Check environment and versions if not in the cloud - if "LIGHTNING_APP_STATE_URL" not in os.environ: + # Check environment and versions if not in the cloud and not testing + is_testing = bool(int(os.getenv("LIGHTING_TESTING", "0"))) + if not is_testing and "LIGHTNING_APP_STATE_URL" not in os.environ: # Enforce running in PATH Python _check_environment_and_redirect() diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 62e6180c49665..fc6a1a873769b 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -8,8 +8,6 @@ from itertools import cycle from typing import Any, Dict, List, Tuple, Type -import aiohttp -import aiohttp.client_exceptions import requests import uvicorn from fastapi import Depends, FastAPI, HTTPException, Request @@ -22,8 +20,13 @@ from lightning_app.core.flow import LightningFlow from lightning_app.core.work import LightningWork from lightning_app.utilities.app_helpers import Logger +from lightning_app.utilities.imports import _is_aiohttp_available, requires from lightning_app.utilities.packaging.cloud_compute import CloudCompute +if _is_aiohttp_available(): + import aiohttp + import aiohttp.client_exceptions + logger = Logger(__name__) @@ -114,6 +117,7 @@ class _LoadBalancer(LightningWork): \**kwargs: Arguments passed to :func:`LightningWork.init` like ``CloudCompute``, ``BuildConfig``, etc. """ + @requires(["aiohttp"]) def __init__( self, input_type: BaseModel, @@ -446,7 +450,8 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - return self._work_cls(*self._work_args, **self._work_kwargs, start_with_flow=False) + self._work_kwargs.update(dict(start_with_flow=False)) + return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: """Adds a new LightningWork instance. diff --git a/src/lightning_app/components/multi_node/base.py b/src/lightning_app/components/multi_node/base.py index ee4f2b3abd4fb..5662442b7375a 100644 --- a/src/lightning_app/components/multi_node/base.py +++ b/src/lightning_app/components/multi_node/base.py @@ -66,7 +66,7 @@ def run( *[ work_cls( *work_args, - cloud_compute=cloud_compute, + cloud_compute=cloud_compute.clone(), **work_kwargs, parallel=True, ) diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index 1868b0b357fd3..c522a25eb3f3d 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -75,8 +75,6 @@ class PythonServer(LightningWork, abc.ABC): @requires(["torch", "lightning_api_access"]) def __init__( # type: ignore self, - host: str = "127.0.0.1", - port: int = 7777, input_type: type = _DefaultInputData, output_type: type = _DefaultOutputData, **kwargs, @@ -84,8 +82,6 @@ def __init__( # type: ignore """The PythonServer Class enables to easily get your machine learning server up and running. Arguments: - host: Address to be used for running the server. - port: Port to be used to running the server. input_type: Optional `input_type` to be provided. This needs to be a pydantic BaseModel class. The default data type is good enough for the basic usecases and it expects the data to be a json object that has one key called `payload` @@ -129,7 +125,7 @@ def predict(self, request): ... >>> app = LightningApp(SimpleServer()) """ - super().__init__(parallel=True, host=host, port=port, **kwargs) + super().__init__(parallel=True, **kwargs) if not issubclass(input_type, BaseModel): raise TypeError("input_type must be a pydantic BaseModel class") if not issubclass(output_type, BaseModel): diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index a79794bac3d20..5a82400066f05 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -173,12 +173,19 @@ def __setattr__(self, name: str, value: Any) -> None: elif isinstance(value, (Dict, List)): self._structures.add(name) _set_child_name(self, value, name) - if getattr(self, "_backend", None) is not None: - value._backend = self._backend - for flow in value.flows: - LightningFlow._attach_backend(flow, self._backend) - for work in value.works: - self._backend._wrap_run_method(_LightningAppRef().get_current(), work) + + _backend = getattr(self, "backend", None) + if _backend is not None: + value._backend = _backend + + for flow in value.flows: + if _backend is not None: + LightningFlow._attach_backend(flow, _backend) + + for work in value.works: + work._register_cloud_compute() + if _backend is not None: + _backend._wrap_run_method(_LightningAppRef().get_current(), work) elif isinstance(value, Path): # In the init context, the full name of the Flow and Work is not known, i.e., we can't serialize diff --git a/src/lightning_app/testing/testing.py b/src/lightning_app/testing/testing.py index 8d112d7fa4a7a..40b705458dd49 100644 --- a/src/lightning_app/testing/testing.py +++ b/src/lightning_app/testing/testing.py @@ -262,6 +262,7 @@ def run_app_in_cloud( with tempfile.TemporaryDirectory() as tmpdir: env_copy = os.environ.copy() env_copy["PACKAGE_LIGHTNING"] = "1" + env_copy["LIGHTING_TESTING"] = "1" if debug: env_copy["LIGHTNING_DEBUG"] = "1" shutil.copytree(app_folder, tmpdir, dirs_exist_ok=True) diff --git a/src/lightning_app/utilities/cli_helpers.py b/src/lightning_app/utilities/cli_helpers.py index 293944ca82c50..caa414e163ffc 100644 --- a/src/lightning_app/utilities/cli_helpers.py +++ b/src/lightning_app/utilities/cli_helpers.py @@ -254,7 +254,7 @@ def _get_newer_version() -> Optional[str]: return None if __version__ == latest_version else latest_version except Exception: # Return None if any exception occurs - return "err" + return None def _redirect_command(executable: str): @@ -277,7 +277,7 @@ def _check_version_and_upgrade(): prompt = f"A newer version of {__package_name__} is available ({new_version}). Would you like to upgrade?" if click.confirm(prompt, default=True): - command = f"pip install --upgrade {__package_name__}" + command = f"pip install '{__package_name__}=={new_version}'" logger.info(f"⚡ RUN: {command}") diff --git a/src/lightning_app/utilities/imports.py b/src/lightning_app/utilities/imports.py index b484110d3811e..19978fcf5d137 100644 --- a/src/lightning_app/utilities/imports.py +++ b/src/lightning_app/utilities/imports.py @@ -141,4 +141,8 @@ def _is_sqlmodel_available() -> bool: return module_available("sqlmodel") +def _is_aiohttp_available() -> bool: + return module_available("aiohttp") + + _CLOUD_TEST_RUN = bool(os.getenv("CLOUD", False)) diff --git a/src/lightning_app/utilities/packaging/cloud_compute.py b/src/lightning_app/utilities/packaging/cloud_compute.py index f3b162ed042c6..ca6c9705ae866 100644 --- a/src/lightning_app/utilities/packaging/cloud_compute.py +++ b/src/lightning_app/utilities/packaging/cloud_compute.py @@ -82,7 +82,7 @@ def __post_init__(self) -> None: # All `default` CloudCompute are identified in the same way. if self._internal_id is None: - self._internal_id = "default" if self.name == "default" else uuid4().hex[:7] + self._internal_id = self._generate_id() # Internal arguments for now. self.preemptible = False @@ -118,6 +118,14 @@ def id(self) -> Optional[str]: def is_default(self) -> bool: return self.name == "default" + def _generate_id(self): + return "default" if self.name == "default" else uuid4().hex[:7] + + def clone(self): + new_dict = self.to_dict() + new_dict["_internal_id"] = self._generate_id() + return self.from_dict(new_dict) + def _verify_mount_root_dirs_are_unique(mounts: Union[None, Mount, List[Mount], Tuple[Mount]]) -> None: if isinstance(mounts, (list, tuple, set)): diff --git a/src/lightning_lite/__version__.py b/src/lightning_lite/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning_lite/__version__.py +++ b/src/lightning_lite/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 3df4b4a7dcb83..a8d91c1ae4a55 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -8,7 +8,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Direct support for compiled models ([#15922](https://github.com/Lightning-AI/lightning/pull/15922)) +- Direct support for compiled models ( + [#15922](https://github.com/Lightning-AI/lightning/pull/15922), + [15957](https://github.com/Lightning-AI/lightning/pull/15957) +) ### Fixed @@ -16,6 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/lightning/pull/15940)) - Fixed `fit_loop.restarting` to be `False` for lr finder ([#15620](https://github.com/Lightning-AI/lightning/pull/15620)) - Fixed `torch.jit.script`-ing a LightningModule causing an unintended error message about deprecated `use_amp` property ([#15947](https://github.com/Lightning-AI/lightning/pull/15947)) +- Fixed the `XLAProfiler` not recording anything due to mismatching of action names ([#15885](https://github.com/Lightning-AI/lightning/pull/15885)) ## [1.8.3] - 2022-11-22 diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/pytorch_lightning/__version__.py +++ b/src/pytorch_lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 48cce4ccbe971..c24235f47d113 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1976,9 +1976,17 @@ def from_compiled(cls, model: "torch._dynamo.OptimizedModule") -> "pl.LightningM "compiler": "dynamo", "dynamo_ctx": model.dynamo_ctx, "original_forward": orig_module.forward, + "original_training_step": orig_module.training_step, + "original_validation_step": orig_module.validation_step, + "original_test_step": orig_module.test_step, + "original_predict_step": orig_module.predict_step, } orig_module.forward = model.dynamo_ctx(orig_module.forward) # type: ignore[assignment] + orig_module.training_step = model.dynamo_ctx(orig_module.training_step) # type: ignore[assignment] + orig_module.validation_step = model.dynamo_ctx(orig_module.validation_step) # type: ignore[assignment] + orig_module.test_step = model.dynamo_ctx(orig_module.test_step) # type: ignore[assignment] + orig_module.predict_step = model.dynamo_ctx(orig_module.predict_step) # type: ignore[assignment] return orig_module @classmethod @@ -2007,6 +2015,10 @@ def to_uncompiled(cls, model: Union["pl.LightningModule", "torch._dynamo.Optimiz raise ValueError("`model` must either be an instance of torch._dynamo.OptimizedModule or LightningModule") model.forward = model._compiler_ctx["original_forward"] # type: ignore[assignment] + model.training_step = model._compiler_ctx["original_training_step"] # type: ignore[assignment] + model.validation_step = model._compiler_ctx["original_validation_step"] # type: ignore[assignment] + model.test_step = model._compiler_ctx["original_test_step"] # type: ignore[assignment] + model.predict_step = model._compiler_ctx["original_predict_step"] # type: ignore[assignment] model._compiler_ctx = None return model diff --git a/src/pytorch_lightning/profilers/xla.py b/src/pytorch_lightning/profilers/xla.py index ef103a9a45842..4bfefbc0bacbb 100644 --- a/src/pytorch_lightning/profilers/xla.py +++ b/src/pytorch_lightning/profilers/xla.py @@ -50,12 +50,14 @@ def __init__(self, port: int = 9012) -> None: def start(self, action_name: str) -> None: import torch_xla.debug.profiler as xp - if action_name in self.RECORD_FUNCTIONS: + # The action name is formatted as '[TYPE]{class name}.{hook name}' + # Example: [LightningModule]BoringModel.training_step + if action_name.split(".")[-1] in self.RECORD_FUNCTIONS: if not self._start_trace: self.server = xp.start_server(self.port) self._start_trace = True - if action_name in self.STEP_FUNCTIONS: + if action_name.split(".")[-1] in self.STEP_FUNCTIONS: step = self._get_step_num(action_name) recording = xp.StepTrace(action_name, step_num=step) else: diff --git a/tests/tests_app/cli/test_cmd_install.py b/tests/tests_app/cli/test_cmd_install.py index 2e2086348cb58..c11dd5fdd38c0 100644 --- a/tests/tests_app/cli/test_cmd_install.py +++ b/tests/tests_app/cli/test_cmd_install.py @@ -321,6 +321,18 @@ def test_install_app_shows_error(tmpdir): # os.chdir(cwd) +def test_app_and_component_gallery_app(monkeypatch): + monkeypatch.setattr(cmd_install, "_install_app_from_source", mock.MagicMock()) + path = cmd_install.gallery_apps_and_components("lightning/lightning-diffusion-component-api", True, "latest") + assert path == os.path.join(os.getcwd(), "diffusion2", "app.py") + + +def test_app_and_component_gallery_component(monkeypatch): + monkeypatch.setattr(cmd_install, "_install_app_from_source", mock.MagicMock()) + path = cmd_install.gallery_apps_and_components("lightning/lit-jupyter", True, "latest") + assert path == os.path.join(os.getcwd(), "app.py") + + @mock.patch.dict(os.environ, {"LIGHTNING_APP_REGISTRY": "https://TODO/other_non_PL_registry"}) def test_private_app_registry(): registry = cmd_install._resolve_app_registry() diff --git a/tests/tests_app/components/multi_node/test_base.py b/tests/tests_app/components/multi_node/test_base.py index e23535fbfe970..2c6aed1120c0a 100644 --- a/tests/tests_app/components/multi_node/test_base.py +++ b/tests/tests_app/components/multi_node/test_base.py @@ -1,4 +1,5 @@ from re import escape +from unittest import mock import pytest from tests_app.helpers.utils import no_warning_call @@ -17,3 +18,14 @@ def run(self): with no_warning_call(UserWarning, match=escape("You set MultiNode(num_nodes=1, ...)` but ")): MultiNode(Work, num_nodes=1, cloud_compute=CloudCompute("gpu")) + + +@mock.patch("lightning_app.components.multi_node.base.is_running_in_cloud", mock.Mock(return_value=True)) +def test_multi_node_separate_cloud_computes(): + class Work(LightningWork): + def run(self): + pass + + m = MultiNode(Work, num_nodes=2, cloud_compute=CloudCompute("gpu")) + + assert len({w.cloud_compute._internal_id for w in m.ws}) == len(m.ws) diff --git a/tests/tests_app/core/test_lightning_flow.py b/tests/tests_app/core/test_lightning_flow.py index dacccfb3873aa..c8e9921f29eec 100644 --- a/tests/tests_app/core/test_lightning_flow.py +++ b/tests/tests_app/core/test_lightning_flow.py @@ -10,7 +10,8 @@ import pytest from deepdiff import DeepDiff, Delta -from lightning_app import LightningApp +import lightning_app +from lightning_app import CloudCompute, LightningApp from lightning_app.core.flow import LightningFlow from lightning_app.core.work import LightningWork from lightning_app.runners import MultiProcessRuntime @@ -901,3 +902,29 @@ def run_patch(method): state = app.api_publish_state_queue.put._mock_call_args[0][0] call_hash = state["works"]["w"]["calls"]["latest_call_hash"] assert state["works"]["w"]["calls"][call_hash]["statuses"][0]["stage"] == "succeeded" + + +def test_structures_register_work_cloudcompute(): + class MyDummyWork(LightningWork): + def run(self): + return + + class MyDummyFlow(LightningFlow): + def __init__(self): + super().__init__() + self.w_list = LList(*[MyDummyWork(cloud_compute=CloudCompute("gpu")) for i in range(5)]) + self.w_dict = LDict(**{str(i): MyDummyWork(cloud_compute=CloudCompute("gpu")) for i in range(5)}) + + def run(self): + for w in self.w_list: + w.run() + + for w in self.w_dict.values(): + w.run() + + MyDummyFlow() + assert len(lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE) == 10 + for v in lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE.values(): + assert len(v.component_names) == 1 + assert v.component_names[0][:-1] in ("root.w_list.", "root.w_dict.") + assert v.component_names[0][-1].isdigit() diff --git a/tests/tests_app/utilities/packaging/test_cloud_compute.py b/tests/tests_app/utilities/packaging/test_cloud_compute.py index aa0395aa5451a..f2670723f132a 100644 --- a/tests/tests_app/utilities/packaging/test_cloud_compute.py +++ b/tests/tests_app/utilities/packaging/test_cloud_compute.py @@ -41,3 +41,21 @@ def test_cloud_compute_with_non_unique_mount_root_dirs(): with pytest.raises(ValueError, match="Every Mount attached to a work must have a unique"): CloudCompute("gpu", mounts=[mount_1, mount_2]) + + +def test_cloud_compute_clone(): + c1 = CloudCompute("gpu") + c2 = c1.clone() + + assert isinstance(c2, CloudCompute) + + c1_dict = c1.to_dict() + c2_dict = c2.to_dict() + + assert len(c1_dict) == len(c2_dict) + + for k in c1_dict.keys(): + if k == "_internal_id": + assert c1_dict[k] != c2_dict[k] + else: + assert c1_dict[k] == c2_dict[k] diff --git a/tests/tests_app/utilities/test_cli_helpers.py b/tests/tests_app/utilities/test_cli_helpers.py index 4ebb3ddc4f0ae..ecdd1705c2130 100644 --- a/tests/tests_app/utilities/test_cli_helpers.py +++ b/tests/tests_app/utilities/test_cli_helpers.py @@ -99,7 +99,7 @@ def test_arrow_time_callback(): "1.0.0dev", None, ), - ({"1.0.0": "this wil trigger an error"}, "1.0.0", "err"), + ({"1.0.0": "this wil trigger an error"}, "1.0.0", None), ({}, "1.0.0rc0", None), ], ) diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index ba8419a904b3a..1fc00f277a526 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -21,7 +21,7 @@ from torch.optim import Adam, SGD from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.demos.boring_classes import BoringModel, DemoModel +from pytorch_lightning.demos.boring_classes import BoringModel from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_13 @@ -457,15 +457,32 @@ def test_trainer_reference_recursively(): @RunIf(min_torch="1.14.0.dev20221202") def test_compile_uncompile(): - lit_model = DemoModel() + lit_model = BoringModel() model_compiled = torch.compile(lit_model) lit_model_compiled = LightningModule.from_compiled(model_compiled) + def has_dynamo(fn): + return any(el for el in dir(fn) if el.startswith("_torchdynamo")) + assert isinstance(lit_model_compiled, LightningModule) assert lit_model_compiled._compiler_ctx is not None + assert has_dynamo(lit_model_compiled.forward) + assert has_dynamo(lit_model_compiled.training_step) + assert has_dynamo(lit_model_compiled.validation_step) + assert has_dynamo(lit_model_compiled.test_step) + assert has_dynamo(lit_model_compiled.predict_step) lit_model_orig = LightningModule.to_uncompiled(lit_model) assert lit_model_orig._compiler_ctx is None assert lit_model_orig.forward == lit_model.forward + assert lit_model_orig.training_step == lit_model.training_step + assert lit_model_orig.validation_step == lit_model.validation_step + assert lit_model_orig.test_step == lit_model.test_step + assert lit_model_orig.predict_step == lit_model.predict_step + assert not has_dynamo(lit_model_orig.forward) + assert not has_dynamo(lit_model_orig.training_step) + assert not has_dynamo(lit_model_orig.validation_step) + assert not has_dynamo(lit_model_orig.test_step) + assert not has_dynamo(lit_model_orig.predict_step) diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py index 066172af11de3..74ea2ac11a701 100644 --- a/tests/tests_pytorch/trainer/test_trainer.py +++ b/tests/tests_pytorch/trainer/test_trainer.py @@ -45,7 +45,6 @@ from pytorch_lightning.demos.boring_classes import ( BoringDataModule, BoringModel, - DemoModel, RandomDataset, RandomIterableDataset, RandomIterableDatasetWithLen, @@ -2244,7 +2243,7 @@ def on_fit_start(self): # TODO: replace with 1.14 when it is released @RunIf(min_torch="1.14.0.dev20221202") def test_trainer_compiled_model(): - model = DemoModel() + model = BoringModel() model = torch.compile(model)