From e5d59012712bf0c9a895aabdf59b212b22757864 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 16 Dec 2022 01:03:33 +0900 Subject: [PATCH] releasing 1.8.5 (#16051) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update chlog * CI: Add remote fetch (#16001) Co-authored-by: thomas (cherry picked from commit 37fe3f6ce9fa26fb3c49f1b23b2f457c597dbf82) * Set the logger explicitly in tests (#15815) (cherry picked from commit 9ed43c64b6a6d45e2225f28b1537c0eaf1436921) * [App] Fix `AutoScaler` trying to replicate multiple works in a single machine (#15991) * dont try to replicate new works in the existing machine * update chglog * Update comment * Update src/lightning_app/components/auto_scaler.py * add test (cherry picked from commit c1d0156e1db09581c23414a904eace5c23253199) * Fix typo in PR titles generated by github-actions bot (#16003) (cherry picked from commit 2dcebc213c5e980f305c2c8032c993370bf76bd7) * Update docker requirement from <=5.0.3,>=5.0.0 to >=5.0.0,<6.0.2 in /requirements (#16007) Update docker requirement in /requirements Updates the requirements on [docker](https://github.com/docker/docker-py) to permit the latest version. - [Release notes](https://github.com/docker/docker-py/releases) - [Commits](https://github.com/docker/docker-py/compare/5.0.0...6.0.1) --- updated-dependencies: - dependency-name: docker dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 4083b202f47362aacb4fea72b03aff48158eee4f) * Update deepdiff requirement from <=5.8.1,>=5.7.0 to >=5.7.0,<6.2.3 in /requirements (#16006) Update deepdiff requirement in /requirements Updates the requirements on [deepdiff](https://github.com/seperman/deepdiff) to permit the latest version. - [Release notes](https://github.com/seperman/deepdiff/releases) - [Changelog](https://github.com/seperman/deepdiff/blob/master/docs/changelog.rst) - [Commits](https://github.com/seperman/deepdiff/compare/5.7.0...6.2.2) --- updated-dependencies: - dependency-name: deepdiff dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 5e705fa0df9ede1d362e96f8f2364d5cfbe82170) * app: update doctest_skip (#15997) simple Co-authored-by: hhsecond (cherry picked from commit 4fea6bf43e522e8a2b527aee82d046b688a6bc45) * CI: clean install & share pkg build (#15986) * abstract pkg build * share ci * syntax * Checkgroup * folders * whl 1st * doctest Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos Mocholí (cherry picked from commit 18a463808cceb8346e7e411972425718dc7f9b83) * Adding hint to the logger's error messages (#16034) Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Carlos Mocholí Fixes https://github.com/Lightning-AI/lightning/issues/15143 (cherry picked from commit 7ce3825f6dc5fea57ccd7d2cffe1ccd0f870cd7f) * fix publish * Introduce `{Work,Flow}.lightningignore` (#15818) (cherry picked from commit edd2b4259a3074be0ab14ac5e808d796a1d5f3f9) * [App] Support running on multiple clusters (#16016) (cherry picked from commit d3a722608e49d7ff7a4ddda35c7430dfa2c4acce) * [App] Improve lightning connect experience (#16035) (cherry picked from commit e522a12d177fe5d3a733b67c44d2a558a18aa116) * Cleanup cluster waiting (#16054) (cherry picked from commit 6458a5abc49621e5e67b2621bc478bcd63ac37d5) * feature(cli): login flow fixes and improvements (#16052) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit ebe784881d6ffcef83a36933bbed6f474951163d) * Add guards to cluster deletion from cli (#16053) Adds guards to cluster deletion. - If cluster has running apps -> throw an error - If cluster has stopped apps -> confirm w/ user that apps and logs will be deleted (cherry picked from commit 64d0ebbd9b6251a50770bb9ca64bd366b45f8f31) * Load app before setting LIGHTNING_DISPATCHED (#16057) (cherry picked from commit 8d3339a0e99ed91871e0c4f7214aca1146e95a89) * [App] Hot fix: Resolve detection of python debugger (#16068) Co-authored-by: thomas Co-authored-by: Carlos Mocholí (cherry picked from commit eae56ee47b48fdae060232d60c549265aea90e6f) * fix(cloud): detect and ignore venv (#16056) Co-authored-by: Ethan Harris (cherry picked from commit 3b323c842d1692ef43a10c22b4ec6ebd690d070a) * version 1.8.5 * update chlog Co-authored-by: thomas chaton Co-authored-by: Carlos Mocholí Co-authored-by: Akihiro Nitta Co-authored-by: Adrian Wälchli Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Huy Đỗ <56794124+Al3xDo@users.noreply.github.com> Co-authored-by: Ethan Harris Co-authored-by: Luca Furst Co-authored-by: Yurij Mikhalevich Co-authored-by: Luca Antiga --- .github/actions/pkg-check/action.yml | 1 - .github/actions/pkg-install/action.yml | 21 +- .github/actions/pkg-publish/action.yml | 17 +- .github/checkgroup.yml | 3 + .github/workflows/_build-packages.yml | 65 ++++ .github/workflows/ci-app-examples.yml | 8 +- .github/workflows/ci-app-tests.yml | 9 +- .github/workflows/ci-lite-tests.yml | 4 +- .github/workflows/ci-pkg-install.yml | 59 +++- .github/workflows/ci-pytorch-tests.yml | 5 +- .github/workflows/docs-checks.yml | 4 +- .github/workflows/legacy-checkpoints.yml | 2 +- .github/workflows/release-pypi.yml | 73 ++--- .../run_app_on_cloud/cloud_files.rst | 13 +- examples/app_installation_commands/app.py | 13 +- requirements/app/base.txt | 2 +- requirements/app/cloud.txt | 4 +- requirements/pytorch/test.txt | 6 +- src/lightning/__version__.py | 2 +- src/lightning_app/CHANGELOG.md | 22 ++ src/lightning_app/__version__.py | 2 +- src/lightning_app/cli/cmd_clusters.py | 87 ++++- src/lightning_app/cli/commands/connection.py | 220 +++++++------ src/lightning_app/cli/lightning_cli.py | 4 +- src/lightning_app/components/auto_scaler.py | 11 +- .../components/database/server.py | 6 +- .../components/multi_node/trainer.py | 3 + src/lightning_app/components/python/popen.py | 3 +- src/lightning_app/components/python/tracer.py | 2 +- .../components/serve/python_server.py | 9 +- src/lightning_app/core/app.py | 2 +- src/lightning_app/core/flow.py | 26 +- src/lightning_app/core/work.py | 25 +- src/lightning_app/runners/cloud.py | 276 +++++++++------- src/lightning_app/runners/multiprocess.py | 2 +- src/lightning_app/runners/runtime.py | 5 +- src/lightning_app/source_code/copytree.py | 6 +- src/lightning_app/source_code/local.py | 9 +- src/lightning_app/structures/dict.py | 2 +- src/lightning_app/structures/list.py | 2 +- src/lightning_app/utilities/app_helpers.py | 27 +- src/lightning_app/utilities/cli_helpers.py | 4 + src/lightning_app/utilities/commands/base.py | 12 +- src/lightning_app/utilities/frontend.py | 1 + src/lightning_app/utilities/login.py | 45 ++- .../utilities/packaging/app_config.py | 5 +- .../utilities/packaging/build_config.py | 2 +- src/lightning_lite/CHANGELOG.md | 6 + src/lightning_lite/__version__.py | 2 +- src/pytorch_lightning/CHANGELOG.md | 6 + src/pytorch_lightning/__version__.py | 2 +- .../core/mixins/hparams_mixin.py | 4 + .../logger_connector/fx_validator.py | 5 +- src/pytorch_lightning/utilities/parsing.py | 2 +- tests/tests_app/cli/test_cloud_cli.py | 7 +- tests/tests_app/cli/test_cmd_clusters.py | 90 +++++- tests/tests_app/cli/test_connect.py | 58 +--- .../tests_app/components/test_auto_scaler.py | 10 +- tests/tests_app/runners/test_cloud.py | 304 ++++++++++++++---- .../public/test_commands_and_api.py | 2 +- .../progress/test_rich_progress_bar.py | 4 +- .../progress/test_tqdm_progress_bar.py | 2 + .../callbacks/test_device_stats_monitor.py | 4 +- .../callbacks/test_lr_monitor.py | 33 +- .../callbacks/test_stochastic_weight_avg.py | 3 +- .../checkpointing/test_model_checkpoint.py | 20 +- tests/tests_pytorch/loggers/test_logger.py | 10 +- .../tests_pytorch/loggers/test_tensorboard.py | 2 +- tests/tests_pytorch/models/test_grad_norm.py | 9 +- tests/tests_pytorch/models/test_hparams.py | 10 +- .../tests_pytorch/profilers/test_profiler.py | 21 +- .../strategies/test_deepspeed_strategy.py | 3 + .../trainer/flags/test_fast_dev_run.py | 3 +- .../logging_/test_eval_loop_logging.py | 3 +- .../logging_/test_train_loop_logging.py | 3 + .../tests_pytorch/trainer/test_dataloaders.py | 7 +- tests/tests_pytorch/trainer/test_trainer.py | 1 + 77 files changed, 1238 insertions(+), 529 deletions(-) create mode 100644 .github/workflows/_build-packages.yml diff --git a/.github/actions/pkg-check/action.yml b/.github/actions/pkg-check/action.yml index 6680f945d589d..941429d30c3da 100644 --- a/.github/actions/pkg-check/action.yml +++ b/.github/actions/pkg-check/action.yml @@ -5,7 +5,6 @@ inputs: pkg-name: description: package name inside lightning.* required: true - default: "" nb-dirs: description: nb of packages in the wrap/distribution required: false diff --git a/.github/actions/pkg-install/action.yml b/.github/actions/pkg-install/action.yml index 0e0751c217a95..3698dd82c11df 100644 --- a/.github/actions/pkg-install/action.yml +++ b/.github/actions/pkg-install/action.yml @@ -2,6 +2,9 @@ name: Install and validate the package description: Install and validate the package inputs: + pkg-folder: + description: Define folder with packages + required: true pkg-name: description: Package name to import required: true @@ -14,22 +17,26 @@ runs: using: "composite" steps: - name: Choose package import + working-directory: ${{ inputs.pkg-folder }} run: | - python -c "print('PKG_IMPORT=' + {'app': 'lightning_app', 'lite': 'lightning_lite', 'pytorch': 'pytorch_lightning', 'lightning': 'lightning'}['${{matrix.pkg-name}}'])" >> $GITHUB_ENV + ls -l + python -c "print('PKG_IMPORT=' + {'app': 'lightning_app', 'lite': 'lightning_lite', 'pytorch': 'pytorch_lightning'}.get('${{matrix.pkg-name}}', 'lightning'))" >> $GITHUB_ENV + python -c "import glob ; ls = glob.glob('*.tar.gz') ; print('PKG_SOURCE=' + ls[0])" >> $GITHUB_ENV + python -c "import glob ; ls = glob.glob('*.whl') ; print('PKG_WHEEL=' + ls[0])" >> $GITHUB_ENV shell: bash - - name: Install package - archive - working-directory: ./dist + - name: Install package - wheel + working-directory: ${{ inputs.pkg-folder }} run: | - pip install *.tar.gz ${{ inputs.pip-flags }} + pip install ${PKG_WHEEL} ${{ inputs.pip-flags }} pip list | grep lightning python -c "import ${{ env.PKG_IMPORT }}; print(${{ env.PKG_IMPORT }}.__version__)" shell: bash - - name: Install package - wheel - working-directory: ./dist + - name: Install package - archive + working-directory: ${{ inputs.pkg-folder }} run: | - pip install *.whl ${{ inputs.pip-flags }} + pip install ${PKG_SOURCE} ${{ inputs.pip-flags }} pip list | grep lightning python -c "import ${{ env.PKG_IMPORT }}; print(${{ env.PKG_IMPORT }}.__version__)" shell: bash diff --git a/.github/actions/pkg-publish/action.yml b/.github/actions/pkg-publish/action.yml index beef2d34f7db9..5e02dc70388ec 100644 --- a/.github/actions/pkg-publish/action.yml +++ b/.github/actions/pkg-publish/action.yml @@ -2,8 +2,8 @@ name: Publish package description: publishing whl and src to PyPI inputs: - pkg-pattern: - description: what file pattern is searched in folder, so for example `*_app*` + pkg-folder: + description: define folder with packages required: true pypi-test-token: description: login token for PyPI @@ -18,10 +18,7 @@ runs: using: "composite" steps: - - name: filter packages - run: | - mv dist/${{ inputs.pkg-pattern }} pypi/ - ls -l pypi/ + - run: ls -lh ${{ inputs.pkg-folder }} shell: bash # We do this, since failures on test.pypi aren't that bad @@ -32,7 +29,7 @@ runs: user: __token__ password: ${{ inputs.pypi-test-token }} repository_url: https://test.pypi.org/legacy/ - packages_dir: pypi/ + packages_dir: ${{ inputs.pkg-folder }} verbose: true - name: Publish distribution 📦 to PyPI @@ -41,9 +38,5 @@ runs: with: user: __token__ password: ${{ inputs.pypi-token }} - packages_dir: pypi/ + packages_dir: ${{ inputs.pkg-folder }} verbose: true - - - name: filter packages - run: rm pypi/* - shell: bash diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 5ba130d45ac1e..eeb707267184e 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -312,6 +312,9 @@ subprojects: - id: "install" paths: - ".actions/**" + - ".github/actions/pkg-check/*" + - ".github/actions/pkg-install/*" + - ".github/workflows/_build-packages.yml" - ".github/workflows/ci-pkg-install.yml" - "setup.py" - "src/**" diff --git a/.github/workflows/_build-packages.yml b/.github/workflows/_build-packages.yml new file mode 100644 index 0000000000000..f2fe8239821fb --- /dev/null +++ b/.github/workflows/_build-packages.yml @@ -0,0 +1,65 @@ +name: Building packages + +on: + workflow_call: + inputs: + artifact-name: + description: 'Unique name for collecting artifacts' + required: true + type: string + pkg-names: + description: 'list package names to be build in json format' + required: false + type: string + default: | + ["lightning", "app", "lite", "pytorch"] + +defaults: + run: + shell: bash + +jobs: + + init: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - run: | + mkdir dist && touch dist/.placeholder + - uses: actions/upload-artifact@v3 + with: + name: ${{ inputs.artifact-name }} + path: dist + + + build-packages: + needs: init + runs-on: ubuntu-20.04 + strategy: + max-parallel: 1 # run sequential to prevent download/upload collisions + matrix: + pkg-name: ${{ fromJSON(inputs.pkg-names) }} + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: ${{ inputs.artifact-name }} + path: pypi + - uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - run: python -c "print('NB_DIRS=' + str(2 if '${{ matrix.pkg-name }}' == 'pytorch' else 1))" >> $GITHUB_ENV + - uses: ./.github/actions/pkg-check + with: + pkg-name: ${{ matrix.pkg-name }} + nb-dirs: ${{ env.NB_DIRS }} + + - run: | + mkdir pypi/${{ matrix.pkg-name }} + cp dist/* pypi/${{ matrix.pkg-name }}/ + + - uses: actions/upload-artifact@v3 + with: + name: ${{ inputs.artifact-name }} + path: pypi diff --git a/.github/workflows/ci-app-examples.yml b/.github/workflows/ci-app-examples.yml index b1a79ea50d9bc..2046cd940aafb 100644 --- a/.github/workflows/ci-app-examples.yml +++ b/.github/workflows/ci-app-examples.yml @@ -93,12 +93,16 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app" --target_import="lightning.app" + run: | + python .actions/assistant.py copy_replace_imports --source_dir="./tests" \ + --source_import="lightning_app" --target_import="lightning.app" - name: Adjust examples if: ${{ matrix.pkg-name != 'lightning' }} run: | - python .actions/assistant.py copy_replace_imports --source_dir="./examples" --source_import="lightning.app,lightning" --target_import="lightning_app,lightning_app" + python .actions/assistant.py copy_replace_imports --source_dir="./examples" \ + --source_import="lightning.app,lightning" \ + --target_import="lightning_app,lightning_app" - name: Switch coverage scope run: python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_app'))" >> $GITHUB_ENV diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index f53b3fa9803a3..1a082f69c0a1d 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -99,12 +99,17 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.lite,lightning.pytorch" + run: | + python .actions/assistant.py copy_replace_imports --source_dir="./tests" \ + --source_import="lightning_app,lightning_lite,pytorch_lightning" \ + --target_import="lightning.app,lightning.lite,lightning.pytorch" - name: Adjust examples if: ${{ matrix.pkg-name != 'lightning' }} run: | - python .actions/assistant.py copy_replace_imports --source_dir="./examples" --source_import="lightning.app,lightning" --target_import="lightning_app,lightning_app" + python .actions/assistant.py copy_replace_imports --source_dir="./examples" \ + --source_import="lightning.app,lightning" \ + --target_import="lightning_app,lightning_app" - name: Switch coverage scope run: python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_app'))" >> $GITHUB_ENV diff --git a/.github/workflows/ci-lite-tests.yml b/.github/workflows/ci-lite-tests.yml index fb134b301b6e6..1db82fe8ba52c 100644 --- a/.github/workflows/ci-lite-tests.yml +++ b/.github/workflows/ci-lite-tests.yml @@ -114,7 +114,9 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_lite" --target_import="lightning.lite" + run: | + python .actions/assistant.py copy_replace_imports --source_dir="./tests" \ + --source_import="lightning_lite" --target_import="lightning.lite" - name: Testing Warnings # the stacklevel can only be set on >=3.7 diff --git a/.github/workflows/ci-pkg-install.yml b/.github/workflows/ci-pkg-install.yml index d9474edb98f8f..ca3a6d695a91e 100644 --- a/.github/workflows/ci-pkg-install.yml +++ b/.github/workflows/ci-pkg-install.yml @@ -9,6 +9,9 @@ on: types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - ".actions/**" + - ".github/actions/pkg-check/*" + - ".github/actions/pkg-install/*" + - ".github/workflows/_build-packages.yml" - ".github/workflows/ci-pkg-install.yml" - "setup.py" - "src/**" @@ -28,7 +31,13 @@ defaults: jobs: + build-packages: + uses: ./.github/workflows/_build-packages.yml + with: + artifact-name: dist-packages-${{ github.sha }} + install-pkg: + needs: build-packages runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -36,35 +45,51 @@ jobs: os: [ubuntu-22.04, macOS-12, windows-2022] pkg-name: ["app", "lite", "pytorch", "lightning"] python-version: ["3.7" , "3.10"] + # TODO: add also install from source steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - - name: DocTests actions - working-directory: .actions/ - run: | - pip install -q pytest - python -m pytest setup_tools.py - - - run: python -c "print('NB_DIRS=' + str(2 if '${{ matrix.pkg-name }}' == 'pytorch' else 1))" >> $GITHUB_ENV - - - uses: ./.github/actions/pkg-check + - uses: actions/download-artifact@v3 with: - pkg-name: ${{ matrix.pkg-name }} - nb-dirs: ${{ env.NB_DIRS }} + name: dist-packages-${{ github.sha }} + path: dist + - run: | + python -c "print('PKG_DIR=' + {'notset': 'lightning'}.get('${{matrix.pkg-name}}', '${{matrix.pkg-name}}'))" >> $GITHUB_ENV - uses: ./.github/actions/pkg-install with: + pkg-folder: dist/${{ env.PKG_DIR }} pkg-name: ${{ matrix.pkg-name }} - - name: Run CLI - # todo: add testing for `lightning_app` - if: ${{ matrix.pkg-name == 'lightning' }} + - name: Run CLI (via python) + if: ${{ matrix.pkg-name == 'lightning' || matrix.pkg-name == 'notset' }} run: python -m lightning --version + - name: Run CLI (direct bash) + if: ${{ matrix.pkg-name == 'lightning' || matrix.pkg-name == 'app' }} + run: lightning --version + - name: Adjust code for Lit + if: ${{ matrix.pkg-name == 'lightning' || matrix.pkg-name == 'notset' }} + run: | + pip install -q -r .actions/requirements.txt + python .actions/assistant.py copy_replace_imports --source_dir="./src" \ + --source_import="pytorch_lightning,lightning_lite,lightning_app" \ + --target_import="lightning.pytorch,lightning.lite,lightning.app" + rm -rf src/lightning + - name: Rename src folders + working-directory: src/ + run: | + mv pytorch_lightning pl + mv lightning_lite lit_lite + mv lightning_app lit_app + + - name: DocTests actions + working-directory: .actions/ + run: | + pip install -q pytest + python -m pytest setup_tools.py - name: DocTest package env: LIGHTING_TESTING: 1 # path for require wrapper @@ -72,5 +97,5 @@ jobs: run: | pip install -q "pytest-doctestplus>=0.9.0" pip list - PKG_NAME=$(python -c "print({'app': 'lightning_app', 'lite': 'lightning_lite', 'pytorch': 'pytorch_lightning', 'lightning': 'lightning'}['${{matrix.pkg-name}}'])") + PKG_NAME=$(python -c "print({'app': 'lit_app', 'lite': 'lit_lite', 'pytorch': 'pl'}.get('${{matrix.pkg-name}}', ''))") python -m pytest src/${PKG_NAME} --ignore-glob="**/cli/*-template/**" diff --git a/.github/workflows/ci-pytorch-tests.yml b/.github/workflows/ci-pytorch-tests.yml index b4e51ccddf5a0..34ef2b0834949 100644 --- a/.github/workflows/ci-pytorch-tests.yml +++ b/.github/workflows/ci-pytorch-tests.yml @@ -169,7 +169,10 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="pytorch_lightning,lightning_lite" --target_import="lightning.pytorch,lightning.lite" + run: | + python .actions/assistant.py copy_replace_imports --source_dir="./tests" \ + --source_import="pytorch_lightning,lightning_lite" \ + --target_import="lightning.pytorch,lightning.lite" - name: Testing Warnings # the stacklevel can only be set on >=3.7 diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml index e51b76991397e..302e9836df55e 100644 --- a/.github/workflows/docs-checks.yml +++ b/.github/workflows/docs-checks.yml @@ -83,7 +83,9 @@ jobs: if: ${{ matrix.pkg-name == 'app' }} run: | pip install -q -r .actions/requirements.txt - python .actions/assistant.py copy_replace_imports --source_dir="./docs" --source_import="pytorch_lightning,lightning_lite" --target_import="lightning.pytorch,lightning.lite" + python .actions/assistant.py copy_replace_imports --source_dir="./docs" \ + --source_import="pytorch_lightning,lightning_lite" \ + --target_import="lightning.pytorch,lightning.lite" - name: Install this package env: diff --git a/.github/workflows/legacy-checkpoints.yml b/.github/workflows/legacy-checkpoints.yml index b27ec472c8791..1a7b7396986d9 100644 --- a/.github/workflows/legacy-checkpoints.yml +++ b/.github/workflows/legacy-checkpoints.yml @@ -132,7 +132,7 @@ jobs: - name: Create Pull Request uses: peter-evans/create-pull-request@v4 with: - title: Adding test for legacy checkpiont created with ${{ needs.create-legacy-ckpts.outputs.pl-version }} + title: Adding test for legacy checkpoint created with ${{ needs.create-legacy-ckpts.outputs.pl-version }} delete-branch: true labels: | tests diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 090eae5553aa3..41b84d6a343a0 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -13,47 +13,10 @@ defaults: jobs: - init: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v3 - - run: | - mkdir dist && touch dist/.placeholder - - uses: actions/upload-artifact@v3 - with: - name: dist-packages-${{ github.sha }} - path: dist - - build-packages: - needs: init - runs-on: ubuntu-20.04 - strategy: - fail-fast: true - max-parallel: 1 # run sequential to prevent download/upload collisions - matrix: - pkg-name: ["lightning", "app", "lite", "pytorch"] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: dist-packages-${{ github.sha }} - path: dist - - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install dependencies - run: pip install -U setuptools wheel - - name: Build packages - env: - PACKAGE_NAME: ${{ matrix.pkg-name }} - run: | - python setup.py sdist bdist_wheel - ls -lh dist/ - - uses: actions/upload-artifact@v3 - with: - name: dist-packages-${{ github.sha }} - path: dist + uses: ./.github/workflows/_build-packages.yml + with: + artifact-name: dist-packages-${{ github.sha }} upload-packages: @@ -70,7 +33,7 @@ jobs: - name: Upload to release uses: AButler/upload-release-assets@v2.0 with: - files: 'dist/*' + files: 'dist/*/*' repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -148,6 +111,8 @@ jobs: if branch in remote_refs: break time.sleep(60) + for remote in repo.remotes: + remote.fetch() shell: python @@ -161,23 +126,24 @@ jobs: with: name: dist-packages-${{ github.sha }} path: dist - - run: ls -lh dist/ - - run: mkdir pypi/ + - run: | + sudo apt install -q -y tree + tree -L 2 -h dist/ - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*app*" + pkg-folder: dist/app pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_APP }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*lite*" + pkg-folder: dist/lite pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_LITE }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*pytorch*" + pkg-folder: dist/pytorch pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_PYTORCH }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*" + pkg-folder: dist/lightning pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_LAI }} @@ -191,23 +157,24 @@ jobs: with: name: dist-packages-${{ github.sha }} path: dist - - run: ls -lh dist/ - - run: mkdir pypi/ + - run: | + sudo apt install -q -y tree + tree -L 2 -h dist/ - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*app*" + pkg-folder: dist/app pypi-token: ${{ secrets.PYPI_TOKEN_APP }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*lite*" + pkg-folder: dist/lite pypi-token: ${{ secrets.PYPI_TOKEN_LITE }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*pytorch*" + pkg-folder: dist/pytorch pypi-token: ${{ secrets.PYPI_TOKEN_PYTORCH }} - uses: ./.github/actions/pkg-publish with: - pkg-pattern: "*" + pkg-folder: dist/lightning pypi-token: ${{ secrets.PYPI_TOKEN_LAI }} diff --git a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst index 3130cd0f336b3..dfef0dc1c13aa 100644 --- a/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst +++ b/docs/source-app/workflows/run_app_on_cloud/cloud_files.rst @@ -30,7 +30,6 @@ For example, the source code directory below with the ``.lightningignore`` file ├── requirements.txt └── model.pt - .. code:: bash ~/project/home ❯ cat .lightningignore @@ -39,6 +38,18 @@ For example, the source code directory below with the ``.lightningignore`` file A sample ``.lightningignore`` file can be found `here `_. +If you are a component author and your components creates local files that you want to ignore, you can do: + +.. code-block:: python + + class MyComponent(L.LightningWork): # or L.LightningFlow + def __init__(self): + super().__init__() + self.lightningignore = ("model.pt", "data_dir") + + +This has the benefit that the files will be ignored automatically for all the component users, making an easier +transition between running locally vs in the cloud. ---- diff --git a/examples/app_installation_commands/app.py b/examples/app_installation_commands/app.py index 087d84b1335b2..f69df99ad9e82 100644 --- a/examples/app_installation_commands/app.py +++ b/examples/app_installation_commands/app.py @@ -13,9 +13,14 @@ def run(self): print("lmdb successfully installed") print("accessing a module in a Work or Flow body works!") - @property - def ready(self) -> bool: - return True + +class RootFlow(L.LightningFlow): + def __init__(self, work): + super().__init__() + self.work = work + + def run(self): + self.work.run() print(f"accessing an object in main code body works!: version={lmdb.version()}") @@ -24,4 +29,4 @@ def ready(self) -> bool: # run on a cloud machine compute = L.CloudCompute("cpu") worker = YourComponent(cloud_compute=compute) -app = L.LightningApp(worker) +app = L.LightningApp(RootFlow(worker)) diff --git a/requirements/app/base.txt b/requirements/app/base.txt index 37e91689bb54d..ac5f97dd6ba48 100644 --- a/requirements/app/base.txt +++ b/requirements/app/base.txt @@ -1,7 +1,7 @@ lightning-cloud>=0.5.12 packaging typing-extensions>=4.0.0, <=4.4.0 -deepdiff>=5.7.0, <=5.8.1 +deepdiff>=5.7.0, <6.2.3 starsessions>=1.2.1, <2.0 # strict fsspec>=2022.5.0, <=2022.7.1 croniter>=1.3.0, <1.4.0 # strict; TODO: for now until we find something more robust. diff --git a/requirements/app/cloud.txt b/requirements/app/cloud.txt index f2fd258c4185a..14b4f30d7db5f 100644 --- a/requirements/app/cloud.txt +++ b/requirements/app/cloud.txt @@ -1,6 +1,6 @@ # WARNING: this file is not used directly by the backend # any dependency here needs to be shipped with the base image redis>=4.0.1, <=4.2.4 -docker>=5.0.0, <=5.0.3 -# setuptools==59.5.0 +docker>=5.0.0, <6.0.2 s3fs>=2022.5.0, <2022.8.3 +# setuptools==59.5.0 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index acb3c4b970e6e..485e6e6fd0393 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -12,8 +12,8 @@ scikit-learn>0.22.1, <1.1.3 onnxruntime<1.14.0 psutil<5.9.4 # for `DeviceStatsMonitor` pandas>1.0, <1.5.2 # needed in benchmarks -fastapi<0.87.0 -uvicorn<0.19.1 +fastapi<0.87.0 # for `ServableModuleValidator` +uvicorn<0.19.1 # for `ServableModuleValidator` -tensorboard>=2.9.1, <2.12.0 +tensorboard>=2.9.1, <2.12.0 # for `TensorBoardLogger` protobuf<=3.20.1 # strict # an extra is updating protobuf, this pin prevents TensorBoard failure diff --git a/src/lightning/__version__.py b/src/lightning/__version__.py index 5e8e81fd482ee..cb9f51643e523 100644 --- a/src/lightning/__version__.py +++ b/src/lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4.post0" +version = "1.8.5" diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index b61d45b79f3e2..1b38b2d35546d 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -4,6 +4,28 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). + +## [1.8.5] - 2022-12-15 + +### Added + +- Added `Lightning{Flow,Work}.lightningignores` attributes to programmatically ignore files before uploading to the cloud ([#15818](https://github.com/Lightning-AI/lightning/pull/15818)) +- Added a progres bar while connecting to an app through the CLI ([#16035](https://github.com/Lightning-AI/lightning/pull/16035)) +- Support running on multiple clusters ([#16016](https://github.com/Lightning-AI/lightning/pull/16016)) +- Added guards to cluster deletion from cli ([#16053](https://github.com/Lightning-AI/lightning/pull/16053)) + +### Changed + +- Cleanup cluster waiting ([#16054](https://github.com/Lightning-AI/lightning/pull/16054)) + +### Fixed + +- Fixed `DDPStrategy` import in app framework ([#16029](https://github.com/Lightning-AI/lightning/pull/16029)) +- Fixed `AutoScaler` raising an exception when non-default cloud compute is specified ([#15991](https://github.com/Lightning-AI/lightning/pull/15991)) +- Fixed and improvements of login flow ([#16052](https://github.com/Lightning-AI/lightning/pull/16052)) +- Fixed the debugger detection mechanism for lightning App in VSCode ([#16068](https://github.com/Lightning-AI/lightning/pull/16068)) + + ## [1.8.4] - 2022-12-08 ### Added diff --git a/src/lightning_app/__version__.py b/src/lightning_app/__version__.py index 5e8e81fd482ee..cb9f51643e523 100644 --- a/src/lightning_app/__version__.py +++ b/src/lightning_app/__version__.py @@ -1 +1 @@ -version = "1.8.4.post0" +version = "1.8.5" diff --git a/src/lightning_app/cli/cmd_clusters.py b/src/lightning_app/cli/cmd_clusters.py index ac491609052f8..b7e62f52a1846 100644 --- a/src/lightning_app/cli/cmd_clusters.py +++ b/src/lightning_app/cli/cmd_clusters.py @@ -9,6 +9,7 @@ import lightning_cloud from lightning_cloud.openapi import ( Externalv1Cluster, + Externalv1LightningappInstance, V1AWSClusterDriverSpec, V1ClusterDriver, V1ClusterPerformanceProfile, @@ -18,6 +19,9 @@ V1CreateClusterRequest, V1GetClusterResponse, V1KubernetesClusterDriver, + V1LightningappInstanceState, + V1ListLightningappInstancesResponse, + V1Membership, ) from lightning_utilities.core.enums import StrEnum from rich.console import Console @@ -25,6 +29,7 @@ from rich.text import Text from lightning_app.cli.core import Formatable +from lightning_app.utilities.cloud import _get_project from lightning_app.utilities.network import LightningClient from lightning_app.utilities.openapi import create_openapi_object, string2dict @@ -198,6 +203,33 @@ def delete(self, cluster_id: str, force: bool = False, do_async: bool = False) - ) click.confirm("Do you want to continue?", abort=True) + else: + apps = _list_apps(self.api_client, cluster_id=cluster_id, phase_in=[V1LightningappInstanceState.RUNNING]) + if apps: + raise click.ClickException( + dedent( + """\ + To delete the cluster, you must first delete the apps running on it. + Use the following commands to delete the apps, then delete the cluster again: + + """ + ) + + "\n".join([f"\tlightning delete app {app.name} --cluster-id {cluster_id}" for app in apps]) + ) + + if _list_apps(self.api_client, cluster_id=cluster_id, phase_not_in=[V1LightningappInstanceState.DELETED]): + click.echo( + dedent( + """\ + This cluster has stopped apps. + Deleting this cluster will delete those apps and their logs. + + App artifacts aren't deleted and will still be available in the S3 bucket for the cluster. + """ + ) + ) + click.confirm("Are you sure you want to continue?", abort=True) + resp: V1GetClusterResponse = self.api_client.cluster_service_get_cluster(id=cluster_id) bucket_name = resp.spec.driver.kubernetes.aws.bucket_name @@ -226,12 +258,34 @@ def delete(self, cluster_id: str, force: bool = False, do_async: bool = False) - click.echo(background_message) +def _list_apps( + api_client: LightningClient, + **filters: Any, +) -> List[Externalv1LightningappInstance]: + """_list_apps is a thin wrapper around lightningapp_instance_service_list_lightningapp_instances. + + Args: + api_client (LightningClient): Used for listing app instances + **filters: keyword arguments passed to the list method + + Returns: + List[Externalv1LightningappInstance]: List of apps matching the filters + """ + project: V1Membership = _get_project(api_client) + resp: V1ListLightningappInstancesResponse = api_client.lightningapp_instance_service_list_lightningapp_instances( + project.project_id, + **filters, + ) + + return resp.lightningapps + + def _wait_for_cluster_state( api_client: LightningClient, cluster_id: str, target_state: V1ClusterState, timeout_seconds: int = MAX_CLUSTER_WAIT_TIME, - poll_duration_seconds: int = 10, + poll_duration_seconds: int = 60, ) -> None: """_wait_for_cluster_state waits until the provided cluster has reached a desired state, or failed. @@ -307,21 +361,24 @@ def _cluster_status_long(cluster: V1GetClusterResponse, desired_state: V1Cluster duration = _format_elapsed_seconds(elapsed) if current_state == V1ClusterState.FAILED: - return dedent( - f"""\ - The requested cluster operation for cluster {cluster_id} has errors: - {current_reason} + if not _is_retryable_error(current_reason): + return dedent( + f"""\ + The requested cluster operation for cluster {cluster_id} has errors: - --- - We are automatically retrying, and an automated alert has been created + {current_reason} - WARNING: Any non-deleted cluster may be using resources. - To avoid incuring cost on your cloud provider, delete the cluster using the following command: - lightning delete cluster {cluster_id} + -------------------------------------------------------------- - Contact support@lightning.ai for additional help - """ - ) + We are automatically retrying, and an automated alert has been created + + WARNING: Any non-deleted cluster may be using resources. + To avoid incuring cost on your cloud provider, delete the cluster using the following command: + lightning delete cluster {cluster_id} + + Contact support@lightning.ai for additional help + """ + ) if desired_state == current_state == V1ClusterState.RUNNING: return dedent( @@ -352,6 +409,10 @@ def _cluster_status_long(cluster: V1GetClusterResponse, desired_state: V1Cluster raise click.ClickException(f"Unknown cluster desired state {desired_state}") +def _is_retryable_error(error_message: str) -> bool: + return "resources failed to delete" in error_message + + def _format_elapsed_seconds(seconds: Union[float, int]) -> str: """Turns seconds into a duration string. diff --git a/src/lightning_app/cli/commands/connection.py b/src/lightning_app/cli/commands/connection.py index ee0bf7edc5d67..f5600947ab427 100644 --- a/src/lightning_app/cli/commands/connection.py +++ b/src/lightning_app/cli/commands/connection.py @@ -8,6 +8,7 @@ import click import psutil from lightning_utilities.core.imports import package_available +from rich.progress import Progress from lightning_app.utilities.cli_helpers import _LightningAppOpenAPIRetriever from lightning_app.utilities.cloud import _get_project @@ -16,15 +17,33 @@ from lightning_app.utilities.network import LightningClient _HOME = os.path.expanduser("~") -_PPID = str(psutil.Process(os.getpid()).ppid()) +_PPID = os.getenv("LIGHTNING_CONNECT_PPID", str(psutil.Process(os.getpid()).ppid())) _LIGHTNING_CONNECTION = os.path.join(_HOME, ".lightning", "lightning_connection") _LIGHTNING_CONNECTION_FOLDER = os.path.join(_LIGHTNING_CONNECTION, _PPID) @click.argument("app_name_or_id", required=True) -@click.option("-y", "--yes", required=False, is_flag=True, help="Whether to download the commands automatically.") -def connect(app_name_or_id: str, yes: bool = False): - """Connect to a Lightning App.""" +def connect(app_name_or_id: str): + """Connect your local terminal to a running lightning app. + + After connecting, the lightning CLI will respond to commands exposed by the app. + + Example: + + \b + # connect to an app named pizza-cooker-123 + lightning connect pizza-cooker-123 + \b + # this will now show the commands exposed by pizza-cooker-123 + lightning --help + \b + # while connected, you can run the cook-pizza command exposed + # by pizza-cooker-123.BTW, this should arguably generate an exception :-) + lightning cook-pizza --flavor pineapple + \b + # once done, disconnect and go back to the standard lightning CLI commands + lightning disconnect + """ from lightning_app.utilities.commands.base import _download_command _clean_lightning_connection() @@ -47,51 +66,64 @@ def connect(app_name_or_id: str, yes: bool = False): click.echo(f"You are already connected to the cloud Lightning App: {app_name_or_id}.") else: disconnect() - connect(app_name_or_id, yes) + connect(app_name_or_id) elif app_name_or_id.startswith("localhost"): - if app_name_or_id != "localhost": - raise Exception("You need to pass localhost to connect to the local Lightning App.") + with Progress() as progress_bar: + connecting = progress_bar.add_task("[magenta]Setting things up for you...", total=1.0) - retriever = _LightningAppOpenAPIRetriever(None) + if app_name_or_id != "localhost": + raise Exception("You need to pass localhost to connect to the local Lightning App.") - if retriever.api_commands is None: - raise Exception(f"The commands weren't found. Is your app {app_name_or_id} running ?") + retriever = _LightningAppOpenAPIRetriever(None) - commands_folder = os.path.join(_LIGHTNING_CONNECTION_FOLDER, "commands") - if not os.path.exists(commands_folder): - os.makedirs(commands_folder) + if retriever.api_commands is None: + raise Exception(f"Connection wasn't successful. Is your app {app_name_or_id} running?") - _write_commands_metadata(retriever.api_commands) + increment = 1 / (1 + len(retriever.api_commands)) - with open(os.path.join(commands_folder, "openapi.json"), "w") as f: - json.dump(retriever.openapi, f) + progress_bar.update(connecting, advance=increment) - _install_missing_requirements(retriever, yes) + commands_folder = os.path.join(_LIGHTNING_CONNECTION_FOLDER, "commands") + if not os.path.exists(commands_folder): + os.makedirs(commands_folder) - for command_name, metadata in retriever.api_commands.items(): - if "cls_path" in metadata: - target_file = os.path.join(commands_folder, f"{command_name.replace(' ','_')}.py") - _download_command( - command_name, - metadata["cls_path"], - metadata["cls_name"], - None, - target_file=target_file, - ) - repr_command_name = command_name.replace("_", " ") - click.echo(f"Storing `{repr_command_name}` at {target_file}") - else: - with open(os.path.join(commands_folder, f"{command_name}.txt"), "w") as f: - f.write(command_name) + _write_commands_metadata(retriever.api_commands) + + with open(os.path.join(commands_folder, "openapi.json"), "w") as f: + json.dump(retriever.openapi, f) - click.echo(f"You can review all the downloaded commands at {commands_folder}") + _install_missing_requirements(retriever) + + for command_name, metadata in retriever.api_commands.items(): + if "cls_path" in metadata: + target_file = os.path.join(commands_folder, f"{command_name.replace(' ','_')}.py") + _download_command( + command_name, + metadata["cls_path"], + metadata["cls_name"], + None, + target_file=target_file, + ) + else: + with open(os.path.join(commands_folder, f"{command_name}.txt"), "w") as f: + f.write(command_name) + + progress_bar.update(connecting, advance=increment) with open(connected_file, "w") as f: f.write(app_name_or_id + "\n") - click.echo("You are connected to the local Lightning App.") + click.echo("The lightning CLI now responds to app commands. Use 'lightning --help' to see them.") + click.echo(" ") + + Popen( + f"LIGHTNING_CONNECT_PPID={_PPID} {sys.executable} -m lightning --help", + shell=True, + stdout=sys.stdout, + stderr=sys.stderr, + ).wait() elif matched_connection_path: @@ -101,40 +133,39 @@ def connect(app_name_or_id: str, yes: bool = False): commands = os.path.join(_LIGHTNING_CONNECTION_FOLDER, "commands") shutil.copytree(matched_commands, commands) shutil.copy(matched_connected_file, connected_file) - copied_files = [el for el in os.listdir(commands) if os.path.splitext(el)[1] == ".py"] - click.echo("Found existing connection, reusing cached commands") - for target_file in copied_files: - pretty_command_name = os.path.splitext(target_file)[0].replace("_", " ") - click.echo(f"Storing `{pretty_command_name}` at {os.path.join(commands, target_file)}") - click.echo(f"You can review all the commands at {commands}") + click.echo("The lightning CLI now responds to app commands. Use 'lightning --help' to see them.") click.echo(" ") - click.echo(f"You are connected to the cloud Lightning App: {app_name_or_id}.") - else: + Popen( + f"LIGHTNING_CONNECT_PPID={_PPID} {sys.executable} -m lightning --help", + shell=True, + stdout=sys.stdout, + stderr=sys.stderr, + ).wait() - retriever = _LightningAppOpenAPIRetriever(app_name_or_id) + else: + with Progress() as progress_bar: + connecting = progress_bar.add_task("[magenta]Setting things up for you...", total=1.0) + + retriever = _LightningAppOpenAPIRetriever(app_name_or_id) + + if not retriever.api_commands: + client = LightningClient() + project = _get_project(client) + apps = client.lightningapp_instance_service_list_lightningapp_instances(project_id=project.project_id) + click.echo( + "We didn't find a matching App. Here are the available Apps that you can" + f"connect to {[app.name for app in apps.lightningapps]}." + ) + return - if not retriever.api_commands: - client = LightningClient() - project = _get_project(client) - apps = client.lightningapp_instance_service_list_lightningapp_instances(project_id=project.project_id) - click.echo( - "We didn't find a matching App. Here are the available Apps that could be " - f"connected to {[app.name for app in apps.lightningapps]}." - ) - return + increment = 1 / (1 + len(retriever.api_commands)) - _install_missing_requirements(retriever, yes) + progress_bar.update(connecting, advance=increment) - if not yes: - yes = click.confirm( - f"The Lightning App `{app_name_or_id}` provides a command-line (CLI). " - "Do you want to proceed and install its CLI ?" - ) - click.echo(" ") + _install_missing_requirements(retriever) - if yes: commands_folder = os.path.join(_LIGHTNING_CONNECTION_FOLDER, "commands") if not os.path.exists(commands_folder): os.makedirs(commands_folder) @@ -151,26 +182,25 @@ def connect(app_name_or_id: str, yes: bool = False): retriever.app_id, target_file=target_file, ) - pretty_command_name = command_name.replace("_", " ") - click.echo(f"Storing `{pretty_command_name}` at {target_file}") else: with open(os.path.join(commands_folder, f"{command_name}.txt"), "w") as f: f.write(command_name) - click.echo(f"You can review all the downloaded commands at {commands_folder}") - - click.echo(" ") - click.echo("The client interface has been successfully installed. ") - click.echo("You can now run the following commands:") - for command in retriever.api_commands: - pretty_command_name = command.replace("_", " ") - click.echo(f" lightning {pretty_command_name}") + progress_bar.update(connecting, advance=increment) with open(connected_file, "w") as f: f.write(retriever.app_name + "\n") f.write(retriever.app_id + "\n") + + click.echo("The lightning CLI now responds to app commands. Use 'lightning --help' to see them.") click.echo(" ") - click.echo(f"You are connected to the cloud Lightning App: {app_name_or_id}.") + + Popen( + f"LIGHTNING_CONNECT_PPID={_PPID} {sys.executable} -m lightning --help", + shell=True, + stdout=sys.stdout, + stderr=sys.stderr, + ).wait() def disconnect(logout: bool = False): @@ -244,22 +274,37 @@ def _list_app_commands(echo: bool = True) -> List[str]: click.echo("The current Lightning App doesn't have commands.") return [] + app_info = metadata[command_names[0]].get("app_info", None) + + title, description, on_connect_end = "Lightning", None, None + if app_info: + title = app_info.get("title") + description = app_info.get("description") + on_connect_end = app_info.get("on_connect_end") + if echo: - click.echo("Usage: lightning [OPTIONS] COMMAND [ARGS]...") - click.echo("") - click.echo(" --help Show this message and exit.") + click.echo(f"{title} App") + if description: + click.echo("") + click.echo("Description:") + if description.endswith("\n"): + description = description[:-2] + click.echo(f" {description}") click.echo("") - click.echo("Lightning App Commands") + click.echo("Commands:") max_length = max(len(n) for n in command_names) for command_name in command_names: padding = (max_length + 1 - len(command_name)) * " " click.echo(f" {command_name}{padding}{metadata[command_name].get('description', '')}") + if "LIGHTNING_CONNECT_PPID" in os.environ and on_connect_end: + if on_connect_end.endswith("\n"): + on_connect_end = on_connect_end[:-2] + click.echo(on_connect_end) return command_names def _install_missing_requirements( retriever: _LightningAppOpenAPIRetriever, - yes_global: bool = False, fail_if_missing: bool = False, ): requirements = set() @@ -281,20 +326,15 @@ def _install_missing_requirements( sys.exit(0) for req in missing_requirements: - if not yes_global: - yes = click.confirm( - f"The Lightning App CLI `{retriever.app_id}` requires `{req}`. Do you want to install it ?" - ) - else: - print(f"Installing missing `{req}` requirement.") - yes = yes_global - if yes: - std_out_out = get_logfile("output.log") - with open(std_out_out, "wb") as stdout: - Popen( - f"{sys.executable} -m pip install {req}", shell=True, stdout=stdout, stderr=sys.stderr - ).wait() - print() + std_out_out = get_logfile("output.log") + with open(std_out_out, "wb") as stdout: + Popen( + f"{sys.executable} -m pip install {req}", + shell=True, + stdout=stdout, + stderr=stdout, + ).wait() + os.remove(std_out_out) def _clean_lightning_connection(): diff --git a/src/lightning_app/cli/lightning_cli.py b/src/lightning_app/cli/lightning_cli.py index 4696745ada95f..98df731415920 100644 --- a/src/lightning_app/cli/lightning_cli.py +++ b/src/lightning_app/cli/lightning_cli.py @@ -76,8 +76,6 @@ def main() -> None: else: message = f"You are connected to the cloud Lightning App: {app_name}." - click.echo(" ") - if (len(sys.argv) > 1 and sys.argv[1] in ["-h", "--help"]) or len(sys.argv) == 1: _list_app_commands() else: @@ -206,7 +204,7 @@ def login() -> None: auth.clear() try: - auth._run_server() + auth.authenticate() except ConnectionError: click.echo(f"Unable to connect to {get_lightning_cloud_url()}. Please check your internet connection.") exit(1) diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index fc6a1a873769b..13948ba50af89 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -449,8 +449,15 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" - # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - self._work_kwargs.update(dict(start_with_flow=False)) + cloud_compute = self._work_kwargs.get("cloud_compute", None) + self._work_kwargs.update( + dict( + # TODO: Remove `start_with_flow=False` for faster initialization on the cloud + start_with_flow=False, + # don't try to create multiple works in a single machine + cloud_compute=cloud_compute.clone() if cloud_compute else None, + ) + ) return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: diff --git a/src/lightning_app/components/database/server.py b/src/lightning_app/components/database/server.py index 6d187e4cda133..0a93bf94f985b 100644 --- a/src/lightning_app/components/database/server.py +++ b/src/lightning_app/components/database/server.py @@ -14,6 +14,7 @@ from lightning_app.components.database.utilities import _create_database, _Delete, _Insert, _SelectAll, _Update from lightning_app.core.work import LightningWork from lightning_app.storage import Drive +from lightning_app.utilities.app_helpers import Logger from lightning_app.utilities.imports import _is_sqlmodel_available from lightning_app.utilities.packaging.build_config import BuildConfig @@ -23,6 +24,9 @@ SQLModel = object +logger = Logger(__name__) + + # Required to avoid Uvicorn Server overriding Lightning App signal handlers. # Discussions: https://github.com/encode/uvicorn/discussions/1708 class _DatabaseUvicornServer(uvicorn.Server): @@ -167,7 +171,7 @@ def store_database(self): drive = Drive("lit://database", component_name=self.name, root_folder=tmpdir) drive.put(os.path.basename(tmp_db_filename)) - print("Stored the database to the Drive.") + logger.debug("Stored the database to the Drive.") except Exception: print(traceback.print_exc()) diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 76d744e24608c..e3f738abad329 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -114,3 +114,6 @@ def __init__( cloud_compute=cloud_compute, **work_kwargs, ) + + # the Trainer enables TensorBoard by default, so this is often an undesired directory to upload to the cloud + self.lightningignore += ("lightning_logs",) diff --git a/src/lightning_app/components/python/popen.py b/src/lightning_app/components/python/popen.py index 868ba45d26b7d..bb7980566cdc1 100644 --- a/src/lightning_app/components/python/popen.py +++ b/src/lightning_app/components/python/popen.py @@ -41,8 +41,7 @@ def __init__( Raises: FileNotFoundError: If the provided `script_path` doesn't exists. - .. doctest:: - + Example: >>> from lightning_app.components.python import PopenPythonScript >>> f = open("a.py", "w") diff --git a/src/lightning_app/components/python/tracer.py b/src/lightning_app/components/python/tracer.py index d10ca92252ed8..af1eb678d613e 100644 --- a/src/lightning_app/components/python/tracer.py +++ b/src/lightning_app/components/python/tracer.py @@ -70,7 +70,7 @@ def __init__( This method takes any python globals before executing the script, e.g., you can modify classes or function from the script. - .. doctest:: + Example: >>> from lightning_app.components.python import TracerPythonScript >>> f = open("a.py", "w") diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index e447673993973..40b7e83a3bdca 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -7,7 +7,7 @@ import uvicorn from fastapi import FastAPI -from lightning_utilities.core.imports import compare_version +from lightning_utilities.core.imports import compare_version, module_available from pydantic import BaseModel from lightning_app.core.work import LightningWork @@ -16,12 +16,9 @@ logger = Logger(__name__) -__doctest_skip__ = ["PythonServer", "PythonServer.*"] - - # Skip doctests if requirements aren't available -if not _is_torch_available(): - __doctest_skip__ += ["PythonServer", "PythonServer.*"] +if not module_available("lightning_api_access") or not _is_torch_available(): + __doctest_skip__ = ["PythonServer", "PythonServer.*"] def _get_device(): diff --git a/src/lightning_app/core/app.py b/src/lightning_app/core/app.py index 47055c70f7f4b..9c3aeeb650de0 100644 --- a/src/lightning_app/core/app.py +++ b/src/lightning_app/core/app.py @@ -87,7 +87,7 @@ def __init__( You can learn more about proxy `here `_. - .. doctest:: + Example: >>> from lightning_app import LightningFlow, LightningApp >>> from lightning_app.runners import MultiProcessRuntime diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index 5a82400066f05..302ba344320d1 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -10,7 +10,13 @@ from lightning_app.frontend import Frontend from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive -from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden +from lightning_app.utilities.app_helpers import ( + _is_json_serializable, + _lightning_dispatched, + _LightningAppRef, + _set_child_name, + is_overridden, +) from lightning_app.utilities.component import _sanitize_state from lightning_app.utilities.exceptions import ExitAppException from lightning_app.utilities.introspection import _is_init_context, _is_run_context @@ -77,7 +83,7 @@ def __init__(self): can be distributed (each LightningWork will be run within its own process or different arrangements). - .. doctest:: + Example: >>> from lightning_app import LightningFlow >>> class RootFlow(LightningFlow): @@ -104,6 +110,8 @@ def __init__(self): self._layout: Union[List[Dict], Dict] = {} self._paths = {} self._backend: Optional[Backend] = None + # tuple instead of a list so that it cannot be modified without using the setter + self._lightningignore: Tuple[str, ...] = tuple() @property def name(self): @@ -310,6 +318,20 @@ def flows(self) -> Dict[str, "LightningFlow"]: flows.update(getattr(self, struct_name).flows) return flows + @property + def lightningignore(self) -> Tuple[str, ...]: + """Programmatic equivalent of the ``.lightningignore`` file.""" + return self._lightningignore + + @lightningignore.setter + def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: + if _lightning_dispatched(): + raise RuntimeError( + f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" + " effect" + ) + self._lightningignore = lightningignore + def works(self, recurse: bool = True) -> List[LightningWork]: """Return its :class:`~lightning_app.core.work.LightningWork`.""" works = [getattr(self, el) for el in sorted(self._works)] diff --git a/src/lightning_app/core/work.py b/src/lightning_app/core/work.py index 60d1ea62d8afb..43ffc0006d5ea 100644 --- a/src/lightning_app/core/work.py +++ b/src/lightning_app/core/work.py @@ -3,7 +3,7 @@ import warnings from copy import deepcopy from functools import partial, wraps -from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TYPE_CHECKING, Union from deepdiff import DeepHash, Delta @@ -11,7 +11,12 @@ from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive from lightning_app.storage.payload import Payload -from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, is_overridden +from lightning_app.utilities.app_helpers import ( + _is_json_serializable, + _lightning_dispatched, + _LightningAppRef, + is_overridden, +) from lightning_app.utilities.component import _is_flow_context, _sanitize_state from lightning_app.utilities.enum import ( CacheCallsKeys, @@ -154,6 +159,8 @@ def __init__( self._local_build_config = local_build_config or BuildConfig() self._cloud_build_config = cloud_build_config or BuildConfig() self._cloud_compute = cloud_compute or CloudCompute() + # tuple instead of a list so that it cannot be modified without using the setter + self._lightningignore: Tuple[str, ...] = tuple() self._backend: Optional[Backend] = None self._check_run_is_implemented() self._on_init_end() @@ -253,6 +260,20 @@ def cloud_compute(self, cloud_compute: CloudCompute) -> None: compute_store.remove(self.name) self._cloud_compute = cloud_compute + @property + def lightningignore(self) -> Tuple[str, ...]: + """Programmatic equivalent of the ``.lightningignore`` file.""" + return self._lightningignore + + @lightningignore.setter + def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: + if _lightning_dispatched(): + raise RuntimeError( + f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" + " effect" + ) + self._lightningignore = lightningignore + @property def status(self) -> WorkStatus: """Return the current status of the work. diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 6ef7770124aae..8a7170ee0244a 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -1,12 +1,13 @@ import fnmatch import json import random +import re import string import sys import time from dataclasses import dataclass +from functools import partial from pathlib import Path -from textwrap import dedent from typing import Any, List, Optional, Union import click @@ -19,6 +20,7 @@ Externalv1LightningappInstance, Gridv1ImageSpec, V1BuildSpec, + V1ClusterType, V1DependencyFileInfo, V1Drive, V1DriveSpec, @@ -62,6 +64,7 @@ from lightning_app.runners.backends.cloud import CloudBackend from lightning_app.runners.runtime import Runtime from lightning_app.source_code import LocalSourceCodeDir +from lightning_app.source_code.copytree import _filter_ignored, _parse_lightningignore from lightning_app.storage import Drive, Mount from lightning_app.utilities.app_helpers import _is_headless, Logger from lightning_app.utilities.cloud import _get_project @@ -210,14 +213,32 @@ def dispatch( # Determine the root of the project: Start at the entrypoint_file and look for nearby Lightning config files, # going up the directory structure. The root of the project is where the Lightning config file is located. - # TODO: verify lightning version - # _verify_lightning_version() config_file = _get_config_file(self.entrypoint_file) app_config = AppConfig.load_from_file(config_file) if config_file.exists() else AppConfig() root = Path(self.entrypoint_file).absolute().parent cleanup_handle = _prepare_lightning_wheels_and_requirements(root) self.app._update_index_file() - repo = LocalSourceCodeDir(path=root) + + # gather and merge all lightningignores + children = self.app.flows + self.app.works + lightningignores = [c.lightningignore for c in children] + if lightningignores: + merged = sum(lightningignores, tuple()) + logger.debug(f"Found the following lightningignores: {merged}") + patterns = _parse_lightningignore(merged) + ignore_functions = [partial(_filter_ignored, root, patterns)] + else: + ignore_functions = None + + # Create a default dotignore if it doesn't exist + if not (root / DOT_IGNORE_FILENAME).is_file(): + with open(root / DOT_IGNORE_FILENAME, "w") as f: + f.write("venv/\n") + if (root / "bin" / "activate").is_file() or (root / "pyvenv.cfg").is_file(): + # the user is developing inside venv + f.write("bin/\ninclude/\nlib/\npyvenv.cfg\n") + + repo = LocalSourceCodeDir(path=root, ignore_functions=ignore_functions) self._check_uploaded_folder(root, repo) requirements_file = root / "requirements.txt" # The entry point file needs to be relative to the root of the uploaded source file directory, @@ -228,10 +249,6 @@ def dispatch( # Override the name if provided by the CLI app_config.name = name - if cluster_id: - # Override the cluster ID if provided by the CLI - app_config.cluster_id = cluster_id - print(f"The name of the app is: {app_config.name}") v1_env_vars = [V1EnvVar(name=k, value=v) for k, v in self.env_vars.items()] @@ -293,17 +310,92 @@ def dispatch( project = _get_project(self.backend.client) try: - list_apps_resp = self.backend.client.lightningapp_v2_service_list_lightningapps_v2( - project_id=project.project_id, name=app_config.name + if cluster_id is not None: + # Verify that the cluster exists + list_clusters_resp = self.backend.client.cluster_service_list_clusters() + cluster_ids = [cluster.id for cluster in list_clusters_resp.clusters] + if cluster_id not in cluster_ids: + raise ValueError(f"You requested to run on cluster {cluster_id}, but that cluster doesn't exist.") + + self._ensure_cluster_project_binding(project.project_id, cluster_id) + + # Resolve the app name, instance, and cluster ID + existing_instance = None + app_name = app_config.name + + # List existing instances + # TODO: Add pagination, otherwise this could break if users have a lot of apps. + find_instances_resp = self.backend.client.lightningapp_instance_service_list_lightningapp_instances( + project_id=project.project_id ) - if list_apps_resp.lightningapps: - # There can be only one app with unique project_id<>name pair - lit_app = list_apps_resp.lightningapps[0] - else: - app_body = Body7(name=app_config.name, can_download_source_code=True) + + # Seach for instances with the given name (possibly with some random characters appended) + pattern = re.escape(f"{app_name}-") + ".{4}" + instances = [ + lightningapp + for lightningapp in find_instances_resp.lightningapps + if lightningapp.name == app_name or (re.fullmatch(pattern, lightningapp.name) is not None) + ] + + # If instances exist and cluster is None, mimic cluster selection logic to choose a default + if cluster_id is None and len(instances) > 0: + # Determine the cluster ID + cluster_id = self._get_default_cluster(project.project_id) + + # If an instance exists on the cluster with the same base name - restart it + for instance in instances: + if instance.spec.cluster_id == cluster_id: + existing_instance = instance + break + + # If instances exist but not on the cluster - choose a randomised name + if len(instances) > 0 and existing_instance is None: + name_exists = True + while name_exists: + random_name = self._randomise_name(app_name) + name_exists = any([instance.name == random_name for instance in instances]) + + app_name = random_name + + # Create the app if it doesn't exist + if existing_instance is None: + app_body = Body7(name=app_name, can_download_source_code=True) lit_app = self.backend.client.lightningapp_v2_service_create_lightningapp_v2( project_id=project.project_id, body=app_body ) + app_id = lit_app.id + else: + app_id = existing_instance.spec.app_id + + # check if user has sufficient credits to run an app + # if so set the desired state to running otherwise, create the app in stopped state, + # and open the admin ui to add credits and running the app. + has_sufficient_credits = self._project_has_sufficient_credits(project, app=self.app) + app_release_desired_state = ( + V1LightningappInstanceState.RUNNING if has_sufficient_credits else V1LightningappInstanceState.STOPPED + ) + if not has_sufficient_credits: + logger.warn("You may need Lightning credits to run your apps on the cloud.") + + # Stop the instance if it isn't stopped yet + if existing_instance and existing_instance.status.phase != V1LightningappInstanceState.STOPPED: + # TODO(yurij): Implement release switching in the UI and remove this + # We can only switch release of the stopped instance + existing_instance = self.backend.client.lightningapp_instance_service_update_lightningapp_instance( + project_id=project.project_id, + id=existing_instance.id, + body=Body3(spec=V1LightningappInstanceSpec(desired_state=V1LightningappInstanceState.STOPPED)), + ) + # wait for the instance to stop for up to 150 seconds + for _ in range(150): + existing_instance = self.backend.client.lightningapp_instance_service_get_lightningapp_instance( + project_id=project.project_id, id=existing_instance.id + ) + if existing_instance.status.phase == V1LightningappInstanceState.STOPPED: + break + time.sleep(1) + if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: + raise RuntimeError("Failed to stop the existing instance.") network_configs: Optional[List[V1NetworkConfig]] = None if enable_multiple_works_in_default_container(): @@ -318,90 +410,18 @@ def dispatch( ) initial_port += 1 - # check if user has sufficient credits to run an app - # if so set the desired state to running otherwise, create the app in stopped state, - # and open the admin ui to add credits and running the app. - has_sufficient_credits = self._project_has_sufficient_credits(project, app=self.app) - app_release_desired_state = ( - V1LightningappInstanceState.RUNNING if has_sufficient_credits else V1LightningappInstanceState.STOPPED - ) - if not has_sufficient_credits: - logger.warn("You may need Lightning credits to run your apps on the cloud.") - - # right now we only allow a single instance of the app - find_instances_resp = self.backend.client.lightningapp_instance_service_list_lightningapp_instances( - project_id=project.project_id, app_id=lit_app.id - ) - queue_server_type = V1QueueServerType.UNSPECIFIED if CLOUD_QUEUE_TYPE == "http": queue_server_type = V1QueueServerType.HTTP elif CLOUD_QUEUE_TYPE == "redis": queue_server_type = V1QueueServerType.REDIS - existing_instance: Optional[Externalv1LightningappInstance] = None - if find_instances_resp.lightningapps: - existing_instance = find_instances_resp.lightningapps[0] - - if not app_config.cluster_id: - # Re-run the app on the same cluster - app_config.cluster_id = existing_instance.spec.cluster_id - - if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: - # TODO(yurij): Implement release switching in the UI and remove this - # We can only switch release of the stopped instance - existing_instance = self.backend.client.lightningapp_instance_service_update_lightningapp_instance( - project_id=project.project_id, - id=existing_instance.id, - body=Body3(spec=V1LightningappInstanceSpec(desired_state=V1LightningappInstanceState.STOPPED)), - ) - # wait for the instance to stop for up to 150 seconds - for _ in range(150): - existing_instance = self.backend.client.lightningapp_instance_service_get_lightningapp_instance( - project_id=project.project_id, id=existing_instance.id - ) - if existing_instance.status.phase == V1LightningappInstanceState.STOPPED: - break - time.sleep(1) - if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: - raise RuntimeError("Failed to stop the existing instance.") - - if app_config.cluster_id is not None: - # Verify that the cluster exists - list_clusters_resp = self.backend.client.cluster_service_list_clusters() - cluster_ids = [cluster.id for cluster in list_clusters_resp.clusters] - if app_config.cluster_id not in cluster_ids: - if cluster_id: - msg = f"You requested to run on cluster {cluster_id}, but that cluster doesn't exist." - else: - msg = ( - f"Your app last ran on cluster {app_config.cluster_id}, but that cluster " - "doesn't exist anymore." - ) - raise ValueError(msg) - if existing_instance and existing_instance.spec.cluster_id != app_config.cluster_id: - raise ValueError( - dedent( - f"""\ - An app names {app_config.name} is already running on cluster {existing_instance.spec.cluster_id}, and you requested it to run on cluster {app_config.cluster_id}. - - In order to proceed, please either: - a. rename the app to run on {app_config.cluster_id} with the --name option - lightning run app {app_entrypoint_file} --name (new name) --cloud --cluster-id {app_config.cluster_id} - b. delete the app running on {existing_instance.spec.cluster_id} in the UI before running this command. - """ # noqa: E501 - ) - ) - - if app_config.cluster_id is not None: - self._ensure_cluster_project_binding(project.project_id, app_config.cluster_id) - release_body = Body8( app_entrypoint_file=app_spec.app_entrypoint_file, enable_app_server=app_spec.enable_app_server, flow_servers=app_spec.flow_servers, image_spec=app_spec.image_spec, - cluster_id=app_config.cluster_id, + cluster_id=cluster_id, network_config=network_configs, works=works, local_source=True, @@ -412,14 +432,13 @@ def dispatch( # create / upload the new app release lightning_app_release = self.backend.client.lightningapp_v2_service_create_lightningapp_release( - project_id=project.project_id, app_id=lit_app.id, body=release_body + project_id=project.project_id, app_id=app_id, body=release_body ) if lightning_app_release.source_upload_url == "": raise RuntimeError("The source upload url is empty.") if getattr(lightning_app_release, "cluster_id", None): - app_config.cluster_id = lightning_app_release.cluster_id logger.info(f"Running app on {lightning_app_release.cluster_id}") # Save the config for re-runs @@ -428,7 +447,7 @@ def dispatch( repo.package() repo.upload(url=lightning_app_release.source_upload_url) - if find_instances_resp.lightningapps: + if existing_instance is not None: lightning_app_instance = ( self.backend.client.lightningapp_instance_service_update_lightningapp_instance_release( project_id=project.project_id, @@ -452,12 +471,12 @@ def dispatch( lightning_app_instance = ( self.backend.client.lightningapp_v2_service_create_lightningapp_release_instance( project_id=project.project_id, - app_id=lit_app.id, + app_id=app_id, id=lightning_app_release.id, body=Body9( - cluster_id=app_config.cluster_id, + cluster_id=cluster_id, desired_state=app_release_desired_state, - name=lit_app.name, + name=app_name, env=v1_env_vars, queue_server_type=queue_server_type, ), @@ -490,30 +509,65 @@ def _ensure_cluster_project_binding(self, project_id: str, cluster_id: str): body=V1ProjectClusterBinding(cluster_id=cluster_id, project_id=project_id), ) + def _get_default_cluster(self, project_id: str) -> str: + """This utility implements a minimal version of the cluster selection logic used in the cloud. + + TODO: This should be requested directly from the platform. + """ + cluster_bindings = self.backend.client.projects_service_list_project_cluster_bindings( + project_id=project_id + ).clusters + + if not cluster_bindings: + raise ValueError(f"No clusters are bound to the project {project_id}.") + + if len(cluster_bindings) == 1: + return cluster_bindings[0].cluster_id + + clusters = [ + self.backend.client.cluster_service_get_cluster(cluster_binding.cluster_id) + for cluster_binding in cluster_bindings + ] + + # Filter global clusters + clusters = [cluster for cluster in clusters if cluster.spec.cluster_type == V1ClusterType.GLOBAL] + + return random.choice(clusters).id + + @staticmethod + def _randomise_name(app_name: str) -> str: + letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + return app_name + "-" + "".join(random.sample(letters, 4)) + @staticmethod def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None: """This method is used to inform the users if their folder files are large and how to filter them.""" - lightning_tar = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz")) - app_folder_size = sum(Path(p).stat().st_size for p in repo.files if p not in lightning_tar) - app_folder_size_in_mb = round(app_folder_size / (1000 * 1000), 5) + excludes = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz")) + excludes.update(fnmatch.filter(repo.files, ".lightningignore")) + files = [Path(f) for f in repo.files if f not in excludes] + file_sizes = {f: f.stat().st_size for f in files} + mb = 1000_000 + app_folder_size_in_mb = sum(file_sizes.values()) / mb if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING: - path_sizes = [(p, Path(p).stat().st_size / (1000 * 1000)) for p in repo.files] - largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25] - largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths) + # filter out files under 0.01mb + relevant_files = {f: sz for f, sz in file_sizes.items() if sz > 0.01 * mb} + if relevant_files: + by_largest = dict(sorted(relevant_files.items(), key=lambda x: x[1], reverse=True)) + by_largest = dict(list(by_largest.items())[:25]) # trim + largest_paths_msg = "\n".join( + f"{round(sz / mb, 5)} MB: {p.relative_to(root)}" for p, sz in by_largest.items() + ) + largest_paths_msg = f"Here are the largest files:\n{largest_paths_msg}\n" + else: + largest_paths_msg = "" warning_msg = ( f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. " - f"The total size is {app_folder_size_in_mb} MB\n" - f"Here are the largest files: \n{largest_paths_msg}\n" - "Perhaps you should try running the app in an empty directory." + f"The total size is {round(app_folder_size_in_mb, 2)} MB. {len(files)} files were uploaded.\n" + + largest_paths_msg + + "Perhaps you should try running the app in an empty directory.\n" + + "You can ignore some files or folders by adding them to `.lightningignore`.\n" + + " You can also set the `self.lightningingore` attribute in a Flow or Work." ) - if not (root / DOT_IGNORE_FILENAME).is_file(): - warning_msg = ( - warning_msg - + "\nIn order to ignore some files or folder, " - + "create a `.lightningignore` file and add the paths to ignore." - ) - else: - warning_msg += "\nYou can ignore some files or folders by adding them to `.lightningignore`." logger.warn(warning_msg) diff --git a/src/lightning_app/runners/multiprocess.py b/src/lightning_app/runners/multiprocess.py index 673e8601043d7..e5d34fb76800f 100644 --- a/src/lightning_app/runners/multiprocess.py +++ b/src/lightning_app/runners/multiprocess.py @@ -82,7 +82,7 @@ def dispatch(self, *args: Any, open_ui: bool = True, **kwargs: Any): if is_overridden("configure_commands", self.app.root): commands = _prepare_commands(self.app) - apis += _commands_to_api(commands) + apis += _commands_to_api(commands, info=self.app.info) kwargs = dict( apis=apis, diff --git a/src/lightning_app/runners/runtime.py b/src/lightning_app/runners/runtime.py index ab552f136fde0..a30b78f9178a0 100644 --- a/src/lightning_app/runners/runtime.py +++ b/src/lightning_app/runners/runtime.py @@ -55,9 +55,6 @@ def dispatch( from lightning_app.runners.runtime_type import RuntimeType from lightning_app.utilities.component import _set_flow_context - # Used to indicate Lightning has been dispatched - os.environ["LIGHTNING_DISPATCHED"] = "1" - _set_flow_context() runtime_type = RuntimeType(runtime_type) @@ -80,6 +77,8 @@ def dispatch( secrets=secrets, run_app_comment_commands=run_app_comment_commands, ) + # Used to indicate Lightning has been dispatched + os.environ["LIGHTNING_DISPATCHED"] = "1" # a cloud dispatcher will return the result while local # dispatchers will be running the app in the main process return runtime.dispatch(open_ui=open_ui, name=name, no_cache=no_cache, cluster_id=cluster_id) diff --git a/src/lightning_app/source_code/copytree.py b/src/lightning_app/source_code/copytree.py index 554537849f4cf..7435c332b50f6 100644 --- a/src/lightning_app/source_code/copytree.py +++ b/src/lightning_app/source_code/copytree.py @@ -3,18 +3,20 @@ from functools import partial from pathlib import Path from shutil import copy2, copystat, Error -from typing import Callable, List, Set, Union +from typing import Callable, List, Optional, Set, Union from lightning_app.core.constants import DOT_IGNORE_FILENAME from lightning_app.utilities.app_helpers import Logger logger = Logger(__name__) +_IGNORE_FUNCTION = Callable[[Path, List[Path]], List[Path]] + def _copytree( src: Union[Path, str], dst: Union[Path, str], - ignore_functions: List[Callable] = None, + ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None, dirs_exist_ok=False, dry_run=False, ) -> List[str]: diff --git a/src/lightning_app/source_code/local.py b/src/lightning_app/source_code/local.py index b461d3814e9db..79d655cefbc06 100644 --- a/src/lightning_app/source_code/local.py +++ b/src/lightning_app/source_code/local.py @@ -4,7 +4,7 @@ from shutil import rmtree from typing import List, Optional -from lightning_app.source_code.copytree import _copytree +from lightning_app.source_code.copytree import _copytree, _IGNORE_FUNCTION from lightning_app.source_code.hashing import _get_hash from lightning_app.source_code.tar import _tar_path from lightning_app.source_code.uploader import FileUploader @@ -15,8 +15,9 @@ class LocalSourceCodeDir: cache_location: Path = Path.home() / ".lightning" / "cache" / "repositories" - def __init__(self, path: Path): + def __init__(self, path: Path, ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None) -> None: self.path = path + self.ignore_functions = ignore_functions # cache checksum version self._version: Optional[str] = None @@ -33,7 +34,7 @@ def __init__(self, path: Path): def files(self) -> List[str]: """Returns a set of files that are not ignored by .lightningignore.""" if self._non_ignored_files is None: - self._non_ignored_files = _copytree(self.path, "", dry_run=True) + self._non_ignored_files = _copytree(self.path, "", ignore_functions=self.ignore_functions, dry_run=True) return self._non_ignored_files @property @@ -59,7 +60,7 @@ def packaging_session(self) -> Path: session_path = self.cache_location / "packaging_sessions" / self.version try: rmtree(session_path, ignore_errors=True) - _copytree(self.path, session_path) + _copytree(self.path, session_path, ignore_functions=self.ignore_functions) yield session_path finally: rmtree(session_path, ignore_errors=True) diff --git a/src/lightning_app/structures/dict.py b/src/lightning_app/structures/dict.py index 7bf102e19f180..0489bb7d0e986 100644 --- a/src/lightning_app/structures/dict.py +++ b/src/lightning_app/structures/dict.py @@ -19,7 +19,7 @@ def __init__(self, **kwargs: T): :class:`~lightning_app.core.work.LightningWork` or :class:`~lightning_app.core.flow.LightningFlow`. - .. doctest:: + Example: >>> from lightning_app import LightningFlow, LightningWork >>> from lightning_app.structures import Dict diff --git a/src/lightning_app/structures/list.py b/src/lightning_app/structures/list.py index 9f110c69b1388..940973d212cd9 100644 --- a/src/lightning_app/structures/list.py +++ b/src/lightning_app/structures/list.py @@ -19,7 +19,7 @@ def __init__(self, *items: T): :class:`~lightning_app.core.work.LightningWork` or :class:`~lightning_app.core.flow.LightningFlow`. - .. doctest:: + Example: >>> from lightning_app import LightningFlow, LightningWork >>> from lightning_app.structures import List diff --git a/src/lightning_app/utilities/app_helpers.py b/src/lightning_app/utilities/app_helpers.py index 83b78e1929aa5..bc3d092b280dd 100644 --- a/src/lightning_app/utilities/app_helpers.py +++ b/src/lightning_app/utilities/app_helpers.py @@ -511,12 +511,33 @@ def is_static_method(klass_or_instance, attr) -> bool: return isinstance(inspect.getattr_static(klass_or_instance, attr), staticmethod) +def _lightning_dispatched() -> bool: + return bool(int(os.getenv("LIGHTNING_DISPATCHED", 0))) + + +def _using_debugger() -> bool: + """This method is used to detect whether the app is run with a debugger attached.""" + if "LIGHTNING_DETECTED_DEBUGGER" in os.environ: + return True + + # Collect the information about the process. + parent_process = os.popen(f"ps -ax | grep -i {os.getpid()} | grep -v grep").read() + + # Detect whether VSCode or PyCharm debugger are used + use_debugger = "debugpy" in parent_process or "pydev" in parent_process + + # Store the result to avoid multiple popen calls. + if use_debugger: + os.environ["LIGHTNING_DETECTED_DEBUGGER"] = "1" + return use_debugger + + def _should_dispatch_app() -> bool: return ( - __debug__ - and "_pytest.doctest" not in sys.modules - and not bool(int(os.getenv("LIGHTNING_DISPATCHED", "0"))) + not _lightning_dispatched() and "LIGHTNING_APP_STATE_URL" not in os.environ + # Keep last to avoid running it if already dispatched + and _using_debugger() ) diff --git a/src/lightning_app/utilities/cli_helpers.py b/src/lightning_app/utilities/cli_helpers.py index caa414e163ffc..0ec6eabd3022c 100644 --- a/src/lightning_app/utilities/cli_helpers.py +++ b/src/lightning_app/utilities/cli_helpers.py @@ -69,6 +69,7 @@ def _get_metadata_from_openapi(paths: Dict, path: str): cls_name = paths[path]["post"].get("cls_name", None) description = paths[path]["post"].get("description", None) requirements = paths[path]["post"].get("requirements", None) + app_info = paths[path]["post"].get("app_info", None) metadata = {"tag": tag, "parameters": {}} @@ -84,6 +85,9 @@ def _get_metadata_from_openapi(paths: Dict, path: str): if description: metadata["requirements"] = requirements + if app_info: + metadata["app_info"] = app_info + if not parameters: return metadata diff --git a/src/lightning_app/utilities/commands/base.py b/src/lightning_app/utilities/commands/base.py index 53aefe5725194..947456d8e4e1c 100644 --- a/src/lightning_app/utilities/commands/base.py +++ b/src/lightning_app/utilities/commands/base.py @@ -5,6 +5,7 @@ import shutil import sys import traceback +from dataclasses import asdict from getpass import getuser from importlib.util import module_from_spec, spec_from_file_location from tempfile import gettempdir @@ -16,6 +17,7 @@ from lightning_app.api.http_methods import Post from lightning_app.api.request_types import _APIRequest, _CommandRequest, _RequestResponse +from lightning_app.utilities import frontend from lightning_app.utilities.app_helpers import is_overridden, Logger from lightning_app.utilities.cloud import _get_project from lightning_app.utilities.network import LightningClient @@ -250,7 +252,7 @@ def _process_requests(app, requests: List[Union[_APIRequest, _CommandRequest]]) app.api_response_queue.put(responses) -def _collect_open_api_extras(command) -> Dict: +def _collect_open_api_extras(command, info) -> Dict: if not isinstance(command, ClientCommand): if command.__doc__ is not None: return {"description": command.__doc__} @@ -263,10 +265,14 @@ def _collect_open_api_extras(command) -> Dict: } if command.requirements: extras.update({"requirements": command.requirements}) + if info: + extras.update({"app_info": asdict(info)}) return extras -def _commands_to_api(commands: List[Dict[str, Union[Callable, ClientCommand]]]) -> List: +def _commands_to_api( + commands: List[Dict[str, Union[Callable, ClientCommand]]], info: Optional[frontend.AppInfo] = None +) -> List: """Convert user commands to API endpoint.""" api = [] for command in commands: @@ -278,7 +284,7 @@ def _commands_to_api(commands: List[Dict[str, Union[Callable, ClientCommand]]]) v.method if isinstance(v, ClientCommand) else v, method_name=k, tags=["app_client_command"] if isinstance(v, ClientCommand) else ["app_command"], - openapi_extra=_collect_open_api_extras(v), + openapi_extra=_collect_open_api_extras(v, info), ) ) return api diff --git a/src/lightning_app/utilities/frontend.py b/src/lightning_app/utilities/frontend.py index 315c119935b6f..470036436a63c 100644 --- a/src/lightning_app/utilities/frontend.py +++ b/src/lightning_app/utilities/frontend.py @@ -12,6 +12,7 @@ class AppInfo: image: Optional[str] = None # ensure the meta tags are correct or the UI might fail to load. meta_tags: Optional[List[str]] = None + on_connect_end: Optional[str] = None def update_index_file(ui_root: str, info: Optional[AppInfo] = None, root_path: str = "") -> None: diff --git a/src/lightning_app/utilities/login.py b/src/lightning_app/utilities/login.py index 4539ef805eafa..ff04e3b865d41 100644 --- a/src/lightning_app/utilities/login.py +++ b/src/lightning_app/utilities/login.py @@ -4,6 +4,7 @@ import pathlib from dataclasses import dataclass from enum import Enum +from time import sleep from typing import Optional from urllib.parse import urlencode @@ -44,9 +45,11 @@ def __post_init__(self): setattr(self, key.suffix, os.environ.get(key.value, None)) self._with_env_var = bool(self.user_id and self.api_key) # used by authenticate method - if self.api_key and not self.user_id: + if self._with_env_var: + self.save("", self.user_id, self.api_key, self.user_id) + logger.info("Credentials loaded from environment variables") + elif self.api_key or self.user_id: raise ValueError( - f"{Keys.USER_ID.value} is missing from env variables. " "To use env vars for authentication both " f"{Keys.USER_ID.value} and {Keys.API_KEY.value} should be set." ) @@ -135,7 +138,8 @@ def authenticate(self) -> Optional[str]: class AuthServer: - def get_auth_url(self, port: int) -> str: + @staticmethod + def get_auth_url(port: int) -> str: redirect_uri = f"http://localhost:{port}/login-complete" params = urlencode(dict(redirectTo=redirect_uri)) return f"{get_lightning_cloud_url()}/sign-in?{params}" @@ -144,6 +148,7 @@ def login_with_browser(self, auth: Auth) -> None: app = FastAPI() port = find_free_network_port() url = self.get_auth_url(port) + try: # check if server is reachable or catch any network errors requests.head(url) @@ -156,32 +161,42 @@ def login_with_browser(self, auth: Auth) -> None: f"An error occurred with the request. Please report this issue to Lightning Team \n{e}" # E501 ) - logger.info(f"login started for lightning.ai, opening {url}") + logger.info( + "\nAttempting to automatically open the login page in your default browser.\n" + 'If the browser does not open, navigate to the "Keys" tab on your Lightning AI profile page:\n\n' + f"{get_lightning_cloud_url()}/me/keys\n\n" + 'Copy the "Headless CLI Login" command, and execute it in your terminal.\n' + ) click.launch(url) @app.get("/login-complete") async def save_token(request: Request, token="", key="", user_id: str = Query("", alias="userID")): - if token: - auth.save(token=token, username=user_id, user_id=user_id, api_key=key) - logger.info("Authentication Successful") - else: + async def stop_server_once_request_is_done(): + while not await request.is_disconnected(): + sleep(0.25) + server.should_exit = True + + if not token: logger.warn( - "Authentication Failed. This is most likely because you're using an older version of the CLI. \n" # noqa E501 + "Login Failed. This is most likely because you're using an older version of the CLI. \n" # noqa E501 "Please try to update the CLI or open an issue with this information \n" # E501 f"expected token in {request.query_params.items()}" ) + return RedirectResponse( + url=f"{get_lightning_cloud_url()}/cli-login-failed", + background=BackgroundTask(stop_server_once_request_is_done), + ) + + auth.save(token=token, username=user_id, user_id=user_id, api_key=key) + logger.info("Login Successful") # Include the credentials in the redirect so that UI will also be logged in params = urlencode(dict(token=token, key=key, userID=user_id)) return RedirectResponse( - url=f"{get_lightning_cloud_url()}/me/apps?{params}", - # The response background task is being executed right after the server finished writing the response - background=BackgroundTask(stop_server), + url=f"{get_lightning_cloud_url()}/cli-login-successful?{params}", + background=BackgroundTask(stop_server_once_request_is_done), ) - def stop_server(): - server.should_exit = True - server = uvicorn.Server(config=uvicorn.Config(app, port=port, log_level="error")) server.run() diff --git a/src/lightning_app/utilities/packaging/app_config.py b/src/lightning_app/utilities/packaging/app_config.py index c3e44159ffb4e..202ad91aec19f 100644 --- a/src/lightning_app/utilities/packaging/app_config.py +++ b/src/lightning_app/utilities/packaging/app_config.py @@ -1,6 +1,6 @@ import pathlib from dataclasses import asdict, dataclass, field -from typing import Optional, Union +from typing import Union import yaml @@ -18,7 +18,6 @@ class AppConfig: """ name: str = field(default_factory=get_unique_name) - cluster_id: Optional[str] = field(default=None) def save_to_file(self, path: Union[str, pathlib.Path]) -> None: """Save the configuration to the given file in YAML format.""" @@ -35,6 +34,8 @@ def load_from_file(cls, path: Union[str, pathlib.Path]) -> "AppConfig": """Load the configuration from the given file.""" with open(path) as file: config = yaml.safe_load(file) + # Ignore `cluster_id` without error for backwards compatibility. + config.pop("cluster_id", None) return cls(**config) @classmethod diff --git a/src/lightning_app/utilities/packaging/build_config.py b/src/lightning_app/utilities/packaging/build_config.py index 7c20853bec157..51a610d213529 100644 --- a/src/lightning_app/utilities/packaging/build_config.py +++ b/src/lightning_app/utilities/packaging/build_config.py @@ -91,7 +91,7 @@ def build_commands(self) -> List[str]: .. note:: If you provide your own dockerfile, this would be ignored. - .. doctest:: + Example: from dataclasses import dataclass from lightning_app import BuildConfig diff --git a/src/lightning_lite/CHANGELOG.md b/src/lightning_lite/CHANGELOG.md index cceb9b1d97ed5..baae664a95cb2 100644 --- a/src/lightning_lite/CHANGELOG.md +++ b/src/lightning_lite/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). + +## [1.8.5] - 2022-12-15 + +- minor cleaning + + ## [1.8.4] - 2022-12-08 - Fixed `shuffle=False` having no effect when using DDP/DistributedSampler ([#15931](https://github.com/Lightning-AI/lightning/issues/15931)) diff --git a/src/lightning_lite/__version__.py b/src/lightning_lite/__version__.py index 5e8e81fd482ee..cb9f51643e523 100644 --- a/src/lightning_lite/__version__.py +++ b/src/lightning_lite/__version__.py @@ -1 +1 @@ -version = "1.8.4.post0" +version = "1.8.5" diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index a8d91c1ae4a55..7d9559ba97ad9 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). + +## [1.8.5] - 2022-12-15 + +- minor cleaning + + ## [1.8.4] - 2022-12-08 ### Changed diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py index 5e8e81fd482ee..cb9f51643e523 100644 --- a/src/pytorch_lightning/__version__.py +++ b/src/pytorch_lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4.post0" +version = "1.8.5" diff --git a/src/pytorch_lightning/core/mixins/hparams_mixin.py b/src/pytorch_lightning/core/mixins/hparams_mixin.py index b5b4a9b312312..abbca184e8c35 100644 --- a/src/pytorch_lightning/core/mixins/hparams_mixin.py +++ b/src/pytorch_lightning/core/mixins/hparams_mixin.py @@ -47,6 +47,7 @@ class ``__init__`` to be ignored logger: Whether to send the hyperparameters to the logger. Default: True Example:: + >>> from pytorch_lightning.core.mixins import HyperparametersMixin >>> class ManuallyArgsModel(HyperparametersMixin): ... def __init__(self, arg1, arg2, arg3): ... super().__init__() @@ -59,6 +60,7 @@ class ``__init__`` to be ignored "arg1": 1 "arg3": 3.14 + >>> from pytorch_lightning.core.mixins import HyperparametersMixin >>> class AutomaticArgsModel(HyperparametersMixin): ... def __init__(self, arg1, arg2, arg3): ... super().__init__() @@ -72,6 +74,7 @@ class ``__init__`` to be ignored "arg2": abc "arg3": 3.14 + >>> from pytorch_lightning.core.mixins import HyperparametersMixin >>> class SingleArgModel(HyperparametersMixin): ... def __init__(self, params): ... super().__init__() @@ -85,6 +88,7 @@ class ``__init__`` to be ignored "p2": abc "p3": 3.14 + >>> from pytorch_lightning.core.mixins import HyperparametersMixin >>> class ManuallyArgsModel(HyperparametersMixin): ... def __init__(self, arg1, arg2, arg3): ... super().__init__() diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py b/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py index f1478ecbf9cbe..686d3a364cbf4 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py @@ -174,7 +174,10 @@ def check_logging(cls, fx_name: str) -> None: ) if cls.functions[fx_name] is None: - raise MisconfigurationException(f"You can't `self.log()` inside `{fx_name}`.") + raise MisconfigurationException( + f"You can't `self.log()` inside `{fx_name}`. HINT: You can still log directly to the logger by using" + " `self.logger.experiment`." + ) @classmethod def get_default_logging_levels( diff --git a/src/pytorch_lightning/utilities/parsing.py b/src/pytorch_lightning/utilities/parsing.py index eed889716f88f..ae969041da62a 100644 --- a/src/pytorch_lightning/utilities/parsing.py +++ b/src/pytorch_lightning/utilities/parsing.py @@ -185,7 +185,7 @@ def collect_init_args( path_args.append(local_args) return collect_init_args(frame.f_back, path_args, inside=True, classes=classes) if not inside: - return collect_init_args(frame.f_back, path_args, inside, classes=classes) + return collect_init_args(frame.f_back, path_args, inside=False, classes=classes) return path_args diff --git a/tests/tests_app/cli/test_cloud_cli.py b/tests/tests_app/cli/test_cloud_cli.py index 169e82ab1f42f..598a90368da88 100644 --- a/tests/tests_app/cli/test_cloud_cli.py +++ b/tests/tests_app/cli/test_cloud_cli.py @@ -11,7 +11,6 @@ from lightning_cloud.openapi import ( V1LightningappV2, V1ListLightningappInstancesResponse, - V1ListLightningappsV2Response, V1ListMembershipsResponse, V1Membership, ) @@ -102,8 +101,8 @@ def __init__(self, *args, create_response, **kwargs): super().__init__() self.create_response = create_response - def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs): - return V1ListLightningappsV2Response(lightningapps=[V1LightningappV2(id="my_app", name="app")]) + def lightningapp_v2_service_create_lightningapp_v2(self, *args, **kwargs): + return V1LightningappV2(id="my_app", name="app") def lightningapp_v2_service_create_lightningapp_release(self, project_id, app_id, body): assert project_id == "test-project-id" @@ -183,7 +182,7 @@ def __init__(self, *args, message, **kwargs): super().__init__() self.message = message - def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs): + def lightningapp_instance_service_list_lightningapp_instances(self, *args, **kwargs): raise ApiException( http_resp=HttpHeaderDict( data=self.message, diff --git a/tests/tests_app/cli/test_cmd_clusters.py b/tests/tests_app/cli/test_cmd_clusters.py index 7ce7333ed43e3..aca861c946c5b 100644 --- a/tests/tests_app/cli/test_cmd_clusters.py +++ b/tests/tests_app/cli/test_cmd_clusters.py @@ -4,6 +4,7 @@ import click import pytest from lightning_cloud.openapi import ( + Externalv1LightningappInstance, V1AWSClusterDriverSpec, V1ClusterDriver, V1ClusterPerformanceProfile, @@ -14,6 +15,11 @@ V1CreateClusterRequest, V1GetClusterResponse, V1KubernetesClusterDriver, + V1LightningappInstanceState, + V1LightningappInstanceStatus, + V1ListLightningappInstancesResponse, + V1ListMembershipsResponse, + V1Membership, ) from lightning_app.cli import cmd_clusters @@ -95,10 +101,27 @@ def test_list_clusters(api: mock.MagicMock): api.assert_called_once_with(phase_not_in=[V1ClusterState.DELETED]) +@pytest.fixture() +def fixture_list_instances_empty(): + return V1ListLightningappInstancesResponse([]) + + @mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock()) +@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships") +@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances") @mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_delete_cluster") @mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster") -def test_delete_cluster_api(api_get: mock.MagicMock, api_delete: mock.MagicMock, async_or_interrupt, spec): +def test_delete_cluster_api( + api_get: mock.MagicMock, + api_delete: mock.MagicMock, + api_list_instances: mock.MagicMock, + api_list_memberships: mock.MagicMock, + async_or_interrupt, + spec, + fixture_list_instances_empty, +): + api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")]) + api_list_instances.return_value = fixture_list_instances_empty api_get.return_value = V1GetClusterResponse(spec=spec) cluster_manager = AWSClusterManager() cluster_manager.delete(cluster_id="test-7", do_async=async_or_interrupt) @@ -106,6 +129,71 @@ def test_delete_cluster_api(api_get: mock.MagicMock, api_delete: mock.MagicMock, api_delete.assert_called_once_with(id="test-7", force=False) +@mock.patch("click.confirm") +@mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock()) +@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships") +@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances") +@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_delete_cluster") +@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster") +def test_delete_cluster_with_stopped_apps( + api_get: mock.MagicMock, + api_delete: mock.MagicMock, + api_list_instances: mock.MagicMock, + api_list_memberships: mock.MagicMock, + click_mock: mock.MagicMock, + spec, +): + api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")]) + api_list_instances.side_effect = [ + # when querying for running apps + V1ListLightningappInstancesResponse([]), + # when querying for stopped apps + V1ListLightningappInstancesResponse( + lightningapps=[ + Externalv1LightningappInstance( + status=V1LightningappInstanceStatus( + phase=V1LightningappInstanceState.STOPPED, + ) + ) + ] + ), + ] + api_get.return_value = V1GetClusterResponse(spec=spec) + cluster_manager = AWSClusterManager() + + cluster_manager.delete(cluster_id="test-7", do_async=True) + api_delete.assert_called_once_with(id="test-7", force=False) + click_mock.assert_called_once_with("Are you sure you want to continue?", abort=True) + + +@mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock()) +@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships") +@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances") +@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster") +def test_delete_cluster_with_running_apps( + api_get: mock.MagicMock, + api_list_instances: mock.MagicMock, + api_list_memberships: mock.MagicMock, + spec, +): + api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")]) + api_list_instances.return_value = V1ListLightningappInstancesResponse( + lightningapps=[ + Externalv1LightningappInstance( + status=V1LightningappInstanceStatus( + phase=V1LightningappInstanceState.RUNNING, + ) + ) + ] + ) + api_get.return_value = V1GetClusterResponse(spec=spec) + cluster_manager = AWSClusterManager() + + with pytest.raises(click.ClickException) as exception: + cluster_manager.delete(cluster_id="test-7") + exception.match("apps running") + + class Test_check_cluster_id_is_valid: @pytest.mark.parametrize("name", ["test-7", "0wildgoat"]) def test_valid(self, name): diff --git a/tests/tests_app/cli/test_connect.py b/tests/tests_app/cli/test_connect.py index a8924ab375db2..adbd55385815e 100644 --- a/tests/tests_app/cli/test_connect.py +++ b/tests/tests_app/cli/test_connect.py @@ -1,6 +1,5 @@ import json import os -import sys from unittest.mock import MagicMock import click @@ -37,11 +36,8 @@ def monkeypatch_connection(monkeypatch, tmpdir, ppid): def test_connect_disconnect_local(tmpdir, monkeypatch): disconnect() - ppid = str(psutil.Process(os.getpid()).ppid()) - connection_path = monkeypatch_connection(monkeypatch, tmpdir, ppid=ppid) - - with pytest.raises(Exception, match="The commands weren't found. Is your app localhost running ?"): - connect("localhost", True) + with pytest.raises(Exception, match="Connection wasn't successful. Is your app localhost running ?"): + connect("localhost") with open(os.path.join(os.path.dirname(__file__), "jsons/connect_1.json")) as f: data = json.load(f) @@ -64,23 +60,14 @@ def fn(msg): response.status_code = 200 response.json.return_value = data monkeypatch.setattr(cli_helpers.requests, "get", MagicMock(return_value=response)) - connect("localhost", True) + connect("localhost") assert _retrieve_connection_to_an_app() == ("localhost", None) command_path = _resolve_command_path("nested_command") assert not os.path.exists(command_path) command_path = _resolve_command_path("command_with_client") assert os.path.exists(command_path) - s = "/" if sys.platform != "win32" else "\\" - command_folder_path = f"{connection_path}{s}commands" - expected = [ - f"Storing `command with client` at {command_folder_path}{s}command_with_client.py", - f"You can review all the downloaded commands at {command_folder_path}", - "You are connected to the local Lightning App.", - ] - assert messages == expected - messages = [] - connect("localhost", True) + connect("localhost") assert messages == ["You are connected to the local Lightning App."] messages = [] @@ -101,8 +88,6 @@ def test_connect_disconnect_cloud(tmpdir, monkeypatch): ppid_1 = str(psutil.Process(os.getpid()).ppid()) ppid_2 = "222" - connection_path = monkeypatch_connection(monkeypatch, tmpdir, ppid=ppid_1) - target_file = _resolve_command_path("command_with_client") if os.path.exists(target_file): @@ -153,7 +138,7 @@ def fn(msg): with open(data["paths"]["/command/command_with_client"]["post"]["cls_path"], "rb") as f: response.content = f.read() - connect("example", True) + connect("example") assert _retrieve_connection_to_an_app() == ("example", "1234") commands = _list_app_commands() assert commands == ["command with client", "command without client", "nested command"] @@ -161,50 +146,25 @@ def fn(msg): assert not os.path.exists(command_path) command_path = _resolve_command_path("command_with_client") assert os.path.exists(command_path) - s = "/" if sys.platform != "win32" else "\\" - command_folder_path = f"{connection_path}{s}commands" - expected = [ - f"Storing `command with client` at {command_folder_path}{s}command_with_client.py", - f"You can review all the downloaded commands at {command_folder_path}", - " ", - "The client interface has been successfully installed. ", - "You can now run the following commands:", - " lightning command without client", - " lightning command with client", - " lightning nested command", - " ", - "You are connected to the cloud Lightning App: example.", - "Usage: lightning [OPTIONS] COMMAND [ARGS]...", - "", - " --help Show this message and exit.", - "", - "Lightning App Commands", - " command with client A command with a client.", - " command without client A command without a client.", - " nested command A nested command.", - ] - assert messages == expected - messages = [] - connect("example", True) + connect("example") assert messages == ["You are already connected to the cloud Lightning App: example."] _ = monkeypatch_connection(monkeypatch, tmpdir, ppid=ppid_2) messages = [] - connect("example", True) - assert messages[0] == "Found existing connection, reusing cached commands" + connect("example") + assert "The lightning CLI now responds to app commands" in messages[0] messages = [] disconnect() - print(messages) assert messages == ["You are disconnected from the cloud Lightning App: example."] _ = monkeypatch_connection(monkeypatch, tmpdir, ppid=ppid_1) messages = [] disconnect() - assert messages == ["You are disconnected from the cloud Lightning App: example."] + assert "You aren't connected to any Lightning App" in messages[0] messages = [] disconnect() diff --git a/tests/tests_app/components/test_auto_scaler.py b/tests/tests_app/components/test_auto_scaler.py index 436c3517d01ca..672b05bbc9a15 100644 --- a/tests/tests_app/components/test_auto_scaler.py +++ b/tests/tests_app/components/test_auto_scaler.py @@ -3,7 +3,7 @@ import pytest -from lightning_app import LightningWork +from lightning_app import CloudCompute, LightningWork from lightning_app.components import AutoScaler @@ -90,3 +90,11 @@ def test_scale(replicas, metrics, expected_replicas): ) assert auto_scaler.scale(replicas, metrics) == expected_replicas + + +def test_create_work_cloud_compute_cloned(): + """Test CloudCompute is cloned to avoid creating multiple works in a single machine.""" + cloud_compute = CloudCompute("gpu") + auto_scaler = AutoScaler(EmptyWork, cloud_compute=cloud_compute) + _ = auto_scaler.create_work() + assert auto_scaler._work_kwargs["cloud_compute"] is not cloud_compute diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index e89e1e8aa468d..4d1ebda6ccdc5 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -2,7 +2,6 @@ import os import re import sys -from contextlib import nullcontext as does_not_raise from copy import copy from pathlib import Path from unittest import mock @@ -17,12 +16,15 @@ Gridv1ImageSpec, IdGetBody, V1BuildSpec, + V1ClusterSpec, + V1ClusterType, V1DependencyFileInfo, V1Drive, V1DriveSpec, V1DriveStatus, V1DriveType, V1EnvVar, + V1GetClusterResponse, V1LightningappInstanceState, V1LightningappRelease, V1LightningworkDrives, @@ -30,6 +32,7 @@ V1ListClustersResponse, V1ListLightningappInstancesResponse, V1ListMembershipsResponse, + V1ListProjectClusterBindingsResponse, V1Membership, V1Metadata, V1NetworkConfig, @@ -43,13 +46,15 @@ V1Work, ) -from lightning_app import BuildConfig, LightningApp, LightningWork +from lightning_app import BuildConfig, LightningApp, LightningFlow, LightningWork from lightning_app.runners import backends, cloud, CloudRuntime from lightning_app.runners.cloud import ( _generate_works_json_gallery, _generate_works_json_web, _validate_build_spec_and_compute, ) +from lightning_app.source_code.copytree import _copytree, _parse_lightningignore +from lightning_app.source_code.local import LocalSourceCodeDir from lightning_app.storage import Drive, Mount from lightning_app.testing.helpers import EmptyWork from lightning_app.utilities.cloud import _get_project @@ -156,27 +161,17 @@ def test_run_on_deleted_cluster(self, cloud_backend): with pytest.raises(ValueError, match="that cluster doesn't exist"): cloud_runtime.dispatch(name=app_name, cluster_id="unknown-cluster") - # TODO: remove this test once there is support for multiple instances @pytest.mark.parametrize( - "old_cluster,new_cluster,expected_raise", + "old_cluster,new_cluster", [ - ( - "test", - "other", - pytest.raises( - ValueError, - match="already running on cluster", - ), - ), - ("test", "test", does_not_raise()), - (None, None, does_not_raise()), - (None, "litng-ai-03", does_not_raise()), - ("litng-ai-03", None, does_not_raise()), + ("test", "other"), + ("test", "test"), + (None, None), + (None, "litng-ai-03"), + ("litng-ai-03", None), ], ) - def test_new_instance_on_different_cluster( - self, cloud_backend, project_id, old_cluster, new_cluster, expected_raise - ): + def test_new_instance_on_different_cluster(self, cloud_backend, project_id, old_cluster, new_cluster): app_name = "test-app" mock_client = mock.MagicMock() @@ -197,6 +192,18 @@ def test_new_instance_on_different_cluster( ] ) + mock_client.projects_service_list_project_cluster_bindings.return_value = V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id=old_cluster or DEFAULT_CLUSTER), + V1ProjectClusterBinding(cluster_id=new_cluster or DEFAULT_CLUSTER), + ] + ) + + # Mock all clusters as global clusters + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) + ) + cloud_backend.client = mock_client app = mock.MagicMock() @@ -204,6 +211,7 @@ def test_new_instance_on_different_cluster( app.frontend = {} existing_instance = MagicMock() + existing_instance.name = app_name existing_instance.status.phase = V1LightningappInstanceState.STOPPED existing_instance.spec.cluster_id = old_cluster or DEFAULT_CLUSTER mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( @@ -216,8 +224,15 @@ def test_new_instance_on_different_cluster( # This is the main assertion: # we have an existing instance on `cluster-001` # but we want to run this app on `cluster-002` - with expected_raise: - cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster) + cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster) + + if new_cluster != old_cluster and None not in (old_cluster, new_cluster): + # If we switched cluster, check that a new name was used which starts with the old name + mock_client.lightningapp_v2_service_create_lightningapp_release_instance.assert_called_once() + args = mock_client.lightningapp_v2_service_create_lightningapp_release_instance.call_args + assert args[1]["body"].name != app_name + assert args[1]["body"].name.startswith(app_name) + assert args[1]["body"].cluster_id == new_cluster @pytest.mark.parametrize("flow_cloud_compute", [None, CloudCompute(name="t2.medium")]) @mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock()) @@ -326,9 +341,7 @@ def test_requirements_file(self, monkeypatch): mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=[]) ) - mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( - cluster_id="test" - ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease() mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse([Externalv1Cluster(id="test")]) cloud_backend = mock.MagicMock() cloud_backend.client = mock_client @@ -373,7 +386,6 @@ def test_requirements_file(self, monkeypatch): path="requirements.txt", ), ) - body.cluster_id = "test" cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_called_with( project_id="test-project-id", app_id=mock.ANY, body=body ) @@ -436,20 +448,28 @@ def test_no_cache(self, monkeypatch): def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, tmpdir): source_code_root_dir = Path(tmpdir / "src").absolute() source_code_root_dir.mkdir() - Path(source_code_root_dir / ".lightning").write_text("cluster_id: test\nname: myapp") + Path(source_code_root_dir / ".lightning").write_text("name: myapp") requirements_file = Path(source_code_root_dir / "requirements.txt") Path(requirements_file).touch() + (source_code_root_dir / "entrypoint.py").touch() mock_client = mock.MagicMock() if lightningapps: + lightningapps[0].name = "myapp" lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED lightningapps[0].spec.cluster_id = "test" mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) - mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( - cluster_id="test" + mock_client.projects_service_list_project_cluster_bindings.return_value = V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id="test"), + ] ) + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) + ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease() mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse([Externalv1Cluster(id="test")]) mock_client.lightningapp_v2_service_create_lightningapp_release_instance.return_value = MagicMock() existing_instance = MagicMock() @@ -551,9 +571,7 @@ def test_call_with_queue_server_type_specified(self, lightningapps, monkeypatch, mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=[]) ) - mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( - cluster_id="test" - ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease() mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse([Externalv1Cluster(id="test")]) cloud_backend = mock.MagicMock() cloud_backend.client = mock_client @@ -574,7 +592,6 @@ def test_call_with_queue_server_type_specified(self, lightningapps, monkeypatch, # calling with no env variable set body = IdGetBody( - cluster_id="test", desired_state=V1LightningappInstanceState.STOPPED, env=[], name=mock.ANY, @@ -590,7 +607,6 @@ def test_call_with_queue_server_type_specified(self, lightningapps, monkeypatch, cloud_runtime.backend.client.reset_mock() cloud_runtime.dispatch() body = IdGetBody( - cluster_id="test", desired_state=V1LightningappInstanceState.STOPPED, env=[], name=mock.ANY, @@ -606,20 +622,28 @@ def test_call_with_queue_server_type_specified(self, lightningapps, monkeypatch, def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch, tmpdir): source_code_root_dir = Path(tmpdir / "src").absolute() source_code_root_dir.mkdir() - Path(source_code_root_dir / ".lightning").write_text("cluster_id: test\nname: myapp") + Path(source_code_root_dir / ".lightning").write_text("name: myapp") requirements_file = Path(source_code_root_dir / "requirements.txt") Path(requirements_file).touch() + (source_code_root_dir / "entrypoint.py").touch() mock_client = mock.MagicMock() if lightningapps: + lightningapps[0].name = "myapp" lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED lightningapps[0].spec.cluster_id = "test" mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) - mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( - cluster_id="test" + mock_client.projects_service_list_project_cluster_bindings.return_value = V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id="test"), + ] + ) + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease() mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse([Externalv1Cluster(id="test")]) lightning_app_instance = MagicMock() mock_client.lightningapp_v2_service_create_lightningapp_release_instance = MagicMock( @@ -742,20 +766,30 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch def test_call_with_work_app_and_app_comment_command_execution_set(self, lightningapps, monkeypatch, tmpdir): source_code_root_dir = Path(tmpdir / "src").absolute() source_code_root_dir.mkdir() - Path(source_code_root_dir / ".lightning").write_text("cluster_id: test\nname: myapp") + Path(source_code_root_dir / ".lightning").write_text("name: myapp") requirements_file = Path(source_code_root_dir / "requirements.txt") Path(requirements_file).touch() + (source_code_root_dir / "entrypoint.py").touch() mock_client = mock.MagicMock() if lightningapps: + lightningapps[0].name = "myapp" lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED lightningapps[0].spec.cluster_id = "test" + mock_client.projects_service_list_project_cluster_bindings.return_value = ( + V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id="test"), + ] + ) + ) + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) + ) mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) - mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( - cluster_id="test" - ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease() mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse([Externalv1Cluster(id="test")]) lightning_app_instance = MagicMock() mock_client.lightningapp_v2_service_create_lightningapp_release_instance = MagicMock( @@ -846,7 +880,6 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin app_id=mock.ANY, id=mock.ANY, body=Body9( - cluster_id="test", desired_state=V1LightningappInstanceState.STOPPED, name=mock.ANY, env=[V1EnvVar(name="ENABLE_APP_COMMENT_COMMAND_EXECUTION", value="1")], @@ -859,14 +892,26 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, monkeypatch, tmpdir): source_code_root_dir = Path(tmpdir / "src").absolute() source_code_root_dir.mkdir() - Path(source_code_root_dir / ".lightning").write_text("cluster_id: test\nname: myapp") + Path(source_code_root_dir / ".lightning").write_text("name: myapp") requirements_file = Path(source_code_root_dir / "requirements.txt") Path(requirements_file).touch() + (source_code_root_dir / "entrypoint.py").touch() mock_client = mock.MagicMock() if lightningapps: + lightningapps[0].name = "myapp" lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED lightningapps[0].spec.cluster_id = "test" + mock_client.projects_service_list_project_cluster_bindings.return_value = ( + V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id="test"), + ] + ) + ) + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) + ) mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -1062,14 +1107,26 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, monkeypatch, tmpdir): source_code_root_dir = Path(tmpdir / "src").absolute() source_code_root_dir.mkdir() - Path(source_code_root_dir / ".lightning").write_text("cluster_id: test\nname: myapp") + Path(source_code_root_dir / ".lightning").write_text("name: myapp") requirements_file = Path(source_code_root_dir / "requirements.txt") Path(requirements_file).touch() + (source_code_root_dir / "entrypoint.py").touch() mock_client = mock.MagicMock() if lightningapps: + lightningapps[0].name = "myapp" lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED lightningapps[0].spec.cluster_id = "test" + mock_client.projects_service_list_project_cluster_bindings.return_value = ( + V1ListProjectClusterBindingsResponse( + clusters=[ + V1ProjectClusterBinding(cluster_id="test"), + ] + ) + ) + mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse( + id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL) + ) mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -1247,31 +1304,38 @@ def test_get_project(monkeypatch): assert ret.project_id == "test-project-id1" +def write_file_of_size(path, size): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as f: + f.seek(size) + f.write(b"\0") + + @mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock()) @mock.patch("lightning_app.runners.backends.cloud.LightningClient", MagicMock()) def test_check_uploaded_folder(monkeypatch, tmpdir, caplog): - - monkeypatch.setattr(cloud, "logger", logging.getLogger()) - app = MagicMock() - repo = MagicMock() + root = Path(tmpdir) + repo = LocalSourceCodeDir(root) backend = cloud.CloudRuntime(app) with caplog.at_level(logging.WARN): - backend._check_uploaded_folder(Path(tmpdir), repo) + backend._check_uploaded_folder(root, repo) assert caplog.messages == [] - mock = MagicMock() - mock.st_mode = 33188 - mock.st_size = 5 * 1000 * 1000 - repo.files = [str(Path("./a.png"))] - monkeypatch.setattr(Path, "stat", MagicMock(return_value=mock)) + # write some files to assert the message below. + write_file_of_size(root / "a.png", 4 * 1000 * 1000) + write_file_of_size(root / "b.txt", 5 * 1000 * 1000) + write_file_of_size(root / "c.jpg", 6 * 1000 * 1000) - path = Path(".") + repo._non_ignored_files = None # force reset with caplog.at_level(logging.WARN): - backend._check_uploaded_folder(path, repo) - assert caplog.messages[0].startswith( - f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 5.0 MB" - ) + backend._check_uploaded_folder(root, repo) + assert f"Your application folder '{root.absolute()}' is more than 2 MB" in caplog.text + assert "The total size is 15.0 MB" in caplog.text + assert "3 files were uploaded" in caplog.text + assert "files:\n6.0 MB: c.jpg\n5.0 MB: b.txt\n4.0 MB: a.png\nPerhaps" in caplog.text # tests the order + assert "adding them to `.lightningignore`." in caplog.text + assert "lightningingore` attribute in a Flow or Work" in caplog.text @mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock()) @@ -1433,6 +1497,130 @@ def run(self): _validate_build_spec_and_compute(Work()) +def test_programmatic_lightningignore(monkeypatch, caplog, tmpdir): + monkeypatch.setenv("LIGHTNING_DISPATCHED", "0") # this is not cleaned up + + mock_client = mock.MagicMock() + mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( + memberships=[V1Membership(name="test-project", project_id="test-project-id")] + ) + mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( + V1ListLightningappInstancesResponse(lightningapps=[]) + ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( + cluster_id="test" + ) + cloud_backend = mock.MagicMock(client=mock_client) + monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend)) + + class MyWork(LightningWork): + def __init__(self): + super().__init__() + self.lightningignore += ("foo", "lightning_logs") + + def run(self): + with pytest.raises(RuntimeError, match="w.lightningignore` does not"): + self.lightningignore += ("foobar",) + + class MyFlow(LightningFlow): + def __init__(self): + super().__init__() + self.lightningignore = ("foo",) + self.w = MyWork() + + def run(self): + with pytest.raises(RuntimeError, match="root.lightningignore` does not"): + self.lightningignore = ("baz",) + self.w.run() + + flow = MyFlow() + app = LightningApp(flow) + + monkeypatch.setattr(app, "_update_index_file", mock.MagicMock()) + + path = Path(tmpdir) + cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=path / "entrypoint.py") + monkeypatch.setattr(LocalSourceCodeDir, "upload", mock.MagicMock()) + + # write some files + write_file_of_size(path / "a.txt", 5 * 1000 * 1000) + write_file_of_size(path / "foo.png", 4 * 1000 * 1000) + write_file_of_size(path / "lightning_logs" / "foo.ckpt", 6 * 1000 * 1000) + # also an actual .lightningignore file + (path / ".lightningignore").write_text("foo.png") + + with mock.patch( + "lightning_app.runners.cloud._parse_lightningignore", wraps=_parse_lightningignore + ) as parse_mock, mock.patch( + "lightning_app.source_code.local._copytree", wraps=_copytree + ) as copy_mock, caplog.at_level( + logging.WARN + ): + cloud_runtime.dispatch() + + parse_mock.assert_called_once_with(("foo", "foo", "lightning_logs")) + assert copy_mock.mock_calls[0].kwargs["ignore_functions"][0].args[1] == {"lightning_logs", "foo"} + + assert f"Your application folder '{path.absolute()}' is more than 2 MB" in caplog.text + assert "The total size is 5.0 MB" in caplog.text + assert "2 files were uploaded" # a.txt and .lightningignore + assert "files:\n5.0 MB: a.txt\nPerhaps" in caplog.text # only this file appears + + # replicate how the app would dispatch the app, and call `run` + monkeypatch.setenv("LIGHTNING_DISPATCHED", "1") + flow.run() + + +def test_default_lightningignore(monkeypatch, caplog, tmpdir): + mock_client = mock.MagicMock() + mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( + memberships=[V1Membership(name="test-project", project_id="test-project-id")] + ) + mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( + V1ListLightningappInstancesResponse(lightningapps=[]) + ) + mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease( + cluster_id="test" + ) + cloud_backend = mock.MagicMock(client=mock_client) + monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend)) + + class MyWork(LightningWork): + def run(self): + pass + + app = LightningApp(MyWork()) + + path = Path(tmpdir) + cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=path / "entrypoint.py") + monkeypatch.setattr(LocalSourceCodeDir, "upload", mock.MagicMock()) + + # write some files + write_file_of_size(path / "a.txt", 5 * 1000 * 1000) + write_file_of_size(path / "venv" / "foo.txt", 4 * 1000 * 1000) + + assert not (path / ".lightningignore").exists() + + with mock.patch( + "lightning_app.runners.cloud._parse_lightningignore", wraps=_parse_lightningignore + ) as parse_mock, mock.patch( + "lightning_app.source_code.local._copytree", wraps=_copytree + ) as copy_mock, caplog.at_level( + logging.WARN + ): + cloud_runtime.dispatch() + + parse_mock.assert_called_once_with(()) + assert copy_mock.mock_calls[0].kwargs["ignore_functions"][0].args[1] == set() + + assert (path / ".lightningignore").exists() + + assert f"Your application folder '{path.absolute()}' is more than 2 MB" in caplog.text + assert "The total size is 5.0 MB" in caplog.text + assert "2 files were uploaded" # a.txt and .lightningignore + assert "files:\n5.0 MB: a.txt\nPerhaps" in caplog.text # only this file appears + + @pytest.mark.parametrize( "lightning_app_instance, lightning_cloud_url, expected_url", [ diff --git a/tests/tests_examples_app/public/test_commands_and_api.py b/tests/tests_examples_app/public/test_commands_and_api.py index a6a015d02a84a..290cf0d49b4cb 100644 --- a/tests/tests_examples_app/public/test_commands_and_api.py +++ b/tests/tests_examples_app/public/test_commands_and_api.py @@ -25,7 +25,7 @@ def test_commands_and_api_example_cloud() -> None: # 2: Connect to the App and send the first & second command with the client # Requires to be run within the same process. - cmd_1 = f"python -m lightning connect {app_id} -y" + cmd_1 = f"python -m lightning connect {app_id}" cmd_2 = "python -m lightning command with client --name=this" cmd_3 = "python -m lightning command without client --name=is" cmd_4 = "lightning disconnect" diff --git a/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py b/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py index 5c9bd410d186e..b1a7082ef6448 100644 --- a/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py +++ b/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py @@ -23,6 +23,7 @@ from pytorch_lightning.callbacks import ProgressBarBase, RichProgressBar from pytorch_lightning.callbacks.progress.rich_progress import RichProgressBarTheme from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset, RandomIterableDataset +from pytorch_lightning.loggers import CSVLogger from tests_pytorch.helpers.runif import RunIf @@ -330,7 +331,7 @@ def training_step(self, *args, **kwargs): progress_bar = RichProgressBar() model = CustomModel() - trainer = Trainer(default_root_dir=tmpdir, callbacks=progress_bar, fast_dev_run=True) + trainer = Trainer(default_root_dir=tmpdir, callbacks=progress_bar, fast_dev_run=True, logger=CSVLogger(tmpdir)) trainer.fit(model) main_progress_bar_id = progress_bar.main_progress_bar_id @@ -384,6 +385,7 @@ def test_step(self, batch, batch_idx): enable_checkpointing=False, log_every_n_steps=1, callbacks=pbar, + logger=CSVLogger(tmpdir), ) trainer.fit(model) diff --git a/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py b/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py index 105f380be58f5..bdd1c2002f1cb 100644 --- a/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py +++ b/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py @@ -29,6 +29,7 @@ from pytorch_lightning.callbacks.progress.tqdm_progress import Tqdm from pytorch_lightning.core.module import LightningModule from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset +from pytorch_lightning.loggers import CSVLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.runif import RunIf @@ -706,6 +707,7 @@ def test_step(self, batch, batch_idx): enable_checkpointing=False, log_every_n_steps=1, callbacks=pbar, + logger=CSVLogger(tmpdir), ) trainer.fit(model) diff --git a/tests/tests_pytorch/callbacks/test_device_stats_monitor.py b/tests/tests_pytorch/callbacks/test_device_stats_monitor.py index 36b30dc346d65..826fa0f088f28 100644 --- a/tests/tests_pytorch/callbacks/test_device_stats_monitor.py +++ b/tests/tests_pytorch/callbacks/test_device_stats_monitor.py @@ -155,13 +155,13 @@ def test_prefix_metric_keys(): assert converted_metrics == {"foo.1": 1.0, "foo.2": 2.0, "foo.3": 3.0} -def test_device_stats_monitor_warning_when_psutil_not_available(monkeypatch): +def test_device_stats_monitor_warning_when_psutil_not_available(monkeypatch, tmp_path): """Test that warning is raised when psutil is not available.""" import pytorch_lightning.callbacks.device_stats_monitor as imports monkeypatch.setattr(imports, "_PSUTIL_AVAILABLE", False) monitor = DeviceStatsMonitor() - trainer = Trainer() + trainer = Trainer(logger=CSVLogger(tmp_path)) assert trainer.strategy.root_device == torch.device("cpu") # TODO: raise an exception from v1.9 with pytest.warns(UserWarning, match="psutil` is not installed"): diff --git a/tests/tests_pytorch/callbacks/test_lr_monitor.py b/tests/tests_pytorch/callbacks/test_lr_monitor.py index 90e2c0fa26909..6fa62fa91697d 100644 --- a/tests/tests_pytorch/callbacks/test_lr_monitor.py +++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py @@ -20,6 +20,7 @@ from pytorch_lightning.callbacks.callback import Callback from pytorch_lightning.callbacks.finetuning import BackboneFinetuning from pytorch_lightning.demos.boring_classes import BoringModel +from pytorch_lightning.loggers import CSVLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.datamodules import ClassifDataModule from tests_pytorch.helpers.runif import RunIf @@ -32,7 +33,12 @@ def test_lr_monitor_single_lr(tmpdir): lr_monitor = LearningRateMonitor() trainer = Trainer( - default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_monitor] + default_root_dir=tmpdir, + max_epochs=2, + limit_val_batches=0.1, + limit_train_batches=0.5, + callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -70,6 +76,7 @@ def configure_optimizers(self): limit_train_batches=5, log_every_n_steps=1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -96,6 +103,7 @@ def configure_optimizers(self): limit_train_batches=5, log_every_n_steps=1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) with pytest.warns(RuntimeWarning, match="optimizers do not have momentum."): trainer.fit(model) @@ -117,7 +125,12 @@ def configure_optimizers(self): lr_monitor = LearningRateMonitor() trainer = Trainer( - default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_monitor] + default_root_dir=tmpdir, + max_epochs=2, + limit_val_batches=0.1, + limit_train_batches=0.5, + callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -154,6 +167,7 @@ def configure_optimizers(self): limit_train_batches=5, log_every_n_steps=1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -179,6 +193,7 @@ def configure_optimizers(self): limit_train_batches=5, log_every_n_steps=1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) with pytest.warns(RuntimeWarning, match="optimizers do not have momentum."): trainer.fit(model) @@ -226,6 +241,7 @@ def configure_optimizers(self): limit_train_batches=7, limit_val_batches=0.1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -269,6 +285,7 @@ def configure_optimizers(self): limit_train_batches=7, limit_val_batches=0.1, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model) @@ -305,7 +322,12 @@ def configure_optimizers(self): lr_monitor = LearningRateMonitor() trainer = Trainer( - default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_monitor] + default_root_dir=tmpdir, + max_epochs=2, + limit_val_batches=0.1, + limit_train_batches=0.5, + callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), ) trainer.fit(model, datamodule=dm) @@ -330,6 +352,7 @@ def configure_optimizers(self): callbacks=[lr_monitor], enable_progress_bar=False, enable_model_summary=False, + logger=CSVLogger(tmpdir), ) trainer.fit(TestModel()) assert list(lr_monitor.lrs) == ["my_logging_name"] @@ -349,6 +372,7 @@ def configure_optimizers(self): limit_val_batches=2, limit_train_batches=2, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, ) @@ -384,6 +408,7 @@ def configure_optimizers(self): limit_val_batches=2, limit_train_batches=2, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, ) @@ -475,6 +500,7 @@ def finetune_function(self, pl_module, epoch: int, optimizer, opt_idx: int): limit_val_batches=0, limit_train_batches=2, callbacks=[TestFinetuning(), lr_monitor, Check()], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, enable_checkpointing=False, @@ -533,6 +559,7 @@ def configure_optimizers(self): limit_val_batches=2, limit_train_batches=2, callbacks=[lr_monitor], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, ) diff --git a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py index e3f8a979f4353..ac2b5cb51274e 100644 --- a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py +++ b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py @@ -303,13 +303,14 @@ def _swa_resume_training_from_checkpoint(tmpdir, model, resume_model, ddp=False) "limit_val_batches": 0, "accumulate_grad_batches": 2, "enable_progress_bar": False, + "logger": False, } trainer = Trainer(callbacks=SwaTestCallback(swa_epoch_start=swa_start, swa_lrs=0.1), **trainer_kwargs) with _backward_patch(trainer), pytest.raises(Exception, match="SWA crash test"): trainer.fit(model) - checkpoint_dir = Path(tmpdir) / "lightning_logs" / "version_0" / "checkpoints" + checkpoint_dir = Path(tmpdir) / "checkpoints" checkpoint_files = os.listdir(checkpoint_dir) assert len(checkpoint_files) == 1 ckpt_path = str(checkpoint_dir / checkpoint_files[0]) diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py index 54ad7c80ee692..a1e2d5c071876 100644 --- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py +++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py @@ -35,7 +35,7 @@ from pytorch_lightning import seed_everything, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _OMEGACONF_AVAILABLE from tests_pytorch.helpers.runif import RunIf @@ -301,9 +301,11 @@ def test_model_checkpoint_with_non_string_input(tmpdir, save_top_k: int): checkpoint = ModelCheckpoint(monitor="early_stop_on", dirpath=None, filename="{epoch}", save_top_k=save_top_k) max_epochs = 2 - trainer = Trainer(default_root_dir=tmpdir, callbacks=[checkpoint], overfit_batches=0.20, max_epochs=max_epochs) + trainer = Trainer( + default_root_dir=tmpdir, callbacks=[checkpoint], overfit_batches=0.20, max_epochs=max_epochs, logger=False + ) trainer.fit(model) - assert checkpoint.dirpath == tmpdir / trainer.logger.name / "version_0" / "checkpoints" + assert checkpoint.dirpath == tmpdir / "checkpoints" if save_top_k == -1: ckpt_files = os.listdir(checkpoint.dirpath) @@ -753,7 +755,12 @@ def test_default_checkpoint_behavior(tmpdir): model = LogInTwoMethods() trainer = Trainer( - default_root_dir=tmpdir, max_epochs=3, enable_progress_bar=False, limit_train_batches=5, limit_val_batches=5 + default_root_dir=tmpdir, + max_epochs=3, + enable_progress_bar=False, + limit_train_batches=5, + limit_val_batches=5, + logger=False, ) with patch.object(trainer, "save_checkpoint", wraps=trainer.save_checkpoint) as save_mock: @@ -761,7 +768,7 @@ def test_default_checkpoint_behavior(tmpdir): results = trainer.test() assert len(results) == 1 - save_dir = tmpdir / "lightning_logs" / "version_0" / "checkpoints" + save_dir = tmpdir / "checkpoints" save_weights_only = trainer.checkpoint_callback.save_weights_only save_mock.assert_has_calls( [ @@ -867,6 +874,7 @@ def validation_step(self, batch, batch_idx): "enable_model_summary": False, "log_every_n_steps": 1, "default_root_dir": tmpdir, + "logger": CSVLogger(tmpdir), } trainer = Trainer(**trainer_kwargs, callbacks=[checkpoint_callback]) trainer.fit(model) @@ -931,6 +939,7 @@ def assert_checkpoint_log_dir(idx): limit_val_batches=3, limit_test_batches=4, callbacks=[checkpoint_cb], + logger=TensorBoardLogger(tmpdir), ) trainer = Trainer(**trainer_config) assert_trainer_init(trainer) @@ -953,6 +962,7 @@ def assert_checkpoint_log_dir(idx): assert_checkpoint_content(ckpt_dir) # load from checkpoint + trainer_config["logger"] = TensorBoardLogger(tmpdir) trainer = pl.Trainer(**trainer_config) assert_trainer_init(trainer) diff --git a/tests/tests_pytorch/loggers/test_logger.py b/tests/tests_pytorch/loggers/test_logger.py index 5903fce2a6348..a36375ac981c4 100644 --- a/tests/tests_pytorch/loggers/test_logger.py +++ b/tests/tests_pytorch/loggers/test_logger.py @@ -239,7 +239,12 @@ def __init__(self, param_one, param_two): model = TestModel("pytorch", "lightning") trainer = Trainer( - default_root_dir=tmpdir, max_epochs=1, limit_train_batches=0.1, limit_val_batches=0.1, num_sanity_val_steps=0 + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=0.1, + limit_val_batches=0.1, + num_sanity_val_steps=0, + logger=TensorBoardLogger(tmpdir), ) trainer.fit(model) @@ -270,6 +275,7 @@ class _Test: trainer = Trainer( default_root_dir=tmpdir, + logger=TensorBoardLogger(tmpdir), max_epochs=1, limit_train_batches=0.1, limit_val_batches=0.1, @@ -294,6 +300,7 @@ class _Test: dm = TestDataModule(diff_params) trainer = Trainer( default_root_dir=tmpdir, + logger=TensorBoardLogger(tmpdir), max_epochs=1, limit_train_batches=0.1, limit_val_batches=0.1, @@ -311,6 +318,7 @@ class _Test: dm = TestDataModule(tensor_params) trainer = Trainer( default_root_dir=tmpdir, + logger=TensorBoardLogger(tmpdir), max_epochs=1, limit_train_batches=0.1, limit_val_batches=0.1, diff --git a/tests/tests_pytorch/loggers/test_tensorboard.py b/tests/tests_pytorch/loggers/test_tensorboard.py index ddab738269904..4f91d7a1661ba 100644 --- a/tests/tests_pytorch/loggers/test_tensorboard.py +++ b/tests/tests_pytorch/loggers/test_tensorboard.py @@ -38,7 +38,7 @@ def __init__(self, b1=0.5, b2=0.999): super().__init__() self.save_hyperparameters() - trainer = Trainer(max_steps=1, default_root_dir=tmpdir) + trainer = Trainer(max_steps=1, default_root_dir=tmpdir, logger=TensorBoardLogger(tmpdir)) model = CustomModel() assert trainer.log_dir == trainer.logger.log_dir trainer.fit(model) diff --git a/tests/tests_pytorch/models/test_grad_norm.py b/tests/tests_pytorch/models/test_grad_norm.py index 4d31187caf1a6..2b7d979937309 100644 --- a/tests/tests_pytorch/models/test_grad_norm.py +++ b/tests/tests_pytorch/models/test_grad_norm.py @@ -18,6 +18,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel +from pytorch_lightning.loggers import CSVLogger class ModelWithManualGradTracker(BoringModel): @@ -86,7 +87,13 @@ def on_train_batch_end(self, *_) -> None: @pytest.mark.parametrize("log_every_n_steps", [1, 2, 3]) def test_grad_tracking_interval(tmpdir, log_every_n_steps): """Test that gradient norms get tracked in the right interval and that everytime the same keys get logged.""" - trainer = Trainer(default_root_dir=tmpdir, track_grad_norm=2, log_every_n_steps=log_every_n_steps, max_steps=10) + trainer = Trainer( + default_root_dir=tmpdir, + track_grad_norm=2, + log_every_n_steps=log_every_n_steps, + max_steps=10, + logger=CSVLogger(tmpdir), + ) with patch.object(trainer.logger, "log_metrics") as mocked: model = BoringModel() diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py index 80ef49e87fcf2..fe85e9abade56 100644 --- a/tests/tests_pytorch/models/test_hparams.py +++ b/tests/tests_pytorch/models/test_hparams.py @@ -33,6 +33,7 @@ from pytorch_lightning.core.mixins import HyperparametersMixin from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset +from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, AttributeDict, is_picklable from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.runif import RunIf @@ -642,7 +643,12 @@ def test_init_arg_with_runtime_change(tmpdir, cls): assert model.hparams.running_arg == -1 trainer = Trainer( - default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1 + default_root_dir=tmpdir, + limit_train_batches=2, + limit_val_batches=2, + limit_test_batches=2, + max_epochs=1, + logger=TensorBoardLogger(tmpdir), ) trainer.fit(model) @@ -875,7 +881,7 @@ def test_colliding_hparams(tmpdir): model = SaveHparamsModel({"data_dir": "abc", "arg2": "abc"}) data = DataModuleWithHparams({"data_dir": "foo"}) - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, logger=CSVLogger(tmpdir)) with pytest.raises(MisconfigurationException, match=r"Error while merging hparams:"): trainer.fit(model, datamodule=data) diff --git a/tests/tests_pytorch/profilers/test_profiler.py b/tests/tests_pytorch/profilers/test_profiler.py index 1ed1212840234..b2387f12a63f2 100644 --- a/tests/tests_pytorch/profilers/test_profiler.py +++ b/tests/tests_pytorch/profilers/test_profiler.py @@ -103,13 +103,12 @@ def test_simple_profiler_dirpath(tmpdir): assert profiler.dirpath is None model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, profiler=profiler) + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, profiler=profiler, logger=False) trainer.fit(model) - expected = tmpdir / "lightning_logs" / "version_0" - assert trainer.log_dir == expected + assert trainer.log_dir == tmpdir assert profiler.dirpath == trainer.log_dir - assert expected.join("fit-profiler.txt").exists() + assert tmpdir.join("fit-profiler.txt").exists() def test_simple_profiler_with_nonexisting_log_dir(tmpdir): @@ -121,15 +120,19 @@ def test_simple_profiler_with_nonexisting_log_dir(tmpdir): model = BoringModel() trainer = Trainer( - default_root_dir=nonexisting_tmpdir, max_epochs=1, limit_train_batches=1, limit_val_batches=1, profiler=profiler + default_root_dir=nonexisting_tmpdir, + max_epochs=1, + limit_train_batches=1, + limit_val_batches=1, + profiler=profiler, + logger=False, ) trainer.fit(model) - expected = nonexisting_tmpdir / "lightning_logs" / "version_0" - assert expected.exists() - assert trainer.log_dir == expected + assert nonexisting_tmpdir.exists() + assert trainer.log_dir == nonexisting_tmpdir assert profiler.dirpath == trainer.log_dir - assert expected.join("fit-profiler.txt").exists() + assert nonexisting_tmpdir.join("fit-profiler.txt").exists() def test_simple_profiler_with_nonexisting_dirpath(tmpdir): diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py index 786cfd1ab1504..d2d8479e5bfa0 100644 --- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py +++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py @@ -29,6 +29,7 @@ from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.callbacks import Callback, LearningRateMonitor, ModelCheckpoint from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset, RandomIterableDataset +from pytorch_lightning.loggers import CSVLogger from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin from pytorch_lightning.strategies import DeepSpeedStrategy from pytorch_lightning.strategies.deepspeed import _DEEPSPEED_AVAILABLE, LightningDeepSpeedModule @@ -298,6 +299,7 @@ def configure_optimizers(self): fast_dev_run=True, precision=16, callbacks=[TestCB(), lr_monitor], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, ) @@ -337,6 +339,7 @@ def on_train_start(self, trainer, pl_module) -> None: max_epochs=2, precision=16, callbacks=[TestCB(), lr_monitor], + logger=CSVLogger(tmpdir), enable_progress_bar=False, enable_model_summary=False, ) diff --git a/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py b/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py index 27e194eab1b32..0e54f2993e708 100644 --- a/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py +++ b/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py @@ -7,6 +7,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.demos.boring_classes import BoringModel +from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.loggers.logger import DummyLogger @@ -72,7 +73,7 @@ def test_step(self, batch, batch_idx): default_root_dir=tmpdir, fast_dev_run=fast_dev_run, val_check_interval=2, - logger=True, + logger=TensorBoardLogger(tmpdir), log_every_n_steps=1, callbacks=[checkpoint_callback, early_stopping_callback], ) diff --git a/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py index 20c5323907027..1f234166be5aa 100644 --- a/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py @@ -765,7 +765,7 @@ def test_dataloader(self): return [super().test_dataloader(), super().test_dataloader()] model = CustomBoringModel() - trainer = Trainer(default_root_dir=tmpdir, limit_test_batches=1) + trainer = Trainer(default_root_dir=tmpdir, limit_test_batches=1, logger=TensorBoardLogger(tmpdir)) results = trainer.test(model) # what's logged in `test_epoch_end` gets included in the results of each dataloader @@ -997,6 +997,7 @@ def test_dataloader(self): limit_train_batches=1, limit_val_batches=limit_batches, limit_test_batches=limit_batches, + logger=TensorBoardLogger(tmpdir), ) model = CustomBoringModel() diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index 8a44b7e131644..c2fd64271ede5 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -29,6 +29,7 @@ from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, TQDMProgressBar from pytorch_lightning.core.module import LightningModule from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset, RandomDictDataset +from pytorch_lightning.loggers.tensorboard import TensorBoardLogger from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.runif import RunIf @@ -798,6 +799,7 @@ def training_step(self, batch, batch_idx): enable_model_summary=False, enable_checkpointing=False, enable_progress_bar=False, + logger=TensorBoardLogger(tmpdir), ) trainer.fit(model) @@ -831,6 +833,7 @@ def on_train_start(self): enable_model_summary=False, enable_checkpointing=False, enable_progress_bar=False, + logger=TensorBoardLogger(tmpdir), ) trainer.fit(model) diff --git a/tests/tests_pytorch/trainer/test_dataloaders.py b/tests/tests_pytorch/trainer/test_dataloaders.py index 08e81e5915351..673ba94b67b9c 100644 --- a/tests/tests_pytorch/trainer/test_dataloaders.py +++ b/tests/tests_pytorch/trainer/test_dataloaders.py @@ -32,6 +32,7 @@ RandomIterableDataset, RandomIterableDatasetWithLen, ) +from pytorch_lightning.loggers import CSVLogger from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.data import has_len_all_ranks from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -689,11 +690,13 @@ def test_warning_with_small_dataloader_and_logging_interval(tmpdir): model.train_dataloader = lambda: dataloader with pytest.warns(UserWarning, match=r"The number of training batches \(10\) is smaller than the logging interval"): - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, log_every_n_steps=11) + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, log_every_n_steps=11, logger=CSVLogger(tmpdir)) trainer.fit(model) with pytest.warns(UserWarning, match=r"The number of training batches \(1\) is smaller than the logging interval"): - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, log_every_n_steps=2, limit_train_batches=1) + trainer = Trainer( + default_root_dir=tmpdir, max_epochs=1, log_every_n_steps=2, limit_train_batches=1, logger=CSVLogger(".") + ) trainer.fit(model) diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py index 74ea2ac11a701..8b71da9815255 100644 --- a/tests/tests_pytorch/trainer/test_trainer.py +++ b/tests/tests_pytorch/trainer/test_trainer.py @@ -1342,6 +1342,7 @@ def training_step(self, *args, **kwargs): limit_train_batches=train_batches, limit_val_batches=0, max_steps=max_steps, + logger=TensorBoardLogger(tmpdir), ) trainer.fit(model) expected_calls = [call(metrics=ANY, step=s) for s in range(log_interval - 1, max_steps, log_interval)]