Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Releasing 1.8.2 [rebase & merge] #15698

Merged
merged 41 commits into from Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
80a6ca6
chlog update
ethanwharris Nov 11, 2022
42db30c
mypy: ignore mypy serve (#15631)
Borda Nov 10, 2022
ffc4f36
Enable Probot CheckGroup v3 (#15622)
carmocca Nov 11, 2022
da0a402
[App] Enable state broadcast with MultiNode (#15607)
tchaton Nov 11, 2022
1f0e08a
[App] Resolve race condition to move ui files (#15398)
tchaton Nov 11, 2022
942c860
Make sure save_dir can be empty str (#15638)
tshu-w Nov 11, 2022
d68daf3
[App] Resolve bi-directional queue bug (#15642)
tchaton Nov 11, 2022
75418b2
Refactor checkgroup to avoid duplicated checks (#15633)
carmocca Nov 11, 2022
8d86e5f
Delete unused TPU CI files (#15611)
carmocca Nov 11, 2022
fe302b2
Update run_ptl_script.py
williamFalcon Nov 11, 2022
27afcd3
[App] Accelerate Multi Node Startup Time (#15650)
tchaton Nov 11, 2022
733ec55
[App] Change app root / config path to be the `app.py` parent directo…
ethanwharris Nov 11, 2022
067bb4e
Add LightningLite to top level imports (#15502)
awaelchli Nov 12, 2022
91d7a38
Upgrade GPU CI to PyTorch 1.13 (#15583)
awaelchli Nov 12, 2022
2d2c902
Prevent artifactual "running from outside your current environment" e…
lantiga Nov 12, 2022
f3bb85c
Fix ddp_spawn -> ddp fallback logic when on LSF cluster (#15657)
Atharva-Phatak Nov 12, 2022
677a004
Include images with the mirror package (#15659)
Borda Nov 12, 2022
622d509
[App] Rename failed -> error in tables (#15608)
luca3rd Nov 12, 2022
da6b0ee
Improves the PanelFrontend docs (#14493)
MarcSkovMadsen Nov 12, 2022
031b044
add title and description to ServeGradio (#15639)
aniketmaurya Nov 12, 2022
0336574
Upgrade CI to PyTorch 1.13 (#15403)
Borda Nov 12, 2022
7e1d83d
Fixed Import in Docs For Multinode Trainer Name Which does Not Exist …
rlizzo Nov 13, 2022
a77e4e8
Validate the combination of CloudCompute and BuildConfig (#14929)
awaelchli Nov 13, 2022
f78dc66
add contributing guide to readme
edenlightning Nov 15, 2022
24f9f10
Add Python 3.10 badge (#15681)
function2-llx Nov 15, 2022
482fd58
fix(docs/app): broken links in the intermediate/web-ui section (#15691)
yurijmikhalevich Nov 15, 2022
27b7481
Bump google-github-actions/setup-gcloud from 0 to 1 (#15671)
dependabot[bot] Nov 15, 2022
c09bad7
Update onnxruntime requirement from <1.13.0 to <1.14.0 in /requiremen…
dependabot[bot] Nov 15, 2022
988a7d3
Bump google-github-actions/auth from 0 to 1 (#15675)
dependabot[bot] Nov 15, 2022
5fc7685
Docs: Fix import for scikit in XGBoost template (#15693)
narJH27 Nov 16, 2022
68be06e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2022
37a01c5
Enable Probot CheckGroup v4 (#15649)
carmocca Nov 16, 2022
5d8d164
docs 5/n (#15669)
williamFalcon Nov 17, 2022
dc8b5a0
fix(docs/app/lit_tabs): remove unused app_id, enable run instead (#15…
yurijmikhalevich Nov 17, 2022
35f38e0
[App] Mock missing package imports when launching in the cloud (#15711)
ethanwharris Nov 17, 2022
1b71e8b
Fix catimage import (#15712)
rlizzo Nov 17, 2022
8e4446f
Parse all lines in app file looking for shebangs to run commands. (#1…
rlizzo Nov 17, 2022
2d8812c
Bump coverage from 6.4.2 to 6.5.0 in /requirements (#15674)
dependabot[bot] Nov 17, 2022
685e391
remove unused random_split import from tutorial (#15716)
yiftachbeer Nov 17, 2022
7b54d04
Fix typo 'wether' (#15710)
dymil Nov 17, 2022
b5d0a41
releasing 1.8.2
Borda Nov 17, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 26 additions & 16 deletions .actions/assistant.py
@@ -1,8 +1,9 @@
import os
import re
import shutil
from itertools import chain
from os.path import dirname, isfile
from pathlib import Path
from pprint import pprint
from typing import Dict, List, Optional, Sequence, Tuple

import pkg_resources
Expand Down Expand Up @@ -65,6 +66,7 @@ def _replace_imports(lines: List[str], mapping: List[Tuple[str, str]]) -> List[s
def copy_replace_imports(
source_dir: str, source_imports: List[str], target_imports: List[str], target_dir: Optional[str] = None
) -> None:
"""Copy package content with import adjustments."""
print(f"Replacing imports: {locals()}")
assert len(source_imports) == len(target_imports), (
"source and target imports must have the same length, "
Expand All @@ -75,19 +77,27 @@ def copy_replace_imports(

ls = _retrieve_files(source_dir)
for fp in ls:
if fp.endswith(".py") or not fp.endswith(".pyc"):
with open(fp, encoding="utf-8") as fo:
try:
lines = fo.readlines()
except UnicodeDecodeError:
# a binary file, skip
print(f"Skipped replacing imports for {fp}")
continue
lines = _replace_imports(lines, list(zip(source_imports, target_imports)))
fp_new = fp.replace(source_dir, target_dir)
os.makedirs(os.path.dirname(fp_new), exist_ok=True)
with open(fp_new, "w", encoding="utf-8") as fo:
fo.writelines(lines)
fp_new = fp.replace(source_dir, target_dir)
_, ext = os.path.splitext(fp)
if ext in (".png", ".jpg", ".ico"):
os.makedirs(dirname(fp_new), exist_ok=True)
if not isfile(fp_new):
shutil.copy(fp, fp_new)
continue
elif ext in (".pyc",):
continue
# Try to parse everything else
with open(fp, encoding="utf-8") as fo:
try:
lines = fo.readlines()
except UnicodeDecodeError:
# a binary file, skip
print(f"Skipped replacing imports for {fp}")
continue
lines = _replace_imports(lines, list(zip(source_imports, target_imports)))
os.makedirs(os.path.dirname(fp_new), exist_ok=True)
with open(fp_new, "w", encoding="utf-8") as fo:
fo.writelines(lines)


def create_mirror_package(source_dir: str, package_mapping: Dict[str, str]) -> None:
Expand Down Expand Up @@ -129,7 +139,7 @@ def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
req = list(pkg_resources.parse_requirements(ln_))[0]
if req.name not in packages:
final.append(line)
pprint(final)
print(final)
path.write_text("\n".join(final))

@staticmethod
Expand All @@ -147,7 +157,7 @@ def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL
def copy_replace_imports(
source_dir: str, source_import: str, target_import: str, target_dir: Optional[str] = None
) -> None:
"""Recursively replace imports in given folder."""
"""Copy package content with import adjustments."""
source_imports = source_import.strip().split(",")
target_imports = target_import.strip().split(",")
copy_replace_imports(source_dir, source_imports, target_imports, target_dir=target_dir)
Expand Down
17 changes: 11 additions & 6 deletions .azure/app-cloud-e2e.yml
Expand Up @@ -27,12 +27,17 @@ pr:
- "release/*"
paths:
include:
- ".azure/app-cloud-e2e.yml"
- "requirements/app/**"
- "src/lightning_app/**"
- "examples/app_*"
- "tests/tests_app_examples/**"
- ".actions/**"
- ".azure/app-cloud-e2e.yml"
- "requirements/app/**"
- "src/lightning_app/**"
- "tests/tests_app/**"
- "examples/app_*/**" # some tests_app tests call examples files
- "tests/tests_app_examples/**"
- "setup.py"
- ".actions/**"
- "!requirements/app/docs.txt"
- "!*.md"
- "!**/*.md"

# variables are automatically exported as environment variables so this will override pip's default cache dir
variables:
Expand Down
38 changes: 33 additions & 5 deletions .azure/gpu-benchmark.yml
Expand Up @@ -21,6 +21,11 @@ pr:
paths:
include:
- ".azure/gpu-benchmark.yml"
- "tests/tests_pytorch/benchmarks/**"
- "requirements/pytorch/**"
- "!requirements/pytorch/docs.txt"
- "!*.md"
- "!**/*.md"

schedules:
- cron: "0 0 * * *" # At the end of every day
Expand All @@ -37,7 +42,7 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.6.1"
options: "--gpus=all --shm-size=32g"
workspace:
clean: all
Expand All @@ -47,18 +52,41 @@ jobs:
- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
cuda_ver=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
echo "##vso[task.setvariable variable=CUDA_VERSION_MM]$cuda_ver"
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${cuda_ver}/torch_stable.html"
displayName: 'set env. vars'

- bash: |
pip install -e .[strategies] --find-links ${TORCH_URL}
echo $CUDA_VISIBLE_DEVICES
echo $TORCH_URL
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
which python && which pip
python --version
pip --version
pip list
displayName: 'Image info & NVIDIA'

- bash: |
python .actions/assistant.py requirements_prune_pkgs --packages [horovod,bagua,colossalai] --req_files [requirements/pytorch/strategies.txt]

PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/base.txt ${PYTORCH_VERSION}
displayName: 'Adjust dependencies'

- bash: pip install -e .[dev,strategies,examples] --find-links ${TORCH_URL}
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
PACKAGE_NAME: "pytorch"
FREEZE_REQUIREMENTS: "1"
displayName: 'Install package'

- bash: |
set -e
pip list
python requirements/collect_env_details.py
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
displayName: 'Env details'

- bash: python -m pytest benchmarks -v --durations=0
env:
PL_RUNNING_BENCHMARKS: "1"
Expand Down
33 changes: 23 additions & 10 deletions .azure/gpu-tests-lite.yml
Expand Up @@ -21,12 +21,18 @@ pr:
paths:
include:
- ".azure/gpu-tests-lite.yml"
- "examples/lite/**"
- "examples/run_lite_examples.sh"
- "tests/tests_lite/run_standalone_*.sh"
- "tests/tests_pytorch/run_standalone_tests.sh" # used by Lite through a symlink
- "requirements/lite/**"
- "src/lightning_lite/**"
- "tests/tests_lite/**"
- "tests/tests_pytorch/run_standalone_tests.sh"
- "tests/tests_lite/run_standalone_tests.sh" # a symlink to the one above
- "setup.cfg" # includes pytest config
- ".actions/**"
- "!requirements/lite/docs.txt"
- "!*.md"
- "!**/*.md"

jobs:
- job: testing
Expand All @@ -38,7 +44,7 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.6.1"
# default shm size is 64m. Increase it to avoid:
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
options: "--gpus=all --shm-size=2gb"
Expand All @@ -48,6 +54,14 @@ jobs:

steps:
- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
cuda_ver=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${cuda_ver}/torch_stable.html"
displayName: 'set env. vars'
- bash: |
echo $CUDA_VISIBLE_DEVICES
echo $TORCH_URL
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
Expand All @@ -58,22 +72,21 @@ jobs:
displayName: 'Image info & NVIDIA'
- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
displayName: 'set visible devices'
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION}
python ./requirements/pytorch/adjust-versions.py requirements/lite/examples.txt ${PYTORCH_VERSION}
displayName: 'Adjust dependencies'
- bash: |
set -e
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install -e .[dev,strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
pip list
pip install -e .[dev,strategies,examples] --find-links ${TORCH_URL}
env:
PACKAGE_NAME: "lite"
FREEZE_REQUIREMENTS: "1"
displayName: 'Install package & dependencies'
- bash: |
set -e
echo $CUDA_VISIBLE_DEVICES
pip list
python requirements/collect_env_details.py
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
displayName: 'Env details'
Expand Down
23 changes: 16 additions & 7 deletions .azure/gpu-tests-pytorch.yml
Expand Up @@ -37,14 +37,20 @@ pr:
- "requirements/lite/**"
- "src/lightning_lite/**"
- ".actions/**"
- "!requirements/**/docs.txt"
- "!*.md"
- "!**/*.md"

jobs:
- job: testing
strategy:
matrix:
# TODO: package parametrization
'PyTorch - stable':
'PyTorch & strategies': # this uses torch 1.12 as not all strategies support 1.13 yet
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1"
scope: "strategies"
'PyTorch - latest':
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.6.1"
scope: ""
# how long to run the job before automatically cancelling
timeoutInMinutes: "80"
# how much time to give 'run always even if cancelled tasks' before stopping them
Expand Down Expand Up @@ -93,11 +99,11 @@ jobs:
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt ${PYTORCH_VERSION}
displayName: 'Adjust dependencies'

- bash: pip install -e .[strategies] -r requirements/pytorch/devel.txt -r requirements/pytorch/examples.txt --find-links ${TORCH_URL}
- bash: pip install -e .[dev,examples] --find-links ${TORCH_URL}
env:
PACKAGE_NAME: "pytorch"
FREEZE_REQUIREMENTS: "1"
displayName: 'Install package'
displayName: 'Install package & extras'

- bash: |
set -e
Expand All @@ -109,14 +115,17 @@ jobs:
CUDA_VERSION_COLOSSALAI=$(python -c "print([ver for ver in [11.3, 11.1] if $CUDA_VERSION_MM_COLOSSALAI >= ver][0])")
pip install "colossalai==0.1.10+torch${PYTORCH_VERSION_COLOSSALAI}cu${CUDA_VERSION_COLOSSALAI}" --find-links https://release.colossalai.org

pip list
displayName: 'Install dependencies'
pip install -r requirements/pytorch/strategies.txt --find-links ${TORCH_URL}

python requirements/pytorch/check-avail-strategies.py
condition: eq(variables['scope'], 'strategies')
displayName: 'Install strategies'

- bash: |
set -e
pip list
python requirements/collect_env_details.py
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
python requirements/pytorch/check-avail-strategies.py
python requirements/pytorch/check-avail-extras.py
displayName: 'Env details'

Expand Down
3 changes: 3 additions & 0 deletions .azure/hpu-tests.yml
Expand Up @@ -26,6 +26,9 @@ pr:
- "tests/tests_pytorch/**"
- "setup.cfg" # includes pytest config
- ".actions/**"
- "!requirements/**/docs.txt"
- "!*.md"
- "!**/*.md"

jobs:
- job: testing
Expand Down
3 changes: 3 additions & 0 deletions .azure/ipu-tests.yml
Expand Up @@ -23,6 +23,9 @@ pr:
- "tests/tests_pytorch/**"
- "setup.cfg" # includes pytest config
- ".actions/**"
- "!requirements/**/docs.txt"
- "!*.md"
- "!**/*.md"

variables:
- name: poplar_sdk
Expand Down