Skip to content

Commit

Permalink
Build macOS wheels and integration tests (#197)
Browse files Browse the repository at this point in the history
* Build macOS wheels and integration tests

This is pretty simple, mostly just adding macos-13 (x86) and macos-14
(arm64) runners to the matrix. We also need to explicitly disable MPS
for PyTorch to run correctly on the memory-limited GHA runners.

Signed-off-by: James Bornholt <bornholt@amazon.com>

* Update Rust setup actions

* Update python setup action and temporarily disable macos-14

* Enable macos-14 arm64

---------

Signed-off-by: James Bornholt <bornholt@amazon.com>
Co-authored-by: dnnanuti <dnnanuti@amazon.com>
  • Loading branch information
jamesbornholt and dnanuti committed Apr 11, 2024
1 parent 640a154 commit 23806df
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-integration-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Integration tests (Main)

on:
push:
branches: [ "main", "feature/*" ]
branches: [ "main", "feature/*", "workflow/*" ]
merge_group:
types: [ "checks_requested" ]

Expand Down
12 changes: 2 additions & 10 deletions .github/workflows/python-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down Expand Up @@ -75,16 +71,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/python-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ env:
jobs:
integration-test:
name: Integration tests
runs-on: ubuntu-22.04
runs-on: ${{ matrix.runner }}
environment: ${{ inputs.environment }}
strategy:
fail-fast: false
matrix:
runner: [ubuntu-22.04, macos-13]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
test-run:
- name: "S3"
Expand Down Expand Up @@ -53,17 +54,13 @@ jobs:
aws-region: ${{ vars.S3_REGION }}

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable

- name: Restore Cargo cache
id: restore-cargo-cache
uses: actions/cache/restore@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down Expand Up @@ -114,7 +111,6 @@ jobs:
if: inputs.environment != 'integration-tests'
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down
24 changes: 7 additions & 17 deletions .github/workflows/rust-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
submodules: true
uses: dtolnay/rust-toolchain@stable

- name: Run cargo deny
uses: EmbarkStudios/cargo-deny-action@v1
Expand All @@ -48,17 +44,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
components: clippy
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand All @@ -69,18 +60,17 @@ jobs:
run: cargo clippy --all-targets --all-features --manifest-path s3torchconnectorclient/Cargo.toml

tests:
runs-on: ubuntu-22.04
runs-on: ${{ matrix.runner }}
name: Rust tests
strategy:
matrix:
runner: [ubuntu-22.04, macos-13]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
submodules: true
uses: dtolnay/rust-toolchain@stable

- name: Build Rust tests
run: cargo test --no-default-features --no-run --manifest-path s3torchconnectorclient/Cargo.toml
Expand Down
36 changes: 29 additions & 7 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,33 @@ jobs:
uses: ./.github/workflows/generate_third_party_licenses.yml

build_wheels:
name: Build wheels for ${{ matrix.build_prefix }} - ${{ matrix.builder.arch }}
name: Wheels for ${{ matrix.python }} - ${{ matrix.builder.kind }} - ${{ matrix.builder.arch }}
runs-on: ${{ matrix.builder.runner }}
needs: generate_third_party_licenses
strategy:
matrix:
build_prefix:
python:
- cp38
- cp39
- cp310
- cp311
- cp312
builder:
- runner: codebuild-${{ vars.CODEBUILD_PROJECT_NAME }}-${{ github.run_id }}-${{ github.run_attempt }}-ubuntu-7.0-large
kind: manylinux
arch: x86_64
- runner: codebuild-${{ vars.CODEBUILD_PROJECT_NAME }}-${{ github.run_id }}-${{ github.run_attempt }}-arm-3.0-large
kind: manylinux
arch: aarch64
# - runner: ubuntu-20.04
# kind: manylinux
# arch: x86_64
- runner: macos-13
kind: macosx
arch: x86_64
- runner: macos-14
kind: macosx
arch: arm64
permissions:
id-token: write
contents: read
Expand All @@ -62,16 +71,29 @@ jobs:
run: |
mv NOTICE_DEFAULT THIRD-PARTY-LICENSES
# actions/setup-python requires /Users/runner/hostedtoolcache to exist to work properly
# with macosx due to fixed shared library path
# https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#macos
- name: Create hostedtoolcache dir for macosx
if: ${{ matrix.builder.kind == 'macosx' }}
run: |
mkdir -p /Users/runner/hostedtoolcache
# actions/setup-python doesn't yet support ARM
# https://github.com/actions/setup-python/issues/678
- if: ${{ matrix.builder.arch != 'aarch64' }}
uses: actions/setup-python@v4
- name: Setup Python
if: ${{ matrix.builder.arch != 'aarch64' }}
uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: "3.12"

- name: Install pipx
run: |
python -m pip install --upgrade pipx
which python
which pip
python -m pip install --upgrade pip
python -m pip install --upgrade pipx
python -m pipx ensurepath
# Run cibuildwheel manually, as the current runner uses setup-python
# https://github.com/pypa/cibuildwheel/issues/1623
Expand All @@ -80,7 +102,7 @@ jobs:
cibuildwheel
"s3torchconnectorclient"
--output-dir "wheelhouse"
--only "${{ matrix.build_prefix }}-manylinux_${{ matrix.builder.arch }}"
--only "${{ matrix.python }}-${{ matrix.builder.kind }}_${{ matrix.builder.arch }}"
2>&1
shell: bash
Expand Down
9 changes: 8 additions & 1 deletion s3torchconnector/tst/e2e/test_e2e_s3_lightning_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from models.lightning_transformer import LightningTransformer, L


LIGHTNING_ACCELERATOR = "cpu"


def test_save_and_load_checkpoint(checkpoint_directory):
tensor = torch.rand(3, 10, 10)
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
Expand Down Expand Up @@ -77,7 +80,7 @@ def test_load_trained_checkpoint(checkpoint_directory):
dataset = WikiText2(data_dir=Path(f"/tmp/data/{nonce}"))
dataloader = DataLoader(dataset, num_workers=3)
model = LightningTransformer(vocab_size=dataset.vocab_size)
trainer = L.Trainer(fast_dev_run=2)
trainer = L.Trainer(accelerator=LIGHTNING_ACCELERATOR, fast_dev_run=2)
trainer.fit(model=model, train_dataloaders=dataloader)
checkpoint_name = "lightning_module_training_checkpoint.pt"
s3_uri = f"{checkpoint_directory.s3_uri}{checkpoint_name}"
Expand All @@ -96,6 +99,7 @@ def test_compatibility_with_trainer_plugins(checkpoint_directory):
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
_verify_user_agent(s3_lightning_checkpoint)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[s3_lightning_checkpoint],
max_epochs=1,
Expand Down Expand Up @@ -130,6 +134,7 @@ def test_compatibility_with_checkpoint_callback(checkpoint_directory):
enable_version_counter=True,
)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
plugins=[s3_lightning_checkpoint],
callbacks=[checkpoint_callback],
min_epochs=4,
Expand Down Expand Up @@ -163,6 +168,7 @@ def test_compatibility_with_async_checkpoint_io(checkpoint_directory):
async_s3_lightning_checkpoint = AsyncCheckpointIO(s3_lightning_checkpoint)

trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[async_s3_lightning_checkpoint],
min_epochs=4,
Expand All @@ -189,6 +195,7 @@ def test_compatibility_with_lightning_checkpoint_load(checkpoint_directory):
model = LightningTransformer(vocab_size=dataset.vocab_size)
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[s3_lightning_checkpoint],
max_epochs=1,
Expand Down

0 comments on commit 23806df

Please sign in to comment.