Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build macOS wheels and integration tests #197

Merged
merged 4 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-integration-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Integration tests (Main)

on:
push:
branches: [ "main", "feature/*" ]
branches: [ "main", "feature/*", "workflow/*" ]
merge_group:
types: [ "checks_requested" ]

Expand Down
12 changes: 2 additions & 10 deletions .github/workflows/python-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down Expand Up @@ -75,16 +71,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/python-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ env:
jobs:
integration-test:
name: Integration tests
runs-on: ubuntu-22.04
runs-on: ${{ matrix.runner }}
environment: ${{ inputs.environment }}
strategy:
fail-fast: false
matrix:
runner: [ubuntu-22.04, macos-13]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
test-run:
- name: "S3"
Expand Down Expand Up @@ -53,17 +54,13 @@ jobs:
aws-region: ${{ vars.S3_REGION }}

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: dtolnay/rust-toolchain@stable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


- name: Restore Cargo cache
id: restore-cargo-cache
uses: actions/cache/restore@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down Expand Up @@ -114,7 +111,6 @@ jobs:
if: inputs.environment != 'integration-tests'
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand Down
24 changes: 7 additions & 17 deletions .github/workflows/rust-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
submodules: true
uses: dtolnay/rust-toolchain@stable

- name: Run cargo deny
uses: EmbarkStudios/cargo-deny-action@v1
Expand All @@ -48,17 +44,12 @@ jobs:
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
components: clippy
override: true
uses: dtolnay/rust-toolchain@stable

- name: Cargo cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
Expand All @@ -69,18 +60,17 @@ jobs:
run: cargo clippy --all-targets --all-features --manifest-path s3torchconnectorclient/Cargo.toml

tests:
runs-on: ubuntu-22.04
runs-on: ${{ matrix.runner }}
name: Rust tests
strategy:
matrix:
runner: [ubuntu-22.04, macos-13]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up stable Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
submodules: true
uses: dtolnay/rust-toolchain@stable

- name: Build Rust tests
run: cargo test --no-default-features --no-run --manifest-path s3torchconnectorclient/Cargo.toml
Expand Down
36 changes: 29 additions & 7 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,33 @@ jobs:
uses: ./.github/workflows/generate_third_party_licenses.yml

build_wheels:
name: Build wheels for ${{ matrix.build_prefix }} - ${{ matrix.builder.arch }}
name: Wheels for ${{ matrix.python }} - ${{ matrix.builder.kind }} - ${{ matrix.builder.arch }}
runs-on: ${{ matrix.builder.runner }}
needs: generate_third_party_licenses
strategy:
matrix:
build_prefix:
python:
- cp38
- cp39
- cp310
- cp311
- cp312
builder:
- runner: codebuild-${{ vars.CODEBUILD_PROJECT_NAME }}-${{ github.run_id }}-${{ github.run_attempt }}-ubuntu-7.0-large
kind: manylinux
arch: x86_64
- runner: codebuild-${{ vars.CODEBUILD_PROJECT_NAME }}-${{ github.run_id }}-${{ github.run_attempt }}-arm-3.0-large
kind: manylinux
arch: aarch64
# - runner: ubuntu-20.04
# kind: manylinux
# arch: x86_64
- runner: macos-13
kind: macosx
arch: x86_64
- runner: macos-14
kind: macosx
arch: arm64
permissions:
id-token: write
contents: read
Expand All @@ -62,16 +71,29 @@ jobs:
run: |
mv NOTICE_DEFAULT THIRD-PARTY-LICENSES

# actions/setup-python requires /Users/runner/hostedtoolcache to exist to work properly
# with macosx due to fixed shared library path
# https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#macos
- name: Create hostedtoolcache dir for macosx
if: ${{ matrix.builder.kind == 'macosx' }}
run: |
mkdir -p /Users/runner/hostedtoolcache

# actions/setup-python doesn't yet support ARM
# https://github.com/actions/setup-python/issues/678
- if: ${{ matrix.builder.arch != 'aarch64' }}
uses: actions/setup-python@v4
- name: Setup Python
if: ${{ matrix.builder.arch != 'aarch64' }}
uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: "3.12"

- name: Install pipx
run: |
python -m pip install --upgrade pipx
which python
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these which lines still wanted?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used them for debugging, but left them on purpose, as they helped to understand the problem, and I am expecting changes for pip setup in the upcoming runner updates.

which pip
python -m pip install --upgrade pip
python -m pip install --upgrade pipx
python -m pipx ensurepath

# Run cibuildwheel manually, as the current runner uses setup-python
# https://github.com/pypa/cibuildwheel/issues/1623
Expand All @@ -80,7 +102,7 @@ jobs:
cibuildwheel
"s3torchconnectorclient"
--output-dir "wheelhouse"
--only "${{ matrix.build_prefix }}-manylinux_${{ matrix.builder.arch }}"
--only "${{ matrix.python }}-${{ matrix.builder.kind }}_${{ matrix.builder.arch }}"
2>&1
shell: bash

Expand Down
9 changes: 8 additions & 1 deletion s3torchconnector/tst/e2e/test_e2e_s3_lightning_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from models.lightning_transformer import LightningTransformer, L


LIGHTNING_ACCELERATOR = "cpu"


def test_save_and_load_checkpoint(checkpoint_directory):
tensor = torch.rand(3, 10, 10)
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
Expand Down Expand Up @@ -77,7 +80,7 @@ def test_load_trained_checkpoint(checkpoint_directory):
dataset = WikiText2(data_dir=Path(f"/tmp/data/{nonce}"))
dataloader = DataLoader(dataset, num_workers=3)
model = LightningTransformer(vocab_size=dataset.vocab_size)
trainer = L.Trainer(fast_dev_run=2)
trainer = L.Trainer(accelerator=LIGHTNING_ACCELERATOR, fast_dev_run=2)
trainer.fit(model=model, train_dataloaders=dataloader)
checkpoint_name = "lightning_module_training_checkpoint.pt"
s3_uri = f"{checkpoint_directory.s3_uri}{checkpoint_name}"
Expand All @@ -96,6 +99,7 @@ def test_compatibility_with_trainer_plugins(checkpoint_directory):
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
_verify_user_agent(s3_lightning_checkpoint)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[s3_lightning_checkpoint],
max_epochs=1,
Expand Down Expand Up @@ -130,6 +134,7 @@ def test_compatibility_with_checkpoint_callback(checkpoint_directory):
enable_version_counter=True,
)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
plugins=[s3_lightning_checkpoint],
callbacks=[checkpoint_callback],
min_epochs=4,
Expand Down Expand Up @@ -163,6 +168,7 @@ def test_compatibility_with_async_checkpoint_io(checkpoint_directory):
async_s3_lightning_checkpoint = AsyncCheckpointIO(s3_lightning_checkpoint)

trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[async_s3_lightning_checkpoint],
min_epochs=4,
Expand All @@ -189,6 +195,7 @@ def test_compatibility_with_lightning_checkpoint_load(checkpoint_directory):
model = LightningTransformer(vocab_size=dataset.vocab_size)
s3_lightning_checkpoint = S3LightningCheckpoint(region=checkpoint_directory.region)
trainer = L.Trainer(
accelerator=LIGHTNING_ACCELERATOR,
default_root_dir=checkpoint_directory.s3_uri,
plugins=[s3_lightning_checkpoint],
max_epochs=1,
Expand Down