Skip to content

Commit

Permalink
Separate arm64 and amd64 docker builds (#125617)
Browse files Browse the repository at this point in the history
Fixes #125094

Please note: Docker CUDa 12.4 failure is existing issue, related to docker image not being available on gitlab:
```
docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: not found
```
 https://github.com/pytorch/pytorch/actions/runs/8974959068/job/24648540236?pr=125617

Here is the reference issue: https://gitlab.com/nvidia/container-images/cuda/-/issues/225

Tracked on our side: pytorch/builder#1811
Pull Request resolved: #125617
Approved by: https://github.com/huydhn, https://github.com/malfet
  • Loading branch information
atalman authored and pytorchmergebot committed May 7, 2024
1 parent 5dee462 commit b29d77b
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 6 deletions.
14 changes: 13 additions & 1 deletion .github/scripts/generate_docker_release_matrix.py
Expand Up @@ -21,6 +21,8 @@

def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
ret: List[Dict[str, str]] = []
# CUDA amd64 Docker images are available as both runtime and devel while
# CPU arm64 image is only available as runtime.
for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
for image in DOCKER_IMAGE_TYPES:
ret.append(
Expand All @@ -31,9 +33,19 @@ def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
cuda
],
"image_type": image,
"platform": "linux/arm64,linux/amd64",
"platform": "linux/amd64",
}
)
ret.append(
{
"cuda": "cpu",
"cuda_full_version": "",
"cudnn_version": "",
"image_type": "runtime",
"platform": "linux/arm64",
}
)

return {"include": ret}


Expand Down
21 changes: 16 additions & 5 deletions .github/workflows/docker-release.yml
Expand Up @@ -7,6 +7,7 @@ on:
- Dockerfile
- docker.Makefile
- .github/workflows/docker-release.yml
- .github/scripts/generate_docker_release_matrix.py
push:
branches:
- nightly
Expand Down Expand Up @@ -129,17 +130,27 @@ jobs:
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
run: |
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
CUDA_SUFFIX="-cu${CUDA_VERSION}"
if [[ ${CUDA_VERSION_SHORT} == "cpu" ]]; then
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
CUDA_SUFFIX=""
fi
PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
python -c 'import torch; print(torch.version.git_version[:7],end="")')
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
# Please note, here we ned to pin specific verison of CUDA as with latest label
if [[ ${CUDA_VERSION_SHORT} == "12.1" ]]; then
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}" \
ghcr.io/pytorch/pytorch-nightly:latest
docker push ghcr.io/pytorch/pytorch-nightly:latest
fi
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
ghcr.io/pytorch/pytorch-nightly:latest
docker push ghcr.io/pytorch/pytorch-nightly:latest
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
18 changes: 18 additions & 0 deletions docker.Makefile
Expand Up @@ -83,6 +83,22 @@ devel-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CU
devel-push:
$(DOCKER_PUSH)

ifeq ("$(CUDA_VERSION_SHORT)","cpu")

.PHONY: runtime-image
runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
runtime-image:
$(DOCKER_BUILD)

.PHONY: runtime-push
runtime-push: BASE_IMAGE := $(BASE_RUNTIME)
runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
runtime-push:
$(DOCKER_PUSH)

else

.PHONY: runtime-image
runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime
Expand All @@ -95,6 +111,8 @@ runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(
runtime-push:
$(DOCKER_PUSH)

endif

.PHONY: clean
clean:
-docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME))

0 comments on commit b29d77b

Please sign in to comment.