diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh index f06eb8a808..b634f5c9a2 100755 --- a/.circleci/scripts/binary_checkout.sh +++ b/.circleci/scripts/binary_checkout.sh @@ -41,8 +41,8 @@ echo "export BUILDER_ROOT=${BUILDER_ROOT}" >> ${BASH_ENV} retry git clone --depth 1 https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT" # Removed checking out pytorch/pytorch using CIRCLE_PR_NUMBER and CIRCLE_SHA1 as # those environment variables are tied to the host repo where the build is being -# triggered. -retry git submodule update --init --recursive --jobs 0 +# triggered. +retry git submodule update --init --recursive pushd "$PYTORCH_ROOT" echo "Using Pytorch from " git --no-pager log --max-count 1 diff --git a/.github/actions/validate-binary/action.yml b/.github/actions/validate-binary/action.yml deleted file mode 100644 index 76531010ad..0000000000 --- a/.github/actions/validate-binary/action.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: 'validate-binary' -description: 'Binary Conda or Wheel Validation for Linux and MacOS' -inputs: - gpu_arch_type: - description: 'GPU arch type' - required: true - default: 'cpu' - gpu_arch_ver: - description: 'GPU arch version' - required: true - default: 'cpu' - installation: - description: 'Installation instructions' - required: true - default: '' - python_version: - description: 'Python version' - required: true - default: '3.9' - target_os: - description: 'Target OS linux or macos' - required: false - default: 'linux' -runs: - using: "composite" - steps: - - name: Checkout PyTorch builder - uses: actions/checkout@v2 - - name: Check nvidia smi - if: ${{ inputs.gpu_arch_type == 'cuda' }} - shell: bash - run: | - nvidia-smi - - name: Install Conda Linux - if: ${{ inputs.target_os == 'linux' }} - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: ${{ inputs.python_version }} - auto-update-conda: true - miniconda-version: "latest" - activate-environment: testenv - - name: Install Conda MacOS - if: ${{ inputs.target_os == 'macos' }} - uses: pytorch/test-infra/.github/actions/setup-miniconda@main - - name: Install PyTorch and run tests - shell: bash - env: - GPU_ARCH_VER: ${{ inputs.gpu_arch_ver }} - GPU_ARCH_TYPE: ${{ inputs.gpu_arch_type }} - INSTALLATION: ${{ inputs.installation }} - ENV_NAME: conda-env-${{ github.run_id }} - run: | - set -ex - conda create -yp ${ENV_NAME} python=${{ inputs.python_version }} numpy - conda run -p ${ENV_NAME} $INSTALLATION - conda run -p ${ENV_NAME} python3 ./test/smoke_test/smoke_test.py - conda env remove -p ${ENV_NAME} diff --git a/.github/actions/validate-windows-binary/action.yml b/.github/actions/validate-windows-binary/action.yml deleted file mode 100644 index 7214a813bd..0000000000 --- a/.github/actions/validate-windows-binary/action.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: 'validate-windows-binary' -description: 'Windows Binary Conda or Wheel Validation' -inputs: - gpu_arch_type: - description: 'GPU arch type' - required: true - default: 'cpu' - gpu_arch_ver: - description: 'GPU arch version' - required: true - default: 'cpu' - installation: - description: 'Installation instructions' - required: true - default: '' - python_version: - description: 'Python version' - required: true - default: '3.9' -runs: - using: "composite" - steps: - - name: Check nvidia smi - if: ${{ inputs.gpu_arch_type == 'cuda' }} - shell: powershell - run: | - nvidia-smi - - name: Install conda - if: ${{ inputs.gpu_arch_type == 'cpu' }} - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: ${{ inputs.python_version }} - auto-update-conda: true - miniconda-version: "latest" - activate-environment: conda-env-${{ github.run_id }} - - name: Conda Install pytorch and smoke test - shell: powershell - env: - GPU_ARCH_VER: ${{ inputs.gpu_arch_ver }} - GPU_ARCH_TYPE: ${{ inputs.gpu_arch_type }} - CUDA_VER: ${{ inputs.desired_cuda }} - run: | - conda install numpy pillow python=${{ inputs.python_version }} - $install = '${{ inputs.installation }}' - Invoke-Expression $install - python ./test/smoke_test/smoke_test.py diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh new file mode 100755 index 0000000000..6ce3dd70b3 --- /dev/null +++ b/.github/scripts/validate_binaries.sh @@ -0,0 +1,50 @@ +if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then + curl ${MATRIX_INSTALLATION} -o libtorch.zip + unzip libtorch.zip +else + #special case for Python 3.11 + if [[ ${MATRIX_PYTHON_VERSION} == '3.11' ]]; then + conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} + conda activate ${ENV_NAME} + + INSTALLATION=${MATRIX_INSTALLATION/"-c pytorch"/"-c malfet -c pytorch"} + INSTALLATION=${INSTALLATION/"pytorch-cuda"/"pytorch-${MATRIX_CHANNEL}::pytorch-cuda"} + INSTALLATION=${INSTALLATION/"conda install"/"conda install -y"} + + eval $INSTALLATION + python ./test/smoke_test/smoke_test.py + conda deactivate + conda env remove -n ${ENV_NAME} + else + + + + # Special case Pypi installation package, only applicable to linux nightly CUDA 11.7 builds, wheel package + if [[ ${TARGET_OS} == 'linux' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' && ${MATRIX_CHANNEL} != 'nightly' ]]; then + conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg + INSTALLATION_PYPI=${MATRIX_INSTALLATION/"cu117"/"cu117_pypi_cudnn"} + INSTALLATION_PYPI=${INSTALLATION_PYPI/"torchvision torchaudio"/""} + INSTALLATION_PYPI=${INSTALLATION_PYPI/"index-url"/"extra-index-url"} + conda run -p ${ENV_NAME}_pypi ${INSTALLATION_PYPI} + conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py --package torchonly + conda deactivate + conda env remove -p ${ENV_NAME}_pypi + fi + + # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159 + conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg + conda activate ${ENV_NAME} + INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"} + eval $INSTALLATION + + if [[ ${TARGET_OS} == 'linux' ]]; then + export CONDA_LIBRARY_PATH="$(dirname $(which python))/../lib" + export LD_LIBRARY_PATH=$CONDA_LIBRARY_PATH:$LD_LIBRARY_PATH + ${PWD}/check_binary.sh + fi + + python ./test/smoke_test/smoke_test.py + conda deactivate + conda env remove -n ${ENV_NAME} + fi +fi diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml index 92567d7bd3..43626533e6 100644 --- a/.github/workflows/build-conda-images.yml +++ b/.github/workflows/build-conda-images.yml @@ -19,19 +19,19 @@ env: DOCKER_BUILDKIT: 1 DOCKER_ID: ${{ secrets.DOCKER_ID }} DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} - WITH_PUSH: ${{ github.event_name == 'push' }} + WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} jobs: build-docker: - runs-on: linux.2xlarge + runs-on: ubuntu-22.04 strategy: matrix: - cuda_version: ["10.2", "11.3", "11.5", "11.6", "11.7", "cpu"] + cuda_version: ["11.6", "11.7", "11.8", "cpu"] env: CUDA_VERSION: ${{ matrix.cuda_version }} steps: - name: Checkout PyTorch builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml index 9526434e4e..49069557a9 100644 --- a/.github/workflows/build-libtorch-images.yml +++ b/.github/workflows/build-libtorch-images.yml @@ -21,20 +21,20 @@ env: DOCKER_BUILDKIT: 1 DOCKER_ID: ${{ secrets.DOCKER_ID }} DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} - WITH_PUSH: ${{ github.event_name == 'push' }} + WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} jobs: build-docker-cuda: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 strategy: matrix: - cuda_version: ["11.7", "11.6", "11.5", "11.3", "10.2"] + cuda_version: ["11.8", "11.7", "11.6"] env: GPU_ARCH_TYPE: cuda GPU_ARCH_VERSION: ${{ matrix.cuda_version }} steps: - name: Checkout PyTorch builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then @@ -44,16 +44,16 @@ jobs: run: | libtorch/build_docker.sh build-docker-rocm: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 strategy: matrix: - rocm_version: ["5.1.1", "5.2"] + rocm_version: ["5.3", "5.4.2"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} steps: - name: Checkout PyTorch - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then @@ -63,10 +63,10 @@ jobs: run: | libtorch/build_docker.sh build-docker-cpu: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 steps: - name: Checkout PyTorch - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then diff --git a/.github/workflows/build-llvm-images.yml b/.github/workflows/build-llvm-images.yml index 5b24abf5fa..a89230891b 100644 --- a/.github/workflows/build-llvm-images.yml +++ b/.github/workflows/build-llvm-images.yml @@ -17,7 +17,7 @@ env: DOCKER_BUILDKIT: 1 DOCKER_ID: ${{ secrets.DOCKER_ID }} DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} - WITH_PUSH: ${{ github.event_name == 'push' }} + WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} FORCE_PUSH: yes jobs: @@ -25,7 +25,7 @@ jobs: runs-on: linux.2xlarge steps: - name: Checkout PyTorch builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then diff --git a/.github/workflows/build-magma-linux.yml b/.github/workflows/build-magma-linux.yml index 655d02c6ee..eb1d67a70a 100644 --- a/.github/workflows/build-magma-linux.yml +++ b/.github/workflows/build-magma-linux.yml @@ -30,10 +30,10 @@ jobs: runs-on: linux.2xlarge strategy: matrix: - cuda_version: ["117", "116", "115"] + cuda_version: ["118", "117", "116"] steps: - name: Checkout PyTorch builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build Magma Cuda working-directory: magma run: | @@ -54,7 +54,7 @@ jobs: run: | conda install -y conda-build anaconda-client - name: Push MAGMA to anaconda - if: ${{ github.event_name == 'push' }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} run: | anaconda --token $ANACONDA_TOKEN upload -u pytorch --force magma/output/linux-64/magma-cuda*.bz2 env: diff --git a/.github/workflows/build-magma-windows.yml b/.github/workflows/build-magma-windows.yml index 87fdb22c5a..5ad6ba29a6 100644 --- a/.github/workflows/build-magma-windows.yml +++ b/.github/workflows/build-magma-windows.yml @@ -17,14 +17,14 @@ jobs: runs-on: windows-2019 strategy: matrix: - cuda_version: ["117", "116"] + cuda_version: ["118", "117", "116"] config: ["Release", "Debug"] env: CUDA_VERSION: ${{ matrix.cuda_version }} CONFIG: ${{ matrix.config }} steps: - name: Checkout pytorch/builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Enable MSVC dev commands to enable cl.exe # FYI incompatible with shell: bash uses: ilammy/msvc-dev-cmd@dd5e2fa0a7de1e7929605d9ecc020e749d9856a3 - name: Install CUDA Toolkit @@ -36,9 +36,9 @@ jobs: with: path: magma_*_cuda*_*.7z push-windows-magma: - if: ${{ github.event_name == 'push' }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} environment: magma - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 needs: build-windows-magma steps: - name: Download all artifacts diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index b62507bbd0..153f501bc7 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -7,12 +7,14 @@ on: paths: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile + - manywheel/Dockerfile_cxx11-abi - manywheel/build_docker.sh - 'common/*' pull_request: paths: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile + - manywheel/Dockerfile_cxx11-abi - 'common/*' - manywheel/build_docker.sh @@ -21,20 +23,20 @@ env: DOCKER_BUILDKIT: 1 DOCKER_ID: ${{ secrets.DOCKER_ID }} DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} - WITH_PUSH: ${{ github.event_name == 'push' }} + WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} jobs: build-docker-cuda: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 strategy: matrix: - cuda_version: ["11.7", "11.6", "11.5", "11.3", "10.2"] + cuda_version: ["11.8", "11.7", "11.6"] env: GPU_ARCH_TYPE: cuda GPU_ARCH_VERSION: ${{ matrix.cuda_version }} steps: - name: Checkout PyTorch builder - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then @@ -44,16 +46,16 @@ jobs: run: | manywheel/build_docker.sh build-docker-rocm: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 strategy: matrix: - rocm_version: ["5.1.1", "5.2"] + rocm_version: ["5.3", "5.4.2"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} steps: - name: Checkout PyTorch - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then @@ -63,10 +65,25 @@ jobs: run: | manywheel/build_docker.sh build-docker-cpu: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 steps: - name: Checkout PyTorch - uses: actions/checkout@v2 + uses: actions/checkout@v3 + - name: Authenticate if WITH_PUSH + run: | + if [[ "${WITH_PUSH}" == true ]]; then + echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin + fi + - name: Build Docker Image + run: | + manywheel/build_docker.sh + build-docker-cpu-cxx11-abi: + runs-on: ubuntu-22.04 + env: + GPU_ARCH_TYPE: cpu-cxx11-abi + steps: + - name: Checkout PyTorch + uses: actions/checkout@v3 - name: Authenticate if WITH_PUSH run: | if [[ "${WITH_PUSH}" == true ]]; then diff --git a/.github/workflows/test-validate-domain-library.yml b/.github/workflows/test-validate-domain-library.yml new file mode 100644 index 0000000000..6c651e709a --- /dev/null +++ b/.github/workflows/test-validate-domain-library.yml @@ -0,0 +1,19 @@ +name: Test validate domain library + +on: + pull_request: + paths: + - .github/workflows/validate-domain-library.yml + - .github/workflows/test-validate-domain-library.yml + workflow_dispatch: + +jobs: + test-validate-domain-library: + uses: ./.github/workflows/validate-domain-library.yml + with: + package_type: "conda,wheel" + os: "all" + channel: "release" + repository: "pytorch/builder" + ref: main + smoke_test: "echo test" diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml new file mode 100644 index 0000000000..4ae2605386 --- /dev/null +++ b/.github/workflows/validate-binaries.yml @@ -0,0 +1,90 @@ +name: Validate binaries + +# A reusable workflow that triggers a set of jobs that perform a smoke test / validation of pytorch binaries. +# Optionally restricts validation to the specified OS and channel. +# For the details about parameter values, see: +# pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main +# For an example of the `workflow_call` usage see: +# https://github.com/pytorch/builder/pull/1144 +on: + workflow_call: + inputs: + os: + description: "Operating system to generate for (linux, windows, macos, macos-arm64)" + required: true + type: string + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + limit-win-builds: + description: "Limit windows builds to single python/cuda config" + default: "disable" + type: string + workflow_dispatch: + inputs: + os: + description: "Operating system to generate for (linux, windows, macos, macos-arm64)" + required: true + type: choice + default: all + options: + - windows + - linux + - macos + - all + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: choice + default: all + options: + - release + - nightly + - test + - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + limit-win-builds: + description: "Limit windows builds to single python/cuda config" + default: "disable" + required: false + type: string + +jobs: + win: + if: inputs.os == 'windows' || inputs.os == 'all' + uses: ./.github/workflows/validate-windows-binaries.yml + with: + channel: ${{ inputs.channel }} + ref: ${{ inputs.ref || github.ref }} + limit-win-builds: ${{ inputs.limit-win-builds }} + + linux: + if: inputs.os == 'linux' || inputs.os == 'all' + uses: ./.github/workflows/validate-linux-binaries.yml + with: + channel: ${{ inputs.channel }} + ref: ${{ inputs.ref || github.ref }} + + mac: + if: inputs.os == 'macos' || inputs.os == 'all' + uses: ./.github/workflows/validate-macos-binaries.yml + with: + channel: ${{ inputs.channel }} + ref: ${{ inputs.ref || github.ref }} + + mac-arm64: + if: inputs.os == 'macos' || inputs.os == 'all' + uses: ./.github/workflows/validate-macos-arm64-binaries.yml + with: + channel: ${{ inputs.channel }} + ref: ${{ inputs.ref || github.ref }} diff --git a/.github/workflows/validate-domain-library.yml b/.github/workflows/validate-domain-library.yml new file mode 100644 index 0000000000..149b8335b4 --- /dev/null +++ b/.github/workflows/validate-domain-library.yml @@ -0,0 +1,153 @@ +name: Validate domain libary + +# A reusable workflow that triggers a set of jobs that perform a smoke test / validation of pytorch binaries. +# Optionally restricts validation to the specified OS and channel. +# For the details about parameter values, see: +# pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main +on: + workflow_call: + inputs: + os: + description: "Operating system to generate for (linux, windows, macos, macos-arm64)" + required: false + type: string + default: "all" + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + package_type: + description: "Package type (conda, wheel, all)" + required: false + type: string + default: "all" + repository: + description: "Path to repository to checkout" + required: true + type: string + smoke_test: + description: "Path to a smoke test script" + required: true + type: string + with_cuda: + description: "With cuda enable/disable" + required: false + type: string + default: disable + +jobs: + generate-linux-matrix: + if: (inputs.os == 'linux' || inputs.os == 'all') + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: ${{ inputs.package_type }} + os: linux + channel: ${{ inputs.channel }} + with-cuda: ${{ inputs.with_cuda }} + generate-windows-matrix: + if: (inputs.os == 'windows' || inputs.os == 'all') + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: ${{ inputs.package_type }} + os: windows + channel: ${{ inputs.channel }} + with-cuda: ${{ inputs.with_cuda }} + generate-macos-matrix: + if: (inputs.os == 'macos' || inputs.os == 'all') + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: ${{ inputs.package_type }} + os: macos + channel: ${{ inputs.channel }} + with-cuda: ${{ inputs.with_cuda }} + generate-macos-arm64-matrix: + if: (inputs.os == 'macos-arm64' || inputs.os == 'all') + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: ${{ inputs.package_type }} + os: macos-arm64 + channel: ${{ inputs.channel }} + with-cuda: ${{ inputs.with_cuda }} + validate-linux: + if: (inputs.os == 'linux' || inputs.os == 'all') + needs: generate-linux-matrix + strategy: + matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }} + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + name: "linux-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + with: + runner: ${{ matrix.validation_runner }} + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref || github.ref }} + job-name: "linux-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export SMOKE_TEST="${{ inputs.smoke_test }}" + eval $SMOKE_TEST + validate-windows: + if: (inputs.os == 'windows' || inputs.os == 'all') + needs: generate-windows-matrix + strategy: + matrix: ${{ fromJson(needs.generate-windows-matrix.outputs.matrix) }} + fail-fast: false + uses: pytorch/test-infra/.github/workflows/windows_job.yml@main + name: "windows-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + with: + runner: ${{ matrix.validation_runner }} + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref || github.ref }} + job-name: "windows-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export SMOKE_TEST="${{ inputs.smoke_test }}" + export TARGET_OS="windows" + eval $SMOKE_TEST + validate-macos: + if: (inputs.os == 'macos' || inputs.os == 'all') + needs: generate-macos-matrix + strategy: + matrix: ${{ fromJson(needs.generate-macos-matrix.outputs.matrix) }} + fail-fast: false + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + with: + runner: ${{ matrix.validation_runner }} + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref || github.ref }} + job-name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="macos" + export SMOKE_TEST="${{ inputs.smoke_test }}" + eval $SMOKE_TEST + validate-macos-arm64: + if: (inputs.os == 'macos-arm64' || inputs.os == 'all') + needs: generate-macos-matrix + strategy: + matrix: ${{ fromJson(needs.generate-macos-arm64-matrix.outputs.matrix) }} + fail-fast: false + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + with: + runner: ${{ matrix.validation_runner }} + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref || github.ref }} + job-name: "macos-arm64-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}" + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="macos-arm64" + eval $SMOKE_TEST diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml index 43c1a484d5..438062f91a 100644 --- a/.github/workflows/validate-linux-binaries.yml +++ b/.github/workflows/validate-linux-binaries.yml @@ -1,79 +1,58 @@ name: Validate linux binaries on: - push: - branches: - main - paths: - - .github/workflows/validate-linux-binaries.yml - pull_request: - paths: - - .github/workflows/validate-linux-binaries.yml + workflow_call: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + workflow_dispatch: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: choice + options: + - release + - nightly + - test + - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + jobs: - generate-conda-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: conda - os: linux - channel: nightly - generate-wheel-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - channel: nightly - generate-libtorch-matrix: + generate-linux-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: - package-type: libtorch + package-type: all os: linux - channel: nightly - validate-linux-binaries-conda: - needs: generate-conda-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-conda-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary conda - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - validate-linux-binaries-wheels: - needs: generate-wheel-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-wheel-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary wheel - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - validate-linux-libtorch-binaries: - needs: generate-libtorch-matrix + channel: ${{ inputs.channel }} + + linux: + needs: generate-linux-matrix strategy: - matrix: - ${{ fromJson(needs.generate-libtorch-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }} fail-fast: false - runs-on: "ubuntu-20.04" - env: - PYTHON_VERSION: ${{ matrix.python_version }} - steps: - - name: Install pytorch and smoke test - env: - INSTALLATION: ${{ matrix.installation }} - ENV_NAME: conda-env-${{ github.run_id }} - run: | - sudo apt-get install unzip -y - set -ex - curl ${INSTALLATION} -o libtorch.zip - unzip libtorch.zip + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + name: ${{ matrix.build_name }} + with: + runner: ${{ matrix.validation_runner }} + repository: "pytorch/builder" + ref: ${{ inputs.ref || github.ref }} + job-name: ${{ matrix.build_name }} + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="linux" + eval "$(conda shell.bash hook)" + source ./.github/scripts/validate_binaries.sh diff --git a/.github/workflows/validate-macos-arm64-binaries.yml b/.github/workflows/validate-macos-arm64-binaries.yml new file mode 100644 index 0000000000..f321022d42 --- /dev/null +++ b/.github/workflows/validate-macos-arm64-binaries.yml @@ -0,0 +1,56 @@ +name: Validate MacOS ARM64 Binaries + +on: + workflow_call: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + workflow_dispatch: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: choice + options: + - release + - nightly + - test + - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + +jobs: + generate-macos-arm64-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: all + os: macos-arm64 + channel: ${{ inputs.channel }} + macos-arm64: + needs: generate-macos-arm64-matrix + strategy: + matrix: ${{ fromJson(needs.generate-macos-arm64-matrix.outputs.matrix) }} + fail-fast: false + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + name: ${{ matrix.build_name }} + with: + runner: ${{ matrix.validation_runner }} + repository: "pytorch/builder" + ref: ${{ inputs.ref || github.ref }} + job-name: ${{ matrix.build_name }} + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="macos-arm64" + source ./.github/scripts/validate_binaries.sh diff --git a/.github/workflows/validate-macos-binaries.yml b/.github/workflows/validate-macos-binaries.yml index 3bc3ea0cdc..0e3f38ff86 100644 --- a/.github/workflows/validate-macos-binaries.yml +++ b/.github/workflows/validate-macos-binaries.yml @@ -1,96 +1,56 @@ name: Validate MacOS Binaries on: - pull_request: - paths: - - .github/workflows/validate-macos-binaries.yml + workflow_call: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + workflow_dispatch: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: choice + options: + - release + - nightly + - test + - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + jobs: - generate-arm64-conda-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: conda - os: macos-arm64 - channel: all - generate-arm64-wheel-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: macos-arm64 - channel: all - generate-x86_64-conda-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: conda - os: macos-x86_64 - channel: all - generate-x86_64-wheel-matrix: + generate-macos-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: - package-type: wheel - os: macos-x86_64 - channel: all - - validate-macos-arm64-binaries-conda: - needs: generate-arm64-conda-matrix + package-type: all + os: macos + channel: ${{ inputs.channel }} + macos: + needs: generate-macos-matrix strategy: - matrix: - ${{ fromJson(needs.generate-arm64-conda-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-macos-matrix.outputs.matrix) }} fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary conda - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - target_os: macos - validate-macos-arm64-binaries-wheel: - needs: generate-arm64-wheel-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-arm64-wheel-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary wheel - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - target_os: macos - validate-macos-x86_64-binaries-conda: - needs: generate-x86_64-conda-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-x86_64-conda-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary conda - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - target_os: macos - validate-macos-x86_64-binaries-wheel: - needs: generate-x86_64-wheel-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-x86_64-wheel-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary wheel - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - target_os: macos + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + name: ${{ matrix.build_name }} + with: + runner: ${{ matrix.validation_runner }} + repository: "pytorch/builder" + ref: ${{ inputs.ref || github.ref }} + job-name: ${{ matrix.build_name }} + binary-matrix: ${{ toJSON(matrix) }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="macos" + source ./.github/scripts/validate_binaries.sh diff --git a/.github/workflows/validate-nightly-binaries.yml b/.github/workflows/validate-nightly-binaries.yml new file mode 100644 index 0000000000..c252e0433b --- /dev/null +++ b/.github/workflows/validate-nightly-binaries.yml @@ -0,0 +1,35 @@ +# Scheduled validation of the nightly binaries +name: cron + +on: + schedule: + # At 2:30 pm UTC (7:30 am PDT) + - cron: "30 14 * * *" + # Have the ability to trigger this job manually through the API + workflow_dispatch: + push: + branches: + - main + paths: + - .github/workflows/validate-nightly-binaries.yml + - .github/workflows/validate-linux-binaries.yml + - .github/workflows/validate-windows-binaries.yml + - .github/workflows/validate-macos-binaries.yml + - .github/workflows/validate-macos-arm64-binaries.yml + - test/smoke_test/* + pull_request: + paths: + - .github/workflows/validate-nightly-binaries.yml + - .github/workflows/validate-linux-binaries.yml + - .github/workflows/validate-windows-binaries.yml + - .github/workflows/validate-macos-binaries.yml + - .github/workflows/validate-macos-arm64-binaries.yml + - .github/scripts/validate_binaries.sh + - test/smoke_test/* +jobs: + nightly: + uses: ./.github/workflows/validate-binaries.yml + with: + channel: nightly + os: all + limit-win-builds: enable diff --git a/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml new file mode 100644 index 0000000000..a995ec817a --- /dev/null +++ b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml @@ -0,0 +1,26 @@ +name: Validate Nightly PyPI Wheel Binary Size +on: + pull_request: + paths: + - .github/workflows/validate-nightly-pypi-wheel-binary-size.yml + workflow_dispatch: + schedule: + # At 2:30 pm UTC (7:30 am PDT) + - cron: "30 14 * * *" + +jobs: + nightly-pypi-binary-size-validation: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + repository: pytorch/test-infra + - name: Install requirements + run: | + pip3 install -r tools/binary_size_validation/requirements.txt + - name: Run validation + run: | + python tools/binary_size_validation/binary_size_validation.py \ + --url https://download.pytorch.org/whl/nightly/torch/ \ + --include "pypi" --only-latest-version --threshold 750 \ No newline at end of file diff --git a/.github/workflows/validate-release-binaries.yml b/.github/workflows/validate-release-binaries.yml new file mode 100644 index 0000000000..9549e1e33e --- /dev/null +++ b/.github/workflows/validate-release-binaries.yml @@ -0,0 +1,27 @@ +# Scheduled validation of the release binaries +name: cron + +on: + schedule: + # At 3 am and 2 pm UTC (7 am and 8 pm PDT) + - cron: "0 3,14 * * *" + # Have the ability to trigger this job manually through the API + workflow_dispatch: + push: + branches: + - main + paths: + - .github/workflows/validate-release-binaries.yml + - .github/workflows/validate-linux-binaries.yml + - .github/workflows/validate-windows-binaries.yml + - .github/workflows/validate-macos-binaries.yml + - .github/workflows/validate-macos-arm64-binaries.yml + - test/smoke_test/* + +jobs: + release: + uses: ./.github/workflows/validate-binaries.yml + with: + channel: release + os: all + limit-win-builds: enable diff --git a/.github/workflows/validate-repackaged-binary-sizes.yml b/.github/workflows/validate-repackaged-binary-sizes.yml new file mode 100644 index 0000000000..695c68d3aa --- /dev/null +++ b/.github/workflows/validate-repackaged-binary-sizes.yml @@ -0,0 +1,88 @@ +name: Validate manywheel binaries + +# This workflow validates the size of the manywheel binaries after repackaging for PyPi +# Specify the direct URLs to the binaries (from https://download.pytorch.org/whl/test/torch/) in the matrix +# along with the python version. +# +# The workflow will: +# * download the binaries, +# * run release/pypi/prep_binary_for_pypi.sh +# * run smoke tests on the repackaged binaries +# * display the size before and after repackaging as the workflow annotation +# * optionally upload the repackaged binaries as artifacts (for debug or promotion) + +on: + pull_request: + paths: + - .github/workflows/validate-repackaged-binary-sizes.yml + - release/pypi/prep_binary_for_pypi.sh + +jobs: + validate-binary-size: + strategy: + fail-fast: false + matrix: + whl: + - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp310-cp310-linux_x86_64.whl + python: "3.10" # python version to use for smoke tests + upload_artifact: false # upload the repackaged binary as an artifact + - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp37-cp37m-linux_x86_64.whl + python: "3.7" + artifact: false + - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp38-cp38-linux_x86_64.whl + python: "3.8" + artifact: false + - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp39-cp39-linux_x86_64.whl + python: "3.9" + artifact: false + # - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp311-cp311-linux_x86_64.whl + # python: "3.11" + # artifact: false + + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.4xlarge.nvidia.gpu + job-name: "Validate binary size" + upload-artifact: ${{ matrix.whl.upload_artifact == 'true' && 'repackaged-binary' || '' }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export GPU_ARCH_VER="11.7" + export GPU_ARCH_TYPE="cuda" + export CUDA_VER="11.7" + export DESIRED_PYTHON="${{ matrix.whl.python }}" + export DESIRED_CUDA="cu117" + export PACKAGE_TYPE="wheel" + export TARGET_OS="linux" + export INSTALLATION="" + + # install zip + sudo yum install zip -y + + # install patchelf + chmod a+x common/install_patchelf.sh + sudo common/install_patchelf.sh + + # download torch whl + wget ${{ matrix.whl.url }} + FILENAME=$(ls -1 *.whl | head -n 1) + SIZE_BEFORE=$(du -h $FILENAME | cut -f1) + + # repackage into manywheel + release/pypi/prep_binary_for_pypi.sh $FILENAME + + NEW_FILENAME=$(ls -1 *.whl | head -n 1) + echo "::notice:: $FILENAME before: $SIZE_BEFORE after: $(du -h $NEW_FILENAME | cut -f1)" + + # cp to ${RUNNER_ARTIFACT_DIR} + cp $NEW_FILENAME ${RUNNER_ARTIFACT_DIR}/ + + # create conda env + conda create -y -n $ENV_NAME python=$DESIRED_PYTHON + conda activate $ENV_NAME + + # install torch + pip install numpy pillow $NEW_FILENAME + + # run smoke test + python ./test/smoke_test/smoke_test.py --package=torchonly \ No newline at end of file diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml index 1dad91db06..6833e55b20 100644 --- a/.github/workflows/validate-windows-binaries.yml +++ b/.github/workflows/validate-windows-binaries.yml @@ -1,80 +1,69 @@ -name: Validate binary images +name: Validate Windows binary images on: - push: - branches: - main - paths: - - .github/workflows/validate-windows-binaries.yml - pull_request: - paths: - - .github/workflows/validate-windows-binaries.yml + workflow_call: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + limit-win-builds: + description: "Limit windows builds to single python/cuda config" + default: "disable" + type: string + workflow_dispatch: + inputs: + channel: + description: "Channel to use (nightly, test, release, all)" + required: true + type: choice + options: + - release + - nightly + - test + - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + limit-win-builds: + description: "Limit windows builds to single python/cuda config" + default: "disable" + required: false + type: string + jobs: - generate-conda-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: conda - os: windows - channel: nightly - generate-wheel-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - channel: nightly - generate-libtorch-matrix: + generate-windows-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: - package-type: libtorch + package-type: all os: windows - channel: nightly - validate-windows-binaries-conda: - needs: generate-conda-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-conda-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Checkout PyTorch builder - uses: actions/checkout@v2 - - name: Validate binary conda - uses: ./.github/actions/validate-windows-binary - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - validate-windows-binaries-wheel: - needs: generate-wheel-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-wheel-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Checkout PyTorch builder - uses: actions/checkout@v2 - - name: Validate binary wheel - uses: ./.github/actions/validate-windows-binary - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - validate-linux-libtorch-binaries: - needs: generate-libtorch-matrix + channel: ${{ inputs.channel }} + limit-win-builds: ${{ inputs.limit-win-builds }} + + win: + needs: generate-windows-matrix strategy: - matrix: - ${{ fromJson(needs.generate-libtorch-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-windows-matrix.outputs.matrix) }} fail-fast: false - runs-on: "windows-2019" - env: - PYTHON_VERSION: ${{ matrix.python_version }} - steps: - - name: Install pytorch and smoke test - shell: powershell - run: | - $install = '${{ matrix.installation }}' - Invoke-WebRequest -Uri $install -OutFile 'libtorch.zip' - Expand-Archive -Force libtorch.zip . + uses: pytorch/test-infra/.github/workflows/windows_job.yml@main + name: ${{ matrix.build_name }} + with: + runner: ${{ matrix.package_type == 'libtorch' && 'windows.4xlarge' || matrix.validation_runner }} + repository: "pytorch/builder" + ref: ${{ inputs.ref || github.ref }} + job-name: ${{ matrix.build_name }} + binary-matrix: ${{ toJSON(matrix) }} + timeout: 60 + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export TARGET_OS="windows" + source /c/Jenkins/Miniconda3/etc/profile.d/conda.sh + source ./.github/scripts/validate_binaries.sh diff --git a/CUDA_UPGRADE_GUIDE.MD b/CUDA_UPGRADE_GUIDE.MD index 4a725c0f06..ae3f158d31 100644 --- a/CUDA_UPGRADE_GUIDE.MD +++ b/CUDA_UPGRADE_GUIDE.MD @@ -9,9 +9,8 @@ Here is the supported matrix for CUDA and CUDNN | CUDA | CUDNN | additional details | | --- | --- | --- | -| 10.2 | 7.6.5.32 | Needed for publishing CUDA enabled binaries to PyPi since CUDA 11.x binaries don’t meet the space requirements (<750MB) | -| 11.3 | 8.3.2.44 | Stable CUDA Release | -| 11.6 | 8.3.2.44 | Latest CUDA Release | +| 11.6 | 8.3.2.44 | Stable CUDA Release | +| 11.7 | 8.5.0.96 | Latest CUDA Release | ### B. Check the package availability @@ -72,12 +71,13 @@ Add setup for our Docker `libtorch` and `manywheel`: 1. Follow this [PR 999](https://github.com/pytorch/builder/pull/999) for all steps in this section 2. To get the CUDA install link, just like with Linux, go [here](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exe_local) and upload that `.exe` file to our S3 bucket [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects). -3. To get the cuDNN install link, you could ask NVIDIA, but you could also just sign up for an NVIDIA account and access the needed `.zip` file at this [link](https://developer.nvidia.com/rdp/cudnn-download). First click on `cuDNN Library for Windows (x86)` and then upload that zip file to our S3 bucket. -4. NOTE: When you upload files to S3, make sure to make these objects publicly readable so that our CI can access them! -5. Most times, you have to upgrade the driver install for newer versions, which would look like [updating the `windows/internal/driver_update.bat` file](https://github.com/pytorch/builder/commit/9b997037e16eb3bc635e28d101c3297d7e4ead29) +3. Review "Table 3. Possible Subpackage Names" of CUDA installation guide for windows [link](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) to make sure the Subpackage Names have not changed. These are specified in [cuda_install.bat file](https://github.com/pytorch/builder/pull/999/files#diff-92a9c40963159c9d8f88fa2987057a65a2370737bd4ecc233498ebdfa02021e6) +4. To get the cuDNN install link, you could ask NVIDIA, but you could also just sign up for an NVIDIA account and access the needed `.zip` file at this [link](https://developer.nvidia.com/rdp/cudnn-download). First click on `cuDNN Library for Windows (x86)` and then upload that zip file to our S3 bucket. +5. NOTE: When you upload files to S3, make sure to make these objects publicly readable so that our CI can access them! +6. Most times, you have to upgrade the driver install for newer versions, which would look like [updating the `windows/internal/driver_update.bat` file](https://github.com/pytorch/builder/commit/9b997037e16eb3bc635e28d101c3297d7e4ead29) 1. Please check the CUDA Toolkit and Minimum Required Driver Version for CUDA minor version compatibility table in [the release notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) to see if a driver update is necessary. -6. Compile MAGMA with the new CUDA version. Update `.github/workflows/build-magma-windows.yml` to include new version. -7. Validate Magma builds by going to S3 [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects). And querying for ```magma_``` +7. Compile MAGMA with the new CUDA version. Update `.github/workflows/build-magma-windows.yml` to include new version. +8. Validate Magma builds by going to S3 [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects). And querying for ```magma_``` ## 6. Generate new Windows AMI, test and deploy to canary and prod. diff --git a/README.md b/README.md index 01e18dcbea..70d902ac32 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,7 @@ Folders: - **windows** : scripts to build Windows wheels - **cron** : scripts to drive all of the above scripts across multiple configurations together - **analytics** : scripts to pull wheel download count from our AWS s3 logs + +## Testing + +In order to test build triggered by PyTorch repo's GitHub actions see [these instructions](https://github.com/pytorch/pytorch/blob/master/.github/scripts/README.md#testing-pytorchbuilder-changes) diff --git a/aarch64_linux/README.md b/aarch64_linux/README.md new file mode 100644 index 0000000000..583ed4af99 --- /dev/null +++ b/aarch64_linux/README.md @@ -0,0 +1,19 @@ +# Aarch64 (ARM/Graviton) Support Scripts +Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels: +* torch +* torchvision +* torchaudio +* torchtext +* torchdata +## Aarch64_ci_build.sh +This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```. +### Usage +```DESIRED_PYTHON= aarch64_ci_build.sh``` + +__NOTE:__ CI build is currently __EXPERMINTAL__ + +## Build_aarch64_wheel.py +This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system. + +### Usage +```build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch ``` diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh new file mode 100644 index 0000000000..c72698389c --- /dev/null +++ b/aarch64_linux/aarch64_ci_build.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -eux -o pipefail + +# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script +# as we need to install conda and setup the python version for the build. + +CONDA_PYTHON_EXE=/opt/conda/bin/python +CONDA_EXE=/opt/conda/bin/conda +PATH=/opt/conda/bin:$PATH + +############################################################################### +# Install OS dependent packages +############################################################################### +yum -y install epel-release +yum -y install less zstd + +############################################################################### +# Install conda +# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path" +# when using Python version, less than the conda latest +############################################################################### +echo 'Installing conda-forge' +curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh +chmod +x /mambaforge.sh +/mambaforge.sh -b -p /opt/conda +rm /mambaforge.sh +/opt/conda/bin/conda config --set ssl_verify False +/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf +python --version +conda --version + +############################################################################### +# Exec libglfortran.a hack +# +# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC. +# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get +# ubuntu's libgfortran.a which is compiled with -fPIC +############################################################################### +cd ~/ +curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.4.0-6ubuntu1_arm64.deb +ar x ~/libgfortran-10-dev.deb +tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ +cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/ + +############################################################################### +# Run aarch64 builder python +############################################################################### +cd / +# adding safe directory for git as the permissions will be +# on the mounted pytorch repo +git config --global --add safe.directory /pytorch +python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py new file mode 100755 index 0000000000..c76f6d6474 --- /dev/null +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 + +import os +import subprocess +from typing import Dict, List, Optional, Tuple + + +'''' +Helper for getting paths for Python +''' +def list_dir(path: str) -> List[str]: + return subprocess.check_output(["ls", "-1", path]).decode().split("\n") + + +''' +Helper to get repo branches for specific versions +''' +def checkout_repo(branch: str = "main", + url: str = "", + git_clone_flags: str = "", + mapping: Dict[str, Tuple[str, str]] = []) -> Optional[str]: + for prefix in mapping: + if not branch.startswith(prefix): + continue + tag = f"v{mapping[prefix][0]}-{mapping[prefix][1]}" + os.system(f"git clone {url} -b {tag} {git_clone_flags}") + return mapping[prefix][0] + + os.system(f"git clone {url} {git_clone_flags}") + return None + + +''' +Using OpenBLAS with PyTorch +''' +def build_OpenBLAS(git_clone_flags: str = "") -> None: + print('Building OpenBLAS') + os.system(f"cd /; git clone https://github.com/xianyi/OpenBLAS -b v0.3.21 {git_clone_flags}") + make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8 " + os.system(f"cd OpenBLAS; make {make_flags} -j8; make {make_flags} install; cd /; rm -rf OpenBLAS") + + +''' +Using ArmComputeLibrary for aarch64 PyTorch +''' +def build_ArmComputeLibrary(git_clone_flags: str = "") -> None: + print('Building Arm Compute Library') + os.system("cd / && mkdir /acl") + os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v22.11 {git_clone_flags}") + os.system(f"cd ComputeLibrary; export acl_install_dir=/acl; " \ + f"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " \ + f"cp -r arm_compute $acl_install_dir; " \ + f"cp -r include $acl_install_dir; " \ + f"cp -r utils $acl_install_dir; " \ + f"cp -r support $acl_install_dir; " \ + f"cp -r src $acl_install_dir; cd /") + + +''' +Script to embed libgomp to the wheels +''' +def embed_libgomp(wheel_name) -> None: + print('Embedding libgomp into wheel') + os.system(f"python3 /builder/aarch64_linux/embed_library.py {wheel_name} --update-tag") + + +''' +Build TorchVision wheel +''' +def build_torchvision(branch: str = "main", + git_clone_flags: str = "") -> str: + print('Checking out TorchVision repo') + build_version = checkout_repo(branch=branch, + url="https://github.com/pytorch/vision", + git_clone_flags=git_clone_flags, + mapping={ + "v1.7.1": ("0.8.2", "rc2"), + "v1.8.0": ("0.9.0", "rc3"), + "v1.8.1": ("0.9.1", "rc1"), + "v1.9.0": ("0.10.0", "rc1"), + "v1.10.0": ("0.11.1", "rc1"), + "v1.10.1": ("0.11.2", "rc1"), + "v1.10.2": ("0.11.3", "rc1"), + "v1.11.0": ("0.12.0", "rc1"), + "v1.12.0": ("0.13.0", "rc4"), + "v1.12.1": ("0.13.1", "rc6"), + "v1.13.0": ("0.14.0", "rc4"), + "v1.13.1": ("0.14.1", "rc2"), + "v2.0.0": ("0.15.0", "rc2"), + }) + print('Building TorchVision wheel') + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + if branch == 'nightly': + version = '' + if os.path.exists('/vision/version.txt'): + version = subprocess.check_output(['cat', '/vision/version.txt']).decode().strip() + if len(version) == 0: + # In older revisions, version was embedded in setup.py + version = subprocess.check_output(['grep', 'version', 'setup.py']).decode().strip().split('\'')[1][:-2] + build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/vision').decode().replace('-','') + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += f"BUILD_VERSION={build_version}" + + os.system(f"cd /vision; {build_vars} python3 setup.py bdist_wheel") + wheel_name = list_dir("/vision/dist")[0] + embed_libgomp(f"/vision/dist/{wheel_name}") + + print('Move TorchVision wheel to artfacts') + os.system(f"mv /vision/dist/{wheel_name} /artifacts/") + return wheel_name + + +''' +Build TorchAudio wheel +''' +def build_torchaudio(branch: str = "main", + git_clone_flags: str = "") -> str: + print('Checking out TorchAudio repo') + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo(branch=branch, + url="https://github.com/pytorch/audio", + git_clone_flags=git_clone_flags, + mapping={ + "v1.9.0": ("0.9.0", "rc2"), + "v1.10.0": ("0.10.0", "rc5"), + "v1.10.1": ("0.10.1", "rc1"), + "v1.10.2": ("0.10.2", "rc1"), + "v1.11.0": ("0.11.0", "rc1"), + "v1.12.0": ("0.12.0", "rc3"), + "v1.12.1": ("0.12.1", "rc5"), + "v1.13.0": ("0.13.0", "rc4"), + "v1.13.1": ("0.13.1", "rc2"), + "v2.0.0": ("2.0.0", "rc2"), + }) + print('Building TorchAudio wheel') + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + if branch == 'nightly': + version = '' + if os.path.exists('/audio/version.txt'): + version = subprocess.check_output(['cat', '/audio/version.txt']).decode().strip() + build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/audio').decode().replace('-','') + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += f"BUILD_VERSION={build_version}" + + os.system(f"cd /audio; {build_vars} python3 setup.py bdist_wheel") + wheel_name = list_dir("/audio/dist")[0] + embed_libgomp(f"/audio/dist/{wheel_name}") + + print('Move TorchAudio wheel to artfacts') + os.system(f"mv /audio/dist/{wheel_name} /artifacts/") + return wheel_name + + +''' +Build TorchText wheel +''' +def build_torchtext(branch: str = "main", + git_clone_flags: str = "") -> str: + print('Checking out TorchText repo') + os.system(f"cd /") + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo(branch=branch, + url="https://github.com/pytorch/text", + git_clone_flags=git_clone_flags, + mapping={ + "v1.9.0": ("0.10.0", "rc1"), + "v1.10.0": ("0.11.0", "rc2"), + "v1.10.1": ("0.11.1", "rc1"), + "v1.10.2": ("0.11.2", "rc1"), + "v1.11.0": ("0.12.0", "rc1"), + "v1.12.0": ("0.13.0", "rc2"), + "v1.12.1": ("0.13.1", "rc5"), + "v1.13.0": ("0.14.0", "rc3"), + "v1.13.1": ("0.14.1", "rc1"), + "v2.0.0": ("0.15.0", "rc2"), + }) + print('Building TorchText wheel') + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + if branch == 'nightly': + version = '' + if os.path.exists('/text/version.txt'): + version = subprocess.check_output(['cat', '/text/version.txt']).decode().strip() + build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/text').decode().replace('-','') + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += f"BUILD_VERSION={build_version}" + + os.system(f"cd text; {build_vars} python3 setup.py bdist_wheel") + wheel_name = list_dir("/text/dist")[0] + embed_libgomp(f"/text/dist/{wheel_name}") + + print('Move TorchText wheel to artfacts') + os.system(f"mv /text/dist/{wheel_name} /artifacts/") + return wheel_name + + +''' +Build TorchData wheel +''' +def build_torchdata(branch: str = "main", + git_clone_flags: str = "") -> str: + print('Checking out TorchData repo') + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo(branch=branch, + url="https://github.com/pytorch/data", + git_clone_flags=git_clone_flags, + mapping={ + "v1.11.0": ("0.3.0", "rc1"), + "v1.12.0": ("0.4.0", "rc3"), + "v1.12.1": ("0.4.1", "rc5"), + "v1.13.1": ("0.5.1", "rc2"), + "v2.0.0": ("0.6.0", "rc2"), + }) + print('Building TorchData wheel') + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + if branch == 'nightly': + version = '' + if os.path.exists('/data/version.txt'): + version = subprocess.check_output(['cat', '/data/version.txt']).decode().strip() + build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/data').decode().replace('-','') + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += f"BUILD_VERSION={build_version}" + + os.system(f"cd /data; {build_vars} python3 setup.py bdist_wheel") + wheel_name = list_dir("/data/dist")[0] + embed_libgomp(f"/data/dist/{wheel_name}") + + print('Move TorchAudio wheel to artfacts') + os.system(f"mv /data/dist/{wheel_name} /artifacts/") + return wheel_name + + +def parse_arguments(): + from argparse import ArgumentParser + parser = ArgumentParser("AARCH64 wheels python CD") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--build-only", action="store_true") + parser.add_argument("--test-only", type=str) + parser.add_argument("--enable-mkldnn", action="store_true") + return parser.parse_args() + + +''' +Entry Point +''' +if __name__ == '__main__': + + args = parse_arguments() + enable_mkldnn = args.enable_mkldnn + os.system("cd /pytorch") + branch = subprocess.check_output("git rev-parse --abbrev-ref HEAD") + + git_clone_flags = " --depth 1 --shallow-submodules" + os.system(f"conda install -y ninja scons") + + print("Build and Install OpenBLAS") + build_OpenBLAS(git_clone_flags) + + print('Building PyTorch wheel') + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + os.system(f"cd /pytorch; pip install -r requirements.txt") + os.system(f"pip install auditwheel") + os.system(f"python setup.py clean") + + if branch == 'nightly' or branch == 'master': + build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/pytorch').decode().replace('-','') + version = subprocess.check_output(['cat','version.txt'], cwd='/pytorch').decode().strip()[:-2] + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1" + if branch.startswith("v1.") or branch.startswith("v2."): + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1" + if enable_mkldnn: + build_ArmComputeLibrary(git_clone_flags) + print("build pytorch with mkldnn+acl backend") + os.system(f"export ACL_ROOT_DIR=/acl; export LD_LIBRARY_PATH=/acl/build; export ACL_LIBRARY=/acl/build") + build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") + print('Repair the wheel') + pytorch_wheel_name = list_dir("pytorch/dist")[0] + os.system(f"export LD_LIBRARY_PATH=/pytorch/build/lib:$LD_LIBRARY_PATH; auditwheel repair /pytorch/dist/{pytorch_wheel_name}") + print('replace the original wheel with the repaired one') + pytorch_repaired_wheel_name = list_dir("wheelhouse")[0] + os.system(f"cp /wheelhouse/{pytorch_repaired_wheel_name} /pytorch/dist/{pytorch_wheel_name}") + else: + print("build pytorch without mkldnn backend") + os.system(f"cd pytorch ; {build_vars} python3 setup.py bdist_wheel") + + print("Deleting build folder") + os.system("cd /pytorch; rm -rf build") + pytorch_wheel_name = list_dir("/pytorch/dist")[0] + embed_libgomp(f"/pytorch/dist/{pytorch_wheel_name}") + print('Move PyTorch wheel to artfacts') + os.system(f"mv /pytorch/dist/{pytorch_wheel_name} /artifacts/") + print("Installing Pytorch wheel") + os.system(f"pip install /artifacts/{pytorch_wheel_name}") + + vision_wheel_name = build_torchvision(branch=branch, git_clone_flags=git_clone_flags) + audio_wheel_name = build_torchaudio(branch=branch, git_clone_flags=git_clone_flags) + text_wheel_name = build_torchtext(branch=branch, git_clone_flags=git_clone_flags) + data_wheel_name = build_torchdata(branch=branch, git_clone_flags=git_clone_flags) + + print(f"Wheels Created:\n" \ + f"{pytorch_wheel_name}\n" \ + f"{vision_wheel_name}\n" \ + f"{audio_wheel_name}\n" \ + f"{text_wheel_name}\n" \ + f"{data_wheel_name}\n") diff --git a/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py similarity index 74% rename from build_aarch64_wheel.py rename to aarch64_linux/build_aarch64_wheel.py index ee3fa54d10..f7b70208a2 100755 --- a/build_aarch64_wheel.py +++ b/aarch64_linux/build_aarch64_wheel.py @@ -4,7 +4,7 @@ # To generate binaries for the release follow these steps: # 1. Update mappings for each of the Domain Libraries by adding new row to a table like this: "v1.11.0": ("0.11.0", "rc1"), # 2. Run script with following arguments for each of the supported python versions and specify required RC tag for example: v1.11.0-rc3: -# build_aarch64_wheel.py --key-name --use-docker --python 3.7 --branch +# build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch import boto3 @@ -15,11 +15,11 @@ from typing import Dict, List, Optional, Tuple, Union - # AMI images for us-east-1, change the following based on your ~/.aws/config os_amis = { - 'ubuntu18_04': "ami-0f2b111fdc1647918", # login_name: ubuntu - 'ubuntu20_04': "ami-0ea142bd244023692", # login_name: ubuntu + 'ubuntu18_04': "ami-078eece1d8119409f", # login_name: ubuntu + 'ubuntu20_04': "ami-052eac90edaa9d08f", # login_name: ubuntu + 'ubuntu22_04': "ami-0c6c29c5125214c77", # login_name: ubuntu 'redhat8': "ami-0698b90665a2ddcf1", # login_name: ec2-user } ubuntu18_04_ami = os_amis['ubuntu18_04'] @@ -128,7 +128,7 @@ def run_cmd(self, args: Union[str, List[str]]) -> None: assert self.container_id is not None docker_cmd = self._gen_ssh_prefix() + ['docker', 'exec', '-i', self.container_id, 'bash'] p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE) - p.communicate(input=" ".join(["source .bashrc;"] + self._split_cmd(args)).encode("utf-8")) + p.communicate(input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode("utf-8")) rc = p.wait() if rc != 0: raise subprocess.CalledProcessError(rc, docker_cmd) @@ -139,7 +139,7 @@ def check_output(self, args: Union[str, List[str]]) -> str: assert self.container_id is not None docker_cmd = self._gen_ssh_prefix() + ['docker', 'exec', '-i', self.container_id, 'bash'] p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) - (out, err) = p.communicate(input=" ".join(["source .bashrc;"] + self._split_cmd(args)).encode("utf-8")) + (out, err) = p.communicate(input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode("utf-8")) rc = p.wait() if rc != 0: raise subprocess.CalledProcessError(rc, docker_cmd, output=out, stderr=err) @@ -211,8 +211,12 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: # Python-3.6 EOLed and not compatible with conda-4.11 install_condaforge(host, suffix="download/4.10.3-10/Miniforge3-4.10.3-10-Linux-aarch64.sh") host.run_cmd(f"conda install -y python={python_version} numpy pyyaml") + elif python_version == "3.11": + install_condaforge(host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh") + # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer + host.run_cmd(f"conda install -y python={python_version} numpy pyyaml setuptools=59.8.0 -c malfet") else: - install_condaforge(host) + install_condaforge(host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh") # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer host.run_cmd(f"conda install -y python={python_version} numpy pyyaml setuptools=59.5.0") @@ -221,16 +225,16 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: print('Building OpenBLAS') host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.19 {git_clone_flags}") make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" - host.run_cmd(f"pushd OpenBLAS; make {make_flags} -j8; sudo make {make_flags} install; popd; rm -rf OpenBLAS") + host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS") -def build_FFTW(host: RemoteHost, git_clone_flags: str = "") -> None: - print("Building FFTW3") - host.run_cmd("sudo apt-get install -y ocaml ocamlbuild autoconf automake indent libtool fig2dev texinfo") - # TODO: fix a version to build - # TODO: consider adding flags --host=arm-linux-gnueabi --enable-single --enable-neon CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp - host.run_cmd(f"git clone https://github.com/FFTW/fftw3 {git_clone_flags}") - host.run_cmd("pushd fftw3; sh bootstrap.sh; make -j8; sudo make install; popd") +def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: + print('Building Arm Compute Library') + acl_install_dir="${HOME}/acl" + acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native" + host.run_cmd(f"mkdir {acl_install_dir}") + host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v22.11 {git_clone_flags}") + host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags} build_dir={acl_install_dir}/build") def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: @@ -250,7 +254,7 @@ def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: def checkout_repo(host: RemoteHost, *, - branch: str = "master", + branch: str = "main", url: str, git_clone_flags: str, mapping: Dict[str, Tuple[str, str]]) -> Optional[str]: @@ -261,14 +265,19 @@ def checkout_repo(host: RemoteHost, *, host.run_cmd(f"git clone {url} -b {tag} {git_clone_flags}") return mapping[prefix][0] - host.run_cmd(f"git clone {url} {git_clone_flags}") + # Map master to main + if branch == "master" and url.rsplit("/")[-1] in ['vision', 'text', 'audio', 'data']: + branch = "main" + + host.run_cmd(f"git clone {url} -b {branch} {git_clone_flags}") return None def build_torchvision(host: RemoteHost, *, - branch: str = "master", + branch: str = "main", use_conda: bool = True, - git_clone_flags: str) -> str: + git_clone_flags: str, + run_smoke_tests: bool = True) -> str: print('Checking out TorchVision repo') build_version = checkout_repo(host, branch=branch, @@ -284,33 +293,84 @@ def build_torchvision(host: RemoteHost, *, "v1.10.2": ("0.11.3", "rc1"), "v1.11.0": ("0.12.0", "rc1"), "v1.12.0": ("0.13.0", "rc4"), + "v1.12.1": ("0.13.1", "rc6"), + "v1.13.0": ("0.14.0", "rc4"), + "v1.13.1": ("0.14.1", "rc2"), + "v2.0.0": ("0.15.1", "rc2"), }) - print('Building TorchVision wheel') + print("Building TorchVision wheel") + + # Please note libnpg and jpeg are required to build image.so extension + if use_conda: + host.run_cmd("conda install -y libpng jpeg") + # Remove .so files to force static linking + host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so") + # And patch setup.py to include libz dependency for libpng + host.run_cmd(['sed -i -e \'s/image_link_flags\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py']) + build_vars = "" - if branch == 'nightly': + if branch == "nightly": version = host.check_output(["if [ -f vision/version.txt ]; then cat vision/version.txt; fi"]).strip() if len(version) == 0: # In older revisions, version was embedded in setup.py version = host.check_output(["grep", "\"version = '\"", "vision/setup.py"]).strip().split("'")[1][:-2] - build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_date = host.check_output("cd vision && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") build_vars += f"BUILD_VERSION={version}.dev{build_date}" elif build_version is not None: - build_vars += f"BUILD_VERSION={build_version}" + build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" - host.run_cmd(f"cd vision; {build_vars} python3 setup.py bdist_wheel") + host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel") vision_wheel_name = host.list_dir("vision/dist")[0] embed_libgomp(host, use_conda, os.path.join('vision', 'dist', vision_wheel_name)) print('Copying TorchVision wheel') host.download_wheel(os.path.join('vision', 'dist', vision_wheel_name)) + if run_smoke_tests: + host.run_cmd(f"pip3 install {os.path.join('vision', 'dist', vision_wheel_name)}") + host.run_cmd("python3 vision/test/smoke_test.py") print("Delete vision checkout") host.run_cmd("rm -rf vision") return vision_wheel_name +def build_torchdata(host: RemoteHost, *, + branch: str = "master", + use_conda: bool = True, + git_clone_flags: str = "") -> str: + print('Checking out TorchData repo') + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo(host, + branch=branch, + url="https://github.com/pytorch/data", + git_clone_flags=git_clone_flags, + mapping={ + "v1.13.1": ("0.5.1", ""), + "v2.0.0": ("0.6.0", "rc5"), + }) + print('Building TorchData wheel') + build_vars = "" + if branch == 'nightly': + version = host.check_output(["if [ -f data/version.txt ]; then cat data/version.txt; fi"]).strip() + build_date = host.check_output("cd data && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + + host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel") + wheel_name = host.list_dir("data/dist")[0] + embed_libgomp(host, use_conda, os.path.join('data', 'dist', wheel_name)) + + print('Copying TorchData wheel') + host.download_wheel(os.path.join('data', 'dist', wheel_name)) + + return wheel_name + + def build_torchtext(host: RemoteHost, *, branch: str = "master", use_conda: bool = True, @@ -328,19 +388,23 @@ def build_torchtext(host: RemoteHost, *, "v1.10.2": ("0.11.2", "rc1"), "v1.11.0": ("0.12.0", "rc1"), "v1.12.0": ("0.13.0", "rc2"), + "v1.12.1": ("0.13.1", "rc5"), + "v1.13.0": ("0.14.0", "rc3"), + "v1.13.1": ("0.14.1", "rc1"), + "v2.0.0": ("0.15.1", "rc2"), }) print('Building TorchText wheel') build_vars = "" if branch == 'nightly': version = host.check_output(["if [ -f text/version.txt ]; then cat text/version.txt; fi"]).strip() - build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_date = host.check_output("cd text && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") build_vars += f"BUILD_VERSION={version}.dev{build_date}" elif build_version is not None: - build_vars += f"BUILD_VERSION={build_version}" + build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" - host.run_cmd(f"cd text; {build_vars} python3 setup.py bdist_wheel") + host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel") wheel_name = host.list_dir("text/dist")[0] embed_libgomp(host, use_conda, os.path.join('text', 'dist', wheel_name)) @@ -367,19 +431,23 @@ def build_torchaudio(host: RemoteHost, *, "v1.10.2": ("0.10.2", "rc1"), "v1.11.0": ("0.11.0", "rc1"), "v1.12.0": ("0.12.0", "rc3"), + "v1.12.1": ("0.12.1", "rc5"), + "v1.13.0": ("0.13.0", "rc4"), + "v1.13.1": ("0.13.1", "rc2"), + "v2.0.0": ("2.0.1", "rc3"), }) print('Building TorchAudio wheel') build_vars = "" if branch == 'nightly': version = host.check_output(["grep", "\"version = '\"", "audio/setup.py"]).strip().split("'")[1][:-2] - build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_date = host.check_output("cd audio && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") build_vars += f"BUILD_VERSION={version}.dev{build_date}" elif build_version is not None: - build_vars += f"BUILD_VERSION={build_version}" + build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" - host.run_cmd(f"cd audio; {build_vars} python3 setup.py bdist_wheel") + host.run_cmd(f"cd audio && {build_vars} python3 setup.py bdist_wheel") wheel_name = host.list_dir("audio/dist")[0] embed_libgomp(host, use_conda, os.path.join('audio', 'dist', wheel_name)) @@ -390,9 +458,9 @@ def build_torchaudio(host: RemoteHost, *, def configure_system(host: RemoteHost, *, - compiler="gcc-8", - use_conda=True, - python_version="3.8") -> None: + compiler: str = "gcc-8", + use_conda: bool = True, + python_version: str = "3.8") -> None: if use_conda: install_condaforge_python(host, python_version) @@ -402,7 +470,7 @@ def configure_system(host: RemoteHost, *, host.run_cmd("sudo apt-get install -y ninja-build g++ git cmake gfortran unzip") else: host.run_cmd("yum install -y sudo") - host.run_cmd("conda install -y ninja") + host.run_cmd("conda install -y ninja scons") if not use_conda: host.run_cmd("sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip") @@ -419,23 +487,39 @@ def configure_system(host: RemoteHost, *, host.run_cmd("sudo pip3 install numpy") +def build_domains(host: RemoteHost, *, + branch: str = "master", + use_conda: bool = True, + git_clone_flags: str = "") -> Tuple[str, str, str, str]: + vision_wheel_name = build_torchvision(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) + audio_wheel_name = build_torchaudio(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) + data_wheel_name = build_torchdata(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) + text_wheel_name = build_torchtext(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) + return (vision_wheel_name, audio_wheel_name, data_wheel_name, text_wheel_name) + + def start_build(host: RemoteHost, *, - branch="master", - compiler="gcc-8", - use_conda=True, - python_version="3.8", - shallow_clone=True) -> Tuple[str, str]: + branch: str = "master", + compiler: str = "gcc-8", + use_conda: bool = True, + python_version: str = "3.8", + pytorch_only: bool = False, + pytorch_build_number: Optional[str] = None, + shallow_clone: bool = True, + enable_mkldnn: bool = False) -> Tuple[str, str, str, str, str]: git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else "" if host.using_docker() and not use_conda: print("Auto-selecting conda option for docker images") use_conda = True + if not host.using_docker(): + print("Disable mkldnn for host builds") + enable_mkldnn = False configure_system(host, compiler=compiler, use_conda=use_conda, python_version=python_version) build_OpenBLAS(host, git_clone_flags) - # build_FFTW(host, git_clone_flags) if host.using_docker(): print("Move libgfortant.a into a standard location") @@ -452,19 +536,36 @@ def start_build(host: RemoteHost, *, host.run_cmd(f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}") print('Building PyTorch wheel') + build_opts = "" + if pytorch_build_number is not None: + build_opts += f" --build-number {pytorch_build_number}" # Breakpad build fails on aarch64 build_vars = "USE_BREAKPAD=0 " if branch == 'nightly': - build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") version = host.check_output("cat pytorch/version.txt").strip()[:-2] build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1" - if branch.startswith("v1."): + if branch.startswith("v1.") or branch.startswith("v2."): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1" if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" - host.run_cmd(f"cd pytorch ; {build_vars} python3 setup.py bdist_wheel") + if enable_mkldnn: + build_ArmComputeLibrary(host, git_clone_flags) + print("build pytorch with mkldnn+acl backend") + build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + host.run_cmd(f"cd pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary:$HOME/acl && {build_vars} python3 setup.py bdist_wheel{build_opts}") + print('Repair the wheel') + pytorch_wheel_name = host.list_dir("pytorch/dist")[0] + host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}") + print('replace the original wheel with the repaired one') + pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0] + host.run_cmd(f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}") + else: + print("build pytorch without mkldnn backend") + host.run_cmd(f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}") + print("Deleting build folder") - host.run_cmd("cd pytorch; rm -rf build") + host.run_cmd("cd pytorch && rm -rf build") pytorch_wheel_name = host.list_dir("pytorch/dist")[0] embed_libgomp(host, use_conda, os.path.join('pytorch', 'dist', pytorch_wheel_name)) print('Copying the wheel') @@ -473,11 +574,11 @@ def start_build(host: RemoteHost, *, print('Installing PyTorch wheel') host.run_cmd(f"pip3 install pytorch/dist/{pytorch_wheel_name}") - vision_wheel_name = build_torchvision(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) - build_torchaudio(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) - build_torchtext(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) + if pytorch_only: + return (pytorch_wheel_name, None, None, None, None) + domain_wheels = build_domains(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags) - return pytorch_wheel_name, vision_wheel_name + return (pytorch_wheel_name, *domain_wheels) embed_library_script = """ @@ -602,10 +703,11 @@ def parse_arguments(): parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") parser.add_argument("--test-only", type=str) - parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu18_04') - parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10'], default=None) + parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu20_04') + parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], default=None) parser.add_argument("--alloc-instance", action="store_true") parser.add_argument("--list-instances", action="store_true") + parser.add_argument("--pytorch-only", action="store_true") parser.add_argument("--keep-running", action="store_true") parser.add_argument("--terminate-instances", action="store_true") parser.add_argument("--instance-type", type=str, default="t4g.2xlarge") @@ -613,6 +715,8 @@ def parse_arguments(): parser.add_argument("--use-docker", action="store_true") parser.add_argument("--compiler", type=str, choices=['gcc-7', 'gcc-8', 'gcc-9', 'clang'], default="gcc-8") parser.add_argument("--use-torch-from-pypi", action="store_true") + parser.add_argument("--pytorch-build-number", type=str, default=None) + parser.add_argument("--disable-mkldnn", action="store_true") return parser.parse_args() @@ -639,7 +743,7 @@ def parse_arguments(): check `~/.ssh/` folder or manually set SSH_KEY_PATH environment variable.""") # Starting the instance - inst = start_instance(key_name, ami=ami) + inst = start_instance(key_name, ami=ami, instance_type=args.instance_type) instance_name = f'{args.key_name}-{args.os}' if args.python_version is not None: instance_name += f'-py{args.python_version}' @@ -673,14 +777,17 @@ def parse_arguments(): python_version=python_version) print("Installing PyTorch wheel") host.run_cmd("pip3 install torch") - build_torchvision(host, - branch=args.branch, - git_clone_flags=" --depth 1 --shallow-submodules") + build_domains(host, + branch=args.branch, + git_clone_flags=" --depth 1 --shallow-submodules") else: start_build(host, branch=args.branch, compiler=args.compiler, - python_version=python_version) + python_version=python_version, + pytorch_only=args.pytorch_only, + pytorch_build_number=args.pytorch_build_number, + enable_mkldnn=not args.disable_mkldnn) if not args.keep_running: print(f'Waiting for instance {inst.id} to terminate') inst.terminate() diff --git a/aarch64_linux/embed_library.py b/aarch64_linux/embed_library.py new file mode 100644 index 0000000000..978970d45f --- /dev/null +++ b/aarch64_linux/embed_library.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +from auditwheel.patcher import Patchelf +from auditwheel.wheeltools import InWheelCtx +from auditwheel.elfutils import elf_file_filter +from auditwheel.repair import copylib +from auditwheel.lddtree import lddtree +from subprocess import check_call +import os +import shutil +import sys +from tempfile import TemporaryDirectory + + +def replace_tag(filename): + with open(filename, 'r') as f: + lines = f.read().split("\\n") + for i,line in enumerate(lines): + if not line.startswith("Tag: "): + continue + lines[i] = line.replace("-linux_", "-manylinux2014_") + print(f'Updated tag from {line} to {lines[i]}') + + with open(filename, 'w') as f: + f.write("\\n".join(lines)) + + +class AlignedPatchelf(Patchelf): + def set_soname(self, file_name: str, new_soname: str) -> None: + check_call(['patchelf', '--page-size', '65536', '--set-soname', new_soname, file_name]) + + def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None: + check_call(['patchelf', '--page-size', '65536', '--replace-needed', soname, new_soname, file_name]) + + +def embed_library(whl_path, lib_soname, update_tag=False): + patcher = AlignedPatchelf() + out_dir = TemporaryDirectory() + whl_name = os.path.basename(whl_path) + tmp_whl_name = os.path.join(out_dir.name, whl_name) + with InWheelCtx(whl_path) as ctx: + torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib') + ctx.out_wheel=tmp_whl_name + new_lib_path, new_lib_soname = None, None + for filename, elf in elf_file_filter(ctx.iter_files()): + if not filename.startswith('torch/lib'): + continue + libtree = lddtree(filename) + if lib_soname not in libtree['needed']: + continue + lib_path = libtree['libs'][lib_soname]['path'] + if lib_path is None: + print(f"Can't embed {lib_soname} as it could not be found") + break + if lib_path.startswith(torchlib_path): + continue + + if new_lib_path is None: + new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher) + patcher.replace_needed(filename, lib_soname, new_lib_soname) + print(f'Replacing {lib_soname} with {new_lib_soname} for {filename}') + if update_tag: + # Add manylinux2014 tag + for filename in ctx.iter_files(): + if os.path.basename(filename) != 'WHEEL': + continue + replace_tag(filename) + shutil.move(tmp_whl_name, whl_path) + + +if __name__ == '__main__': + embed_library(sys.argv[1], 'libgomp.so.1', len(sys.argv) > 2 and sys.argv[2] == '--update-tag') diff --git a/analytics/github_analyze.py b/analytics/github_analyze.py index c255c6c8f1..47330208c2 100755 --- a/analytics/github_analyze.py +++ b/analytics/github_analyze.py @@ -161,9 +161,12 @@ def __init__(self, path, remote='upstream'): self.repo_dir = path self.remote = remote + def _run_git_cmd(self, *args) -> str: + return _check_output(['git', '-C', self.repo_dir] + list(args)) + def _run_git_log(self, revision_range) -> List[GitCommit]: - log = _check_output(['git', '-C', self.repo_dir, 'log', - '--format=fuller', '--date=unix', revision_range, '--', '.']).split("\n") + log = self._run_git_cmd('log', '--format=fuller', + '--date=unix', revision_range, '--', '.').split("\n") rc: List[GitCommit] = [] cur_msg: List[str] = [] for line in log: @@ -179,6 +182,18 @@ def _run_git_log(self, revision_range) -> List[GitCommit]: def get_commit_list(self, from_ref, to_ref) -> List[GitCommit]: return self._run_git_log(f"{self.remote}/{from_ref}..{self.remote}/{to_ref}") + def get_ghstack_orig_branches(self) -> List[str]: + return [x.strip() for x in self._run_git_cmd("branch", "--remotes", "--list", self.remote + "/gh/*/orig").strip().split("\n")] + + def show_ref(self, ref) -> str: + return self._run_git_cmd("show-ref", ref).split(" ")[0] + + def merge_base(self, ref1, ref2) -> str: + return self._run_git_cmd("merge-base", ref1, ref2).strip() + + def rev_list(self, ref): + return self._run_git_cmd("rev-list", f"{self.remote}/master..{ref}").strip().split() + def build_commit_dict(commits: List[GitCommit]) -> Dict[str, GitCommit]: rc = {} @@ -358,6 +373,22 @@ def get_commits_dict(x, y): print(f'{html_url};{issue["title"]};{state}') +def analyze_stacks(repo: GitRepo) -> None: + from tqdm.contrib.concurrent import thread_map + branches = repo.get_ghstack_orig_branches() + stacks_by_author: Dict[str, List[int]] = {} + for branch,rv_commits in thread_map(lambda x: (x, repo.rev_list(x)), branches, max_workers=10): + author = branch.split("/")[2] + if author not in stacks_by_author: + stacks_by_author[author]=[] + stacks_by_author[author].append(len(rv_commits)) + for author, slen in sorted(stacks_by_author.items(), key=lambda x:len(x[1]), reverse=True): + if len(slen) == 1: + print(f"{author} has 1 stack of depth {slen[0]}") + continue + print(f"{author} has {len(slen)} stacks max depth is {max(slen)} avg depth is {sum(slen)/len(slen):.2f} mean is {slen[len(slen)//2]}") + + def parse_arguments(): from argparse import ArgumentParser parser = ArgumentParser(description="Print GitHub repo stats") @@ -375,6 +406,7 @@ def parse_arguments(): parser.add_argument("--print-reverts", action="store_true") parser.add_argument("--contributor-stats", action="store_true") parser.add_argument("--missing-in-branch", action="store_true") + parser.add_argument("--analyze-stacks", action="store_true") return parser.parse_args() @@ -392,6 +424,10 @@ def main(): repo = GitRepo(args.repo_path, remote) + if args.analyze_stacks: + analyze_stacks(repo) + return + if args.missing_in_branch: # Use milestone idx or search it along milestone titles try: diff --git a/analytics/validate_binaries.py b/analytics/validate_binaries.py index c3fd4ff2bf..65965c59ad 100644 --- a/analytics/validate_binaries.py +++ b/analytics/validate_binaries.py @@ -6,10 +6,10 @@ PLATFORMS = ["osx-64", "linux-64", "win-64"] PYTHON_VERSIONS = ["3.10", "3.9", "3.8", "3.7"] CUDA_CUDNN_VERSION = [ - ("11.5", "8.3.2"), ("11.3", "8.2.0"), ("11.1", "8.0.5"), ("10.2", "7.6.5"), ("cpu", None) + ("11.7", "8.5.0"), ("cpu", None) ] CHANNEL = "pytorch-test" -VERSION = "1.11.*" +VERSION = "1.13.*" def generate_expected_builds(platform: str) -> set: @@ -22,9 +22,6 @@ def generate_expected_builds(platform: str) -> set: for cuda_version, cudnn_version in CUDA_CUDNN_VERSION: if platform == "win-64": - if cuda_version == "10.2": - # win does not support cuda 10.2 - continue cudnn_version = "8" if cuda_version == "cpu": diff --git a/check_binary.sh b/check_binary.sh index 153fca7451..80dd1e5cac 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -22,6 +22,19 @@ set -eux -o pipefail # libtorch package. +if [[ -z ${DESIRED_PYTHON:-} ]]; then + export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-} +fi +if [[ -z ${DESIRED_CUDA:-} ]]; then + export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-} +fi +if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then + export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-} +fi +if [[ -z ${PACKAGE_TYPE:-} ]]; then + export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-} +fi + # The install root depends on both the package type and the os # All MacOS packages use conda, even for the wheel packages. if [[ "$PACKAGE_TYPE" == libtorch ]]; then @@ -38,7 +51,7 @@ else install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/" fi -if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then +if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* ]]; then # cu90, cu92, cu100, cu101 if [[ ${#DESIRED_CUDA} -eq 4 ]]; then CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}" @@ -328,7 +341,7 @@ fi if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then echo "Checking that MKL is available" build_and_run_example_cpp check-torch-mkl -else +elif [[ "$(uname -m)" != "arm64" ]]; then if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then echo "Checking that MKL is available" pushd /tmp @@ -366,7 +379,7 @@ if [[ "$OSTYPE" == "msys" ]]; then fi # Test that CUDA builds are setup correctly -if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then +if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* ]]; then if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then build_and_run_example_cpp check-torch-cuda else @@ -392,6 +405,9 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then echo "Checking that basic CNN works" python ${TEST_CODE_DIR}/cnn_smoke.py + echo "Test that linalg works" + python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))" + popd fi # if libtorch fi # if cuda @@ -418,8 +434,8 @@ fi ############################################################################### # Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries ############################################################################### -if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel') ]]; then +if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel')]]; then pushd /tmp - python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1)" + python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))" popd fi diff --git a/common/install_conda.sh b/common/install_conda.sh index 43dd193972..bd06075257 100644 --- a/common/install_conda.sh +++ b/common/install_conda.sh @@ -5,8 +5,11 @@ set -ex # Anaconda wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh chmod +x Miniconda3-latest-Linux-x86_64.sh -bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda +# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431 +bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda rm Miniconda3-latest-Linux-x86_64.sh export PATH=/opt/conda/bin:$PATH -conda install -y conda-build anaconda-client git ninja +# The cmake version here needs to match with the minimum version of cmake +# supported by PyTorch (3.18). There is only 3.18.2 on anaconda +conda install -y conda-build anaconda-client git ninja cmake=3.18.2 conda remove -y --force patchelf diff --git a/common/install_cpython.sh b/common/install_cpython.sh index f393de2025..b06fe27c16 100755 --- a/common/install_cpython.sh +++ b/common/install_cpython.sh @@ -14,37 +14,35 @@ function check_var { fi } -function lex_pyver { - # Echoes Python version string padded with zeros - # Thus: - # 3.2.1 -> 003002001 - # 3 -> 003000000 - echo $1 | awk -F "." '{printf "%03d%03d%03d", $1, $2, $3}' -} - function do_cpython_build { local py_ver=$1 check_var $py_ver - local ucs_setting=$2 - check_var $ucs_setting tar -xzf Python-$py_ver.tgz pushd Python-$py_ver - if [ "$ucs_setting" = "none" ]; then - unicode_flags="" - dir_suffix="" + + local prefix="/opt/_internal/cpython-${py_ver}" + mkdir -p ${prefix}/lib + if [[ -n $(which patchelf) ]]; then + local shared_flags="--enable-shared" else - local unicode_flags="--enable-unicode=$ucs_setting" - local dir_suffix="-$ucs_setting" + local shared_flags="--disable-shared" + fi + if [[ -z "${WITH_OPENSSL+x}" ]]; then + local openssl_flags="" + else + local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto" fi - local prefix="/opt/_internal/cpython-${py_ver}${dir_suffix}" - mkdir -p ${prefix}/lib # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6 - CFLAGS="-Wformat" ./configure --prefix=${prefix} --disable-shared $unicode_flags > /dev/null + CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null make -j40 > /dev/null make install > /dev/null + if [[ "${shared_flags}" == "--enable-shared" ]]; then + patchelf --set-rpath '$ORIGIN/../lib' ${prefix}/bin/python3 + fi + popd rm -rf Python-$py_ver # Some python's install as bin/python3. Make them available as @@ -61,27 +59,16 @@ function do_cpython_build { ln -s ${prefix} /opt/python/${abi_tag} } - function build_cpython { local py_ver=$1 check_var $py_ver check_var $PYTHON_DOWNLOAD_URL local py_ver_folder=$py_ver - # Only beta version of 3.11 is available right now - if [ "$py_ver" = "3.11.0" ]; then - py_ver=$py_ver"b1" - fi wget -q $PYTHON_DOWNLOAD_URL/$py_ver_folder/Python-$py_ver.tgz - if [ $(lex_pyver $py_ver) -lt $(lex_pyver 3.3) ]; then - do_cpython_build $py_ver ucs2 - do_cpython_build $py_ver ucs4 - else - do_cpython_build $py_ver none - fi + do_cpython_build $py_ver none rm -f Python-$py_ver.tgz } - function build_cpythons { check_var $GET_PIP_URL curl -sLO $GET_PIP_URL @@ -91,7 +78,6 @@ function build_cpythons { rm -f get-pip.py } - mkdir -p /opt/python mkdir -p /opt/_internal build_cpythons $CPYTHON_VERSIONS diff --git a/common/install_cuda.sh b/common/install_cuda.sh index 77d1900113..359df5b3bb 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -2,80 +2,6 @@ set -ex -function install_102 { - echo "Installing CUDA 10.2 and CuDNN" - rm -rf /usr/local/cuda-10.2 /usr/local/cuda - # # install CUDA 10.2 in the same container - wget -q http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run - chmod +x cuda_10.2.89_440.33.01_linux.run - ./cuda_10.2.89_440.33.01_linux.run --extract=/tmp/cuda - rm -f cuda_10.2.89_440.33.01_linux.run - mv /tmp/cuda/cuda-toolkit /usr/local/cuda-10.2 - rm -rf /tmp/cuda - rm -f /usr/local/cuda && ln -s /usr/local/cuda-10.2 /usr/local/cuda - - # install CUDA 10.2 CuDNN - # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement - mkdir tmp_cudnn && cd tmp_cudnn - wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.6.5.32-1+cuda10.2_amd64.deb -O cudnn-dev.deb - wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.6.5.32-1+cuda10.2_amd64.deb -O cudnn.deb - ar -x cudnn-dev.deb && tar -xvf data.tar.xz - ar -x cudnn.deb && tar -xvf data.tar.xz - mkdir -p cuda/include && mkdir -p cuda/lib64 - cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h - cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64 - mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a - ln -s libcudnn.so.7 cuda/lib64/libcudnn.so - chmod +x cuda/lib64/*.so - cp -a cuda/include/* /usr/local/cuda/include/ - cp -a cuda/lib64/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_cudnn - ldconfig -} - -function install_113 { - echo "Installing CUDA 11.3 and CuDNN 8.3" - rm -rf /usr/local/cuda-11.3 /usr/local/cuda - # install CUDA 11.3.1 in the same container - wget -q https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run - chmod +x cuda_11.3.1_465.19.01_linux.run - ./cuda_11.3.1_465.19.01_linux.run --toolkit --silent - rm -f cuda_11.3.1_465.19.01_linux.run - rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.3 /usr/local/cuda - - # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement - mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz -O cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz - tar xf cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz - cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_cudnn - ldconfig -} - -function install_115 { - echo "Installing CUDA 11.5 and CuDNN 8.3" - rm -rf /usr/local/cuda-11.5 /usr/local/cuda - # install CUDA 11.5.0 in the same container - wget -q https://developer.download.nvidia.com/compute/cuda/11.5.0/local_installers/cuda_11.5.0_495.29.05_linux.run - chmod +x cuda_11.5.0_495.29.05_linux.run - ./cuda_11.5.0_495.29.05_linux.run --toolkit --silent - rm -f cuda_11.5.0_495.29.05_linux.run - rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.5 /usr/local/cuda - - # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement - mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz -O cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz - tar xf cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz - cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_cudnn - ldconfig -} - function install_116 { echo "Installing CUDA 11.6 and CuDNN 8.3" rm -rf /usr/local/cuda-11.6 /usr/local/cuda @@ -98,7 +24,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.3" + echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run @@ -116,79 +42,56 @@ function install_117 { cd .. rm -rf tmp_cudnn ldconfig -} - -function prune_102 { - echo "Pruning CUDA 10.2 and CuDNN" - ##################################################################################### - # CUDA 10.2 prune static libs - ##################################################################################### - export NVPRUNE="/usr/local/cuda-10.2/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-10.2/lib64" - - export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75" - export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75" - - if [[ -n "$OVERRIDE_GENCODE" ]]; then - export GENCODE=$OVERRIDE_GENCODE - fi - - # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included) - ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ - | xargs -I {} bash -c \ - "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" - - # prune CuDNN and CuBLAS - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a - - ##################################################################################### - # CUDA 10.2 prune visual tools - ##################################################################################### - export CUDA_BASE="/usr/local/cuda-10.2/" - rm -rf $CUDA_BASE/libnsight $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2019.5.0 $CUDA_BASE/nsight-systems-2019.5.2 + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz + tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz + cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } -function prune_113 { - echo "Pruning CUDA 11.3 and CuDNN" - ##################################################################################### - # CUDA 11.3 prune static libs - ##################################################################################### - export NVPRUNE="/usr/local/cuda-11.3/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-11.3/lib64" - - export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" - export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" - - if [[ -n "$OVERRIDE_GENCODE" ]]; then - export GENCODE=$OVERRIDE_GENCODE - fi - - # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included) - ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ - | xargs -I {} bash -c \ - "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" +function install_118 { + echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15" + rm -rf /usr/local/cuda-11.8 /usr/local/cuda + # install CUDA 11.8.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run + chmod +x cuda_11.8.0_520.61.05_linux.run + ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent + rm -f cuda_11.8.0_520.61.05_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda - # prune CuDNN and CuBLAS - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz + tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz + cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + ldconfig - ##################################################################################### - # CUDA 11.3 prune visual tools - ##################################################################################### - export CUDA_BASE="/usr/local/cuda-11.3/" - rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2021.1.0 $CUDA_BASE/nsight-systems-2021.1.3 + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz + tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz + cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } -function prune_115 { - echo "Pruning CUDA 11.5 and CuDNN" +function prune_116 { + echo "Pruning CUDA 11.6 and CuDNN" ##################################################################################### - # CUDA 11.3 prune static libs + # CUDA 11.6 prune static libs ##################################################################################### - export NVPRUNE="/usr/local/cuda-11.5/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-11.5/lib64" + export NVPRUNE="/usr/local/cuda-11.6/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-11.6/lib64" export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" @@ -200,26 +103,26 @@ function prune_115 { # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included) ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ | xargs -I {} bash -c \ - "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" # prune CuDNN and CuBLAS $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a ##################################################################################### - # CUDA 11.5 prune visual tools + # CUDA 11.6 prune visual tools ##################################################################################### - export CUDA_BASE="/usr/local/cuda-11.5/" - rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2021.3.0 $CUDA_BASE/nsight-systems-2021.3.3 + export CUDA_BASE="/usr/local/cuda-11.6/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.1.1 $CUDA_BASE/nsight-systems-2021.5.2 } -function prune_116 { - echo "Pruning CUDA 11.6 and CuDNN" +function prune_117 { + echo "Pruning CUDA 11.7 and CuDNN" ##################################################################################### - # CUDA 11.6 prune static libs + # CUDA 11.7 prune static libs ##################################################################################### - export NVPRUNE="/usr/local/cuda-11.6/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-11.6/lib64" + export NVPRUNE="/usr/local/cuda-11.7/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-11.7/lib64" export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" @@ -240,20 +143,20 @@ function prune_116 { ##################################################################################### # CUDA 11.6 prune visual tools ##################################################################################### - export CUDA_BASE="/usr/local/cuda-11.6/" - rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.1.1 $CUDA_BASE/nsight-systems-2021.5.2 + export CUDA_BASE="/usr/local/cuda-11.7/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.2.0 $CUDA_BASE/nsight-systems-2022.1.3 } -function prune_117 { - echo "Pruning CUDA 11.7 and CuDNN" +function prune_118 { + echo "Pruning CUDA 11.8 and cuDNN" ##################################################################################### - # CUDA 11.7 prune static libs + # CUDA 11.8 prune static libs ##################################################################################### - export NVPRUNE="/usr/local/cuda-11.7/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-11.7/lib64" + export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64" - export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" - export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" + export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" if [[ -n "$OVERRIDE_GENCODE" ]]; then export GENCODE=$OVERRIDE_GENCODE @@ -269,26 +172,22 @@ function prune_117 { $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a ##################################################################################### - # CUDA 11.6 prune visual tools + # CUDA 11.8 prune visual tools ##################################################################################### - export CUDA_BASE="/usr/local/cuda-11.7/" - rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.2.0 $CUDA_BASE/nsight-systems-2022.1.3 + export CUDA_BASE="/usr/local/cuda-11.8/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ } # idiomatic parameter and option handling in sh while test $# -gt 0 do case "$1" in - 10.2) install_102; prune_102 - ;; - 11.3) install_113; prune_113 - ;; - 11.5) install_115; prune_115 - ;; 11.6) install_116; prune_116 ;; 11.7) install_117; prune_117 ;; + 11.8) install_118; prune_118 + ;; *) echo "bad argument $1"; exit 1 ;; esac diff --git a/common/install_magma.sh b/common/install_magma.sh index 5d14dbfe2f..b524c920e9 100644 --- a/common/install_magma.sh +++ b/common/install_magma.sh @@ -7,17 +7,10 @@ MAGMA_VERSION="2.5.2" function do_install() { cuda_version=$1 cuda_version_nodot=${1/./} - - if [[ ${cuda_version_nodot} == 116 ]]; then - MAGMA_VERSION="2.6.1" - magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-0.tar.bz2" - elif [[ ${cuda_version_nodot} == 117 ]]; then - MAGMA_VERSION="2.6.1" - magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-0.tar.bz2" - else - magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" - fi - + + MAGMA_VERSION="2.6.1" + magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" + cuda_dir="/usr/local/cuda-${cuda_version}" ( set -x diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 27521c429d..a5166c0974 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -33,8 +33,9 @@ if [[ $ROCM_INT -lt 40001 ]]; then exit 0 fi +# CHANGED: Do not uninstall. To avoid out of disk space issues, we will copy lib over existing. # Uninstall existing package, to avoid errors during later yum install indicating packages did not change. -yum remove -y miopen-hip +#yum remove -y miopen-hip # Function to retry functions that sometimes timeout or have flaky failures retry () { @@ -91,8 +92,25 @@ fi git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH} pushd MIOpen +# remove .git to save disk space ince CI runner was running out +rm -rf .git +# Don't build MLIR to save docker build time +# since we are disabling MLIR backend for MIOpen anyway +if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then + sed -i '/rocMLIR/d' requirements.txt +elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then + sed -i '/llvm-project-mlir/d' requirements.txt +fi ## MIOpen minimum requirements cmake -P install_deps.cmake --minimum + +# clean up since CI runner was running out of disk space +rm -rf /tmp/* +yum clean all +rm -rf /var/cache/yum +rm -rf /var/lib/yum/yumdb +rm -rf /var/lib/yum/history + ## Build MIOpen mkdir -p build cd build @@ -101,13 +119,19 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang ${MIOPEN_CMAKE_DB_FLAGS} \ -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}" make MIOpen -j $(nproc) -make -j $(nproc) package -yum install -y miopen-*.rpm + +# CHANGED: Do not build package. +# Build MIOpen package +#make -j $(nproc) package + +# clean up since CI runner was running out of disk space +rm -rf /usr/local/cget + +# CHANGED: Do not install package, just copy lib over existing. +#yum install -y miopen-*.rpm +dest=$(ls ${ROCM_INSTALL_PATH}/lib/libMIOpen.so.1.0.*) +rm -f ${dest} +cp lib/libMIOpen.so.1.0 ${dest} + popd rm -rf MIOpen - -# Cleanup -yum clean all -rm -rf /var/cache/yum -rm -rf /var/lib/yum/yumdb -rm -rf /var/lib/yum/history diff --git a/common/install_mkl.sh b/common/install_mkl.sh index 5ebdd94b1b..5889dc1f0e 100644 --- a/common/install_mkl.sh +++ b/common/install_mkl.sh @@ -3,8 +3,8 @@ set -ex # MKL -MKL_VERSION=2020.0 -MKL_BUILD=166 +MKL_VERSION=2022.2.1 +MKL_BUILD=16993 mkdir -p /opt/intel/lib pushd /tmp curl -fsSL https://anaconda.org/intel/mkl-static/${MKL_VERSION}/download/linux-64/mkl-static-${MKL_VERSION}-intel_${MKL_BUILD}.tar.bz2 | tar xjv diff --git a/common/install_patchelf.sh b/common/install_patchelf.sh index 032e3cc27a..37b69415e8 100644 --- a/common/install_patchelf.sh +++ b/common/install_patchelf.sh @@ -2,7 +2,9 @@ set -ex -git clone https://github.com/NixOS/patchelf +# Pin the version to latest release 0.17.2, building newer commit starts +# to fail on the current image +git clone -b 0.17.2 --single-branch https://github.com/NixOS/patchelf cd patchelf sed -i 's/serial/parallel/g' configure.ac ./bootstrap.sh diff --git a/common/install_rocm.sh b/common/install_rocm.sh index d4352c21c7..4323cebd29 100644 --- a/common/install_rocm.sh +++ b/common/install_rocm.sh @@ -47,6 +47,10 @@ install_ubuntu() { ROCM_REPO="xenial" fi + if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then + ROCM_REPO="${UBUNTU_VERSION_NAME}" + fi + # Add rocm repository wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}" diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index c651a6e4e2..00540fbecd 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -37,5 +37,8 @@ make -f make.gen.hipMAGMA -j $(nproc) LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}" make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}" popd -mv magma /opt/rocm +mkdir -p /opt/rocm/magma +mv magma/include /opt/rocm/magma +mv magma/lib /opt/rocm/magma +rm -rf magma diff --git a/conda/Dockerfile b/conda/Dockerfile index f4f4c834a6..c65e1ad99e 100644 --- a/conda/Dockerfile +++ b/conda/Dockerfile @@ -41,20 +41,12 @@ RUN bash ./install_conda.sh && rm install_conda.sh # Install CUDA FROM base as cuda +ARG CUDA_VERSION=10.2 RUN rm -rf /usr/local/cuda-* ADD ./common/install_cuda.sh install_cuda.sh - -FROM cuda as cuda10.2 -RUN bash ./install_cuda.sh 10.2 -ENV DESIRED_CUDA=10.2 - -FROM cuda as cuda11.3 -RUN bash ./install_cuda.sh 11.3 -ENV DESIRED_CUDA=11.3 - -FROM cuda as cuda11.5 -RUN bash ./install_cuda.sh 11.5 -ENV DESIRED_CUDA=11.5 +ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION} +# Make things in our path by default +ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH FROM cuda as cuda11.6 RUN bash ./install_cuda.sh 11.6 @@ -64,17 +56,19 @@ FROM cuda as cuda11.7 RUN bash ./install_cuda.sh 11.7 ENV DESIRED_CUDA=11.7 +FROM cuda as cuda11.8 +RUN bash ./install_cuda.sh 11.8 +ENV DESIRED_CUDA=11.8 + # Install MNIST test data FROM base as mnist ADD ./common/install_mnist.sh install_mnist.sh RUN bash ./install_mnist.sh FROM base as all_cuda -COPY --from=cuda10.2 /usr/local/cuda-10.2 /usr/local/cuda-10.2 -COPY --from=cuda11.3 /usr/local/cuda-11.3 /usr/local/cuda-11.3 -COPY --from=cuda11.5 /usr/local/cuda-11.5 /usr/local/cuda-11.5 COPY --from=cuda11.6 /usr/local/cuda-11.6 /usr/local/cuda-11.6 COPY --from=cuda11.7 /usr/local/cuda-11.7 /usr/local/cuda-11.7 +COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8 FROM ${BASE_TARGET} as final # Install LLVM diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh index bc43976750..1dc5ffe4f9 100755 --- a/conda/build_all_docker.sh +++ b/conda/build_all_docker.sh @@ -4,6 +4,6 @@ set -eou pipefail TOPDIR=$(git rev-parse --show-toplevel) -for CUDA_VERSION in 11.7 11.6 11.5 11.3 10.2 cpu; do +for CUDA_VERSION in 11.8 11.7 11.6 cpu; do CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh done diff --git a/conda/build_docker.sh b/conda/build_docker.sh index db7c5be62c..a3385ff38c 100755 --- a/conda/build_docker.sh +++ b/conda/build_docker.sh @@ -52,6 +52,11 @@ if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then set -x docker tag ${DOCKER_IMAGE} "pytorch/conda-builder:cuda${CUDA_VERSION/./}" ) + # Test that we're using the right CUDA compiler + ( + set -x + docker run --rm "${DOCKER_IMAGE}" nvcc --version | grep "cuda_${CUDA_VERSION}" + ) fi if [[ -n ${GITHUB_REF} ]]; then diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index e430538c3f..30986b4088 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -31,7 +31,7 @@ retry () { $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) } -# Parse arguments and determmine version +# Parse arguments and determine version ########################################################### if [[ -n "$DESIRED_CUDA" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then desired_cuda="$DESIRED_CUDA" @@ -106,7 +106,7 @@ if [[ -z "$DESIRED_PYTHON" ]]; then fi fi if [[ "$OSTYPE" == "darwin"* ]]; then - DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer + DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer fi if [[ "$desired_cuda" == 'cpu' ]]; then cpu_only=1 @@ -190,7 +190,7 @@ if [[ ! -d "$pytorch_rootdir" ]]; then popd fi pushd "$pytorch_rootdir" -git submodule update --init --recursive --jobs 0 +git submodule update --init --recursive echo "Using Pytorch from " git --no-pager log --max-count 1 popd @@ -207,8 +207,6 @@ if [[ "$(uname)" == 'Darwin' ]]; then rm "$miniconda_sh" export PATH="$tmp_conda/bin:$PATH" retry conda install -yq conda-build - # Install py-lief=0.12.0 containing https://github.com/lief-project/LIEF/pull/579 to speed up the builds - retry conda install -yq py-lief==0.12.0 -c malfet elif [[ "$OSTYPE" == "msys" ]]; then export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" @@ -245,12 +243,13 @@ fi meta_yaml="$build_folder/meta.yaml" echo "Using conda-build folder $build_folder" -# Switch between CPU or CUDA configerations +# Switch between CPU or CUDA configurations ########################################################### build_string_suffix="$PYTORCH_BUILD_NUMBER" if [[ -n "$cpu_only" ]]; then export USE_CUDA=0 export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CONDA_TRITON_CONSTRAINT="" export MAGMA_PACKAGE="" export CUDA_VERSION="0.0" export CUDNN_VERSION="0.0" @@ -266,22 +265,24 @@ else . ./switch_cuda_version.sh "$desired_cuda" # TODO, simplify after anaconda fixes their cudatoolkit versioning inconsistency. # see: https://github.com/conda-forge/conda-forge.github.io/issues/687#issuecomment-460086164 - if [[ "$desired_cuda" == "11.7" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=11.7,<11.8 # [not osx]" - export MAGMA_PACKAGE=" - magma-cuda117 # [not osx and not win]" + if [[ "$desired_cuda" == "11.8" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=11.8,<11.9 # [not osx]" + export MAGMA_PACKAGE=" - magma-cuda118 # [not osx and not win]" + elif [[ "$desired_cuda" == "11.7" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=11.7,<11.8 # [not osx]" + export MAGMA_PACKAGE=" - magma-cuda117 # [not osx and not win]" elif [[ "$desired_cuda" == "11.6" ]]; then export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=11.6,<11.7 # [not osx]" export MAGMA_PACKAGE=" - magma-cuda116 # [not osx and not win]" - elif [[ "$desired_cuda" == "11.3" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT=" - cudatoolkit >=11.3,<11.4 # [not osx]" - export MAGMA_PACKAGE=" - magma-cuda113 # [not osx and not win]" - elif [[ "$desired_cuda" == "10.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT=" - cudatoolkit >=10.2,<10.3 # [not osx]" - export MAGMA_PACKAGE=" - magma-cuda102 # [not osx and not win]" else echo "unhandled desired_cuda: $desired_cuda" exit 1 fi + if [[ "$OSTYPE" != "msys" ]]; then + # TODO: Remove me when Triton has a proper release channel + TRITON_SHORTHASH=$(cut -c1-10 $pytorch_rootdir/.github/ci_commit_pins/triton.txt) + export CONDA_TRITON_CONSTRAINT=" - torchtriton==2.1.0+${TRITON_SHORTHASH}" + fi build_string_suffix="cuda${CUDA_VERSION}_cudnn${CUDNN_VERSION}_${build_string_suffix}" fi @@ -298,6 +299,12 @@ else export CONDA_BUILD_EXTRA_ARGS="" fi +if [[ "$DESIRED_PYTHON" == "3.11" ]]; then + # TODO: Remove me when numpy is available in default channel + # or copy numpy to pytorch channel + export CONDA_BUILD_EXTRA_ARGS="-c malfet ${CONDA_BUILD_EXTRA_ARGS}" +fi + # Build PyTorch with Gloo's TCP_TLS transport if [[ "$(uname)" == 'Linux' ]]; then export USE_GLOO_WITH_OPENSSL=1 @@ -339,13 +346,14 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do # Build the package echo "Build $build_folder for Python version $py_ver" conda config --set anaconda_upload no - conda install -y conda-package-handling - # NS: To be removed after conda docker images are updated - conda update -y conda-build + conda install -y conda-package-handling conda==22.9.0 if [[ "$OSTYPE" == "msys" ]]; then # Don't run tests on windows (they were ignored mostly anyways) NO_TEST="--no-test" + else + # NS: To be removed after conda docker images are updated + conda update -y conda-build fi echo "Calling conda-build at $(date)" @@ -386,7 +394,18 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do # Install the built package and run tests, unless it's for mac cross compiled arm64 if [[ -z "$CROSS_COMPILE_ARM64" ]]; then - conda install -y "$built_package" + # Install the package as if from local repo instead of tar.bz2 directly in order + # to trigger runtime dependency installation. See https://github.com/conda/conda/issues/1884 + # Notes: + # - pytorch-nightly is included to install torchtriton + # - nvidia is included for cuda builds, there's no harm in listing the channel for cpu builds + if [[ "$OSTYPE" == "msys" ]]; then + # note the extra slash: `pwd -W` returns `c:/path/to/dir`, we need to add an extra slash for the URI + local_channel="/$(pwd -W)/$output_folder" + else + local_channel="$(pwd)/$output_folder" + fi + conda install -y -c "file://$local_channel" pytorch==$PYTORCH_BUILD_VERSION -c pytorch -c numba/label/dev -c pytorch-nightly -c nvidia echo "$(date) :: Running tests" pushd "$pytorch_rootdir" diff --git a/conda/build_vision.sh b/conda/build_vision.sh deleted file mode 100755 index 3061e4740b..0000000000 --- a/conda/build_vision.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env bash -if [[ -x "/remote/anaconda_token" ]]; then - . /remote/anaconda_token || true -fi - -set -ex - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -if [ "$#" -ne 1 ]; then - echo "Illegal number of parameters. Pass cuda version" - echo "CUDA version should be M.m with no dot, e.g. '8.0' or 'cpu'" - exit 1 -fi -desired_cuda="$1" - -export TORCHVISION_BUILD_VERSION="0.3.0" -export TORCHVISION_BUILD_NUMBER=1 - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then - WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" -fi - -if [[ "$OSTYPE" == "msys" ]]; then - mkdir -p "$WIN_PACKAGE_WORK_DIR" || true - vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src" - git config --system core.longpaths true -else - vision_rootdir="$(pwd)/torchvision-src" -fi - -if [[ ! -d "$vision_rootdir" ]]; then - rm -rf "$vision_rootdir" - git clone "https://github.com/pytorch/vision" "$vision_rootdir" - pushd "$vision_rootdir" - git checkout v$TORCHVISION_BUILD_VERSION - popd -fi - -cd "$SOURCE_DIR" - -if [[ "$OSTYPE" == "msys" ]]; then - export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" - export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" - rm -rf "$tmp_conda" - rm -f "$miniconda_exe" - curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" - "$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" - pushd $tmp_conda - export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" - popd - # We have to skip 3.17 because of the following bug. - # https://github.com/conda/conda-build/issues/3285 - retry conda install -yq conda-build -fi - -ANACONDA_USER=pytorch -conda config --set anaconda_upload no - - -export TORCHVISION_PACKAGE_SUFFIX="" -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CUDA_VERSION="None" - if [[ "$OSTYPE" != "darwin"* ]]; then - export TORCHVISION_PACKAGE_SUFFIX="-cpu" - fi -else - . ./switch_cuda_version.sh $desired_cuda - if [[ "$desired_cuda" == "10.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT=" - cudatoolkit >=10.0,<10.1 # [not osx]" - elif [[ "$desired_cuda" == "9.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT=" - cudatoolkit >=9.0,<9.1 # [not osx]" - else - echo "unhandled desired_cuda: $desired_cuda" - exit 1 - fi -fi - -if [[ "$OSTYPE" == "msys" ]]; then - time conda build -c $ANACONDA_USER --no-anaconda-upload vs2017 -else - time conda build -c $ANACONDA_USER --no-anaconda-upload --python 2.7 torchvision -fi -time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.5 torchvision -time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.6 torchvision -time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.7 torchvision - -set +e diff --git a/conda/debugging_pytorch.sh b/conda/debugging_pytorch.sh index e79567acb5..4cce4f225e 100644 --- a/conda/debugging_pytorch.sh +++ b/conda/debugging_pytorch.sh @@ -14,7 +14,7 @@ export USE_CUDA_STATIC_LINK=1 . ./switch_cuda_version.sh 9.0 -conda install -y cmake numpy=1.17 setuptools pyyaml cffi mkl=2018 mkl-include typing_extension ninja magma-cuda80 -c pytorch +conda install -y cmake numpy=1.17 setuptools pyyaml mkl=2018 mkl-include typing_extension ninja magma-cuda80 -c pytorch export CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" git clone https://github.com/pytorch/pytorch -b nightly2 --recursive diff --git a/conda/pytorch-cuda/conda_build_config.yaml b/conda/pytorch-cuda/conda_build_config.yaml index 802638b8eb..67d14f2b17 100644 --- a/conda/pytorch-cuda/conda_build_config.yaml +++ b/conda/pytorch-cuda/conda_build_config.yaml @@ -1,3 +1,7 @@ version: - 11.6 - 11.7 + - 11.8 +target_platform: + - win-64 + - linux-64 diff --git a/conda/pytorch-cuda/meta.yaml b/conda/pytorch-cuda/meta.yaml index 92c970654f..ecb438ca86 100644 --- a/conda/pytorch-cuda/meta.yaml +++ b/conda/pytorch-cuda/meta.yaml @@ -1,7 +1,40 @@ -{% set build = 0 %} +# Package to manage cuda version in PyTorch. +# +# Windows anaconda packages are packaged differently, +# All dlls are kept within *-dev packages hence we need +# include the dev packages for Windows see: +# https://github.com/pytorch/vision/issues/7185#issuecomment-1420002413 +# +# Please note: Build number should be advanced with +# every deployment. After the deployment to production +# use following links to validate the correctness of +# deployment: +# https://conda.anaconda.org/pytorch/noarch/ +# https://conda.anaconda.org/pytorch/noarch/repodata.json +{% set build = 3 %} {% set cuda_constraints=">=11.6,<11.7" %} +{% set libcufft_constraints=">=10.7.0.55,<10.7.2.50" %} +{% set libcublas_constraints=">=11.8.1.74,<11.10.1.25" %} +{% set libcusolver_constraints=">=11.3.2.55,<11.3.5.50" %} +{% set libcusparse_constraints=">=11.7.1.55,<11.7.3.50" %} +{% set libnpp_constraints=">=11.6.0.55,<11.7.3.21" %} +{% set libnvjpeg_constraints=">=11.6.0.55,<11.7.2.34" %} {% if version == '11.7' %} {% set cuda_constraints=">=11.7,<11.8" %} +{% set libcufft_constraints=">=10.7.2.50,<10.9.0.58" %} +{% set libcublas_constraints=">=11.10.1.25,<11.11.3.6" %} +{% set libcusolver_constraints=">=11.3.5.50,<11.4.1.48" %} +{% set libcusparse_constraints=">=11.7.3.50,<11.7.5.86" %} +{% set libnpp_constraints=">=11.7.3.21,<11.8.0.86" %} +{% set libnvjpeg_constraints=">=11.7.2.34,<11.9.0.86" %} +{% elif version == '11.8' %} +{% set cuda_constraints=">=11.8,<12.0" %} +{% set libcufft_constraints=">=10.9.0.58,<11.0.0.21" %} +{% set libcublas_constraints=">=11.11.3.6,<12.0.1.189" %} +{% set libcusolver_constraints=">=11.4.1.48,<11.4.2.57" %} +{% set libcusparse_constraints=">=11.7.5.86,<12.0.0.76" %} +{% set libnpp_constraints=">=11.8.0.86,<12.0.0.30" %} +{% set libnvjpeg_constraints=">=11.9.0.86,<12.0.0.28" %} {% endif %} package: @@ -9,33 +42,29 @@ package: version: {{ version }} build: number: {{ build }} - noarch: generic requirements: run: - - cuda={{ version }} - run_constrained: - - cuda-cccl {{ cuda_constraints }} - - cuda-command-line-tools {{ cuda_constraints }} - - cuda-compiler {{ cuda_constraints }} + - cuda-libraries {{ cuda_constraints }} + - cuda-nvtx {{ cuda_constraints }} + - libnvjpeg {{ libnvjpeg_constraints }} - cuda-cudart {{ cuda_constraints }} - - cuda-cudart-dev {{ cuda_constraints }} - - cuda-cuobjdump {{ cuda_constraints }} - cuda-cupti {{ cuda_constraints }} - - cuda-cuxxfilt {{ cuda_constraints }} - - cuda-driver-dev {{ cuda_constraints }} - - cuda-libraries {{ cuda_constraints }} - - cuda-libraries-dev {{ cuda_constraints }} - - cuda-cudaart-dev {{ cuda_constraints }} - - cuda-nvcc {{ cuda_constraints }} - - cuda-nvml-dev {{ cuda_constraints }} - - cuda-nvprune {{ cuda_constraints }} - cuda-nvrtc {{ cuda_constraints }} - - cuda-nvrtc-dev {{ cuda_constraints }} - - cuda-nvtx {{ cuda_constraints }} - cuda-runtime {{ cuda_constraints }} - - cuda-toolkit {{ cuda_constraints }} - - cuda-tools {{ cuda_constraints }} - # None, pytorch should depend on pytorch-cuda + - libcufft {{ libcufft_constraints }} + - libcublas {{ libcublas_constraints }} + - libcusolver {{ libcusolver_constraints }} + - libcusparse {{ libcusparse_constraints }} + - libnpp {{ libnpp_constraints }} + - cuda-libraries-dev {{ cuda_constraints }} # [win64] + - libnvjpeg-dev {{ libnvjpeg_constraints }} # [win64] + - cuda-cudart-dev {{ cuda_constraints }} # [win64] + - cuda-nvrtc-dev {{ cuda_constraints }} # [win64] + - libcufft-dev {{ libcufft_constraints }} # [win64] + - libcublas-dev {{ libcublas_constraints }} # [win64] + - libcusolver-dev {{ libcusolver_constraints }} # [win64] + - libcusparse-dev {{ libcusparse_constraints }} # [win64] + - libnpp-dev {{ libnpp_constraints }} # [win64] test: commands: - echo "pytorch-cuda metapackage is created." diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat index 09dc1a8167..18850f758a 100644 --- a/conda/pytorch-nightly/bld.bat +++ b/conda/pytorch-nightly/bld.bat @@ -20,22 +20,18 @@ if "%build_with_cuda%" == "" goto cuda_flags_end set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% set CUDA_BIN_PATH=%CUDA_PATH%\bin set TORCH_NVCC_FLAGS=-Xfatbin -compress-all -set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0 -if "%desired_cuda%" == "10.2" set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5 -if "%desired_cuda%" == "11.3" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 -) +set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6 if "%desired_cuda%" == "11.5" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) if "%desired_cuda%" == "11.6" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) if "%desired_cuda%" == "11.7" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 +) +if "%desired_cuda%" == "11.8" ( + set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;9.0 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) @@ -112,6 +108,7 @@ IF "%USE_SCCACHE%" == "1" ( if NOT "%build_with_cuda%" == "" ( copy "%CUDA_BIN_PATH%\cudnn*64_*.dll*" %SP_DIR%\torch\lib + copy "%NVTOOLSEXT_PATH%\bin\x64\nvToolsExt64_*.dll*" %SP_DIR%\torch\lib :: cupti library file name changes aggressively, bundle it to avoid :: potential file name mismatch. copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" %SP_DIR%\torch\lib diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh index 05a496fc69..ad1871ac4c 100755 --- a/conda/pytorch-nightly/build.sh +++ b/conda/pytorch-nightly/build.sh @@ -8,6 +8,7 @@ export PYTORCH_BUILD_VERSION=$PKG_VERSION export PYTORCH_BUILD_NUMBER=$PKG_BUILDNUM export USE_LLVM="/opt/llvm_no_cxx11_abi" export LLVM_DIR="$USE_LLVM/lib/cmake/llvm" +export PACKAGE_TYPE="conda" # set OPENSSL_ROOT_DIR=/opt/openssl if it exists if [[ -e /opt/openssl ]]; then @@ -51,41 +52,35 @@ if [[ -z "$USE_CUDA" || "$USE_CUDA" == 1 ]]; then fi if [[ -n "$build_with_cuda" ]]; then export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" - export TORCH_CUDA_ARCH_LIST="3.7+PTX;5.0" + TORCH_CUDA_ARCH_LIST="3.7+PTX;5.0" export USE_STATIC_CUDNN=1 # links cudnn statically (driven by tools/setup_helpers/cudnn.py) - if [[ $CUDA_VERSION == 10* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5" - DEPS_LIST=(/usr/local/cuda-10.2/extras/CUPTI/lib64/libcupti.so.10.2) - elif [[ $CUDA_VERSION == 11.3* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" - #for cuda 11.3 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html - #which does not have single static libcudnn_static.a deliverable to link with - export USE_STATIC_CUDNN=0 - #for cuda 11.3 include all dynamic loading libraries - DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.3/extras/CUPTI/lib64/libcupti.so.11.3) - elif [[ $CUDA_VERSION == 11.5* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" - #for cuda 11.5 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html - #which does not have single static libcudnn_static.a deliverable to link with - export USE_STATIC_CUDNN=0 - #for cuda 11.5 include all dynamic loading libraries - DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.5/extras/CUPTI/lib64/libcupti.so.11.5) - elif [[ $CUDA_VERSION == 11.6* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" + if [[ $CUDA_VERSION == 11.6* ]]; then + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" #for cuda 11.5 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html #which does not have single static libcudnn_static.a deliverable to link with export USE_STATIC_CUDNN=0 #for cuda 11.5 include all dynamic loading libraries DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.6/extras/CUPTI/lib64/libcupti.so.11.6) elif [[ $CUDA_VERSION == 11.7* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" #for cuda 11.7 we use cudnn 8.5 #which does not have single static libcudnn_static.a deliverable to link with export USE_STATIC_CUDNN=0 #for cuda 11.7 include all dynamic loading libraries DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.7/extras/CUPTI/lib64/libcupti.so.11.7) + elif [[ $CUDA_VERSION == 11.8* ]]; then + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0" + #for cuda 11.8 we use cudnn 8.7 + #which does not have single static libcudnn_static.a deliverable to link with + export USE_STATIC_CUDNN=0 + #for cuda 11.8 include all dynamic loading libraries + DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8) + fi + if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then + TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST" fi + export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" export NCCL_ROOT_DIR=/usr/local/cuda export USE_STATIC_NCCL=1 # links nccl statically (driven by tools/setup_helpers/nccl.py, some of the NCCL cmake files such as FindNCCL.cmake and gloo/FindNCCL.cmake) diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml index 36e92d5e38..9416a1ed84 100644 --- a/conda/pytorch-nightly/meta.yaml +++ b/conda/pytorch-nightly/meta.yaml @@ -21,15 +21,17 @@ requirements: - pyyaml {% if cross_compile_arm64 == 0 %} - mkl-include # [x86_64] - - mkl=2020.2 # [x86_64 and (not win or py <= 39)] - - mkl=2021.4 # [x86_64 and win and py >= 310] + - mkl=2020.2 # [x86_64 and not win] + - mkl=2021.4 # [x86_64 and win] {% endif %} - typing_extensions - ninja - libuv # [win] - numpy=1.19 # [py <= 39] - - numpy=1.21.5 # [py >= 310] - - openssl=1.1.1l # [py >= 310 and linux] + - numpy=1.21.5 # [py == 310] + - numpy=1.23.5 # [py >= 311] + - openssl=1.1.1l # [py == 310 and linux] + - openssl=1.1.1s # [py >= 311 and linux] {{ environ.get('PYTORCH_LLVM_PACKAGE', ' - llvmdev=9') }} {{ environ.get('MAGMA_PACKAGE', '') }} @@ -41,11 +43,16 @@ requirements: - libuv # [win] - intel-openmp # [win] - typing_extensions + - sympy + - filelock + - networkx + - jinja2 # [py <= 310] {% if cross_compile_arm64 == 0 %} - blas * mkl {% endif %} - pytorch-mutex 1.0 {{ build_variant }} # [not osx ] {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }} +{{ environ.get('CONDA_TRITON_CONSTRAINT', '') }} {% if build_variant == 'cpu' %} run_constrained: @@ -64,6 +71,7 @@ build: - CUDA_VERSION - CUDNN_VERSION - CONDA_CUDATOOLKIT_CONSTRAINT + - CONDA_TRITON_CONSTRAINT - USE_CUDA - CMAKE_ARGS - EXTRA_CAFFE2_CMAKE_FLAGS @@ -81,6 +89,7 @@ build: - USE_PYTORCH_METAL_EXPORT # [osx] - USE_COREML_DELEGATE # [osx] - _GLIBCXX_USE_CXX11_ABI # [unix] + - OVERRIDE_TORCH_CUDA_ARCH_LIST test: imports: diff --git a/conda/torchvision/bld.bat b/conda/torchvision/bld.bat deleted file mode 100644 index 14f6935fba..0000000000 --- a/conda/torchvision/bld.bat +++ /dev/null @@ -1,24 +0,0 @@ -@echo on - -set TORCHVISION_BUILD_VERSION=%PKG_VERSION% -set TORCHVISION_BUILD_NUMBER=%PKG_BUILDNUM% - -if not "%CUDA_VERSION%" == "None" ( - set build_with_cuda=1 - set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% -) else ( - set build_with_cuda= -) - -if "%build_with_cuda%" == "" goto cuda_flags_end - -set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% -set CUDA_BIN_PATH=%CUDA_PATH%\bin -set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - -:cuda_flags_end - -python setup.py install --single-version-externally-managed --record=record.txt -if errorlevel 1 exit /b 1 diff --git a/conda/torchvision/meta.yaml b/conda/torchvision/meta.yaml deleted file mode 100644 index caa439c7d2..0000000000 --- a/conda/torchvision/meta.yaml +++ /dev/null @@ -1,53 +0,0 @@ -package: - name: torchvision{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }} - version: "{{ environ.get('TORCHVISION_BUILD_VERSION') }}" - -source: - git_rev: v{{ environ.get('TORCHVISION_BUILD_VERSION') }} - git_url: https://github.com/pytorch/vision.git - - -requirements: - build: - - {{ compiler('c') }} # [win] - - host: - - python - - setuptools - - pytorch{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }} >=1.1.0 -{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - - run: - - python - - pillow >=4.1.1 - - numpy >=1.11 - - pytorch{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }} >=1.1.0 - - six -{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - -build: - number: {{ environ.get('TORCHVISION_BUILD_NUMBER') }} - string: py{{py}}_cu{{ environ['CUDA_VERSION'] }}_{{environ.get('TORCHVISION_BUILD_NUMBER')}} - script: python setup.py install --single-version-externally-managed --record=record.txt # [not win] - script_env: - - CUDA_VERSION - -test: - imports: - - torchvision - - torchvision.datasets - - torchvision.transforms - source_files: - - test - requires: - - pytest - - scipy - commands: - pytest . - - -about: - home: https://github.com/pytorch/vision - license: BSD - license_file: LICENSE - summary: 'image and video datasets and models for torch deep learning' diff --git a/conda/vs2017/conda_build_config.yaml b/conda/vs2017/conda_build_config.yaml deleted file mode 100755 index 5188bb0ebe..0000000000 --- a/conda/vs2017/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2017 # [win] -cxx_compiler: - - vs2017 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/conda/vs2017/activate.bat b/conda/vs2022/activate.bat old mode 100755 new mode 100644 similarity index 57% rename from conda/vs2017/activate.bat rename to conda/vs2022/activate.bat index ccecfc2544..fe18f77230 --- a/conda/vs2017/activate.bat +++ b/conda/vs2022/activate.bat @@ -1,19 +1,26 @@ :: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 +set DISTUTILS_USE_SDK=1 +set MSSdk=1 -SET "VS_VERSION=15.0" -SET "VS_MAJOR=15" -SET "VS_YEAR=2017" +set "VS_VERSION=17.4" +set "VS_MAJOR=17" +set "VC_YEAR=2022" +set "VC_VERSION_LOWER=17" +set "VC_VERSION_UPPER=18" set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" set "MSYS2_ENV_CONV_EXCL=CL" :: For Python 3.5+, ensure that we link with the dynamic runtime. See :: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" +set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime143.dll" -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( +if not "%VS15INSTALLDIR%" == "" if exist "%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VSINSTALLDIR=%VS15INSTALLDIR%\" + goto :vswhere +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( set "VSINSTALLDIR=%%i\" goto :vswhere @@ -23,15 +30,15 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio :vswhere :: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% +set MyPath=%PATH% setlocal EnableDelayedExpansion -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" +set TempPath="%MyPath:;=";"%" +set var= +for %%a in (%TempPath%) do ( + if exist %%~sa ( + set "var=!var!;%%~sa" ) ) @@ -39,6 +46,6 @@ set "TempPath=!var:~1!" endlocal & set "PATH=%TempPath%" :: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" +for %%A in (.) do cd "%%~sA" :: other things added by install_activate.bat at package build time diff --git a/conda/torchvision/conda_build_config.yaml b/conda/vs2022/conda_build_config.yaml similarity index 86% rename from conda/torchvision/conda_build_config.yaml rename to conda/vs2022/conda_build_config.yaml index 5188bb0ebe..e2a4de3c2e 100644 --- a/conda/torchvision/conda_build_config.yaml +++ b/conda/vs2022/conda_build_config.yaml @@ -1,12 +1,13 @@ blas_impl: - mkl # [x86_64] c_compiler: - - vs2017 # [win] + - vs2022 # [win] cxx_compiler: - - vs2017 # [win] + - vs2022 # [win] python: - - 3.5 - - 3.6 + - 3.8 + - 3.9 + - 3.10 # This differs from target_platform in that it determines what subdir the compiler # will target, not what subdir the compiler package will be itself. # For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 diff --git a/conda/vs2017/install_activate.bat b/conda/vs2022/install_activate.bat old mode 100755 new mode 100644 similarity index 98% rename from conda/vs2017/install_activate.bat rename to conda/vs2022/install_activate.bat index 2ca223ebc8..eb85767d67 --- a/conda/vs2017/install_activate.bat +++ b/conda/vs2022/install_activate.bat @@ -1,5 +1,5 @@ -set YEAR=2017 -set VER=15 +set YEAR=2022 +set VER=17 mkdir "%PREFIX%\etc\conda\activate.d" copy "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" diff --git a/conda/vs2017/install_runtime.bat b/conda/vs2022/install_runtime.bat old mode 100755 new mode 100644 similarity index 92% rename from conda/vs2017/install_runtime.bat rename to conda/vs2022/install_runtime.bat index 5163c16cf2..bac684dae6 --- a/conda/vs2017/install_runtime.bat +++ b/conda/vs2022/install_runtime.bat @@ -3,7 +3,7 @@ if "%ARCH%"=="64" ( set VC_PATH=x64 ) -set MSC_VER=2017 +set MSC_VER=2022 rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( @@ -23,10 +23,10 @@ robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "% robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E if %ERRORLEVEL% GEQ 8 exit 1 -REM ========== This one comes from visual studio 2017 -set "VC_VER=141" +REM ========== This one comes from visual studio 2022 +set "VC_VER=143" -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [17^,18^) -property installationPath`) do ( if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" goto :eof diff --git a/conda/vs2017/meta.yaml b/conda/vs2022/meta.yaml old mode 100755 new mode 100644 similarity index 66% rename from conda/vs2017/meta.yaml rename to conda/vs2022/meta.yaml index 1f569525ee..184c4c32df --- a/conda/vs2017/meta.yaml +++ b/conda/vs2022/meta.yaml @@ -1,7 +1,7 @@ -{% set vcver="14.1" %} -{% set vcfeature="14" %} -{% set vsyear="2017" %} -{% set fullver="15.4.27004.2010" %} +{% set vcver="17.4" %} +{% set vcfeature="17" %} +{% set vsyear="2022" %} +{% set fullver="17.4.33110.190" %} package: name: vs{{ vsyear }} @@ -16,7 +16,7 @@ outputs: - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} script: install_activate.bat track_features: - # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". + # VS 2022 is binary-compatible with VS 2019/vc 14.2, VS 2017/vc 14.1 and 2015/vc14. Tools are "v143". strong: - vc{{ vcfeature }} about: diff --git a/cron/nightly_defaults.sh b/cron/nightly_defaults.sh index 0f6532adc1..d8b6f5ee04 100755 --- a/cron/nightly_defaults.sh +++ b/cron/nightly_defaults.sh @@ -120,7 +120,7 @@ if [[ ! -d "$NIGHTLIES_PYTORCH_ROOT" ]]; then export PYTORCH_BRANCH="$last_commit" fi git checkout "$PYTORCH_BRANCH" - git submodule update --jobs 0 + git submodule update popd fi @@ -229,7 +229,7 @@ if [[ "$DAYS_TO_KEEP" < '1' ]]; then fi # PYTORCH_NIGHTLIES_TIMEOUT -# Timeout in seconds. +# Timeout in seconds. # When full testing is enabled, condas builds often take up to 2 hours 20 # minutes, so the default is set to (2 * 60 + 20 + 40 [buffer]) * 60 == 10800 # seconds. diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile index f38aca0632..c5eb904ce6 100644 --- a/libtorch/Dockerfile +++ b/libtorch/Dockerfile @@ -42,19 +42,7 @@ ENV CUDA_HOME /usr/local/cuda FROM base as conda ADD ./common/install_conda.sh install_conda.sh RUN bash ./install_conda.sh && rm install_conda.sh -RUN /opt/conda/bin/conda install -y cmake=3.14 - -FROM cuda as cuda10.2 -RUN bash ./install_cuda.sh 10.2 -RUN bash ./install_magma.sh 10.2 - -FROM cuda as cuda11.3 -RUN bash ./install_cuda.sh 11.3 -RUN bash ./install_magma.sh 11.3 - -FROM cuda as cuda11.5 -RUN bash ./install_cuda.sh 11.5 -RUN bash ./install_magma.sh 11.5 +RUN /opt/conda/bin/conda install -y cmake=3.18 FROM cuda as cuda11.6 RUN bash ./install_cuda.sh 11.6 @@ -64,6 +52,10 @@ FROM cuda as cuda11.7 RUN bash ./install_cuda.sh 11.7 RUN bash ./install_magma.sh 11.7 +FROM cuda as cuda11.8 +RUN bash ./install_cuda.sh 11.8 +RUN bash ./install_magma.sh 11.8 + FROM cpu as rocm ARG PYTORCH_ROCM_ARCH ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} @@ -77,23 +69,13 @@ RUN apt-get update -y && \ apt-get install python -y && \ apt-get clean -FROM rocm as rocm5.2 -RUN ROCM_VERSION=5.2 bash ./install_rocm.sh && rm install_rocm.sh -RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh -RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh - -FROM rocm as rocm5.1.3 -RUN ROCM_VERSION=5.1.3 bash ./install_rocm.sh && rm install_rocm.sh -RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh -RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh - -FROM rocm as rocm5.1.1 -RUN ROCM_VERSION=5.1.1 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.4.2 +RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh -FROM rocm as rocm5.1 -RUN ROCM_VERSION=5.1 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.3 +RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh diff --git a/libtorch/build.sh b/libtorch/build.sh index b2551a6be3..88c8c6f9a8 100644 --- a/libtorch/build.sh +++ b/libtorch/build.sh @@ -7,4 +7,4 @@ set -ex SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.7" ${SCRIPTPATH}/../manywheel/build.sh +BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.8" ${SCRIPTPATH}/../manywheel/build.sh diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh index 5703ca41cc..8d25da9bcd 100755 --- a/libtorch/build_all_docker.sh +++ b/libtorch/build_all_docker.sh @@ -4,10 +4,10 @@ set -eou pipefail TOPDIR=$(git rev-parse --show-toplevel) -for cuda_version in 11.7 11.6 11.5 11.3 10.2; do +for cuda_version in 11.8 11.7 11.6; do GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh" done -for rocm_version in 5.1.1 5.2; do +for rocm_version in 5.3 5.4.2; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh" done diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh index fe441bb9a6..bbf42b1d02 100755 --- a/libtorch/build_docker.sh +++ b/libtorch/build_docker.sh @@ -27,7 +27,7 @@ case ${GPU_ARCH_TYPE} in rocm) BASE_TARGET=rocm${GPU_ARCH_VERSION} DOCKER_TAG=rocm${GPU_ARCH_VERSION} - GPU_IMAGE=rocm/dev-ubuntu-18.04:${GPU_ARCH_VERSION} + GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION} PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then diff --git a/magma/Makefile b/magma/Makefile index 2d690c14c8..4a90a43e2b 100644 --- a/magma/Makefile +++ b/magma/Makefile @@ -1,8 +1,8 @@ SHELL=/usr/bin/env bash -DESIRED_CUDA ?= 11.3 -PACKAGE_NAME ?= magma-cuda113 -CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 +DESIRED_CUDA ?= 11.6 +PACKAGE_NAME ?= magma-cuda116 +CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 DOCKER_RUN = set -eou pipefail; docker run --rm -i \ -v $(shell git rev-parse --show-toplevel):/builder \ @@ -14,47 +14,30 @@ DOCKER_RUN = set -eou pipefail; docker run --rm -i \ magma/build_magma.sh .PHONY: all +all: magma-cuda118 all: magma-cuda117 all: magma-cuda116 -all: magma-cuda115 -all: magma-cuda113 -all: magma-cuda102 .PHONY: clean: $(RM) -r magma-* $(RM) -r output +.PHONY: magma-cuda118 +magma-cuda118: DESIRED_CUDA := 11.8 +magma-cuda118: PACKAGE_NAME := magma-cuda118 +magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_90,code=sm_90 +magma-cuda118: + $(DOCKER_RUN) + .PHONY: magma-cuda117 magma-cuda117: DESIRED_CUDA := 11.7 magma-cuda117: PACKAGE_NAME := magma-cuda117 -magma-cuda117: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 magma-cuda117: $(DOCKER_RUN) .PHONY: magma-cuda116 magma-cuda116: DESIRED_CUDA := 11.6 magma-cuda116: PACKAGE_NAME := magma-cuda116 -magma-cuda116: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 magma-cuda116: $(DOCKER_RUN) - -.PHONY: magma-cuda115 -magma-cuda115: DESIRED_CUDA := 11.5 -magma-cuda115: PACKAGE_NAME := magma-cuda115 -magma-cuda115: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -magma-cuda115: - $(DOCKER_RUN) - -.PHONY: magma-cuda113 -magma-cuda113: DESIRED_CUDA := 11.3 -magma-cuda113: PACKAGE_NAME := magma-cuda113 -magma-cuda113: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -magma-cuda113: - $(DOCKER_RUN) - -.PHONY: magma-cuda102 -magma-cuda102: DESIRED_CUDA := 10.2 -magma-cuda102: PACKAGE_NAME := magma-cuda102 -magma-cuda102: - $(DOCKER_RUN) diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile index 3140f98638..43a7d0568b 100644 --- a/manywheel/Dockerfile +++ b/manywheel/Dockerfile @@ -21,9 +21,10 @@ RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm & rpm -ivh epel-release-latest-7.noarch.rpm && \ rm -f epel-release-latest-7.noarch.rpm -# cmake -RUN yum install -y cmake3 && \ - ln -s /usr/bin/cmake3 /usr/bin/cmake +# cmake-3.18.4 from pip +RUN yum install -y python3-pip && \ + python3 -mpip install cmake==3.18.4 && \ + ln -s /usr/local/bin/cmake /usr/bin/cmake RUN yum install -y autoconf aclocal automake make @@ -35,18 +36,19 @@ FROM base as openssl ADD ./common/install_openssl.sh install_openssl.sh RUN bash ./install_openssl.sh && rm install_openssl.sh -FROM base as python +# EPEL for cmake +FROM base as patchelf +# Install patchelf +ADD ./common/install_patchelf.sh install_patchelf.sh +RUN bash ./install_patchelf.sh && rm install_patchelf.sh +RUN cp $(which patchelf) /patchelf + +FROM patchelf as python # build python COPY manywheel/build_scripts /build_scripts ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh RUN bash build_scripts/build.sh && rm -r build_scripts -# remove unncessary python versions -RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 -RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 -RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 -RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 - FROM base as cuda ARG BASE_CUDA_VERSION=10.2 # Install CUDA @@ -58,13 +60,6 @@ FROM base as intel ADD ./common/install_mkl.sh install_mkl.sh RUN bash ./install_mkl.sh && rm install_mkl.sh -# EPEL for cmake -FROM base as patchelf -# Install patchelf -ADD ./common/install_patchelf.sh install_patchelf.sh -RUN bash ./install_patchelf.sh && rm install_patchelf.sh -RUN cp $(which patchelf) /patchelf - FROM base as magma ARG BASE_CUDA_VERSION=10.2 # Install magma @@ -142,9 +137,12 @@ RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_ ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH -# cmake -RUN yum install -y cmake3 && \ - ln -s /usr/bin/cmake3 /usr/bin/cmake +# cmake is already installed inside the rocm base image, so remove if present +RUN rpm -e cmake || true +# cmake-3.18.4 from pip +RUN yum install -y python3-pip && \ + python3 -mpip install cmake==3.18.4 && \ + ln -s /usr/local/bin/cmake /usr/bin/cmake # ninja RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-5.el7.noarch.rpm @@ -155,7 +153,7 @@ RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} -FROM common as rocm_final +FROM cpu_final as rocm_final ARG ROCM_VERSION=3.7 ARG PYTORCH_ROCM_ARCH ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} @@ -166,3 +164,5 @@ ADD ./common/install_rocm_drm.sh install_rocm_drm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh ADD ./common/install_rocm_magma.sh install_rocm_magma.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh +# cmake3 is needed for the MIOpen build +RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3 diff --git a/manywheel/Dockerfile_cxx11-abi b/manywheel/Dockerfile_cxx11-abi index 966d570869..a5b0673e97 100644 --- a/manywheel/Dockerfile_cxx11-abi +++ b/manywheel/Dockerfile_cxx11-abi @@ -13,7 +13,6 @@ RUN yum -y update RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel RUN yum install -y autoconf automake make cmake gdb gcc gcc-c++ - FROM base as openssl ADD ./common/install_openssl.sh install_openssl.sh RUN bash ./install_openssl.sh && rm install_openssl.sh diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index d50eea49d7..395f71be36 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -9,12 +9,12 @@ MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh" GPU_ARCH_TYPE=cpu-cxx11-abi "${TOPDIR}/manywheel/build_docker.sh" -for cuda_version in 11.5 11.3 10.2; do +for cuda_version in 11.7 11.6; do GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh" done -for rocm_version in 5.1.1 5.2; do +for rocm_version in 5.3 5.4.2; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" done diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index 878d81628b..cc56e695a4 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -371,15 +371,15 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do - echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib' - $PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile + echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}" + $PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile $PATCHELF_BIN --print-rpath $sofile done # set RPATH of lib/ files to $ORIGIN find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do - echo "Setting rpath of $sofile to " '$ORIGIN' - $PATCHELF_BIN --set-rpath '$ORIGIN' $sofile + echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}" + $PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile $PATCHELF_BIN --print-rpath $sofile done @@ -387,10 +387,10 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g') if [[ -e $record_file ]]; then echo "Generating new record file $record_file" - rm -f $record_file + : > "$record_file" # generate records for folders in wheel find * -type f | while read fname; do - echo $(make_wheel_record $fname) >>$record_file + make_wheel_record "$fname" >>"$record_file" done fi diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index efea1ae93d..6b5cd91117 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -58,12 +58,12 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0" case ${CUDA_VERSION} in - 11.[3567]) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6" + 11.8) + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; - 10.*) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" + 11.[67]) + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; *) @@ -108,96 +108,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" fi -if [[ $CUDA_VERSION == "10.2" ]]; then -DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.10.2" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.10.2" - "/usr/local/cuda/lib64/libnvrtc-builtins.so" - "/usr/local/cuda/lib64/libcublas.so.10" - "/usr/local/cuda/lib64/libcublasLt.so.10" - "$LIBGOMP_PATH" -) - -DEPS_SONAME=( - "libcudart.so.10.2" - "libnvToolsExt.so.1" - "libnvrtc.so.10.2" - "libnvrtc-builtins.so" - "libcublas.so.10" - "libcublasLt.so.10" - "libgomp.so.1" -) -elif [[ $CUDA_VERSION == "11.3" ]]; then -export USE_STATIC_CUDNN=0 -DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.3, it links to 11.3.58 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.3" - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" - "/usr/local/cuda/lib64/libcublas.so.11" - "/usr/local/cuda/lib64/libcublasLt.so.11" - "$LIBGOMP_PATH" -) - -DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.3" - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" - "libcublas.so.11" - "libcublasLt.so.11" - "libgomp.so.1" -) -elif [[ $CUDA_VERSION == "11.5" ]]; then -export USE_STATIC_CUDNN=0 -DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.5, it links to 11.5.50 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.5" - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" - "/usr/local/cuda/lib64/libcublas.so.11" - "/usr/local/cuda/lib64/libcublasLt.so.11" - "$LIBGOMP_PATH" -) -DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.5" - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" - "libcublas.so.11" - "libcublasLt.so.11" - "libgomp.so.1" -) -elif [[ $CUDA_VERSION == "11.6" ]]; then +if [[ $CUDA_VERSION == "11.6" ]]; then export USE_STATIC_CUDNN=0 DEPS_LIST=( "/usr/local/cuda/lib64/libcudart.so.11.0" @@ -231,48 +142,101 @@ DEPS_SONAME=( "libcublasLt.so.11" "libgomp.so.1" ) -elif [[ $CUDA_VERSION == "11.7" ]]; then -export USE_STATIC_CUDNN=0 -DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" - "/usr/local/cuda/lib64/libcublas.so.11" - "/usr/local/cuda/lib64/libcublasLt.so.11" - "$LIBGOMP_PATH" -) -DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.7" - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" - "libcublas.so.11" - "libcublasLt.so.11" - "libgomp.so.1" -) +elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then + export USE_STATIC_CUDNN=0 + # Try parallelizing nvcc as well + export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" + DEPS_LIST=( + "$LIBGOMP_PATH" + ) + DEPS_SONAME=( + "libgomp.so.1" + ) -# Try parallelizing nvcc as well -export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" + if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then + echo "Bundling with cudnn and cublas." + DEPS_LIST+=( + "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" + "/usr/local/cuda/lib64/libcudnn.so.8" + "/usr/local/cuda/lib64/libcublas.so.11" + "/usr/local/cuda/lib64/libcublasLt.so.11" + "/usr/local/cuda/lib64/libcudart.so.11.0" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" + "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake, it links to more specific cuda version + ) + DEPS_SONAME+=( + "libcudnn_adv_infer.so.8" + "libcudnn_adv_train.so.8" + "libcudnn_cnn_infer.so.8" + "libcudnn_cnn_train.so.8" + "libcudnn_ops_infer.so.8" + "libcudnn_ops_train.so.8" + "libcudnn.so.8" + "libcublas.so.11" + "libcublasLt.so.11" + "libcudart.so.11.0" + "libnvToolsExt.so.1" + "libnvrtc.so.11.2" + ) + if [[ $CUDA_VERSION == "11.7" ]]; then + DEPS_LIST+=( + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" + ) + DEPS_SONAME+=( + "libnvrtc-builtins.so.11.7" + ) + fi + if [[ $CUDA_VERSION == "11.8" ]]; then + DEPS_LIST+=( + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" + ) + DEPS_SONAME+=( + "libnvrtc-builtins.so.11.8" + ) + fi + else + echo "Using nvidia libs from pypi." + CUDA_RPATHS=( + '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cuda_cupti/lib' + '$ORIGIN/../../nvidia/cuda_nvrtc/lib' + '$ORIGIN/../../nvidia/cuda_runtime/lib' + '$ORIGIN/../../nvidia/cudnn/lib' + '$ORIGIN/../../nvidia/cufft/lib' + '$ORIGIN/../../nvidia/curand/lib' + '$ORIGIN/../../nvidia/cusolver/lib' + '$ORIGIN/../../nvidia/cusparse/lib' + '$ORIGIN/../../nvidia/nccl/lib' + '$ORIGIN/../../nvidia/nvtx/lib' + ) + CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") + export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' + export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' + export FORCE_RPATH="--force-rpath" + export USE_STATIC_NCCL=0 + export USE_SYSTEM_NCCL=1 + export ATEN_STATIC_CUDA=0 + export USE_CUDA_STATIC_LINK=0 + export USE_CUPTI_SO=1 + export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" + export NCCL_LIB_DIR="/usr/local/cuda/lib64/" + fi else echo "Unknown cuda version $CUDA_VERSION" exit 1 fi +# TODO: Remove me when Triton has a proper release channel +if [[ $(uname) == "Linux" && -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then + TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt) + export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton==2.1.0+${TRITON_SHORTHASH}" +fi + # builder/test.sh requires DESIRED_CUDA to know what tests to exclude export DESIRED_CUDA="$cuda_version_nodot" diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index 1b8b04e706..9b0480210f 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -56,7 +56,7 @@ case ${GPU_ARCH_TYPE} in if [[ $ROCM_VERSION_INT -ge 40300 ]]; then PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030" fi - DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" + DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9" ;; *) echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}" diff --git a/manywheel/build_libtorch.sh b/manywheel/build_libtorch.sh index 855d4bcc83..32e0f7a1a1 100644 --- a/manywheel/build_libtorch.sh +++ b/manywheel/build_libtorch.sh @@ -65,13 +65,11 @@ fi # ever pass one python version, so we assume that DESIRED_PYTHON is not a list # in this case if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then - if [[ "$DESIRED_PYTHON" == '2.7mu' ]]; then - DESIRED_PYTHON='cp27-cp27mu' - elif [[ "$DESIRED_PYTHON" == '3.8m' ]]; then - DESIRED_PYTHON='cp38-cp38' + if [[ "$DESIRED_PYTHON" == '3.7' ]]; then + DESIRED_PYTHON='cp37-cp37m' else python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)" - DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}m" + DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}" fi fi pydir="/opt/python/$DESIRED_PYTHON" diff --git a/release/promote.sh b/release/promote.sh index 984788e42b..1147dc0c98 100644 --- a/release/promote.sh +++ b/release/promote.sh @@ -6,10 +6,11 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" source "${DIR}/release_versions.sh" # Make sure to update these versions when doing a release first -PYTORCH_VERSION=${PYTORCH_VERSION:-1.12.0} -TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.13.0} -TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-0.12.0} -TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.13.0} +PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0} +TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0} +TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0} +TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0} +TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0} DRY_RUN=${DRY_RUN:-enabled} @@ -70,16 +71,48 @@ promote_pypi() { echo } +# Promote s3 dependencies +# promote_s3 "certifi" whl "2022.12.7" +# promote_s3 "charset_normalizer" whl "2.1.1" +# promote_s3 "cmake" whl "3.25" +# promote_s3 "colorama" whl "0.4.6" +# promote_s3 "triton" whl "2.0.0" +# promote_s3 "pytorch_triton_rocm" whl "2.0.1" +# promote_s3 "tqdm" whl "4.64.1" +# promote_s3 "Pillow" whl "9.3.0" +# for python 3.8-3.11 +# promote_s3 "numpy" whl "1.24.1" +# for python 3.7 older pytorch versions +# promote_s3 "numpy" whl "1.21.6" +# promote_s3 "urllib3" whl "1.26.13" +# promote_s3 "lit" whl "15.0.7" +# promote_s3 "sympy" whl "1.11.1" +# promote_s3 "typing_extensions" whl "4.4.0" +# promote_s3 "filelock" whl "3.9.0" +# promote_s3 "mpmath" whl "1.2.1" +# promote_s3 "MarkupSafe" whl "2.1.2" +# promote_s3 "Jinja2" whl "3.1.2" +# promote_s3 "idna" whl "3.4" +# promote_s3 "networkx" whl "3.0" +# promote_s3 "packaging" whl "22.0" +# promote_s3 "requests" whl "2.28.1" + # promote_s3 torch whl "${PYTORCH_VERSION}" # promote_s3 torchvision whl "${TORCHVISION_VERSION}" # promote_s3 torchaudio whl "${TORCHAUDIO_VERSION}" # promote_s3 torchtext whl "${TORCHTEXT_VERSION}" +# promote_s3 torchdata whl "${TORCHDATA_VERSION}" # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}" +# promote_conda torchtriton conda "2.0.0" +# promote_conda pytorch-cuda conda "11.7" +# promote_conda pytorch-cuda conda "11.8" + # promote_conda pytorch conda "${PYTORCH_VERSION}" # promote_conda torchvision conda "${TORCHVISION_VERSION}" # promote_conda torchaudio conda "${TORCHAUDIO_VERSION}" # promote_conda torchtext conda "${TORCHTEXT_VERSION}" +# promote_conda torchdata conda "${TORCHDATA_VERSION}" # Uncomment these to promote to pypi LINUX_VERSION_SUFFIX="%2Bcu102" diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh old mode 100644 new mode 100755 index 201e4b9ac5..fdd9bf4a0e --- a/release/pypi/prep_binary_for_pypi.sh +++ b/release/pypi/prep_binary_for_pypi.sh @@ -12,6 +12,19 @@ set -eou pipefail shopt -s globstar +# Function copied from manywheel/build_common.sh +make_wheel_record() { + FPATH=$1 + if echo $FPATH | grep RECORD >/dev/null 2>&1; then + # if the RECORD file, then + echo "$FPATH,," + else + HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g') + FSIZE=$(ls -nl $FPATH | awk '{print $5}') + echo "$FPATH,sha256=$HASH,$FSIZE" + fi +} + OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)} tmp_dir="$(mktemp -d)" @@ -27,8 +40,9 @@ for whl_file in "$@"; do set -x unzip -q "${whl_file}" -d "${whl_dir}" ) - version_with_suffix=$(grep '^Version:' "${whl_dir}"/*/METADATA | cut -d' ' -f2) + version_with_suffix=$(grep '^Version:' "${whl_dir}"/*/METADATA | cut -d' ' -f2 | tr -d "[:space:]") version_with_suffix_escaped=${version_with_suffix/+/%2B} + # Remove all suffixed +bleh versions version_no_suffix=${version_with_suffix/+*/} new_whl_file=${OUTPUT_DIR}/$(basename "${whl_file/${version_with_suffix_escaped}/${version_no_suffix}}") @@ -37,11 +51,37 @@ for whl_file in "$@"; do dirname_dist_info_folder=$(dirname "${dist_info_folder}") ( set -x + + # Special build with pypi cudnn remove it from version + if [[ $whl_file == *"with.pypi.cudnn"* ]]; then + rm -rf "${whl_dir}/caffe2" + rm -rf "${whl_dir}"/torch/lib/libnvrtc* + + sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py" + fi + find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \; # Moves distinfo from one with a version suffix to one without # Example: torch-1.8.0+cpu.dist-info => torch-1.8.0.dist-info mv "${dist_info_folder}" "${dirname_dist_info_folder}/${basename_dist_info_folder/${version_with_suffix}/${version_no_suffix}}" cd "${whl_dir}" - zip -qr "${new_whl_file}" . + + ( + set +x + # copied from manywheel/build_common.sh + # regenerate the RECORD file with new hashes + record_file="${dirname_dist_info_folder}/${basename_dist_info_folder/${version_with_suffix}/${version_no_suffix}}/RECORD" + if [[ -e $record_file ]]; then + echo "Generating new record file $record_file" + : > "$record_file" + # generate records for folders in wheel + find * -type f | while read fname; do + make_wheel_record "$fname" >>"$record_file" + done + fi + ) + + rm -rf "${new_whl_file}" + zip -qr9 "${new_whl_file}" . ) done diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh index 02ebe4833c..74f139680e 100644 --- a/release/pypi/promote_pypi_to_staging.sh +++ b/release/pypi/promote_pypi_to_staging.sh @@ -21,16 +21,17 @@ upload_pypi_to_staging() { } # Uncomment these to promote to pypi -LINUX_VERSION_SUFFIX="%2Bcu102" +PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu117.with.pypi.cudnn" +LINUX_VERSION_SUFFIX="%2Bcu117" WIN_VERSION_SUFFIX="%2Bcpu" MACOS_X86_64="macosx_.*_x86_64" MACOS_ARM64="macosx_.*_arm64" -PLATFORM="linux_x86_64" VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}" -PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" -PLATFORM="win_amd64" VERSION_SUFFIX="${WIN_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}" -PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac -PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac +PLATFORM="linux_x86_64" VERSION_SUFFIX="${PYTORCH_LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}" +PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" +PLATFORM="win_amd64" VERSION_SUFFIX="${WIN_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}" +PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac +PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac PLATFORM="linux_x86_64" VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}" PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX="" upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}" diff --git a/release/release_versions.sh b/release/release_versions.sh index 95ebfa363b..f0db2a0895 100644 --- a/release/release_versions.sh +++ b/release/release_versions.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash # Make sure to update these versions when doing a release first -PYTORCH_VERSION=${PYTORCH_VERSION:-1.12.0} -TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.13.0} -TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-0.12.0} -TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.13.0} +PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0} +TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0} +TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0} +TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0} +TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0} diff --git a/run_tests.sh b/run_tests.sh index 18b00f00b3..fd66835e23 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -72,21 +72,6 @@ fi # Environment initialization if [[ "$package_type" == conda || "$(uname)" == Darwin ]]; then - # Why are there two different ways to install dependencies after installing an offline package? - # The "cpu" conda package for pytorch doesn't actually depend on "cpuonly" which means that - # when we attempt to update dependencies using "conda update --all" it will attempt to install - # whatever "cudatoolkit" your current computer relies on (which is sometimes none). When conda - # tries to install this cudatoolkit that correlates with your current hardware it will also - # overwrite the currently installed "local" pytorch package meaning you aren't actually testing - # the right package. - # TODO (maybe): Make the "cpu" package of pytorch depend on "cpuonly" - if [[ "$cuda_ver" = 'cpu' ]]; then - # Installing cpuonly will also install dependencies as well - retry conda install -y -c pytorch cpuonly - else - # Install dependencies from installing the pytorch conda package offline - retry conda update -yq --all -c defaults -c pytorch -c numba/label/dev - fi # Install the testing dependencies retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml else @@ -140,15 +125,21 @@ python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else # Test that CUDA builds are setup correctly if [[ "$cuda_ver" != 'cpu' ]]; then - # Test CUDA archs - echo "Checking that CUDA archs are setup correctly" - timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()' - - # These have to run after CUDA is initialized - echo "Checking that magma is available" - python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)' - echo "Checking that CuDNN is available" - python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)' + cuda_installed=1 + nvidia-smi || cuda_installed=0 + if [[ "$cuda_installed" == 0 ]]; then + echo "Skip CUDA tests for machines without a Nvidia GPU card" + else + # Test CUDA archs + echo "Checking that CUDA archs are setup correctly" + timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()' + + # These have to run after CUDA is initialized + echo "Checking that magma is available" + python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)' + echo "Checking that CuDNN is available" + python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)' + fi fi # Check that OpenBlas is not linked to on Macs diff --git a/s3_management/backup_conda.py b/s3_management/backup_conda.py index a75c23407d..06926589d3 100644 --- a/s3_management/backup_conda.py +++ b/s3_management/backup_conda.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 -# Downloads domain library packages from channel +# Downloads domain pytorch and library packages from channel # And backs them up to S3 # Do not use unless you know what you are doing +# Usage: python backup_conda.py --version 1.6.0 import conda.api import boto3 @@ -9,6 +10,7 @@ import urllib import os import hashlib +import argparse S3 = boto3.resource('s3') BUCKET = S3.Bucket('pytorch-backup') @@ -23,11 +25,13 @@ def compute_md5(path:str) -> str: def download_conda_package(package:str, version:Optional[str] = None, depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]: packages = conda.api.SubdirData.query_all(package, channels = [channel] if channel is not None else None, subdirs = _known_subdirs) rc = [] + for pkg in packages: if version is not None and pkg.version != version: continue if depends is not None and depends not in pkg.depends: continue + print(f"Downloading {pkg.url}...") os.makedirs(pkg.subdir, exist_ok = True) fname = f"{pkg.subdir}/{pkg.fn}" @@ -50,6 +54,18 @@ def upload_to_s3(prefix: str, fnames: List[str]) -> None: if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--version", + help="PyTorch Version to backup", + type=str, + required = True + ) + options = parser.parse_args() + rc = download_conda_package("pytorch", channel = "pytorch", version = options.version) + upload_to_s3(f"v{options.version}/conda", rc) + for libname in ["torchvision", "torchaudio", "torchtext"]: - rc = download_conda_package(libname, channel = "pytorch", depends = "pytorch 1.9.0") - upload_to_s3("v1.9.0-rc4/conda", rc) + print(f"processing {libname}") + rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}") + upload_to_s3(f"v{options.version}/conda", rc) diff --git a/s3_management/manage.py b/s3_management/manage.py index 6b6d0c6faa..15b37cf3da 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -1,16 +1,15 @@ #!/usr/bin/env python import argparse -import tempfile import time from os import path, makedirs +from datetime import datetime from collections import defaultdict from typing import Iterator, List, Type, Dict, Set, TypeVar, Optional -from re import sub, match +from re import sub, match, search from packaging.version import parse -import botocore import boto3 @@ -18,7 +17,7 @@ CLIENT = boto3.client('s3') BUCKET = S3.Bucket('pytorch') -ACCEPTED_FILE_EXTENSIONS = ("whl", "zip") +ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz") ACCEPTED_SUBDIR_PATTERNS = [ r"cu[0-9]+", # for cuda r"rocm[0-9]+\.[0-9]+", # for rocm @@ -31,11 +30,71 @@ "whl/test": "torch_test.html", } +# NOTE: This refers to the name on the wheels themselves and not the name of +# package as specified by setuptools, for packages with "-" (hyphens) in their +# names you need to convert them to "_" (underscores) in order for them to be +# allowed here since the name of the wheels is compared here +PACKAGE_ALLOW_LIST = { + "Pillow", + "certifi", + "charset_normalizer", + "cmake", + "colorama", + "filelock", + "idna", + "Jinja2", + "lit", + "MarkupSafe", + "mpmath", + "nestedtensor", + "networkx", + "numpy", + "packaging", + "portalocker", + "pytorch_triton", + "pytorch_triton_rocm", + "requests", + "sympy", + "torch", + "torcharrow", + "torchaudio", + "torchcsprng", + "torchdata", + "torchdistx", + "torchrec", + "torchtext", + "torchvision", + "triton", + "tqdm", + "typing_extensions", + "urllib3", +} + +# Should match torch-2.0.0.dev20221221+cu118-cp310-cp310-linux_x86_64.whl as: +# Group 1: torch-2.0.0.dev +# Group 2: 20221221 +PACKAGE_DATE_REGEX = r"([a-zA-z]*-[0-9.]*.dev)([0-9]*)" + # How many packages should we keep of a specific package? KEEP_THRESHOLD = 60 S3IndexType = TypeVar('S3IndexType', bound='S3Index') +def extract_package_build_time(full_package_name: str) -> datetime: + result = search(PACKAGE_DATE_REGEX, full_package_name) + if result is not None: + try: + return datetime.strptime(result.group(2), "%Y%m%d") + except ValueError: + # Ignore any value errors since they probably shouldn't be hidden anyways + pass + return datetime.now() + +def between_bad_dates(package_build_time: datetime): + start_bad = datetime(year=2022, month=8, day=17) + end_bad = datetime(year=2022, month=12, day=30) + return start_bad <= package_build_time <= end_bad + class S3Index: def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None: @@ -70,9 +129,17 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]: packages: Dict[str, int] = defaultdict(int) to_hide: Set[str] = set() for obj in all_sorted_packages: - package_name = path.basename(obj).split('-')[0] + full_package_name = path.basename(obj) + package_name = full_package_name.split('-')[0] + package_build_time = extract_package_build_time(full_package_name) + # Hard pass on packages that are included in our allow list + if package_name not in PACKAGE_ALLOW_LIST: + to_hide.add(obj) + continue if packages[package_name] >= KEEP_THRESHOLD: to_hide.add(obj) + elif between_bad_dates(package_build_time): + to_hide.add(obj) else: packages[package_name] += 1 return set(self.objects).difference({ @@ -162,7 +229,7 @@ def to_simple_package_html( out.append('') out.append('') out.append(' ') - out.append('

Links for {}

'.format(package_name)) + out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer @@ -183,7 +250,7 @@ def to_simple_packages_html( out.append('') out.append(' ') for pkg_name in sorted(self.get_package_names(subdir)): - out.append(f' {pkg_name}
') + out.append(f' {pkg_name.replace("_","-")}
') # Adding html footer out.append(' ') out.append('') @@ -214,9 +281,10 @@ def upload_pep503_htmls(self) -> None: Body=self.to_simple_packages_html(subdir=subdir) ) for pkg_name in self.get_package_names(subdir=subdir): - print(f"INFO Uploading {subdir}/{pkg_name}/index.html") + compat_pkg_name = pkg_name.lower().replace("_", "-") + print(f"INFO Uploading {subdir}/{compat_pkg_name}/index.html") BUCKET.Object( - key=f"{subdir}/{pkg_name}/index.html" + key=f"{subdir}/{compat_pkg_name}/index.html" ).put( ACL='public-read', CacheControl='no-cache,no-store,must-revalidate', diff --git a/s3_management/requirements.txt b/s3_management/requirements.txt index 86199dbc6e..d9fe7f1f00 100644 --- a/s3_management/requirements.txt +++ b/s3_management/requirements.txt @@ -1,2 +1,2 @@ -boto3 -packaging +boto3==1.12.7 +packaging==21.3 diff --git a/smoke_test.sh b/smoke_test.sh deleted file mode 100755 index e2459b49d6..0000000000 --- a/smoke_test.sh +++ /dev/null @@ -1,197 +0,0 @@ -#!/bin/bash -set -eux -o pipefail -SOURCE_DIR=$(cd $(dirname $0) && pwd) - -# This is meant to be run in either a docker image or in a Mac. This assumes an -# environment that will be teared down after execution is finishes, so it will -# probably mess up what environment it runs in. - -# This is now only meant to be run in CircleCI, after calling the -# .circleci/scripts/binary_populate_env.sh . You can call this manually if you -# make sure all the needed variables are still populated. - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -if ! [ -x "$(command -v curl)" ]; then - if [ -f /etc/lsb-release ]; then - # TODO: Remove this once nvidia package repos are back online - # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968 - # shellcheck disable=SC2046 - sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list") - - apt-get update - apt-get install -y curl - fi -fi - -# Use today's date if none is given -if [[ -z "${DATE:-}" || "${DATE:-}" == 'today' ]]; then - DATE="$(date +%Y%m%d)" -fi - -# DESIRED_PYTHON is in format 2.7m?u? -# DESIRED_CUDA is in format cu80 (or 'cpu') - -if [[ "$DESIRED_CUDA" == cpu ]]; then - export USE_CUDA=0 -else - export USE_CUDA=1 -fi - -# Generate M.m formats for CUDA and Python versions -if [[ "$DESIRED_CUDA" != cpu ]]; then - cuda_dot="$(echo $DESIRED_CUDA | tr -d 'cpu')" - if [[ "${#cuda_dot}" == 2 ]]; then - cuda_dot="${cuda_dot:0:1}.${cuda_dot:1}" - else - cuda_dot="${cuda_dot:0:2}.${cuda_dot:2}" - fi -fi -py_dot="${DESIRED_PYTHON:0:3}" - -# Generate "long" python versions cp27-cp27mu -py_long="cp${DESIRED_PYTHON:0:1}${DESIRED_PYTHON:2:1}-cp${DESIRED_PYTHON:0:1}${DESIRED_PYTHON:2}" -# TODO: I know this is the wrong way to do this translation, we should probably fix it upstream, but this is the quickest way -if [[ "${py_long}" = "cp38-cp38m" ]]; then - py_long="cp38-cp38" -fi - -# Determine package name -if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then - if [[ "$(uname)" == Darwin ]]; then - libtorch_variant='macos' - elif [[ -z "${LIBTORCH_VARIANT:-}" ]]; then - echo "No libtorch variant given. This smoke test does not know which zip" - echo "to download." - exit 1 - else - libtorch_variant="$LIBTORCH_VARIANT" - fi - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - LIBTORCH_ABI="cxx11-abi-" - else - LIBTORCH_ABI= - fi - if [[ "$DESIRED_CUDA" == 'cu102' || "$libtorch_variant" == 'macos' ]]; then - package_name="libtorch-$LIBTORCH_ABI$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}.zip" - else - package_name="libtorch-$LIBTORCH_ABI$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}%2B${DESIRED_CUDA}.zip" - fi - -elif [[ "$PACKAGE_TYPE" == *wheel ]]; then - package_name='torch' -else - package_name='pytorch' -fi -if [[ "$(uname)" == 'Darwin' ]] || [[ "$DESIRED_CUDA" == "cu102" ]] || [[ "$PACKAGE_TYPE" == 'conda' ]]; then - package_name_and_version="${package_name}==${NIGHTLIES_DATE_PREAMBLE}${DATE}" -else - # Linux binaries have the cuda version appended to them. This is only on - # linux, since all macos builds are cpu. (NB: We also omit - # DESIRED_CUDA if it's the default) - package_name_and_version="${package_name}==${NIGHTLIES_DATE_PREAMBLE}${DATE}+${DESIRED_CUDA}" -fi - -# Switch to the desired python -if [[ "$PACKAGE_TYPE" == 'conda' || "$(uname)" == 'Darwin' ]]; then - # Create a new conda env in conda, or on MacOS - conda create -yn test python="$py_dot" && source activate test - python_version=$(python --version 2>&1) - dependencies="numpy protobuf six requests" - case ${python_version} in - *3.6.*) - dependencies="${dependencies} future dataclasses" - ;; - esac - conda install -yq ${dependencies} -else - export PATH=/opt/python/${py_long}/bin:$PATH - if [[ "$(python --version 2>&1)" == *3.6.* ]]; then - retry pip install -q future numpy protobuf six requests dataclasses - else - retry pip install -q future numpy protobuf six requests - fi -fi - -# Switch to the desired CUDA if using the conda-cuda Docker image -if [[ "$PACKAGE_TYPE" == 'conda' ]]; then - rm -rf /usr/local/cuda || true - if [[ "$DESIRED_CUDA" != 'cpu' ]]; then - ln -s "/usr/local/cuda-${cuda_dot}" /usr/local/cuda - export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130 - export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0 - export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) - fi -fi - -# Print some debugging info -python --version -pip --version -which python -# If you are debugging packages not found then run these commands. -#if [[ "$PACKAGE_TYPE" == 'conda' ]]; then -# conda search -c pytorch "$package_name" -#elif [[ "$PACKAGE_TYPE" == *wheel ]]; then -# retry curl "https://download.pytorch.org/whl/nightly/$DESIRED_CUDA/torch_nightly.html" -v -#fi - -# Install the package for the requested date -if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then - mkdir tmp_libtorch - pushd tmp_libtorch - libtorch_url="https://download.pytorch.org/libtorch/nightly/$DESIRED_CUDA/$package_name" - retry curl -o libtorch_zip "${libtorch_url}" - unzip -q libtorch_zip - cd libtorch -elif [[ "$PACKAGE_TYPE" == 'conda' ]]; then - if [[ "$DESIRED_CUDA" == 'cpu' ]]; then - if [[ "$(uname)" == 'Darwin' ]]; then - retry conda install -yq -c pytorch-nightly "$package_name_and_version" - else - retry conda install -yq -c pytorch-nightly "$package_name_and_version" cpuonly - fi - else - retry conda install -yq -c pytorch-nightly "cudatoolkit=$CUDA_VERSION_SHORT" "$package_name_and_version" - fi -else - # We need to upgrade pip now that we have '+cuver' in the package name, as - # old pips do not correctly change the '+' to '%2B' in the url and fail to - # find the package. - pip install --upgrade pip -q - pip_url="https://download.pytorch.org/whl/nightly/$DESIRED_CUDA/torch_nightly.html" - retry pip install "$package_name_and_version" \ - -f "$pip_url" \ - --no-cache-dir \ - --no-index \ - -q -fi - -# Check that all conda features are working -if [[ "$PACKAGE_TYPE" == 'conda' ]]; then - # Check that conda didn't change the Python version out from under us. Conda - # will do this if it didn't find the requested package for the current Python - # version and if nothing else has been installed in the current env. - if [[ -z "$(python --version 2>&1 | grep -o $py_dot)" ]]; then - echo "The Python version has changed to $(python --version)" - echo "Probably the package for the version we want does not exist" - echo '(conda will change the Python version even if it was explicitly declared)' - exit 1 - fi - - # Check that the CUDA feature is working - if [[ "$DESIRED_CUDA" == 'cpu' ]]; then - if [[ -n "$(conda list torch | grep -o cuda)" ]]; then - echo "The installed package is built for CUDA:: $(conda list torch)" - exit 1 - fi - elif [[ -z "$(conda list torch | grep -o cuda$cuda_dot)" ]]; then - echo "The installed package doesn't seem to be built for CUDA $cuda_dot" - echo "The full package is $(conda list torch)" - exit 1 - fi -fi - -"${SOURCE_DIR}/check_binary.sh" diff --git a/test/smoke_test/assets/dog2.jpg b/test/smoke_test/assets/dog2.jpg new file mode 100644 index 0000000000..528dfec720 Binary files /dev/null and b/test/smoke_test/assets/dog2.jpg differ diff --git a/test/smoke_test/assets/rgb_pytorch.jpg b/test/smoke_test/assets/rgb_pytorch.jpg new file mode 100644 index 0000000000..d49e658b94 Binary files /dev/null and b/test/smoke_test/assets/rgb_pytorch.jpg differ diff --git a/test/smoke_test/assets/rgb_pytorch.png b/test/smoke_test/assets/rgb_pytorch.png new file mode 100644 index 0000000000..c9d08e6c7d Binary files /dev/null and b/test/smoke_test/assets/rgb_pytorch.png differ diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index bae7a5d29d..1a55cfed72 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -1,49 +1,243 @@ import os +import re import sys +from pathlib import Path +import argparse import torch -import torchvision -import torchaudio - -def smoke_test_cuda() -> None: - gpu_arch_ver = os.getenv('GPU_ARCH_VER') - gpu_arch_type = os.getenv('GPU_ARCH_TYPE') - is_cuda_system = gpu_arch_type == "cuda" - - if(not torch.cuda.is_available() and is_cuda_system): - print(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") - sys.exit(1) - if(torch.cuda.is_available()): - if(torch.version.cuda != gpu_arch_ver): - print(f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}") - sys.exit(1) - y=torch.randn([3,5]).cuda() +import platform +import importlib +import subprocess +import torch._dynamo +import torch.nn as nn +import torch.nn.functional as F + +gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION") +gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE") +channel = os.getenv("MATRIX_CHANNEL") +stable_version = os.getenv("MATRIX_STABLE_VERSION") +package_type = os.getenv("MATRIX_PACKAGE_TYPE") + +is_cuda_system = gpu_arch_type == "cuda" +SCRIPT_DIR = Path(__file__).parent +NIGHTLY_ALLOWED_DELTA = 3 + +MODULES = [ + { + "name": "torchvision", + "repo": "https://github.com/pytorch/vision.git", + "smoke_test": "python ./vision/test/smoke_test.py", + "extension": "extension", + }, + { + "name": "torchaudio", + "repo": "https://github.com/pytorch/audio.git", + "smoke_test": "python ./audio/test/smoke_test/smoke_test.py --no-ffmpeg", + "extension": "_extension", + }, +] + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.fc1 = nn.Linear(9216, 1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = F.max_pool2d(x, 2) + x = torch.flatten(x, 1) + output = self.fc1(x) + return output + +def check_version(package: str) -> None: + # only makes sense to check nightly package where dates are known + if channel == "nightly": + check_nightly_binaries_date(package) + else: + if not torch.__version__.startswith(stable_version): + raise RuntimeError( + f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}" + ) + +def check_nightly_binaries_date(package: str) -> None: + from datetime import datetime, timedelta + format_dt = '%Y%m%d' + + torch_str = torch.__version__ + date_t_str = re.findall("dev\d+", torch.__version__) + date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt) + if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA: + raise RuntimeError( + f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!" + ) + + if(package == "all"): + for module in MODULES: + imported_module = importlib.import_module(module["name"]) + module_version = imported_module.__version__ + date_m_str = re.findall("dev\d+", module_version) + date_m_delta = datetime.now() - datetime.strptime(date_m_str[0][3:], format_dt) + print(f"Nightly date check for {module['name']} version {module_version}") + if date_m_delta.days > NIGHTLY_ALLOWED_DELTA: + raise RuntimeError( + f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}" + ) + +def test_cuda_runtime_errors_captured() -> None: + cuda_exception_missed=True + try: + print("Testing test_cuda_runtime_errors_captured") + torch._assert_async(torch.tensor(0, device="cuda")) + torch._assert_async(torch.tensor(0 + 0j, device="cuda")) + except RuntimeError as e: + if re.search("CUDA", f"{e}"): + print(f"Caught CUDA exception with success: {e}") + cuda_exception_missed = False + else: + raise e + if(cuda_exception_missed): + raise RuntimeError( f"Expected CUDA RuntimeError but have not received!") + +def smoke_test_cuda(package: str) -> None: + if not torch.cuda.is_available() and is_cuda_system: + raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") + + if(package == 'all' and is_cuda_system): + for module in MODULES: + imported_module = importlib.import_module(module["name"]) + # TBD for vision move extension module to private so it will + # be _extention. + version = "N/A" + if module["extension"] == "extension": + version = imported_module.extension._check_cuda_version() + else: + version = imported_module._extension._check_cuda_version() + print(f"{module['name']} CUDA: {version}") + + if torch.cuda.is_available(): + if torch.version.cuda != gpu_arch_ver: + raise RuntimeError( + f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}" + ) print(f"torch cuda: {torch.version.cuda}") - #todo add cudnn version validation + # todo add cudnn version validation print(f"torch cudnn: {torch.backends.cudnn.version()}") + print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") + + # torch.compile is available only on Linux and python 3.8-3.10 + if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0): + smoke_test_compile() + + test_cuda_runtime_errors_captured() + + +def smoke_test_conv2d() -> None: + import torch.nn as nn + + print("Testing smoke_test_conv2d") + # With square kernels and equal stride + m = nn.Conv2d(16, 33, 3, stride=2) + # non-square kernels and unequal stride and with padding + m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) + # non-square kernels and unequal stride and with padding and dilation + basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)) + input = torch.randn(20, 16, 50, 100) + output = basic_conv(input) + + if is_cuda_system: + print("Testing smoke_test_conv2d with cuda") + conv = nn.Conv2d(3, 3, 3).cuda() + x = torch.randn(1, 3, 24, 24).cuda() + with torch.cuda.amp.autocast(): + out = conv(x) + + supported_dtypes = [torch.float16, torch.float32, torch.float64] + for dtype in supported_dtypes: + print(f"Testing smoke_test_conv2d with cuda for {dtype}") + conv = basic_conv.to(dtype).cuda() + input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype) + output = conv(input) + +def smoke_test_linalg() -> None: + print("Testing smoke_test_linalg") + A = torch.randn(5, 3) + U, S, Vh = torch.linalg.svd(A, full_matrices=False) + U.shape, S.shape, Vh.shape + torch.dist(A, U @ torch.diag(S) @ Vh) + + U, S, Vh = torch.linalg.svd(A) + U.shape, S.shape, Vh.shape + torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh) + + A = torch.randn(7, 5, 3) + U, S, Vh = torch.linalg.svd(A, full_matrices=False) + torch.dist(A, U @ torch.diag_embed(S) @ Vh) + + if is_cuda_system: + supported_dtypes = [torch.float32, torch.float64] + for dtype in supported_dtypes: + print(f"Testing smoke_test_linalg with cuda for {dtype}") + A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype) + torch.linalg.svd(A) + +def smoke_test_compile() -> None: + supported_dtypes = [torch.float16, torch.float32, torch.float64] + def foo(x: torch.Tensor) -> torch.Tensor: + return torch.sin(x) + torch.cos(x) + for dtype in supported_dtypes: + print(f"Testing smoke_test_compile for {dtype}") + x = torch.rand(3, 3, device="cuda").type(dtype) + x_eager = foo(x) + x_pt2 = torch.compile(foo)(x) + print(torch.allclose(x_eager, x_pt2)) + + # Reset torch dynamo since we are changing mode + torch._dynamo.reset() + dtype = torch.float32 + torch.set_float32_matmul_precision('high') + print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}") + x = torch.rand(64, 1, 28, 28, device="cuda").type(torch.float32) + model = Net().to(device="cuda") + x_pt2 = torch.compile(model, mode="max-autotune")(x) + +def smoke_test_modules(): + for module in MODULES: + if module["repo"]: + subprocess.check_output(f"git clone --depth 1 {module['repo']}", stderr=subprocess.STDOUT, shell=True) + try: + output = subprocess.check_output( + module["smoke_test"], stderr=subprocess.STDOUT, shell=True, + universal_newlines=True) + except subprocess.CalledProcessError as exc: + raise RuntimeError( + f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}" + ) + else: + print("Output: \n{}\n".format(output)) -def smoke_test_torchvision() -> None: - import torchvision.datasets as dset - import torchvision.transforms - print('Is torchvision useable?', all(x is not None for x in [torch.ops.image.decode_png, torch.ops.torchvision.roi_align])) - -def smoke_test_torchaudio() -> None: - import torchaudio.compliance.kaldi # noqa: F401 - import torchaudio.datasets # noqa: F401 - import torchaudio.functional # noqa: F401 - import torchaudio.models # noqa: F401 - import torchaudio.pipelines # noqa: F401 - import torchaudio.sox_effects # noqa: F401 - import torchaudio.transforms # noqa: F401 - import torchaudio.utils # noqa: F401 def main() -> None: - #todo add torch, torchvision and torchaudio tests + parser = argparse.ArgumentParser() + parser.add_argument( + "--package", + help="Package to include in smoke testing", + type=str, + choices=["all", "torchonly"], + default="all", + ) + options = parser.parse_args() print(f"torch: {torch.__version__}") - print(f"torchvision: {torchvision.__version__}") - print(f"torchaudio: {torchaudio.__version__}") - smoke_test_cuda() - smoke_test_torchvision() - smoke_test_torchaudio() + check_version(options.package) + smoke_test_conv2d() + smoke_test_linalg() + + if options.package == "all": + smoke_test_modules() + + smoke_test_cuda(options.package) + if __name__ == "__main__": main() diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh index 08b47335af..26df3d71d6 100755 --- a/wheel/build_wheel.sh +++ b/wheel/build_wheel.sh @@ -97,16 +97,7 @@ fi whl_tmp_dir="${MAC_PACKAGE_WORK_DIR}/dist" mkdir -p "$whl_tmp_dir" -# Python 3.5 build against macOS 10.6, others build against 10.9 -# NB: Sometimes Anaconda revs the version, in which case you'll have to -# update this! -# An example of this happened on Aug 13, 2019, when osx-64/python-2.7.16-h97142e2_2.tar.bz2 -# was uploaded to https://anaconda.org/anaconda/python/files -if [[ "$desired_python" == 3.5 ]]; then - mac_version='macosx_10_6_x86_64' -elif [[ "$desired_python" == 2.7 ]]; then - mac_version='macosx_10_7_x86_64' -elif [[ -n "$CROSS_COMPILE_ARM64" ]]; then +if [[ -n "$CROSS_COMPILE_ARM64" || $(uname -m) == "arm64" ]]; then mac_version='macosx_11_0_arm64' else mac_version='macosx_10_9_x86_64' @@ -128,7 +119,7 @@ if [[ ! -d "$pytorch_rootdir" ]]; then popd fi pushd "$pytorch_rootdir" -git submodule update --init --recursive --jobs 0 +git submodule update --init --recursive popd ########################## @@ -144,6 +135,11 @@ SETUPTOOLS_PINNED_VERSION="=46.0.0" PYYAML_PINNED_VERSION="=5.3" EXTRA_CONDA_INSTALL_FLAGS="" case ${desired_python} in + 3.11) + SETUPTOOLS_PINNED_VERSION=">=46.0.0" + PYYAML_PINNED_VERSION=">=5.3" + NUMPY_PINNED_VERSION="==1.23.5" + ;; 3.10) SETUPTOOLS_PINNED_VERSION=">=46.0.0" PYYAML_PINNED_VERSION=">=5.3" @@ -167,8 +163,12 @@ tmp_env_name="wheel_py$python_nodot" conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" source activate "$tmp_env_name" -retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" cffi typing_extensions ninja requests -retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq mkl-include==2020.1 mkl-static==2020.1 -c intel +if [[ "$desired_python" == "3.11" ]]; then + retry pip install -q "numpy${NUMPY_PINNED_VERSION}" "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests +else + retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests +fi +retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja mkl-include==2022.2.1 mkl-static==2022.2.1 -c intel retry pip install -qr "${pytorch_rootdir}/requirements.txt" || true # For USE_DISTRIBUTED=1 on macOS, need libuv and pkg-config to find libuv. diff --git a/windows/build_all.bat b/windows/build_all.bat index 0c1edcf655..f60da8c763 100755 --- a/windows/build_all.bat +++ b/windows/build_all.bat @@ -30,8 +30,8 @@ set "ORIG_PATH=%PATH%" conda remove -n py36 --all -y || rmdir %CONDA_HOME%\envs\py36 /s conda remove -n py37 --all -y || rmdir %CONDA_HOME%\envs\py37 /s -conda create -n py36 -y -q numpy=1.11 mkl=2018 cffi pyyaml boto3 cmake ninja typing_extensions python=3.6 -conda create -n py37 -y -q numpy=1.11 mkl=2018 cffi pyyaml boto3 cmake ninja typing_extensions python=3.7 +conda create -n py36 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.6 +conda create -n py37 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.7 REM Install MKL rmdir /s /q mkl diff --git a/windows/condaenv.bat b/windows/condaenv.bat index 470575340f..6d945badd1 100644 --- a/windows/condaenv.bat +++ b/windows/condaenv.bat @@ -9,10 +9,11 @@ FOR %%v IN (%DESIRED_PYTHON%) DO ( set PYTHON_VERSION_STR=%%v set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s - if "%%v" == "3.7" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" cffi pyyaml boto3 cmake ninja typing_extensions python=%%v + if "%%v" == "3.7" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v if "%%v" == "3.8" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y -q "numpy>=1.11" "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge "numpy>=1.21.2" "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v + if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge "numpy>=1.21.2" "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v if "%%v" == "3" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v ) endlocal diff --git a/windows/cuda102.bat b/windows/cuda102.bat deleted file mode 100644 index 1d90c86b81..0000000000 --- a/windows/cuda102.bat +++ /dev/null @@ -1,58 +0,0 @@ -@echo off - -set MODULE_NAME=pytorch - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. -) ELSE ( - call internal\clean.bat -) -IF ERRORLEVEL 1 goto :eof - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto :eof - -REM Check for optional components - -set USE_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" ( - set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt - ) ELSE ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - ) -) - -IF "%CUDA_PATH_V10_2%"=="" ( - IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" ( - set "CUDA_PATH_V10_2=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" - ) ELSE ( - echo CUDA 10.2 not found, failing - exit /b 1 - ) -) - -IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all -) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 -) - -set "CUDA_PATH=%CUDA_PATH_V10_2%" -set "PATH=%CUDA_PATH_V10_2%\bin;%PATH%" - -:optcheck - -call internal\check_opts.bat -IF ERRORLEVEL 1 goto :eof - -call internal\copy.bat -IF ERRORLEVEL 1 goto :eof - -call internal\setup.bat -IF ERRORLEVEL 1 goto :eof diff --git a/windows/cuda115.bat b/windows/cuda115.bat deleted file mode 100644 index bf037b22cc..0000000000 --- a/windows/cuda115.bat +++ /dev/null @@ -1,58 +0,0 @@ -@echo off - -set MODULE_NAME=pytorch - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. -) ELSE ( - call internal\clean.bat -) -IF ERRORLEVEL 1 goto :eof - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto :eof - -REM Check for optional components - -set USE_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" ( - set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt - ) ELSE ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - ) -) - -IF "%CUDA_PATH_V115%"=="" ( - IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin\nvcc.exe" ( - set "CUDA_PATH_V115=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5" - ) ELSE ( - echo CUDA 11.5 not found, failing - exit /b 1 - ) -) - -IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all -) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -) - -set "CUDA_PATH=%CUDA_PATH_V115%" -set "PATH=%CUDA_PATH_V115%\bin;%PATH%" - -:optcheck - -call internal\check_opts.bat -IF ERRORLEVEL 1 goto :eof - -call internal\copy.bat -IF ERRORLEVEL 1 goto :eof - -call internal\setup.bat -IF ERRORLEVEL 1 goto :eof diff --git a/windows/cuda113.bat b/windows/cuda118.bat similarity index 81% rename from windows/cuda113.bat rename to windows/cuda118.bat index 568f1e754d..02d91adc38 100644 --- a/windows/cuda113.bat +++ b/windows/cuda118.bat @@ -27,24 +27,24 @@ IF "%NVTOOLSEXT_PATH%"=="" ( ) ) -IF "%CUDA_PATH_V113%"=="" ( - IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin\nvcc.exe" ( - set "CUDA_PATH_V113=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3" +IF "%CUDA_PATH_V118%"=="" ( + IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin\nvcc.exe" ( + set "CUDA_PATH_V118=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" ) ELSE ( - echo CUDA 11.3 not found, failing + echo CUDA 11.8 not found, failing exit /b 1 ) ) IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6 + set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 ) -set "CUDA_PATH=%CUDA_PATH_V113%" -set "PATH=%CUDA_PATH_V113%\bin;%PATH%" +set "CUDA_PATH=%CUDA_PATH_V118%" +set "PATH=%CUDA_PATH_V118%\bin;%PATH%" :optcheck diff --git a/windows/internal/check_deps.bat b/windows/internal/check_deps.bat index 25c4c4a51d..5e1f58e35e 100755 --- a/windows/internal/check_deps.bat +++ b/windows/internal/check_deps.bat @@ -16,18 +16,16 @@ IF "%BUILD_VISION%" == "" ( ) ) -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows +if not exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( + echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows exit /b 1 ) -IF "%VC_YEAR%" == "" set VC_YEAR=2019 - -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 -IF "%VC_YEAR%" == "2017" ( - set VC_VERSION_LOWER=15 - set VC_VERSION_UPPER=16 +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 ) if NOT "%VS15INSTALLDIR%" == "" if exist "%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat" ( diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat index a79571014b..b4f11a58a4 100644 --- a/windows/internal/cuda_install.bat +++ b/windows/internal/cuda_install.bat @@ -17,69 +17,23 @@ set CUDNN_FOLDER="cuda" set CUDNN_LIB_FOLDER="lib\x64" :: Skip all of this if we already have cuda installed -if exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" goto set_cuda_env_vars +if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" goto set_cuda_env_vars -if %CUDA_VER% EQU 102 goto cuda102 -if %CUDA_VER% EQU 113 goto cuda113 -if %CUDA_VER% EQU 115 goto cuda115 if %CUDA_VER% EQU 116 goto cuda116 if %CUDA_VER% EQU 117 goto cuda117 +if %CUDA_VER% EQU 118 goto cuda118 echo CUDA %CUDA_VERSION_STR% is not supported exit /b 1 -:cuda102 - -if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" - set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" -) - -goto cuda_common - -:cuda113 - -set CUDA_INSTALL_EXE=cuda_11.3.0_465.89_win10.exe -if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( - curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" - set "ARGS=thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3" -) - -set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive -set CUDNN_LIB_FOLDER="lib" -set CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" -if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( - curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" -) - -@REM Cuda 8.3+ required zlib to be installed on the path -echo Installing ZLIB dlls -curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip" -7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib" -xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" - -goto cuda_common - -:cuda115 +:cuda116 -set CUDA_INSTALL_EXE=cuda_11.5.0_496.13_win10.exe +set CUDA_INSTALL_EXE=cuda_11.6.0_511.23_windows.exe if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" if errorlevel 1 exit /b 1 set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" - set "ARGS=thrust_11.5 nvcc_11.5 cuobjdump_11.5 nvprune_11.5 nvprof_11.5 cupti_11.5 cublas_11.5 cublas_dev_11.5 cudart_11.5 cufft_11.5 cufft_dev_11.5 curand_11.5 curand_dev_11.5 cusolver_11.5 cusolver_dev_11.5 cusparse_11.5 cusparse_dev_11.5 npp_11.5 npp_dev_11.5 nvrtc_11.5 nvrtc_dev_11.5 nvml_dev_11.5" + set "ARGS=thrust_11.6 nvcc_11.6 cuobjdump_11.6 nvprune_11.6 nvprof_11.6 cupti_11.6 cublas_11.6 cublas_dev_11.6 cudart_11.6 cufft_11.6 cufft_dev_11.6 curand_11.6 curand_dev_11.6 cusolver_11.6 cusolver_dev_11.6 cusparse_11.6 cusparse_dev_11.6 npp_11.6 npp_dev_11.6 nvrtc_11.6 nvrtc_dev_11.6 nvml_dev_11.6" ) set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive @@ -99,17 +53,17 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" goto cuda_common -:cuda116 +:cuda117 -set CUDA_INSTALL_EXE=cuda_11.6.0_511.23_windows.exe +set CUDA_INSTALL_EXE=cuda_11.7.0_516.01_windows.exe if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" if errorlevel 1 exit /b 1 set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" - set "ARGS=thrust_11.6 nvcc_11.6 cuobjdump_11.6 nvprune_11.6 nvprof_11.6 cupti_11.6 cublas_11.6 cublas_dev_11.6 cudart_11.6 cufft_11.6 cufft_dev_11.6 curand_11.6 curand_dev_11.6 cusolver_11.6 cusolver_dev_11.6 cusparse_11.6 cusparse_dev_11.6 npp_11.6 npp_dev_11.6 nvrtc_11.6 nvrtc_dev_11.6 nvml_dev_11.6" + set "ARGS=thrust_11.7 nvcc_11.7 cuobjdump_11.7 nvprune_11.7 nvprof_11.7 cupti_11.7 cublas_11.7 cublas_dev_11.7 cudart_11.7 cufft_11.7 cufft_dev_11.7 curand_11.7 curand_dev_11.7 cusolver_11.7 cusolver_dev_11.7 cusparse_11.7 cusparse_dev_11.7 npp_11.7 npp_dev_11.7 nvrtc_11.7 nvrtc_dev_11.7 nvml_dev_11.7" ) -set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive +set CUDNN_FOLDER=cudnn-windows-x86_64-8.5.0.96_cuda11-archive set CUDNN_LIB_FOLDER="lib" set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( @@ -126,17 +80,17 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" goto cuda_common -:cuda117 +:cuda118 -set CUDA_INSTALL_EXE=cuda_11.7.0_516.01_windows.exe +set CUDA_INSTALL_EXE=cuda_11.8.0_522.06_windows.exe if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" if errorlevel 1 exit /b 1 set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" - set "ARGS=thrust_11.7 nvcc_11.7 cuobjdump_11.7 nvprune_11.7 nvprof_11.7 cupti_11.7 cublas_11.7 cublas_dev_11.7 cudart_11.7 cufft_11.7 cufft_dev_11.7 curand_11.7 curand_dev_11.7 cusolver_11.7 cusolver_dev_11.7 cusparse_11.7 cusparse_dev_11.7 npp_11.7 npp_dev_11.7 nvrtc_11.7 nvrtc_dev_11.7 nvml_dev_11.7" + set "ARGS=cuda_profiler_api_11.8 thrust_11.8 nvcc_11.8 cuobjdump_11.8 nvprune_11.8 nvprof_11.8 cupti_11.8 cublas_11.8 cublas_dev_11.8 cudart_11.8 cufft_11.8 cufft_dev_11.8 curand_11.8 curand_dev_11.8 cusolver_11.8 cusolver_dev_11.8 cusparse_11.8 cusparse_dev_11.8 npp_11.8 npp_dev_11.8 nvrtc_11.8 nvrtc_dev_11.8 nvml_dev_11.8" ) -set CUDNN_FOLDER=cudnn-windows-x86_64-8.5.0.96_cuda11-archive +set CUDNN_FOLDER=cudnn-windows-x86_64-8.7.0.84_cuda11-archive set CUDNN_LIB_FOLDER="lib" set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( @@ -158,7 +112,7 @@ goto cuda_common :: With GHA runners these should be pre-installed as part of our AMI process :: If you cannot find the CUDA version you want to build for here then please :: add it @ https://github.com/pytorch/test-infra/tree/main/aws/ami/windows -if not exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" ( +if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" ( if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "%SRC_DIR%\temp_build\NvToolsExt.7z" if errorlevel 1 exit /b 1 @@ -183,12 +137,12 @@ if not exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION popd echo Installing VS integration... - if "%VC_YEAR%" == "2017" ( - xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations" - ) if "%VC_YEAR%" == "2019" ( xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations" ) + if "%VC_YEAR%" == "2022" ( + xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations" + ) echo Installing NvToolsExt... 7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt" diff --git a/windows/internal/env_fix.bat b/windows/internal/env_fix.bat index dd0aaf5f2d..2a53198a99 100644 --- a/windows/internal/env_fix.bat +++ b/windows/internal/env_fix.bat @@ -5,12 +5,19 @@ setlocal -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows +if not exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( + echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows exit /b 1 ) -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( set "VS15INSTALLDIR=%%i" set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" @@ -20,8 +27,8 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio :vswhere -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows +if "%VS15VCVARSALL%"=="" ( + echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows exit /b 1 ) diff --git a/windows/internal/install_nightly_package.bat b/windows/internal/install_nightly_package.bat deleted file mode 100644 index 7db23ebd60..0000000000 --- a/windows/internal/install_nightly_package.bat +++ /dev/null @@ -1,67 +0,0 @@ -if "%PACKAGE_TYPE%" == "wheel" goto wheel -if "%PACKAGE_TYPE%" == "conda" goto conda -if "%PACKAGE_TYPE%" == "libtorch" goto libtorch - -:wheel -echo "install pytorch wheel from nightly" - -set pip_url="https://download.pytorch.org/whl/nightly/%DESIRED_CUDA%/torch_nightly.html" -if "%DESIRED_CUDA%" == "cu102" ( - set package_name_and_version="torch==%NIGHTLIES_DATE_PREAMBLE%%DATE%" -) else ( - set package_name_and_version="torch==%NIGHTLIES_DATE_PREAMBLE%%DATE%+%DESIRED_CUDA%" -) -pip install "%package_name_and_version%" -f "%pip_url%" --no-cache-dir --no-index -q -if errorlevel 1 exit /b 1 - -exit /b 0 - -:conda -echo "install pytorch conda from nightly" -set package_name_and_version="pytorch==%NIGHTLIES_DATE_PREAMBLE%%DATE%" - -if "%DESIRED_CUDA%" == "cpu" ( - call conda install -yq -c pytorch-nightly %package_name_and_version% cpuonly -) else ( - call conda install -yq -c pytorch-nightly "cudatoolkit=%CUDA_VERSION_STR%" %package_name_and_version% -) -if ERRORLEVEL 1 exit /b 1 - -FOR /f %%i in ('python -c "import sys;print(sys.version)"') do set cur_python=%%i - -if not %cur_python:~0,3% == %DESIRED_PYTHON% ( - echo "The Python version has changed to %cur_python%" - echo "Probably the package for the version we want does not exist" - echo "conda will change the Python version even if it was explicitly declared" -) - -if "%DESIRED_CUDA%" == "cpu" ( - call conda list torch | findstr cuda || exit /b 0 - echo "The installed package is built for CUDA, the full package is" - call conda list torch -) else ( - call conda list torch | findstr cuda%CUDA_VERSION% && exit /b 0 - echo "The installed package doesn't seem to be built for CUDA "%CUDA_VERSION_STR% - echo "the full package is " - call conda list torch -) -exit /b 1 - -:libtorch -echo "install libtorch from nightly" -if "%LIBTORCH_CONFIG%" == "debug" ( - set NAME_PREFIX=libtorch-win-shared-with-deps-debug -) else ( - set NAME_PREFIX=libtorch-win-shared-with-deps -) -if "%DESIRED_CUDA%" == "cu102" ( - set package_name=%NAME_PREFIX%-%NIGHTLIES_DATE_PREAMBLE%%DATE%.zip -) else ( - set package_name=%NAME_PREFIX%-%NIGHTLIES_DATE_PREAMBLE%%DATE%%%2B%DESIRED_CUDA%.zip -) -set libtorch_url="https://download.pytorch.org/libtorch/nightly/%DESIRED_CUDA%/%package_name%" -curl --retry 3 -k "%libtorch_url%" -o %package_name% -if ERRORLEVEL 1 exit /b 1 - -7z x %package_name% -otmp -if ERRORLEVEL 1 exit /b 1 diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat index 836a04311b..2e1b1b243a 100644 --- a/windows/internal/smoke_test.bat +++ b/windows/internal/smoke_test.bat @@ -30,6 +30,7 @@ exit /b 1 echo "install wheel package" set PYTHON_INSTALLER_URL= +if "%DESIRED_PYTHON%" == "3.11" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.11.0/python-3.11.0-amd64.exe" if "%DESIRED_PYTHON%" == "3.10" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" if "%DESIRED_PYTHON%" == "3.9" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe" if "%DESIRED_PYTHON%" == "3.8" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.8.2/python-3.8.2-amd64.exe" @@ -51,13 +52,8 @@ set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%" pip install -q numpy protobuf "mkl>=2019" if errorlevel 1 exit /b 1 -if "%TEST_NIGHTLY_PACKAGE%" == "1" ( - call internal\install_nightly_package.bat - if errorlevel 1 exit /b 1 -) else ( - for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i" - if errorlevel 1 exit /b 1 -) +for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i" +if errorlevel 1 exit /b 1 goto smoke_test @@ -68,15 +64,15 @@ echo "install conda package" set "CONDA_HOME=%CD%\conda" set "tmp_conda=%CONDA_HOME%" set "miniconda_exe=%CD%\miniconda.exe" -set "CONDA_EXTRA_ARGS=" -if "%CUDA_VERSION%" == "115" ( - set "CONDA_EXTRA_ARGS=-c=nvidia" -) +set "CONDA_EXTRA_ARGS=cpuonly -c pytorch-nightly" if "%CUDA_VERSION%" == "116" ( - set "CONDA_EXTRA_ARGS=-c=nvidia" + set "CONDA_EXTRA_ARGS=pytorch-cuda=11.6 -c nvidia -c pytorch-nightly" ) if "%CUDA_VERSION%" == "117" ( - set "CONDA_EXTRA_ARGS=-c=nvidia" + set "CONDA_EXTRA_ARGS=pytorch-cuda=11.7 -c nvidia -c pytorch-nightly" +) +if "%CUDA_VERSION%" == "118" ( + set "CONDA_EXTRA_ARGS=pytorch-cuda=11.8 -c nvidia -c pytorch-nightly" ) rmdir /s /q conda @@ -93,9 +89,8 @@ if errorlevel 1 exit /b 1 call %CONDA_HOME%\condabin\activate.bat testenv if errorlevel 1 exit /b 1 -call conda update -n base -y -c defaults conda - -call conda install %CONDA_EXTRA_ARGS% -yq protobuf numpy +:: do conda install to make sure all the dependencies are installed +call conda install -yq pytorch %CONDA_EXTRA_ARGS% if ERRORLEVEL 1 exit /b 1 set /a CUDA_VER=%CUDA_VERSION% @@ -103,25 +98,8 @@ set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1% set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1% set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% -if "%TEST_NIGHTLY_PACKAGE%" == "1" ( - call internal\install_nightly_package.bat - if errorlevel 1 exit /b 1 - goto smoke_test -) - -for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install %CONDA_EXTRA_ARGS% -y "%%i" --offline -if ERRORLEVEL 1 exit /b 1 - -if "%CUDA_VERSION%" == "cpu" goto install_cpu_torch - -:: We do an update --all here since that will install the dependencies for any package that's installed offline -call conda update --all %CONDA_EXTRA_ARGS% -y -c pytorch -c defaults -c numba/label/dev -if ERRORLEVEL 1 exit /b 1 - -goto smoke_test - -:install_cpu_torch -call conda install %CONDA_EXTRA_ARGS% -y cpuonly -c pytorch +:: Install package we just build +for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install -yq "%%i" --offline if ERRORLEVEL 1 exit /b 1 :smoke_test @@ -162,24 +140,21 @@ goto end :libtorch echo "install and test libtorch" -if "%VC_YEAR%" == "2017" powershell internal\vs2017_install.ps1 +if "%VC_YEAR%" == "2019" powershell internal\vs2019_install.ps1 +if "%VC_YEAR%" == "2022" powershell internal\vs2022_install.ps1 + if ERRORLEVEL 1 exit /b 1 -if "%TEST_NIGHTLY_PACKAGE%" == "1" ( - call internal\install_nightly_package.bat - if errorlevel 1 exit /b 1 -) else ( - for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do 7z x "%%i" -otmp - if ERRORLEVEL 1 exit /b 1 -) +for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do 7z x "%%i" -otmp +if ERRORLEVEL 1 exit /b 1 pushd tmp\libtorch -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 -IF "%VC_YEAR%" == "2017" ( - set VC_VERSION_LOWER=15 - set VC_VERSION_UPPER=16 +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +IF "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 ) for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( @@ -192,7 +167,7 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio :vswhere IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch test on Windows + echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch test on Windows exit /b 1 ) call "%VS15VCVARSALL%" x64 @@ -202,13 +177,13 @@ set INCLUDE=%INCLUDE%;%install_root%\include;%install_root%\include\torch\csrc\a set LIB=%LIB%;%install_root%\lib set PATH=%PATH%;%install_root%\lib -cl %BUILDER_ROOT%\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc +cl %BUILDER_ROOT%\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc /std:c++17 if ERRORLEVEL 1 exit /b 1 .\simple-torch-test.exe if ERRORLEVEL 1 exit /b 1 -cl %BUILDER_ROOT%\test_example_code\check-torch-mkl.cpp c10.lib torch_cpu.lib /EHsc +cl %BUILDER_ROOT%\test_example_code\check-torch-mkl.cpp c10.lib torch_cpu.lib /EHsc /std:c++17 if ERRORLEVEL 1 exit /b 1 .\check-torch-mkl.exe @@ -223,9 +198,9 @@ set BUILD_SPLIT_CUDA= if exist "%install_root%\lib\torch_cuda_cu.lib" if exist "%install_root%\lib\torch_cuda_cpp.lib" set BUILD_SPLIT_CUDA=ON if "%BUILD_SPLIT_CUDA%" == "ON" ( - cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda_cu.lib torch_cuda_cpp.lib /EHsc /link /INCLUDE:?warp_size@cuda@at@@YAHXZ /INCLUDE:?_torch_cuda_cu_linker_symbol_op_cuda@native@at@@YA?AVTensor@2@AEBV32@@Z + cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda_cu.lib torch_cuda_cpp.lib /EHsc /std:c++17 /link /INCLUDE:?warp_size@cuda@at@@YAHXZ /INCLUDE:?_torch_cuda_cu_linker_symbol_op_cuda@native@at@@YA?AVTensor@2@AEBV32@@Z ) else ( - cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda.lib /EHsc /link /INCLUDE:?warp_size@cuda@at@@YAHXZ + cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda.lib /EHsc /std:c++17 /link /INCLUDE:?warp_size@cuda@at@@YAHXZ ) .\check-torch-cuda.exe if ERRORLEVEL 1 exit /b 1 diff --git a/windows/internal/vc_install_helper.bat b/windows/internal/vc_install_helper.bat index 6a2a0e0d99..61ab6d5f8c 100644 --- a/windows/internal/vc_install_helper.bat +++ b/windows/internal/vc_install_helper.bat @@ -1,7 +1,12 @@ if "%VC_YEAR%" == "2019" powershell windows/internal/vs2019_install.ps1 +if "%VC_YEAR%" == "2022" powershell windows/internal/vs2022_install.ps1 -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 +) for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -products Microsoft.VisualStudio.Product.BuildTools -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( diff --git a/windows/internal/vs2017_install.ps1 b/windows/internal/vs2017_install.ps1 deleted file mode 100644 index 873e4eb17f..0000000000 --- a/windows/internal/vs2017_install.ps1 +++ /dev/null @@ -1,28 +0,0 @@ -$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe" -$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", - "--add Microsoft.Component.MSBuild", - "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", - "--add Microsoft.VisualStudio.Component.TextTemplating", - "--add Microsoft.VisualStudio.Component.VC.CoreIde", - "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", - "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", - "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", - "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") - -if ($args.Count -ne 0) { - $VS_INSTALL_ARGS += "--add Microsoft.VisualStudio.Component.VC.Tools.$($args[0])" -} - -curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe -if ($LASTEXITCODE -ne 0) { - echo "Download of the VS 2017 installer failed" - exit 1 -} - -$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru -Remove-Item -Path vs_installer.exe -Force -$exitCode = $process.ExitCode -if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { - echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]." - exit 1 -} diff --git a/windows/internal/vs2022_install.ps1 b/windows/internal/vs2022_install.ps1 new file mode 100644 index 0000000000..55fba47378 --- /dev/null +++ b/windows/internal/vs2022_install.ps1 @@ -0,0 +1,56 @@ +# https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479 +# https://learn.microsoft.com/en-us/visualstudio/releases/2022/release-history#evergreen-bootstrappers + +# 17.4.3 BuildTools +$VS_DOWNLOAD_LINK = "https://download.visualstudio.microsoft.com/download/pr/8f480125-28b8-4a2c-847c-c2b02a8cdd1b/64be21d4ada005d7d07896ed0b004c322409bd04d6e8eba4c03c9fa39c928e7a/vs_BuildTools.exe" +$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.TextTemplating", + "--add Microsoft.VisualStudio.Component.VC.CoreIde", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS $VC_YEAR Version $VS_VERSION installer failed" + exit 1 +} + +if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") { + $existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[17, 18)" -property installationPath + if ($existingPath -ne $null) { + if (!${env:CIRCLECI}) { + echo "Found correctly versioned existing BuildTools installation in $existingPath" + exit 0 + } + echo "Found existing BuildTools installation in $existingPath" + $VS_UNINSTALL_ARGS = @("uninstall", "--installPath", "`"$existingPath`"", "--quiet","--wait") + $process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_UNINSTALL_ARGS -NoNewWindow -Wait -PassThru + $exitCode = $process.ExitCode + if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "Original BuildTools uninstall failed with code $exitCode" + exit 1 + } + echo "Original BuildTools uninstalled" + } +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS $VC_YEAR installer exited with code $exitCode, which should be one of [0, 3010]." + curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe + if ($LASTEXITCODE -ne 0) { + echo "Download of the VS Collect tool failed." + exit 1 + } + Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru + New-Item -Path "C:\w\build-results" -ItemType "directory" -Force + Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\" + exit 1 +} diff --git a/windows/internal/vs_install.bat b/windows/internal/vs_install.bat index 624227f0be..221ec33136 100644 --- a/windows/internal/vs_install.bat +++ b/windows/internal/vs_install.bat @@ -1,12 +1,12 @@ @echo off -set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_buildtools.exe +set VS_DOWNLOAD_LINK=https://download.visualstudio.microsoft.com/download/pr/8f480125-28b8-4a2c-847c-c2b02a8cdd1b/64be21d4ada005d7d07896ed0b004c322409bd04d6e8eba4c03c9fa39c928e7a/vs_BuildTools.exe IF "%VS_LATEST%" == "1" ( set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools set VSDEVCMD_ARGS= ) ELSE ( set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ + --add Microsoft.VisualStudio.Component.VC.Tools.14.34 ^ --add Microsoft.Component.MSBuild ^ --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^ --add Microsoft.VisualStudio.Component.TextTemplating ^ @@ -14,9 +14,9 @@ IF "%VS_LATEST%" == "1" ( --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^ --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^ --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ + --add Microsoft.VisualStudio.Component.VC.Tools.14.34 ^ --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81 - set VSDEVCMD_ARGS=-vcvars_ver=14.11 + set VSDEVCMD_ARGS=-vcvars_ver=14.34 ) curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe