diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh
index f06eb8a808..b634f5c9a2 100755
--- a/.circleci/scripts/binary_checkout.sh
+++ b/.circleci/scripts/binary_checkout.sh
@@ -41,8 +41,8 @@ echo "export BUILDER_ROOT=${BUILDER_ROOT}" >> ${BASH_ENV}
 retry git clone --depth 1 https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT"
 # Removed checking out pytorch/pytorch using CIRCLE_PR_NUMBER and CIRCLE_SHA1 as
 # those environment variables are tied to the host repo where the build is being
-# triggered. 
-retry git submodule update --init --recursive --jobs 0
+# triggered.
+retry git submodule update --init --recursive
 pushd "$PYTORCH_ROOT"
 echo "Using Pytorch from "
 git --no-pager log --max-count 1
diff --git a/.github/actions/validate-binary/action.yml b/.github/actions/validate-binary/action.yml
deleted file mode 100644
index 76531010ad..0000000000
--- a/.github/actions/validate-binary/action.yml
+++ /dev/null
@@ -1,57 +0,0 @@
-name: 'validate-binary'
-description: 'Binary Conda or Wheel Validation for Linux and MacOS'
-inputs:
-  gpu_arch_type:
-    description: 'GPU arch type'
-    required: true
-    default: 'cpu'
-  gpu_arch_ver:
-    description: 'GPU arch version'
-    required: true
-    default: 'cpu'
-  installation:
-    description: 'Installation instructions'
-    required: true
-    default: ''
-  python_version:
-    description: 'Python version'
-    required: true
-    default: '3.9'
-  target_os:
-    description: 'Target OS linux or macos'
-    required: false
-    default: 'linux'
-runs:
-  using: "composite"
-  steps:
-    - name: Checkout PyTorch builder
-      uses: actions/checkout@v2
-    - name: Check nvidia smi
-      if: ${{ inputs.gpu_arch_type == 'cuda' }}
-      shell: bash
-      run: |
-        nvidia-smi
-    - name: Install Conda Linux
-      if: ${{ inputs.target_os == 'linux' }}
-      uses: conda-incubator/setup-miniconda@v2
-      with:
-        python-version: ${{ inputs.python_version }}
-        auto-update-conda: true
-        miniconda-version: "latest"
-        activate-environment: testenv
-    - name: Install Conda MacOS
-      if: ${{ inputs.target_os == 'macos' }}
-      uses: pytorch/test-infra/.github/actions/setup-miniconda@main
-    - name: Install PyTorch and run tests
-      shell: bash
-      env:
-        GPU_ARCH_VER: ${{ inputs.gpu_arch_ver }}
-        GPU_ARCH_TYPE: ${{ inputs.gpu_arch_type }}
-        INSTALLATION: ${{ inputs.installation }}
-        ENV_NAME: conda-env-${{ github.run_id }}
-      run: |
-        set -ex
-        conda create -yp ${ENV_NAME} python=${{ inputs.python_version }} numpy
-        conda run -p ${ENV_NAME} $INSTALLATION
-        conda run -p ${ENV_NAME} python3  ./test/smoke_test/smoke_test.py
-        conda env remove -p ${ENV_NAME}
diff --git a/.github/actions/validate-windows-binary/action.yml b/.github/actions/validate-windows-binary/action.yml
deleted file mode 100644
index 7214a813bd..0000000000
--- a/.github/actions/validate-windows-binary/action.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: 'validate-windows-binary'
-description: 'Windows Binary Conda or Wheel Validation'
-inputs:
-  gpu_arch_type:
-    description: 'GPU arch type'
-    required: true
-    default: 'cpu'
-  gpu_arch_ver:
-    description: 'GPU arch version'
-    required: true
-    default: 'cpu'
-  installation:
-    description: 'Installation instructions'
-    required: true
-    default: ''
-  python_version:
-    description: 'Python version'
-    required: true
-    default: '3.9'
-runs:
-  using: "composite"
-  steps:
-    - name: Check nvidia smi
-      if: ${{ inputs.gpu_arch_type == 'cuda' }}
-      shell: powershell
-      run: |
-        nvidia-smi
-    - name: Install conda
-      if: ${{ inputs.gpu_arch_type == 'cpu' }}
-      uses: conda-incubator/setup-miniconda@v2
-      with:
-        python-version: ${{ inputs.python_version }}
-        auto-update-conda: true
-        miniconda-version: "latest"
-        activate-environment: conda-env-${{ github.run_id }}
-    - name: Conda Install pytorch and smoke test
-      shell: powershell
-      env:
-        GPU_ARCH_VER: ${{ inputs.gpu_arch_ver }}
-        GPU_ARCH_TYPE: ${{ inputs.gpu_arch_type }}
-        CUDA_VER: ${{ inputs.desired_cuda }}
-      run: |
-        conda install numpy pillow python=${{ inputs.python_version }}
-        $install = '${{ inputs.installation }}'
-        Invoke-Expression $install
-        python  ./test/smoke_test/smoke_test.py
diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
new file mode 100755
index 0000000000..6ce3dd70b3
--- /dev/null
+++ b/.github/scripts/validate_binaries.sh
@@ -0,0 +1,50 @@
+if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then
+    curl ${MATRIX_INSTALLATION} -o libtorch.zip
+    unzip libtorch.zip
+else
+    #special case for Python 3.11
+    if [[ ${MATRIX_PYTHON_VERSION} == '3.11' ]]; then
+        conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION}
+        conda activate ${ENV_NAME}
+
+        INSTALLATION=${MATRIX_INSTALLATION/"-c pytorch"/"-c malfet -c pytorch"}
+        INSTALLATION=${INSTALLATION/"pytorch-cuda"/"pytorch-${MATRIX_CHANNEL}::pytorch-cuda"}
+        INSTALLATION=${INSTALLATION/"conda install"/"conda install -y"}
+
+        eval $INSTALLATION
+        python ./test/smoke_test/smoke_test.py
+        conda deactivate
+        conda env remove -n ${ENV_NAME}
+    else
+
+
+
+        # Special case Pypi installation package, only applicable to linux nightly CUDA 11.7 builds, wheel package
+        if [[ ${TARGET_OS} == 'linux' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' && ${MATRIX_CHANNEL} != 'nightly' ]]; then
+            conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+            INSTALLATION_PYPI=${MATRIX_INSTALLATION/"cu117"/"cu117_pypi_cudnn"}
+            INSTALLATION_PYPI=${INSTALLATION_PYPI/"torchvision torchaudio"/""}
+            INSTALLATION_PYPI=${INSTALLATION_PYPI/"index-url"/"extra-index-url"}
+            conda run -p ${ENV_NAME}_pypi ${INSTALLATION_PYPI}
+            conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py --package torchonly
+            conda deactivate
+            conda env remove -p ${ENV_NAME}_pypi
+        fi
+
+        # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
+        conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+        conda activate ${ENV_NAME}
+        INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
+        eval $INSTALLATION
+
+        if [[ ${TARGET_OS} == 'linux' ]]; then
+            export CONDA_LIBRARY_PATH="$(dirname $(which python))/../lib"
+            export LD_LIBRARY_PATH=$CONDA_LIBRARY_PATH:$LD_LIBRARY_PATH
+            ${PWD}/check_binary.sh
+        fi
+
+        python  ./test/smoke_test/smoke_test.py
+        conda deactivate
+        conda env remove -n ${ENV_NAME}
+    fi
+fi
diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml
index 92567d7bd3..43626533e6 100644
--- a/.github/workflows/build-conda-images.yml
+++ b/.github/workflows/build-conda-images.yml
@@ -19,19 +19,19 @@ env:
   DOCKER_BUILDKIT: 1
   DOCKER_ID: ${{ secrets.DOCKER_ID }}
   DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
-  WITH_PUSH: ${{ github.event_name == 'push' }}
+  WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
 
 jobs:
   build-docker:
-    runs-on: linux.2xlarge
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        cuda_version: ["10.2", "11.3", "11.5", "11.6", "11.7", "cpu"]
+        cuda_version: ["11.6", "11.7", "11.8", "cpu"]
     env:
       CUDA_VERSION: ${{ matrix.cuda_version }}
     steps:
       - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index 9526434e4e..49069557a9 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -21,20 +21,20 @@ env:
   DOCKER_BUILDKIT: 1
   DOCKER_ID: ${{ secrets.DOCKER_ID }}
   DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
-  WITH_PUSH: ${{ github.event_name == 'push' }}
+  WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
 
 jobs:
   build-docker-cuda:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        cuda_version: ["11.7", "11.6", "11.5", "11.3", "10.2"]
+        cuda_version: ["11.8", "11.7", "11.6"]
     env:
       GPU_ARCH_TYPE: cuda
       GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
     steps:
       - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
@@ -44,16 +44,16 @@ jobs:
         run: |
           libtorch/build_docker.sh
   build-docker-rocm:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        rocm_version: ["5.1.1", "5.2"]
+        rocm_version: ["5.3", "5.4.2"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
     steps:
       - name: Checkout PyTorch
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
@@ -63,10 +63,10 @@ jobs:
         run: |
           libtorch/build_docker.sh
   build-docker-cpu:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout PyTorch
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
diff --git a/.github/workflows/build-llvm-images.yml b/.github/workflows/build-llvm-images.yml
index 5b24abf5fa..a89230891b 100644
--- a/.github/workflows/build-llvm-images.yml
+++ b/.github/workflows/build-llvm-images.yml
@@ -17,7 +17,7 @@ env:
   DOCKER_BUILDKIT: 1
   DOCKER_ID: ${{ secrets.DOCKER_ID }}
   DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
-  WITH_PUSH: ${{ github.event_name == 'push' }}
+  WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
   FORCE_PUSH: yes
 
 jobs:
@@ -25,7 +25,7 @@ jobs:
     runs-on: linux.2xlarge
     steps:
       - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
diff --git a/.github/workflows/build-magma-linux.yml b/.github/workflows/build-magma-linux.yml
index 655d02c6ee..eb1d67a70a 100644
--- a/.github/workflows/build-magma-linux.yml
+++ b/.github/workflows/build-magma-linux.yml
@@ -30,10 +30,10 @@ jobs:
     runs-on: linux.2xlarge
     strategy:
       matrix:
-        cuda_version: ["117", "116", "115"]
+        cuda_version: ["118", "117", "116"]
     steps:
       - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Build Magma Cuda
         working-directory: magma
         run: |
@@ -54,7 +54,7 @@ jobs:
         run: |
           conda install -y conda-build anaconda-client
       - name: Push MAGMA to anaconda
-        if: ${{ github.event_name == 'push' }}
+        if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
         run: |
           anaconda --token $ANACONDA_TOKEN upload -u pytorch --force magma/output/linux-64/magma-cuda*.bz2
         env:
diff --git a/.github/workflows/build-magma-windows.yml b/.github/workflows/build-magma-windows.yml
index 87fdb22c5a..5ad6ba29a6 100644
--- a/.github/workflows/build-magma-windows.yml
+++ b/.github/workflows/build-magma-windows.yml
@@ -17,14 +17,14 @@ jobs:
     runs-on: windows-2019
     strategy:
       matrix:
-        cuda_version: ["117", "116"]
+        cuda_version: ["118", "117", "116"]
         config: ["Release", "Debug"]
     env:
       CUDA_VERSION: ${{ matrix.cuda_version }}
       CONFIG: ${{ matrix.config }}
     steps:
       - name: Checkout pytorch/builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Enable MSVC dev commands to enable cl.exe  # FYI incompatible with shell: bash
         uses: ilammy/msvc-dev-cmd@dd5e2fa0a7de1e7929605d9ecc020e749d9856a3
       - name: Install CUDA Toolkit
@@ -36,9 +36,9 @@ jobs:
         with:
           path: magma_*_cuda*_*.7z
   push-windows-magma:
-    if: ${{ github.event_name == 'push' }}
+    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
     environment: magma
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     needs: build-windows-magma
     steps:
       - name: Download all artifacts
diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index b62507bbd0..153f501bc7 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -7,12 +7,14 @@ on:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_cxx11-abi
       - manywheel/build_docker.sh
       - 'common/*'
   pull_request:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_cxx11-abi
       - 'common/*'
       - manywheel/build_docker.sh
 
@@ -21,20 +23,20 @@ env:
   DOCKER_BUILDKIT: 1
   DOCKER_ID: ${{ secrets.DOCKER_ID }}
   DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
-  WITH_PUSH: ${{ github.event_name == 'push' }}
+  WITH_PUSH: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
 
 jobs:
   build-docker-cuda:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        cuda_version: ["11.7", "11.6", "11.5", "11.3", "10.2"]
+        cuda_version: ["11.8", "11.7", "11.6"]
     env:
       GPU_ARCH_TYPE: cuda
       GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
     steps:
       - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
@@ -44,16 +46,16 @@ jobs:
         run: |
           manywheel/build_docker.sh
   build-docker-rocm:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        rocm_version: ["5.1.1", "5.2"]
+        rocm_version: ["5.3", "5.4.2"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
     steps:
       - name: Checkout PyTorch
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
@@ -63,10 +65,25 @@ jobs:
         run: |
           manywheel/build_docker.sh
   build-docker-cpu:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout PyTorch
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
+      - name: Authenticate if WITH_PUSH
+        run: |
+          if [[ "${WITH_PUSH}" == true ]]; then
+            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          fi
+      - name: Build Docker Image
+        run: |
+          manywheel/build_docker.sh
+  build-docker-cpu-cxx11-abi:
+    runs-on: ubuntu-22.04
+    env:
+      GPU_ARCH_TYPE: cpu-cxx11-abi
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH
         run: |
           if [[ "${WITH_PUSH}" == true ]]; then
diff --git a/.github/workflows/test-validate-domain-library.yml b/.github/workflows/test-validate-domain-library.yml
new file mode 100644
index 0000000000..6c651e709a
--- /dev/null
+++ b/.github/workflows/test-validate-domain-library.yml
@@ -0,0 +1,19 @@
+name: Test validate domain library
+
+on:
+  pull_request:
+    paths:
+      - .github/workflows/validate-domain-library.yml
+      - .github/workflows/test-validate-domain-library.yml
+  workflow_dispatch:
+
+jobs:
+  test-validate-domain-library:
+    uses: ./.github/workflows/validate-domain-library.yml
+    with:
+      package_type: "conda,wheel"
+      os: "all"
+      channel: "release"
+      repository: "pytorch/builder"
+      ref: main
+      smoke_test: "echo test"
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
new file mode 100644
index 0000000000..4ae2605386
--- /dev/null
+++ b/.github/workflows/validate-binaries.yml
@@ -0,0 +1,90 @@
+name: Validate binaries
+
+# A reusable workflow that triggers a set of jobs that perform a smoke test / validation of pytorch binaries.
+# Optionally restricts validation to the specified OS and channel.
+# For the details about parameter values, see:
+#   pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+# For an example of the `workflow_call` usage see:
+#   https://github.com/pytorch/builder/pull/1144
+on:
+  workflow_call:
+    inputs:
+      os:
+        description: "Operating system to generate for (linux, windows, macos, macos-arm64)"
+        required: true
+        type: string
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+      limit-win-builds:
+        description: "Limit windows builds to single python/cuda config"
+        default: "disable"
+        type: string
+  workflow_dispatch:
+    inputs:
+      os:
+        description: "Operating system to generate for (linux, windows, macos, macos-arm64)"
+        required: true
+        type: choice
+        default: all
+        options:
+          - windows
+          - linux
+          - macos
+          - all
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        default: all
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+      limit-win-builds:
+        description: "Limit windows builds to single python/cuda config"
+        default: "disable"
+        required: false
+        type: string
+
+jobs:
+  win:
+    if:  inputs.os == 'windows' || inputs.os == 'all'
+    uses: ./.github/workflows/validate-windows-binaries.yml
+    with:
+      channel: ${{ inputs.channel }}
+      ref: ${{ inputs.ref || github.ref }}
+      limit-win-builds: ${{ inputs.limit-win-builds }}
+
+  linux:
+    if:  inputs.os == 'linux' || inputs.os == 'all'
+    uses: ./.github/workflows/validate-linux-binaries.yml
+    with:
+      channel: ${{ inputs.channel }}
+      ref: ${{ inputs.ref || github.ref }}
+
+  mac:
+    if:  inputs.os == 'macos' || inputs.os == 'all'
+    uses: ./.github/workflows/validate-macos-binaries.yml
+    with:
+      channel: ${{ inputs.channel }}
+      ref: ${{ inputs.ref || github.ref }}
+
+  mac-arm64:
+    if:  inputs.os == 'macos' || inputs.os == 'all'
+    uses: ./.github/workflows/validate-macos-arm64-binaries.yml
+    with:
+      channel: ${{ inputs.channel }}
+      ref: ${{ inputs.ref || github.ref }}
diff --git a/.github/workflows/validate-domain-library.yml b/.github/workflows/validate-domain-library.yml
new file mode 100644
index 0000000000..149b8335b4
--- /dev/null
+++ b/.github/workflows/validate-domain-library.yml
@@ -0,0 +1,153 @@
+name: Validate domain libary
+
+# A reusable workflow that triggers a set of jobs that perform a smoke test / validation of pytorch binaries.
+# Optionally restricts validation to the specified OS and channel.
+# For the details about parameter values, see:
+#   pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+on:
+  workflow_call:
+    inputs:
+      os:
+        description: "Operating system to generate for (linux, windows, macos, macos-arm64)"
+        required: false
+        type: string
+        default: "all"
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+      package_type:
+        description: "Package type (conda, wheel, all)"
+        required: false
+        type: string
+        default: "all"
+      repository:
+        description: "Path to repository to checkout"
+        required: true
+        type: string
+      smoke_test:
+        description: "Path to a smoke test script"
+        required: true
+        type: string
+      with_cuda:
+        description: "With cuda enable/disable"
+        required: false
+        type: string
+        default: disable
+
+jobs:
+  generate-linux-matrix:
+    if:  (inputs.os == 'linux' || inputs.os == 'all')
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: ${{ inputs.package_type }}
+      os: linux
+      channel: ${{ inputs.channel }}
+      with-cuda: ${{ inputs.with_cuda }}
+  generate-windows-matrix:
+    if:  (inputs.os == 'windows' || inputs.os == 'all')
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: ${{ inputs.package_type }}
+      os: windows
+      channel: ${{ inputs.channel }}
+      with-cuda: ${{ inputs.with_cuda }}
+  generate-macos-matrix:
+    if:  (inputs.os == 'macos' || inputs.os == 'all')
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: ${{ inputs.package_type }}
+      os: macos
+      channel: ${{ inputs.channel }}
+      with-cuda: ${{ inputs.with_cuda }}
+  generate-macos-arm64-matrix:
+    if:  (inputs.os == 'macos-arm64' || inputs.os == 'all')
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: ${{ inputs.package_type }}
+      os: macos-arm64
+      channel: ${{ inputs.channel }}
+      with-cuda: ${{ inputs.with_cuda }}
+  validate-linux:
+    if:  (inputs.os == 'linux' || inputs.os == 'all')
+    needs: generate-linux-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    name: "linux-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: ${{ inputs.repository }}
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: "linux-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export SMOKE_TEST="${{ inputs.smoke_test }}"
+        eval $SMOKE_TEST
+  validate-windows:
+    if:  (inputs.os == 'windows' || inputs.os == 'all')
+    needs: generate-windows-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-windows-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    name: "windows-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: ${{ inputs.repository }}
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: "windows-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export SMOKE_TEST="${{ inputs.smoke_test }}"
+        export TARGET_OS="windows"
+        eval $SMOKE_TEST
+  validate-macos:
+    if:  (inputs.os == 'macos' || inputs.os == 'all')
+    needs: generate-macos-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-macos-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+    with:
+      runner:  ${{ matrix.validation_runner }}
+      repository: ${{ inputs.repository }}
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="macos"
+        export SMOKE_TEST="${{ inputs.smoke_test }}"
+        eval $SMOKE_TEST
+  validate-macos-arm64:
+    if:  (inputs.os == 'macos-arm64' || inputs.os == 'all')
+    needs: generate-macos-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-macos-arm64-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    name: "macos-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+    with:
+      runner:  ${{ matrix.validation_runner }}
+      repository: ${{ inputs.repository }}
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: "macos-arm64-${{ matrix.package_type }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }}"
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="macos-arm64"
+        eval $SMOKE_TEST
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 43c1a484d5..438062f91a 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -1,79 +1,58 @@
 name: Validate linux binaries
 
 on:
-  push:
-    branches:
-      main
-    paths:
-      - .github/workflows/validate-linux-binaries.yml
-  pull_request:
-    paths:
-      - .github/workflows/validate-linux-binaries.yml
+  workflow_call:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+
 jobs:
-  generate-conda-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: conda
-      os: linux
-      channel: nightly
-  generate-wheel-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: wheel
-      os: linux
-      channel: nightly
-  generate-libtorch-matrix:
+  generate-linux-matrix:
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
-      package-type: libtorch
+      package-type: all
       os: linux
-      channel: nightly
-  validate-linux-binaries-conda:
-    needs: generate-conda-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-conda-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary conda
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-  validate-linux-binaries-wheels:
-    needs: generate-wheel-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-wheel-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary wheel
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-  validate-linux-libtorch-binaries:
-    needs: generate-libtorch-matrix
+      channel: ${{ inputs.channel }}
+
+  linux:
+    needs: generate-linux-matrix
     strategy:
-      matrix:
-        ${{ fromJson(needs.generate-libtorch-matrix.outputs.matrix) }}
+      matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }}
       fail-fast: false
-    runs-on: "ubuntu-20.04"
-    env:
-      PYTHON_VERSION: ${{ matrix.python_version }}
-    steps:
-      - name: Install pytorch and smoke test
-        env:
-          INSTALLATION: ${{ matrix.installation }}
-          ENV_NAME: conda-env-${{ github.run_id }}
-        run: |
-          sudo apt-get install unzip -y
-          set -ex
-          curl ${INSTALLATION} -o libtorch.zip
-          unzip libtorch.zip
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    name: ${{ matrix.build_name }}
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: "pytorch/builder"
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: ${{ matrix.build_name }}
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="linux"
+        eval "$(conda shell.bash hook)"
+        source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-macos-arm64-binaries.yml b/.github/workflows/validate-macos-arm64-binaries.yml
new file mode 100644
index 0000000000..f321022d42
--- /dev/null
+++ b/.github/workflows/validate-macos-arm64-binaries.yml
@@ -0,0 +1,56 @@
+name: Validate MacOS ARM64 Binaries
+
+on:
+  workflow_call:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+
+jobs:
+  generate-macos-arm64-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: all
+      os: macos-arm64
+      channel: ${{ inputs.channel }}
+  macos-arm64:
+    needs: generate-macos-arm64-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-macos-arm64-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    name: ${{ matrix.build_name }}
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: "pytorch/builder"
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: ${{ matrix.build_name }}
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="macos-arm64"
+        source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-macos-binaries.yml b/.github/workflows/validate-macos-binaries.yml
index 3bc3ea0cdc..0e3f38ff86 100644
--- a/.github/workflows/validate-macos-binaries.yml
+++ b/.github/workflows/validate-macos-binaries.yml
@@ -1,96 +1,56 @@
 name: Validate MacOS Binaries
 
 on:
-  pull_request:
-    paths:
-      - .github/workflows/validate-macos-binaries.yml
+  workflow_call:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+
 jobs:
-  generate-arm64-conda-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: conda
-      os: macos-arm64
-      channel: all
-  generate-arm64-wheel-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: wheel
-      os: macos-arm64
-      channel: all
-  generate-x86_64-conda-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: conda
-      os: macos-x86_64
-      channel: all
-  generate-x86_64-wheel-matrix:
+  generate-macos-matrix:
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
-      package-type: wheel
-      os: macos-x86_64
-      channel: all
-
-  validate-macos-arm64-binaries-conda:
-    needs: generate-arm64-conda-matrix
+      package-type: all
+      os: macos
+      channel: ${{ inputs.channel }}
+  macos:
+    needs: generate-macos-matrix
     strategy:
-      matrix:
-        ${{ fromJson(needs.generate-arm64-conda-matrix.outputs.matrix) }}
+      matrix: ${{ fromJson(needs.generate-macos-matrix.outputs.matrix) }}
       fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary conda
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-          target_os: macos
-  validate-macos-arm64-binaries-wheel:
-    needs: generate-arm64-wheel-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-arm64-wheel-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary wheel
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-          target_os: macos
-  validate-macos-x86_64-binaries-conda:
-    needs: generate-x86_64-conda-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-x86_64-conda-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary conda
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-          target_os: macos
-  validate-macos-x86_64-binaries-wheel:
-    needs: generate-x86_64-wheel-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-x86_64-wheel-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Validate binary wheel
-        uses: pytorch/builder/.github/actions/validate-binary@main
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-          target_os: macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    name: ${{ matrix.build_name }}
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: "pytorch/builder"
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: ${{ matrix.build_name }}
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="macos"
+        source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-nightly-binaries.yml b/.github/workflows/validate-nightly-binaries.yml
new file mode 100644
index 0000000000..c252e0433b
--- /dev/null
+++ b/.github/workflows/validate-nightly-binaries.yml
@@ -0,0 +1,35 @@
+# Scheduled validation of the nightly binaries
+name: cron
+
+on:
+  schedule:
+    # At 2:30 pm UTC (7:30 am PDT)
+    - cron: "30 14 * * *"
+  # Have the ability to trigger this job manually through the API
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - .github/workflows/validate-nightly-binaries.yml
+      - .github/workflows/validate-linux-binaries.yml
+      - .github/workflows/validate-windows-binaries.yml
+      - .github/workflows/validate-macos-binaries.yml
+      - .github/workflows/validate-macos-arm64-binaries.yml
+      - test/smoke_test/*
+  pull_request:
+    paths:
+      - .github/workflows/validate-nightly-binaries.yml
+      - .github/workflows/validate-linux-binaries.yml
+      - .github/workflows/validate-windows-binaries.yml
+      - .github/workflows/validate-macos-binaries.yml
+      - .github/workflows/validate-macos-arm64-binaries.yml
+      - .github/scripts/validate_binaries.sh
+      - test/smoke_test/*
+jobs:
+  nightly:
+    uses: ./.github/workflows/validate-binaries.yml
+    with:
+      channel: nightly
+      os: all
+      limit-win-builds: enable
diff --git a/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml
new file mode 100644
index 0000000000..a995ec817a
--- /dev/null
+++ b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml
@@ -0,0 +1,26 @@
+name: Validate Nightly PyPI Wheel Binary Size
+on:
+  pull_request:
+    paths:
+      - .github/workflows/validate-nightly-pypi-wheel-binary-size.yml
+  workflow_dispatch:
+  schedule:
+    # At 2:30 pm UTC (7:30 am PDT)
+    - cron: "30 14 * * *"
+
+jobs:
+  nightly-pypi-binary-size-validation:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          repository: pytorch/test-infra
+      - name: Install requirements
+        run: |
+          pip3 install -r tools/binary_size_validation/requirements.txt
+      - name: Run validation
+        run: |
+          python tools/binary_size_validation/binary_size_validation.py \
+              --url https://download.pytorch.org/whl/nightly/torch/ \
+              --include "pypi" --only-latest-version --threshold 750
\ No newline at end of file
diff --git a/.github/workflows/validate-release-binaries.yml b/.github/workflows/validate-release-binaries.yml
new file mode 100644
index 0000000000..9549e1e33e
--- /dev/null
+++ b/.github/workflows/validate-release-binaries.yml
@@ -0,0 +1,27 @@
+# Scheduled validation of the release binaries
+name: cron
+
+on:
+  schedule:
+    # At 3 am and 2 pm UTC (7 am and 8 pm PDT)
+    - cron: "0 3,14 * * *"
+  # Have the ability to trigger this job manually through the API
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - .github/workflows/validate-release-binaries.yml
+      - .github/workflows/validate-linux-binaries.yml
+      - .github/workflows/validate-windows-binaries.yml
+      - .github/workflows/validate-macos-binaries.yml
+      - .github/workflows/validate-macos-arm64-binaries.yml
+      - test/smoke_test/*
+
+jobs:
+  release:
+    uses: ./.github/workflows/validate-binaries.yml
+    with:
+      channel: release
+      os: all
+      limit-win-builds: enable
diff --git a/.github/workflows/validate-repackaged-binary-sizes.yml b/.github/workflows/validate-repackaged-binary-sizes.yml
new file mode 100644
index 0000000000..695c68d3aa
--- /dev/null
+++ b/.github/workflows/validate-repackaged-binary-sizes.yml
@@ -0,0 +1,88 @@
+name: Validate manywheel binaries
+
+# This workflow validates the size of the manywheel binaries after repackaging for PyPi
+# Specify the direct URLs to the binaries (from https://download.pytorch.org/whl/test/torch/) in the matrix
+# along with the python version.
+#
+# The workflow will:
+#  * download the binaries,
+#  * run release/pypi/prep_binary_for_pypi.sh
+#  * run smoke tests on the repackaged binaries
+#  * display the size before and after repackaging as the workflow annotation
+#  * optionally upload the repackaged binaries as artifacts (for debug or promotion)
+
+on:
+  pull_request:
+    paths:
+      - .github/workflows/validate-repackaged-binary-sizes.yml
+      - release/pypi/prep_binary_for_pypi.sh
+
+jobs:
+  validate-binary-size:
+    strategy:
+      fail-fast: false
+      matrix:
+        whl:
+          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp310-cp310-linux_x86_64.whl
+            python: "3.10"  # python version to use for smoke tests
+            upload_artifact: false # upload the repackaged binary as an artifact
+          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp37-cp37m-linux_x86_64.whl
+            python: "3.7"
+            artifact: false
+          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp38-cp38-linux_x86_64.whl
+            python: "3.8"
+            artifact: false
+          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp39-cp39-linux_x86_64.whl
+            python: "3.9"
+            artifact: false
+     #    - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp311-cp311-linux_x86_64.whl
+     #      python: "3.11"
+     #      artifact: false
+
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.4xlarge.nvidia.gpu
+      job-name: "Validate binary size"
+      upload-artifact: ${{ matrix.whl.upload_artifact == 'true' && 'repackaged-binary' || '' }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export GPU_ARCH_VER="11.7"
+        export GPU_ARCH_TYPE="cuda"
+        export CUDA_VER="11.7"
+        export DESIRED_PYTHON="${{ matrix.whl.python }}"
+        export DESIRED_CUDA="cu117"
+        export PACKAGE_TYPE="wheel"
+        export TARGET_OS="linux"
+        export INSTALLATION=""
+        
+        # install zip
+        sudo yum install zip -y
+        
+        # install patchelf
+        chmod a+x common/install_patchelf.sh
+        sudo common/install_patchelf.sh
+        
+        # download torch whl
+        wget ${{ matrix.whl.url }}
+        FILENAME=$(ls -1 *.whl | head -n 1)
+        SIZE_BEFORE=$(du -h $FILENAME | cut -f1)
+        
+        # repackage into manywheel
+        release/pypi/prep_binary_for_pypi.sh $FILENAME
+        
+        NEW_FILENAME=$(ls -1 *.whl | head -n 1)
+        echo "::notice:: $FILENAME before: $SIZE_BEFORE after: $(du -h $NEW_FILENAME | cut -f1)"
+        
+        # cp to ${RUNNER_ARTIFACT_DIR}
+        cp $NEW_FILENAME ${RUNNER_ARTIFACT_DIR}/
+        
+        # create conda env
+        conda create -y -n $ENV_NAME python=$DESIRED_PYTHON
+        conda activate $ENV_NAME
+        
+        # install torch
+        pip install numpy pillow $NEW_FILENAME
+        
+        # run smoke test
+        python ./test/smoke_test/smoke_test.py --package=torchonly
\ No newline at end of file
diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml
index 1dad91db06..6833e55b20 100644
--- a/.github/workflows/validate-windows-binaries.yml
+++ b/.github/workflows/validate-windows-binaries.yml
@@ -1,80 +1,69 @@
-name: Validate binary images
+name: Validate Windows binary images
 
 on:
-  push:
-    branches:
-      main
-    paths:
-      - .github/workflows/validate-windows-binaries.yml
-  pull_request:
-    paths:
-      - .github/workflows/validate-windows-binaries.yml
+  workflow_call:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+      limit-win-builds:
+        description: "Limit windows builds to single python/cuda config"
+        default: "disable"
+        type: string
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+      limit-win-builds:
+        description: "Limit windows builds to single python/cuda config"
+        default: "disable"
+        required: false
+        type: string
+
 jobs:
-  generate-conda-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: conda
-      os: windows
-      channel: nightly
-  generate-wheel-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
-    with:
-      package-type: wheel
-      os: windows
-      channel: nightly
-  generate-libtorch-matrix:
+  generate-windows-matrix:
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
-      package-type: libtorch
+      package-type: all
       os: windows
-      channel: nightly
-  validate-windows-binaries-conda:
-    needs: generate-conda-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-conda-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
-      - name: Validate binary conda
-        uses: ./.github/actions/validate-windows-binary
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-  validate-windows-binaries-wheel:
-    needs: generate-wheel-matrix
-    strategy:
-      matrix:
-        ${{ fromJson(needs.generate-wheel-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.validation_runner }}
-    steps:
-      - name: Checkout PyTorch builder
-        uses: actions/checkout@v2
-      - name: Validate binary wheel
-        uses: ./.github/actions/validate-windows-binary
-        with:
-          gpu_arch_type: ${{ matrix.gpu_arch_type }}
-          gpu_arch_ver: ${{ matrix.gpu_arch_version }}
-          installation: ${{ matrix.installation }}
-          python_version: ${{ matrix.python_version }}
-  validate-linux-libtorch-binaries:
-    needs: generate-libtorch-matrix
+      channel: ${{ inputs.channel }}
+      limit-win-builds: ${{ inputs.limit-win-builds }}
+
+  win:
+    needs: generate-windows-matrix
     strategy:
-      matrix:
-        ${{ fromJson(needs.generate-libtorch-matrix.outputs.matrix) }}
+      matrix: ${{ fromJson(needs.generate-windows-matrix.outputs.matrix) }}
       fail-fast: false
-    runs-on: "windows-2019"
-    env:
-      PYTHON_VERSION: ${{ matrix.python_version }}
-    steps:
-      - name: Install pytorch and smoke test
-        shell: powershell
-        run: |
-          $install = '${{ matrix.installation }}'
-          Invoke-WebRequest -Uri $install -OutFile 'libtorch.zip'
-          Expand-Archive -Force libtorch.zip .
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    name: ${{ matrix.build_name }}
+    with:
+      runner: ${{ matrix.package_type == 'libtorch' && 'windows.4xlarge' || matrix.validation_runner }}
+      repository: "pytorch/builder"
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: ${{ matrix.build_name }}
+      binary-matrix: ${{ toJSON(matrix) }}
+      timeout: 60
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="windows"
+        source /c/Jenkins/Miniconda3/etc/profile.d/conda.sh
+        source ./.github/scripts/validate_binaries.sh
diff --git a/CUDA_UPGRADE_GUIDE.MD b/CUDA_UPGRADE_GUIDE.MD
index 4a725c0f06..ae3f158d31 100644
--- a/CUDA_UPGRADE_GUIDE.MD
+++ b/CUDA_UPGRADE_GUIDE.MD
@@ -9,9 +9,8 @@ Here is the supported matrix for CUDA and CUDNN
 
 | CUDA | CUDNN | additional details |
 | --- | --- | --- |
-| 10.2 | 7.6.5.32 | Needed for publishing CUDA enabled binaries to PyPi since CUDA 11.x binaries don’t meet the space requirements (<750MB) |
-| 11.3 | 8.3.2.44 | Stable CUDA Release |
-| 11.6 | 8.3.2.44 | Latest CUDA Release |
+| 11.6 | 8.3.2.44 | Stable CUDA Release |
+| 11.7 | 8.5.0.96 | Latest CUDA Release |
 
 
 ### B. Check the package availability
@@ -72,12 +71,13 @@ Add setup for our Docker `libtorch` and `manywheel`:
 
 1. Follow this [PR 999](https://github.com/pytorch/builder/pull/999) for all steps in this section
 2. To get the CUDA install link, just like with Linux, go [here](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exe_local) and upload that `.exe` file to our S3 bucket [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects).
-3. To get the cuDNN install link, you could ask NVIDIA, but you could also just sign up for an NVIDIA account and access the needed `.zip` file at this [link](https://developer.nvidia.com/rdp/cudnn-download). First click on `cuDNN Library for Windows (x86)` and then upload that zip file to our S3 bucket.
-4. NOTE: When you upload files to S3, make sure to make these objects publicly readable so that our CI can access them!
-5. Most times, you have to upgrade the driver install for newer versions, which would look like [updating the `windows/internal/driver_update.bat` file](https://github.com/pytorch/builder/commit/9b997037e16eb3bc635e28d101c3297d7e4ead29)
+3. Review "Table 3. Possible Subpackage Names" of CUDA installation guide for windows [link](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) to make sure the Subpackage Names have not changed. These are specified in [cuda_install.bat file](https://github.com/pytorch/builder/pull/999/files#diff-92a9c40963159c9d8f88fa2987057a65a2370737bd4ecc233498ebdfa02021e6)
+4. To get the cuDNN install link, you could ask NVIDIA, but you could also just sign up for an NVIDIA account and access the needed `.zip` file at this [link](https://developer.nvidia.com/rdp/cudnn-download). First click on `cuDNN Library for Windows (x86)` and then upload that zip file to our S3 bucket.
+5. NOTE: When you upload files to S3, make sure to make these objects publicly readable so that our CI can access them!
+6. Most times, you have to upgrade the driver install for newer versions, which would look like [updating the `windows/internal/driver_update.bat` file](https://github.com/pytorch/builder/commit/9b997037e16eb3bc635e28d101c3297d7e4ead29)
     1. Please check the CUDA Toolkit and Minimum Required Driver Version for CUDA minor version compatibility table  in [the release notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) to see if a driver update is necessary.
-6. Compile MAGMA with the new CUDA version. Update `.github/workflows/build-magma-windows.yml` to include new version.
-7. Validate Magma builds by going to S3 [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects). And querying for ```magma_```
+7. Compile MAGMA with the new CUDA version. Update `.github/workflows/build-magma-windows.yml` to include new version.
+8. Validate Magma builds by going to S3 [ossci-windows](https://s3.console.aws.amazon.com/s3/buckets/ossci-windows?region=us-east-1&tab=objects). And querying for ```magma_```
 
 ## 6. Generate new Windows AMI, test and deploy to canary and prod.
 
diff --git a/README.md b/README.md
index 01e18dcbea..70d902ac32 100644
--- a/README.md
+++ b/README.md
@@ -10,3 +10,7 @@ Folders:
 - **windows** : scripts to build Windows wheels
 - **cron** : scripts to drive all of the above scripts across multiple configurations together
 - **analytics** : scripts to pull wheel download count from our AWS s3 logs
+
+## Testing
+
+In order to test build triggered by PyTorch repo's GitHub actions see [these instructions](https://github.com/pytorch/pytorch/blob/master/.github/scripts/README.md#testing-pytorchbuilder-changes)
diff --git a/aarch64_linux/README.md b/aarch64_linux/README.md
new file mode 100644
index 0000000000..583ed4af99
--- /dev/null
+++ b/aarch64_linux/README.md
@@ -0,0 +1,19 @@
+# Aarch64 (ARM/Graviton) Support Scripts
+Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels:
+* torch
+* torchvision
+* torchaudio
+* torchtext
+* torchdata
+## Aarch64_ci_build.sh
+This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```.
+### Usage
+```DESIRED_PYTHON=<PythonVersion> aarch64_ci_build.sh```
+
+__NOTE:__ CI build is currently __EXPERMINTAL__
+
+## Build_aarch64_wheel.py
+This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system.
+
+### Usage
+```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
new file mode 100644
index 0000000000..c72698389c
--- /dev/null
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -eux -o pipefail
+
+# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
+# as we need to install conda and setup the python version for the build.
+
+CONDA_PYTHON_EXE=/opt/conda/bin/python
+CONDA_EXE=/opt/conda/bin/conda
+PATH=/opt/conda/bin:$PATH
+
+###############################################################################
+# Install OS dependent packages
+###############################################################################
+yum -y install epel-release
+yum -y install less zstd
+
+###############################################################################
+# Install conda
+# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path"
+# when using Python version, less than the conda latest
+###############################################################################
+echo 'Installing conda-forge'
+curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
+chmod +x /mambaforge.sh
+/mambaforge.sh -b -p /opt/conda
+rm /mambaforge.sh
+/opt/conda/bin/conda config --set ssl_verify False
+/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf
+python --version
+conda --version
+
+###############################################################################
+# Exec libglfortran.a hack
+#
+# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
+# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
+# ubuntu's libgfortran.a which is compiled with -fPIC
+###############################################################################
+cd ~/
+curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.4.0-6ubuntu1_arm64.deb
+ar x ~/libgfortran-10-dev.deb
+tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/
+cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
+
+###############################################################################
+# Run aarch64 builder python
+###############################################################################
+cd /
+# adding safe directory for git as the permissions will be
+# on the mounted pytorch repo
+git config --global --add safe.directory /pytorch
+python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
new file mode 100755
index 0000000000..c76f6d6474
--- /dev/null
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+
+import os
+import subprocess
+from typing import Dict, List, Optional, Tuple
+
+
+''''
+Helper for getting paths for Python
+'''
+def list_dir(path: str) -> List[str]:
+     return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
+
+
+'''
+Helper to get repo branches for specific versions
+'''
+def checkout_repo(branch: str = "main",
+                  url: str = "",
+                  git_clone_flags: str = "",
+                  mapping: Dict[str, Tuple[str, str]] = []) -> Optional[str]:
+    for prefix in mapping:
+        if not branch.startswith(prefix):
+            continue
+        tag = f"v{mapping[prefix][0]}-{mapping[prefix][1]}"
+        os.system(f"git clone {url} -b {tag} {git_clone_flags}")
+        return mapping[prefix][0]
+
+    os.system(f"git clone {url} {git_clone_flags}")
+    return None
+
+
+'''
+Using OpenBLAS with PyTorch
+'''
+def build_OpenBLAS(git_clone_flags: str = "") -> None:
+    print('Building OpenBLAS')
+    os.system(f"cd /; git clone https://github.com/xianyi/OpenBLAS -b v0.3.21 {git_clone_flags}")
+    make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8 "
+    os.system(f"cd OpenBLAS; make {make_flags} -j8; make {make_flags} install; cd /; rm -rf OpenBLAS")
+
+
+'''
+Using ArmComputeLibrary for aarch64 PyTorch
+'''
+def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
+    print('Building Arm Compute Library')
+    os.system("cd / && mkdir /acl")
+    os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v22.11 {git_clone_flags}")
+    os.system(f"cd ComputeLibrary; export acl_install_dir=/acl; " \
+                f"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " \
+                f"cp -r arm_compute $acl_install_dir; " \
+                f"cp -r include $acl_install_dir; " \
+                f"cp -r utils $acl_install_dir; " \
+                f"cp -r support $acl_install_dir; " \
+                f"cp -r src $acl_install_dir; cd /")
+
+
+'''
+Script to embed libgomp to the wheels
+'''
+def embed_libgomp(wheel_name) -> None:
+    print('Embedding libgomp into wheel')
+    os.system(f"python3 /builder/aarch64_linux/embed_library.py {wheel_name} --update-tag")
+
+
+'''
+Build TorchVision wheel
+'''
+def build_torchvision(branch: str = "main",
+                      git_clone_flags: str = "") -> str:
+    print('Checking out TorchVision repo')
+    build_version = checkout_repo(branch=branch,
+                                  url="https://github.com/pytorch/vision",
+                                  git_clone_flags=git_clone_flags,
+                                  mapping={
+                                      "v1.7.1": ("0.8.2", "rc2"),
+                                      "v1.8.0": ("0.9.0", "rc3"),
+                                      "v1.8.1": ("0.9.1", "rc1"),
+                                      "v1.9.0": ("0.10.0", "rc1"),
+                                      "v1.10.0": ("0.11.1", "rc1"),
+                                      "v1.10.1": ("0.11.2", "rc1"),
+                                      "v1.10.2": ("0.11.3", "rc1"),
+                                      "v1.11.0": ("0.12.0", "rc1"),
+                                      "v1.12.0": ("0.13.0", "rc4"),
+                                      "v1.12.1": ("0.13.1", "rc6"),
+                                      "v1.13.0": ("0.14.0", "rc4"),
+                                      "v1.13.1": ("0.14.1", "rc2"),
+                                      "v2.0.0": ("0.15.0", "rc2"),
+                                  })
+    print('Building TorchVision wheel')
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    if branch == 'nightly':
+        version = ''
+        if os.path.exists('/vision/version.txt'):
+            version = subprocess.check_output(['cat', '/vision/version.txt']).decode().strip()
+        if len(version) == 0:
+            # In older revisions, version was embedded in setup.py
+            version = subprocess.check_output(['grep', 'version', 'setup.py']).decode().strip().split('\'')[1][:-2]
+        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/vision').decode().replace('-','')
+        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
+    elif build_version is not None:
+        build_vars += f"BUILD_VERSION={build_version}"
+
+    os.system(f"cd /vision; {build_vars} python3 setup.py bdist_wheel")
+    wheel_name = list_dir("/vision/dist")[0]
+    embed_libgomp(f"/vision/dist/{wheel_name}")
+
+    print('Move TorchVision wheel to artfacts')
+    os.system(f"mv /vision/dist/{wheel_name} /artifacts/")
+    return wheel_name
+
+
+'''
+Build TorchAudio wheel
+'''
+def build_torchaudio(branch: str = "main",
+                     git_clone_flags: str = "") -> str:
+    print('Checking out TorchAudio repo')
+    git_clone_flags += " --recurse-submodules"
+    build_version = checkout_repo(branch=branch,
+                                  url="https://github.com/pytorch/audio",
+                                  git_clone_flags=git_clone_flags,
+                                  mapping={
+                                      "v1.9.0": ("0.9.0", "rc2"),
+                                      "v1.10.0": ("0.10.0", "rc5"),
+                                      "v1.10.1": ("0.10.1", "rc1"),
+                                      "v1.10.2": ("0.10.2", "rc1"),
+                                      "v1.11.0": ("0.11.0", "rc1"),
+                                      "v1.12.0": ("0.12.0", "rc3"),
+                                      "v1.12.1": ("0.12.1", "rc5"),
+                                      "v1.13.0": ("0.13.0", "rc4"),
+                                      "v1.13.1": ("0.13.1", "rc2"),
+                                      "v2.0.0": ("2.0.0", "rc2"),
+                                  })
+    print('Building TorchAudio wheel')
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    if branch == 'nightly':
+        version = ''
+        if os.path.exists('/audio/version.txt'):
+            version = subprocess.check_output(['cat', '/audio/version.txt']).decode().strip()
+        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/audio').decode().replace('-','')
+        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
+    elif build_version is not None:
+        build_vars += f"BUILD_VERSION={build_version}"
+
+    os.system(f"cd /audio; {build_vars} python3 setup.py bdist_wheel")
+    wheel_name = list_dir("/audio/dist")[0]
+    embed_libgomp(f"/audio/dist/{wheel_name}")
+
+    print('Move TorchAudio wheel to artfacts')
+    os.system(f"mv /audio/dist/{wheel_name} /artifacts/")
+    return wheel_name
+
+
+'''
+Build TorchText wheel
+'''
+def build_torchtext(branch: str = "main",
+                    git_clone_flags: str = "") -> str:
+    print('Checking out TorchText repo')
+    os.system(f"cd /")
+    git_clone_flags += " --recurse-submodules"
+    build_version = checkout_repo(branch=branch,
+                                  url="https://github.com/pytorch/text",
+                                  git_clone_flags=git_clone_flags,
+                                  mapping={
+                                      "v1.9.0": ("0.10.0", "rc1"),
+                                      "v1.10.0": ("0.11.0", "rc2"),
+                                      "v1.10.1": ("0.11.1", "rc1"),
+                                      "v1.10.2": ("0.11.2", "rc1"),
+                                      "v1.11.0": ("0.12.0", "rc1"),
+                                      "v1.12.0": ("0.13.0", "rc2"),
+                                      "v1.12.1": ("0.13.1", "rc5"),
+                                      "v1.13.0": ("0.14.0", "rc3"),
+                                      "v1.13.1": ("0.14.1", "rc1"),
+                                      "v2.0.0": ("0.15.0", "rc2"),
+                                  })
+    print('Building TorchText wheel')
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    if branch == 'nightly':
+        version = ''
+        if os.path.exists('/text/version.txt'):
+            version = subprocess.check_output(['cat', '/text/version.txt']).decode().strip()
+        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/text').decode().replace('-','')
+        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
+    elif build_version is not None:
+        build_vars += f"BUILD_VERSION={build_version}"
+
+    os.system(f"cd text; {build_vars} python3 setup.py bdist_wheel")
+    wheel_name = list_dir("/text/dist")[0]
+    embed_libgomp(f"/text/dist/{wheel_name}")
+
+    print('Move TorchText wheel to artfacts')
+    os.system(f"mv /text/dist/{wheel_name} /artifacts/")
+    return wheel_name
+
+
+'''
+Build TorchData wheel
+'''
+def build_torchdata(branch: str = "main",
+                     git_clone_flags: str = "") -> str:
+    print('Checking out TorchData repo')
+    git_clone_flags += " --recurse-submodules"
+    build_version = checkout_repo(branch=branch,
+                                  url="https://github.com/pytorch/data",
+                                  git_clone_flags=git_clone_flags,
+                                  mapping={
+                                      "v1.11.0": ("0.3.0", "rc1"),
+                                      "v1.12.0": ("0.4.0", "rc3"),
+                                      "v1.12.1": ("0.4.1", "rc5"),
+                                      "v1.13.1": ("0.5.1", "rc2"),
+                                      "v2.0.0": ("0.6.0", "rc2"),
+                                  })
+    print('Building TorchData wheel')
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    if branch == 'nightly':
+        version = ''
+        if os.path.exists('/data/version.txt'):
+            version = subprocess.check_output(['cat', '/data/version.txt']).decode().strip()
+        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/data').decode().replace('-','')
+        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
+    elif build_version is not None:
+        build_vars += f"BUILD_VERSION={build_version}"
+
+    os.system(f"cd /data; {build_vars} python3 setup.py bdist_wheel")
+    wheel_name = list_dir("/data/dist")[0]
+    embed_libgomp(f"/data/dist/{wheel_name}")
+
+    print('Move TorchAudio wheel to artfacts')
+    os.system(f"mv /data/dist/{wheel_name} /artifacts/")
+    return wheel_name
+
+
+def parse_arguments():
+    from argparse import ArgumentParser
+    parser = ArgumentParser("AARCH64 wheels python CD")
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--build-only", action="store_true")
+    parser.add_argument("--test-only", type=str)
+    parser.add_argument("--enable-mkldnn", action="store_true")
+    return parser.parse_args()
+
+
+'''
+Entry Point
+'''
+if __name__ == '__main__':
+
+    args = parse_arguments()
+    enable_mkldnn = args.enable_mkldnn
+    os.system("cd /pytorch")
+    branch = subprocess.check_output("git rev-parse --abbrev-ref HEAD")
+
+    git_clone_flags = " --depth 1 --shallow-submodules"
+    os.system(f"conda install -y ninja scons")
+
+    print("Build and Install OpenBLAS")
+    build_OpenBLAS(git_clone_flags)
+
+    print('Building PyTorch wheel')
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    os.system(f"cd /pytorch; pip install -r requirements.txt")
+    os.system(f"pip install auditwheel")
+    os.system(f"python setup.py clean")
+
+    if branch == 'nightly' or branch == 'master':
+        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/pytorch').decode().replace('-','')
+        version = subprocess.check_output(['cat','version.txt'], cwd='/pytorch').decode().strip()[:-2]
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
+    if branch.startswith("v1.") or branch.startswith("v2."):
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
+    if enable_mkldnn:
+        build_ArmComputeLibrary(git_clone_flags)
+        print("build pytorch with mkldnn+acl backend")
+        os.system(f"export ACL_ROOT_DIR=/acl; export LD_LIBRARY_PATH=/acl/build; export ACL_LIBRARY=/acl/build")
+        build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
+        os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
+        print('Repair the wheel')
+        pytorch_wheel_name = list_dir("pytorch/dist")[0]
+        os.system(f"export LD_LIBRARY_PATH=/pytorch/build/lib:$LD_LIBRARY_PATH; auditwheel repair /pytorch/dist/{pytorch_wheel_name}")
+        print('replace the original wheel with the repaired one')
+        pytorch_repaired_wheel_name = list_dir("wheelhouse")[0]
+        os.system(f"cp /wheelhouse/{pytorch_repaired_wheel_name} /pytorch/dist/{pytorch_wheel_name}")
+    else:
+        print("build pytorch without mkldnn backend")
+        os.system(f"cd pytorch ; {build_vars} python3 setup.py bdist_wheel")
+
+    print("Deleting build folder")
+    os.system("cd /pytorch; rm -rf build")
+    pytorch_wheel_name = list_dir("/pytorch/dist")[0]
+    embed_libgomp(f"/pytorch/dist/{pytorch_wheel_name}")
+    print('Move PyTorch wheel to artfacts')
+    os.system(f"mv /pytorch/dist/{pytorch_wheel_name} /artifacts/")
+    print("Installing Pytorch wheel")
+    os.system(f"pip install /artifacts/{pytorch_wheel_name}")
+    
+    vision_wheel_name = build_torchvision(branch=branch, git_clone_flags=git_clone_flags)
+    audio_wheel_name = build_torchaudio(branch=branch, git_clone_flags=git_clone_flags)
+    text_wheel_name = build_torchtext(branch=branch, git_clone_flags=git_clone_flags)
+    data_wheel_name = build_torchdata(branch=branch, git_clone_flags=git_clone_flags)
+
+    print(f"Wheels Created:\n" \
+            f"{pytorch_wheel_name}\n" \
+            f"{vision_wheel_name}\n" \
+            f"{audio_wheel_name}\n" \
+            f"{text_wheel_name}\n" \
+            f"{data_wheel_name}\n")
diff --git a/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
similarity index 74%
rename from build_aarch64_wheel.py
rename to aarch64_linux/build_aarch64_wheel.py
index ee3fa54d10..f7b70208a2 100755
--- a/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -4,7 +4,7 @@
 # To generate binaries for the release follow these steps:
 # 1. Update mappings for each of the Domain Libraries by adding new row to a table like this:  "v1.11.0": ("0.11.0", "rc1"),
 # 2. Run script with following arguments for each of the supported python versions and specify required RC tag for example: v1.11.0-rc3:
-# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.7 --branch <RCtag>
+# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>
 
 
 import boto3
@@ -15,11 +15,11 @@
 from typing import Dict, List, Optional, Tuple, Union
 
 
-
 # AMI images for us-east-1, change the following based on your ~/.aws/config
 os_amis = {
-    'ubuntu18_04': "ami-0f2b111fdc1647918",  # login_name: ubuntu
-    'ubuntu20_04': "ami-0ea142bd244023692",  # login_name: ubuntu
+    'ubuntu18_04': "ami-078eece1d8119409f",  # login_name: ubuntu
+    'ubuntu20_04': "ami-052eac90edaa9d08f",  # login_name: ubuntu
+    'ubuntu22_04': "ami-0c6c29c5125214c77",  # login_name: ubuntu
     'redhat8': "ami-0698b90665a2ddcf1",  # login_name: ec2-user
 }
 ubuntu18_04_ami = os_amis['ubuntu18_04']
@@ -128,7 +128,7 @@ def run_cmd(self, args: Union[str, List[str]]) -> None:
         assert self.container_id is not None
         docker_cmd = self._gen_ssh_prefix() + ['docker', 'exec', '-i', self.container_id, 'bash']
         p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE)
-        p.communicate(input=" ".join(["source .bashrc;"] + self._split_cmd(args)).encode("utf-8"))
+        p.communicate(input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode("utf-8"))
         rc = p.wait()
         if rc != 0:
             raise subprocess.CalledProcessError(rc, docker_cmd)
@@ -139,7 +139,7 @@ def check_output(self, args: Union[str, List[str]]) -> str:
         assert self.container_id is not None
         docker_cmd = self._gen_ssh_prefix() + ['docker', 'exec', '-i', self.container_id, 'bash']
         p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        (out, err) = p.communicate(input=" ".join(["source .bashrc;"] + self._split_cmd(args)).encode("utf-8"))
+        (out, err) = p.communicate(input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode("utf-8"))
         rc = p.wait()
         if rc != 0:
             raise subprocess.CalledProcessError(rc, docker_cmd, output=out, stderr=err)
@@ -211,8 +211,12 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
         # Python-3.6 EOLed and not compatible with conda-4.11
         install_condaforge(host, suffix="download/4.10.3-10/Miniforge3-4.10.3-10-Linux-aarch64.sh")
         host.run_cmd(f"conda install -y python={python_version} numpy pyyaml")
+    elif python_version == "3.11":
+        install_condaforge(host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh")
+        # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer
+        host.run_cmd(f"conda install -y python={python_version} numpy pyyaml setuptools=59.8.0 -c malfet")
     else:
-        install_condaforge(host)
+        install_condaforge(host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh")
         # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer
         host.run_cmd(f"conda install -y python={python_version} numpy pyyaml setuptools=59.5.0")
 
@@ -221,16 +225,16 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building OpenBLAS')
     host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.19 {git_clone_flags}")
     make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
-    host.run_cmd(f"pushd OpenBLAS; make {make_flags} -j8; sudo make {make_flags} install; popd; rm -rf OpenBLAS")
+    host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")
 
 
-def build_FFTW(host: RemoteHost, git_clone_flags: str = "") -> None:
-    print("Building FFTW3")
-    host.run_cmd("sudo apt-get install -y ocaml ocamlbuild autoconf automake indent libtool fig2dev texinfo")
-    # TODO: fix a version to build
-    # TODO: consider adding flags --host=arm-linux-gnueabi --enable-single --enable-neon CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp
-    host.run_cmd(f"git clone https://github.com/FFTW/fftw3 {git_clone_flags}")
-    host.run_cmd("pushd fftw3; sh bootstrap.sh; make -j8; sudo make install; popd")
+def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
+    print('Building Arm Compute Library')
+    acl_install_dir="${HOME}/acl"
+    acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native"
+    host.run_cmd(f"mkdir {acl_install_dir}")
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v22.11 {git_clone_flags}")
+    host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags} build_dir={acl_install_dir}/build")
 
 
 def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None:
@@ -250,7 +254,7 @@ def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None:
 
 
 def checkout_repo(host: RemoteHost, *,
-                  branch: str = "master",
+                  branch: str = "main",
                   url: str,
                   git_clone_flags: str,
                   mapping: Dict[str, Tuple[str, str]]) -> Optional[str]:
@@ -261,14 +265,19 @@ def checkout_repo(host: RemoteHost, *,
         host.run_cmd(f"git clone {url} -b {tag} {git_clone_flags}")
         return mapping[prefix][0]
 
-    host.run_cmd(f"git clone {url} {git_clone_flags}")
+    # Map master to main
+    if branch == "master" and url.rsplit("/")[-1] in ['vision', 'text', 'audio', 'data']:
+        branch = "main"
+
+    host.run_cmd(f"git clone {url} -b {branch} {git_clone_flags}")
     return None
 
 
 def build_torchvision(host: RemoteHost, *,
-                      branch: str = "master",
+                      branch: str = "main",
                       use_conda: bool = True,
-                      git_clone_flags: str) -> str:
+                      git_clone_flags: str,
+                      run_smoke_tests: bool = True) -> str:
     print('Checking out TorchVision repo')
     build_version = checkout_repo(host,
                                   branch=branch,
@@ -284,33 +293,84 @@ def build_torchvision(host: RemoteHost, *,
                                       "v1.10.2": ("0.11.3", "rc1"),
                                       "v1.11.0": ("0.12.0", "rc1"),
                                       "v1.12.0": ("0.13.0", "rc4"),
+                                      "v1.12.1": ("0.13.1", "rc6"),
+                                      "v1.13.0": ("0.14.0", "rc4"),
+                                      "v1.13.1": ("0.14.1", "rc2"),
+                                      "v2.0.0": ("0.15.1", "rc2"),
                                   })
-    print('Building TorchVision wheel')
+    print("Building TorchVision wheel")
+
+    # Please note libnpg and jpeg are required to build image.so extension
+    if use_conda:
+        host.run_cmd("conda install -y libpng jpeg")
+        # Remove .so files to force static linking
+        host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so")
+        # And patch setup.py to include libz dependency for libpng
+        host.run_cmd(['sed -i -e \'s/image_link_flags\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])
+
     build_vars = ""
-    if branch == 'nightly':
+    if branch == "nightly":
         version = host.check_output(["if [ -f vision/version.txt ]; then cat vision/version.txt; fi"]).strip()
         if len(version) == 0:
             # In older revisions, version was embedded in setup.py
             version = host.check_output(["grep", "\"version = '\"", "vision/setup.py"]).strip().split("'")[1][:-2]
-        build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_date = host.check_output("cd vision && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
         build_vars += f"BUILD_VERSION={version}.dev{build_date}"
     elif build_version is not None:
-        build_vars += f"BUILD_VERSION={build_version}"
+        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
 
-    host.run_cmd(f"cd vision; {build_vars} python3 setup.py bdist_wheel")
+    host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel")
     vision_wheel_name = host.list_dir("vision/dist")[0]
     embed_libgomp(host, use_conda, os.path.join('vision', 'dist', vision_wheel_name))
 
     print('Copying TorchVision wheel')
     host.download_wheel(os.path.join('vision', 'dist', vision_wheel_name))
+    if run_smoke_tests:
+        host.run_cmd(f"pip3 install {os.path.join('vision', 'dist', vision_wheel_name)}")
+        host.run_cmd("python3 vision/test/smoke_test.py")
     print("Delete vision checkout")
     host.run_cmd("rm -rf vision")
 
     return vision_wheel_name
 
 
+def build_torchdata(host: RemoteHost, *,
+                    branch: str = "master",
+                    use_conda: bool = True,
+                    git_clone_flags: str = "") -> str:
+    print('Checking out TorchData repo')
+    git_clone_flags += " --recurse-submodules"
+    build_version = checkout_repo(host,
+                                  branch=branch,
+                                  url="https://github.com/pytorch/data",
+                                  git_clone_flags=git_clone_flags,
+                                  mapping={
+                                      "v1.13.1": ("0.5.1", ""),
+                                      "v2.0.0": ("0.6.0", "rc5"),
+                                  })
+    print('Building TorchData wheel')
+    build_vars = ""
+    if branch == 'nightly':
+        version = host.check_output(["if [ -f data/version.txt ]; then cat data/version.txt; fi"]).strip()
+        build_date = host.check_output("cd data && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
+    elif build_version is not None:
+        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
+    if host.using_docker():
+        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
+
+    host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel")
+    wheel_name = host.list_dir("data/dist")[0]
+    embed_libgomp(host, use_conda, os.path.join('data', 'dist', wheel_name))
+
+    print('Copying TorchData wheel')
+    host.download_wheel(os.path.join('data', 'dist', wheel_name))
+
+    return wheel_name
+
+
 def build_torchtext(host: RemoteHost, *,
                     branch: str = "master",
                     use_conda: bool = True,
@@ -328,19 +388,23 @@ def build_torchtext(host: RemoteHost, *,
                                       "v1.10.2": ("0.11.2", "rc1"),
                                       "v1.11.0": ("0.12.0", "rc1"),
                                       "v1.12.0": ("0.13.0", "rc2"),
+                                      "v1.12.1": ("0.13.1", "rc5"),
+                                      "v1.13.0": ("0.14.0", "rc3"),
+                                      "v1.13.1": ("0.14.1", "rc1"),
+                                      "v2.0.0": ("0.15.1", "rc2"),
                                   })
     print('Building TorchText wheel')
     build_vars = ""
     if branch == 'nightly':
         version = host.check_output(["if [ -f text/version.txt ]; then cat text/version.txt; fi"]).strip()
-        build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_date = host.check_output("cd text && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
         build_vars += f"BUILD_VERSION={version}.dev{build_date}"
     elif build_version is not None:
-        build_vars += f"BUILD_VERSION={build_version}"
+        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
 
-    host.run_cmd(f"cd text; {build_vars} python3 setup.py bdist_wheel")
+    host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel")
     wheel_name = host.list_dir("text/dist")[0]
     embed_libgomp(host, use_conda, os.path.join('text', 'dist', wheel_name))
 
@@ -367,19 +431,23 @@ def build_torchaudio(host: RemoteHost, *,
                                       "v1.10.2": ("0.10.2", "rc1"),
                                       "v1.11.0": ("0.11.0", "rc1"),
                                       "v1.12.0": ("0.12.0", "rc3"),
+                                      "v1.12.1": ("0.12.1", "rc5"),
+                                      "v1.13.0": ("0.13.0", "rc4"),
+                                      "v1.13.1": ("0.13.1", "rc2"),
+                                      "v2.0.0": ("2.0.1", "rc3"),
                                   })
     print('Building TorchAudio wheel')
     build_vars = ""
     if branch == 'nightly':
         version = host.check_output(["grep", "\"version = '\"", "audio/setup.py"]).strip().split("'")[1][:-2]
-        build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_date = host.check_output("cd audio && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
         build_vars += f"BUILD_VERSION={version}.dev{build_date}"
     elif build_version is not None:
-        build_vars += f"BUILD_VERSION={build_version}"
+        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
 
-    host.run_cmd(f"cd audio; {build_vars} python3 setup.py bdist_wheel")
+    host.run_cmd(f"cd audio && {build_vars} python3 setup.py bdist_wheel")
     wheel_name = host.list_dir("audio/dist")[0]
     embed_libgomp(host, use_conda, os.path.join('audio', 'dist', wheel_name))
 
@@ -390,9 +458,9 @@ def build_torchaudio(host: RemoteHost, *,
 
 
 def configure_system(host: RemoteHost, *,
-                     compiler="gcc-8",
-                     use_conda=True,
-                     python_version="3.8") -> None:
+                     compiler: str = "gcc-8",
+                     use_conda: bool = True,
+                     python_version: str = "3.8") -> None:
     if use_conda:
         install_condaforge_python(host, python_version)
 
@@ -402,7 +470,7 @@ def configure_system(host: RemoteHost, *,
         host.run_cmd("sudo apt-get install -y ninja-build g++ git cmake gfortran unzip")
     else:
         host.run_cmd("yum install -y sudo")
-        host.run_cmd("conda install -y ninja")
+        host.run_cmd("conda install -y ninja scons")
 
     if not use_conda:
         host.run_cmd("sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip")
@@ -419,23 +487,39 @@ def configure_system(host: RemoteHost, *,
         host.run_cmd("sudo pip3 install numpy")
 
 
+def build_domains(host: RemoteHost, *,
+                  branch: str = "master",
+                  use_conda: bool = True,
+                  git_clone_flags: str = "") -> Tuple[str, str, str, str]:
+    vision_wheel_name = build_torchvision(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
+    audio_wheel_name = build_torchaudio(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
+    data_wheel_name = build_torchdata(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
+    text_wheel_name = build_torchtext(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
+    return (vision_wheel_name, audio_wheel_name, data_wheel_name, text_wheel_name)
+
+
 def start_build(host: RemoteHost, *,
-                branch="master",
-                compiler="gcc-8",
-                use_conda=True,
-                python_version="3.8",
-                shallow_clone=True) -> Tuple[str, str]:
+                branch: str = "master",
+                compiler: str = "gcc-8",
+                use_conda: bool = True,
+                python_version: str = "3.8",
+                pytorch_only: bool = False,
+                pytorch_build_number: Optional[str] = None,
+                shallow_clone: bool = True,
+                enable_mkldnn: bool = False) -> Tuple[str, str, str, str, str]:
     git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else ""
     if host.using_docker() and not use_conda:
         print("Auto-selecting conda option for docker images")
         use_conda = True
+    if not host.using_docker():
+        print("Disable mkldnn for host builds")
+        enable_mkldnn = False
 
     configure_system(host,
                      compiler=compiler,
                      use_conda=use_conda,
                      python_version=python_version)
     build_OpenBLAS(host, git_clone_flags)
-    # build_FFTW(host, git_clone_flags)
 
     if host.using_docker():
         print("Move libgfortant.a into a standard location")
@@ -452,19 +536,36 @@ def start_build(host: RemoteHost, *,
     host.run_cmd(f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}")
 
     print('Building PyTorch wheel')
+    build_opts = ""
+    if pytorch_build_number is not None:
+        build_opts += f" --build-number {pytorch_build_number}"
     # Breakpad build fails on aarch64
     build_vars = "USE_BREAKPAD=0 "
     if branch == 'nightly':
-        build_date = host.check_output("cd pytorch ; git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
         version = host.check_output("cat pytorch/version.txt").strip()[:-2]
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
-    if branch.startswith("v1."):
+    if branch.startswith("v1.") or branch.startswith("v2."):
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
-    host.run_cmd(f"cd pytorch ; {build_vars} python3 setup.py bdist_wheel")
+    if enable_mkldnn:
+        build_ArmComputeLibrary(host, git_clone_flags)
+        print("build pytorch with mkldnn+acl backend")
+        build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
+        host.run_cmd(f"cd pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary:$HOME/acl && {build_vars} python3 setup.py bdist_wheel{build_opts}")
+        print('Repair the wheel')
+        pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
+        host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}")
+        print('replace the original wheel with the repaired one')
+        pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0]
+        host.run_cmd(f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}")
+    else:
+        print("build pytorch without mkldnn backend")
+        host.run_cmd(f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}")
+
     print("Deleting build folder")
-    host.run_cmd("cd pytorch; rm -rf build")
+    host.run_cmd("cd pytorch && rm -rf build")
     pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
     embed_libgomp(host, use_conda, os.path.join('pytorch', 'dist', pytorch_wheel_name))
     print('Copying the wheel')
@@ -473,11 +574,11 @@ def start_build(host: RemoteHost, *,
     print('Installing PyTorch wheel')
     host.run_cmd(f"pip3 install pytorch/dist/{pytorch_wheel_name}")
 
-    vision_wheel_name = build_torchvision(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
-    build_torchaudio(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
-    build_torchtext(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
+    if pytorch_only:
+        return (pytorch_wheel_name, None, None, None, None)
+    domain_wheels = build_domains(host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags)
 
-    return pytorch_wheel_name, vision_wheel_name
+    return (pytorch_wheel_name, *domain_wheels)
 
 
 embed_library_script = """
@@ -602,10 +703,11 @@ def parse_arguments():
     parser.add_argument("--debug", action="store_true")
     parser.add_argument("--build-only", action="store_true")
     parser.add_argument("--test-only", type=str)
-    parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu18_04')
-    parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10'], default=None)
+    parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu20_04')
+    parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], default=None)
     parser.add_argument("--alloc-instance", action="store_true")
     parser.add_argument("--list-instances", action="store_true")
+    parser.add_argument("--pytorch-only", action="store_true")
     parser.add_argument("--keep-running", action="store_true")
     parser.add_argument("--terminate-instances", action="store_true")
     parser.add_argument("--instance-type", type=str, default="t4g.2xlarge")
@@ -613,6 +715,8 @@ def parse_arguments():
     parser.add_argument("--use-docker", action="store_true")
     parser.add_argument("--compiler", type=str, choices=['gcc-7', 'gcc-8', 'gcc-9', 'clang'], default="gcc-8")
     parser.add_argument("--use-torch-from-pypi", action="store_true")
+    parser.add_argument("--pytorch-build-number", type=str, default=None)
+    parser.add_argument("--disable-mkldnn", action="store_true")
     return parser.parse_args()
 
 
@@ -639,7 +743,7 @@ def parse_arguments():
             check `~/.ssh/` folder or manually set SSH_KEY_PATH environment variable.""")
 
     # Starting the instance
-    inst = start_instance(key_name, ami=ami)
+    inst = start_instance(key_name, ami=ami, instance_type=args.instance_type)
     instance_name = f'{args.key_name}-{args.os}'
     if args.python_version is not None:
         instance_name += f'-py{args.python_version}'
@@ -673,14 +777,17 @@ def parse_arguments():
                          python_version=python_version)
         print("Installing PyTorch wheel")
         host.run_cmd("pip3 install torch")
-        build_torchvision(host,
-                          branch=args.branch,
-                          git_clone_flags=" --depth 1 --shallow-submodules")
+        build_domains(host,
+                      branch=args.branch,
+                      git_clone_flags=" --depth 1 --shallow-submodules")
     else:
         start_build(host,
                     branch=args.branch,
                     compiler=args.compiler,
-                    python_version=python_version)
+                    python_version=python_version,
+                    pytorch_only=args.pytorch_only,
+                    pytorch_build_number=args.pytorch_build_number,
+                    enable_mkldnn=not args.disable_mkldnn)
     if not args.keep_running:
         print(f'Waiting for instance {inst.id} to terminate')
         inst.terminate()
diff --git a/aarch64_linux/embed_library.py b/aarch64_linux/embed_library.py
new file mode 100644
index 0000000000..978970d45f
--- /dev/null
+++ b/aarch64_linux/embed_library.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+from auditwheel.patcher import Patchelf
+from auditwheel.wheeltools import InWheelCtx
+from auditwheel.elfutils import elf_file_filter
+from auditwheel.repair import copylib
+from auditwheel.lddtree import lddtree
+from subprocess import check_call
+import os
+import shutil
+import sys
+from tempfile import TemporaryDirectory
+
+
+def replace_tag(filename):
+   with open(filename, 'r') as f:
+     lines = f.read().split("\\n")
+   for i,line in enumerate(lines):
+       if not line.startswith("Tag: "):
+           continue
+       lines[i] = line.replace("-linux_", "-manylinux2014_")
+       print(f'Updated tag from {line} to {lines[i]}')
+
+   with open(filename, 'w') as f:
+       f.write("\\n".join(lines))
+
+
+class AlignedPatchelf(Patchelf):
+    def set_soname(self, file_name: str, new_soname: str) -> None:
+        check_call(['patchelf', '--page-size', '65536', '--set-soname', new_soname, file_name])
+
+    def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None:
+        check_call(['patchelf', '--page-size', '65536', '--replace-needed', soname, new_soname, file_name])
+
+
+def embed_library(whl_path, lib_soname, update_tag=False):
+    patcher = AlignedPatchelf()
+    out_dir = TemporaryDirectory()
+    whl_name = os.path.basename(whl_path)
+    tmp_whl_name = os.path.join(out_dir.name, whl_name)
+    with InWheelCtx(whl_path) as ctx:
+        torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib')
+        ctx.out_wheel=tmp_whl_name
+        new_lib_path, new_lib_soname = None, None
+        for filename, elf in elf_file_filter(ctx.iter_files()):
+            if not filename.startswith('torch/lib'):
+                continue
+            libtree = lddtree(filename)
+            if lib_soname not in libtree['needed']:
+                continue
+            lib_path = libtree['libs'][lib_soname]['path']
+            if lib_path is None:
+                print(f"Can't embed {lib_soname} as it could not be found")
+                break
+            if lib_path.startswith(torchlib_path):
+                continue
+
+            if new_lib_path is None:
+                new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher)
+            patcher.replace_needed(filename, lib_soname, new_lib_soname)
+            print(f'Replacing {lib_soname} with {new_lib_soname} for {filename}')
+        if update_tag:
+            # Add manylinux2014 tag
+            for filename in ctx.iter_files():
+                if os.path.basename(filename) != 'WHEEL':
+                    continue
+                replace_tag(filename)
+    shutil.move(tmp_whl_name, whl_path)
+
+
+if __name__ == '__main__':
+    embed_library(sys.argv[1], 'libgomp.so.1', len(sys.argv) > 2 and sys.argv[2] == '--update-tag')
diff --git a/analytics/github_analyze.py b/analytics/github_analyze.py
index c255c6c8f1..47330208c2 100755
--- a/analytics/github_analyze.py
+++ b/analytics/github_analyze.py
@@ -161,9 +161,12 @@ def __init__(self, path, remote='upstream'):
         self.repo_dir = path
         self.remote = remote
 
+    def _run_git_cmd(self, *args) -> str:
+        return _check_output(['git', '-C', self.repo_dir] + list(args))
+
     def _run_git_log(self, revision_range) -> List[GitCommit]:
-        log = _check_output(['git', '-C', self.repo_dir, 'log',
-                             '--format=fuller', '--date=unix', revision_range, '--', '.']).split("\n")
+        log = self._run_git_cmd('log', '--format=fuller',
+                             '--date=unix', revision_range, '--', '.').split("\n")
         rc: List[GitCommit] = []
         cur_msg: List[str] = []
         for line in log:
@@ -179,6 +182,18 @@ def _run_git_log(self, revision_range) -> List[GitCommit]:
     def get_commit_list(self, from_ref, to_ref) -> List[GitCommit]:
         return self._run_git_log(f"{self.remote}/{from_ref}..{self.remote}/{to_ref}")
 
+    def get_ghstack_orig_branches(self) -> List[str]:
+        return [x.strip() for x in self._run_git_cmd("branch", "--remotes", "--list", self.remote + "/gh/*/orig").strip().split("\n")]
+
+    def show_ref(self, ref) -> str:
+        return self._run_git_cmd("show-ref", ref).split(" ")[0]
+
+    def merge_base(self, ref1, ref2) -> str:
+        return self._run_git_cmd("merge-base", ref1, ref2).strip()
+
+    def rev_list(self, ref):
+        return self._run_git_cmd("rev-list", f"{self.remote}/master..{ref}").strip().split()
+
 
 def build_commit_dict(commits: List[GitCommit]) -> Dict[str, GitCommit]:
     rc = {}
@@ -358,6 +373,22 @@ def get_commits_dict(x, y):
         print(f'{html_url};{issue["title"]};{state}')
 
 
+def analyze_stacks(repo: GitRepo) -> None:
+    from tqdm.contrib.concurrent import thread_map
+    branches = repo.get_ghstack_orig_branches()
+    stacks_by_author: Dict[str, List[int]] = {}
+    for branch,rv_commits in thread_map(lambda x: (x, repo.rev_list(x)), branches, max_workers=10):
+        author = branch.split("/")[2]
+        if author not in stacks_by_author:
+            stacks_by_author[author]=[]
+        stacks_by_author[author].append(len(rv_commits))
+    for author, slen in sorted(stacks_by_author.items(), key=lambda x:len(x[1]), reverse=True):
+        if len(slen) == 1:
+            print(f"{author} has 1 stack of depth {slen[0]}")
+            continue
+        print(f"{author} has {len(slen)} stacks max depth is {max(slen)} avg depth is {sum(slen)/len(slen):.2f} mean is {slen[len(slen)//2]}")
+
+
 def parse_arguments():
     from argparse import ArgumentParser
     parser = ArgumentParser(description="Print GitHub repo stats")
@@ -375,6 +406,7 @@ def parse_arguments():
     parser.add_argument("--print-reverts", action="store_true")
     parser.add_argument("--contributor-stats", action="store_true")
     parser.add_argument("--missing-in-branch", action="store_true")
+    parser.add_argument("--analyze-stacks", action="store_true")
     return parser.parse_args()
 
 
@@ -392,6 +424,10 @@ def main():
 
     repo = GitRepo(args.repo_path, remote)
 
+    if args.analyze_stacks:
+        analyze_stacks(repo)
+        return
+
     if args.missing_in_branch:
         # Use milestone idx or search it along milestone titles
         try:
diff --git a/analytics/validate_binaries.py b/analytics/validate_binaries.py
index c3fd4ff2bf..65965c59ad 100644
--- a/analytics/validate_binaries.py
+++ b/analytics/validate_binaries.py
@@ -6,10 +6,10 @@
 PLATFORMS = ["osx-64", "linux-64", "win-64"]
 PYTHON_VERSIONS = ["3.10", "3.9", "3.8", "3.7"]
 CUDA_CUDNN_VERSION = [
-    ("11.5", "8.3.2"), ("11.3", "8.2.0"), ("11.1", "8.0.5"), ("10.2", "7.6.5"), ("cpu", None)
+    ("11.7", "8.5.0"), ("cpu", None)
 ]
 CHANNEL = "pytorch-test"
-VERSION = "1.11.*"
+VERSION = "1.13.*"
 
 
 def generate_expected_builds(platform: str) -> set:
@@ -22,9 +22,6 @@ def generate_expected_builds(platform: str) -> set:
 
         for cuda_version, cudnn_version in CUDA_CUDNN_VERSION:
             if platform == "win-64":
-                if cuda_version == "10.2":
-                    # win does not support cuda 10.2
-                    continue
                 cudnn_version = "8"
 
             if cuda_version == "cpu":
diff --git a/check_binary.sh b/check_binary.sh
index 153fca7451..80dd1e5cac 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -22,6 +22,19 @@ set -eux -o pipefail
 # libtorch package.
 
 
+if [[ -z ${DESIRED_PYTHON:-} ]]; then
+  export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-}
+fi
+if [[ -z ${DESIRED_CUDA:-} ]]; then
+  export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-}
+fi
+if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then
+  export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-}
+fi
+if [[ -z ${PACKAGE_TYPE:-} ]]; then
+  export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-}
+fi
+
 # The install root depends on both the package type and the os
 # All MacOS packages use conda, even for the wheel packages.
 if [[ "$PACKAGE_TYPE" == libtorch ]]; then
@@ -38,7 +51,7 @@ else
   install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
 fi
 
-if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
+if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* ]]; then
   # cu90, cu92, cu100, cu101
   if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
     CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
@@ -328,7 +341,7 @@ fi
 if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
   echo "Checking that MKL is available"
   build_and_run_example_cpp check-torch-mkl
-else
+elif [[ "$(uname -m)" != "arm64" ]]; then
   if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
     echo "Checking that MKL is available"
     pushd /tmp
@@ -366,7 +379,7 @@ if [[ "$OSTYPE" == "msys" ]]; then
 fi
 
 # Test that CUDA builds are setup correctly
-if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
+if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* ]]; then
   if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
     build_and_run_example_cpp check-torch-cuda
   else
@@ -392,6 +405,9 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
     echo "Checking that basic CNN works"
     python ${TEST_CODE_DIR}/cnn_smoke.py
 
+    echo "Test that linalg works"
+    python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"
+
     popd
   fi # if libtorch
 fi # if cuda
@@ -418,8 +434,8 @@ fi
 ###############################################################################
 # Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries
 ###############################################################################
-if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel') ]]; then
+if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel')]]; then
   pushd /tmp
-  python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1)"
+  python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))"
   popd
 fi
diff --git a/common/install_conda.sh b/common/install_conda.sh
index 43dd193972..bd06075257 100644
--- a/common/install_conda.sh
+++ b/common/install_conda.sh
@@ -5,8 +5,11 @@ set -ex
 # Anaconda
 wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
 chmod +x  Miniconda3-latest-Linux-x86_64.sh
-bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda
+# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
+bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda
 rm Miniconda3-latest-Linux-x86_64.sh
 export PATH=/opt/conda/bin:$PATH
-conda install -y conda-build anaconda-client git ninja
+# The cmake version here needs to match with the minimum version of cmake
+# supported by PyTorch (3.18). There is only 3.18.2 on anaconda
+conda install -y conda-build anaconda-client git ninja cmake=3.18.2
 conda remove -y --force patchelf
diff --git a/common/install_cpython.sh b/common/install_cpython.sh
index f393de2025..b06fe27c16 100755
--- a/common/install_cpython.sh
+++ b/common/install_cpython.sh
@@ -14,37 +14,35 @@ function check_var {
     fi
 }
 
-function lex_pyver {
-    # Echoes Python version string padded with zeros
-    # Thus:
-    # 3.2.1 -> 003002001
-    # 3     -> 003000000
-    echo $1 | awk -F "." '{printf "%03d%03d%03d", $1, $2, $3}'
-}
-
 function do_cpython_build {
     local py_ver=$1
     check_var $py_ver
-    local ucs_setting=$2
-    check_var $ucs_setting
     tar -xzf Python-$py_ver.tgz
     pushd Python-$py_ver
-    if [ "$ucs_setting" = "none" ]; then
-        unicode_flags=""
-        dir_suffix=""
+
+    local prefix="/opt/_internal/cpython-${py_ver}"
+    mkdir -p ${prefix}/lib
+    if [[ -n $(which patchelf) ]]; then
+        local shared_flags="--enable-shared"
     else
-        local unicode_flags="--enable-unicode=$ucs_setting"
-        local dir_suffix="-$ucs_setting"
+        local shared_flags="--disable-shared"
+    fi
+    if [[ -z  "${WITH_OPENSSL+x}" ]]; then
+        local openssl_flags=""
+    else
+        local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
     fi
-    local prefix="/opt/_internal/cpython-${py_ver}${dir_suffix}"
-    mkdir -p ${prefix}/lib
 
     # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
-    CFLAGS="-Wformat" ./configure --prefix=${prefix} --disable-shared $unicode_flags > /dev/null
+    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null
 
     make -j40 > /dev/null
     make install > /dev/null
 
+    if [[ "${shared_flags}" == "--enable-shared" ]]; then
+        patchelf --set-rpath '$ORIGIN/../lib' ${prefix}/bin/python3
+    fi
+
     popd
     rm -rf Python-$py_ver
     # Some python's install as bin/python3. Make them available as
@@ -61,27 +59,16 @@ function do_cpython_build {
     ln -s ${prefix} /opt/python/${abi_tag}
 }
 
-
 function build_cpython {
     local py_ver=$1
     check_var $py_ver
     check_var $PYTHON_DOWNLOAD_URL
     local py_ver_folder=$py_ver
-    # Only beta version of 3.11 is available right now
-    if [ "$py_ver" = "3.11.0" ]; then
-        py_ver=$py_ver"b1"
-    fi
     wget -q $PYTHON_DOWNLOAD_URL/$py_ver_folder/Python-$py_ver.tgz
-    if [ $(lex_pyver $py_ver) -lt $(lex_pyver 3.3) ]; then
-        do_cpython_build $py_ver ucs2
-        do_cpython_build $py_ver ucs4
-    else
-        do_cpython_build $py_ver none
-    fi
+    do_cpython_build $py_ver none
     rm -f Python-$py_ver.tgz
 }
 
-
 function build_cpythons {
     check_var $GET_PIP_URL
     curl -sLO $GET_PIP_URL
@@ -91,7 +78,6 @@ function build_cpythons {
     rm -f get-pip.py
 }
 
-
 mkdir -p /opt/python
 mkdir -p /opt/_internal
 build_cpythons $CPYTHON_VERSIONS
diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 77d1900113..359df5b3bb 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -2,80 +2,6 @@
 
 set -ex
 
-function install_102 {
-    echo "Installing CUDA 10.2 and CuDNN"
-    rm -rf /usr/local/cuda-10.2 /usr/local/cuda
-    # # install CUDA 10.2 in the same container
-    wget -q http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run
-    chmod +x cuda_10.2.89_440.33.01_linux.run
-    ./cuda_10.2.89_440.33.01_linux.run    --extract=/tmp/cuda
-    rm -f cuda_10.2.89_440.33.01_linux.run
-    mv /tmp/cuda/cuda-toolkit /usr/local/cuda-10.2
-    rm -rf /tmp/cuda
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-10.2 /usr/local/cuda
-
-    # install CUDA 10.2 CuDNN
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.6.5.32-1+cuda10.2_amd64.deb -O cudnn-dev.deb
-    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.6.5.32-1+cuda10.2_amd64.deb -O cudnn.deb
-    ar -x cudnn-dev.deb && tar -xvf data.tar.xz
-    ar -x cudnn.deb && tar -xvf data.tar.xz
-    mkdir -p cuda/include && mkdir -p cuda/lib64
-    cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h
-    cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64
-    mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a
-    ln -s libcudnn.so.7 cuda/lib64/libcudnn.so
-    chmod +x cuda/lib64/*.so
-    cp -a cuda/include/* /usr/local/cuda/include/
-    cp -a cuda/lib64/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-    ldconfig
-}
-
-function install_113 {
-    echo "Installing CUDA 11.3 and CuDNN 8.3"
-    rm -rf /usr/local/cuda-11.3 /usr/local/cuda
-    # install CUDA 11.3.1 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
-    chmod +x cuda_11.3.1_465.19.01_linux.run
-    ./cuda_11.3.1_465.19.01_linux.run --toolkit --silent
-    rm -f cuda_11.3.1_465.19.01_linux.run
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.3 /usr/local/cuda
-
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz -O cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-    ldconfig
-}
-
-function install_115 {
-    echo "Installing CUDA 11.5 and CuDNN 8.3"
-    rm -rf /usr/local/cuda-11.5 /usr/local/cuda
-    # install CUDA 11.5.0 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/11.5.0/local_installers/cuda_11.5.0_495.29.05_linux.run
-    chmod +x cuda_11.5.0_495.29.05_linux.run
-    ./cuda_11.5.0_495.29.05_linux.run --toolkit --silent
-    rm -f cuda_11.5.0_495.29.05_linux.run
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.5 /usr/local/cuda
-
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz -O cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-    ldconfig
-}
-
 function install_116 {
     echo "Installing CUDA 11.6 and CuDNN 8.3"
     rm -rf /usr/local/cuda-11.6 /usr/local/cuda
@@ -98,7 +24,7 @@ function install_116 {
 }
 
 function install_117 {
-    echo "Installing CUDA 11.7 and CuDNN 8.3"
+    echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14"
     rm -rf /usr/local/cuda-11.7 /usr/local/cuda
     # install CUDA 11.7.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
@@ -116,79 +42,56 @@ function install_117 {
     cd ..
     rm -rf tmp_cudnn
     ldconfig
-}
-
-function prune_102 {
-    echo "Pruning CUDA 10.2 and CuDNN"
-    #####################################################################################
-    # CUDA 10.2 prune static libs
-    #####################################################################################
-    export NVPRUNE="/usr/local/cuda-10.2/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-10.2/lib64"
-
-    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75"
-    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75"
-
-    if [[ -n "$OVERRIDE_GENCODE" ]]; then
-        export GENCODE=$OVERRIDE_GENCODE
-    fi
-
-    # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
-    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
-	| xargs -I {} bash -c \
-		"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
-
-    # prune CuDNN and CuBLAS
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
-
-    #####################################################################################
-    # CUDA 10.2 prune visual tools
-    #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-10.2/"
-    rm -rf $CUDA_BASE/libnsight $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2019.5.0 $CUDA_BASE/nsight-systems-2019.5.2
 
+    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
+    mkdir tmp_nccl && cd tmp_nccl
+    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz
+    tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz
+    cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/
+    cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/
+    cd ..
+    rm -rf tmp_nccl
+    ldconfig
 }
 
-function prune_113 {
-    echo "Pruning CUDA 11.3 and CuDNN"
-    #####################################################################################
-    # CUDA 11.3 prune static libs
-    #####################################################################################
-    export NVPRUNE="/usr/local/cuda-11.3/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-11.3/lib64"
-
-    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
-    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
-
-    if [[ -n "$OVERRIDE_GENCODE" ]]; then
-        export GENCODE=$OVERRIDE_GENCODE
-    fi
-
-    # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
-    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
-      | xargs -I {} bash -c \
-		"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
+function install_118 {
+    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
+    rm -rf /usr/local/cuda-11.8 /usr/local/cuda
+    # install CUDA 11.8.0 in the same container
+    wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
+    chmod +x cuda_11.8.0_520.61.05_linux.run
+    ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent
+    rm -f cuda_11.8.0_520.61.05_linux.run
+    rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda
 
-    # prune CuDNN and CuBLAS
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
+    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+    mkdir tmp_cudnn && cd tmp_cudnn
+    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
+    tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
+    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
+    cd ..
+    rm -rf tmp_cudnn
+    ldconfig
 
-    #####################################################################################
-    # CUDA 11.3 prune visual tools
-    #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-11.3/"
-    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2021.1.0 $CUDA_BASE/nsight-systems-2021.1.3
+    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
+    mkdir tmp_nccl && cd tmp_nccl
+    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
+    tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
+    cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
+    cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
+    cd ..
+    rm -rf tmp_nccl
+    ldconfig
 }
 
-function prune_115 {
-    echo "Pruning CUDA 11.5 and CuDNN"
+function prune_116 {
+    echo "Pruning CUDA 11.6 and CuDNN"
     #####################################################################################
-    # CUDA 11.3 prune static libs
+    # CUDA 11.6 prune static libs
     #####################################################################################
-    export NVPRUNE="/usr/local/cuda-11.5/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-11.5/lib64"
+    export NVPRUNE="/usr/local/cuda-11.6/bin/nvprune"
+    export CUDA_LIB_DIR="/usr/local/cuda-11.6/lib64"
 
     export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
     export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
@@ -200,26 +103,26 @@ function prune_115 {
     # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
     ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
       | xargs -I {} bash -c \
-		"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
+                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
 
     # prune CuDNN and CuBLAS
     $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
     $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
 
     #####################################################################################
-    # CUDA 11.5 prune visual tools
+    # CUDA 11.6 prune visual tools
     #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-11.5/"
-    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2021.3.0 $CUDA_BASE/nsight-systems-2021.3.3
+    export CUDA_BASE="/usr/local/cuda-11.6/"
+    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.1.1 $CUDA_BASE/nsight-systems-2021.5.2
 }
 
-function prune_116 {
-    echo "Pruning CUDA 11.6 and CuDNN"
+function prune_117 {
+    echo "Pruning CUDA 11.7 and CuDNN"
     #####################################################################################
-    # CUDA 11.6 prune static libs
+    # CUDA 11.7 prune static libs
     #####################################################################################
-    export NVPRUNE="/usr/local/cuda-11.6/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-11.6/lib64"
+    export NVPRUNE="/usr/local/cuda-11.7/bin/nvprune"
+    export CUDA_LIB_DIR="/usr/local/cuda-11.7/lib64"
 
     export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
     export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
@@ -240,20 +143,20 @@ function prune_116 {
     #####################################################################################
     # CUDA 11.6 prune visual tools
     #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-11.6/"
-    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.1.1 $CUDA_BASE/nsight-systems-2021.5.2
+    export CUDA_BASE="/usr/local/cuda-11.7/"
+    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.2.0 $CUDA_BASE/nsight-systems-2022.1.3
 }
 
-function prune_117 {
-    echo "Pruning CUDA 11.7 and CuDNN"
+function prune_118 {
+    echo "Pruning CUDA 11.8 and cuDNN"
     #####################################################################################
-    # CUDA 11.7 prune static libs
+    # CUDA 11.8 prune static libs
     #####################################################################################
-    export NVPRUNE="/usr/local/cuda-11.7/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-11.7/lib64"
+    export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune"
+    export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64"
 
-    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
-    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86"
+    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
 
     if [[ -n "$OVERRIDE_GENCODE" ]]; then
         export GENCODE=$OVERRIDE_GENCODE
@@ -269,26 +172,22 @@ function prune_117 {
     $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
 
     #####################################################################################
-    # CUDA 11.6 prune visual tools
+    # CUDA 11.8 prune visual tools
     #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-11.7/"
-    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.2.0 $CUDA_BASE/nsight-systems-2022.1.3
+    export CUDA_BASE="/usr/local/cuda-11.8/"
+    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
 }
 
 # idiomatic parameter and option handling in sh
 while test $# -gt 0
 do
     case "$1" in
-	10.2) install_102; prune_102
-		;;
-    11.3) install_113; prune_113
-		;;
-    11.5) install_115; prune_115
-		;;
     11.6) install_116; prune_116
 	        ;;
     11.7) install_117; prune_117
 	        ;;
+    11.8) install_118; prune_118
+	        ;;
 	*) echo "bad argument $1"; exit 1
 	   ;;
     esac
diff --git a/common/install_magma.sh b/common/install_magma.sh
index 5d14dbfe2f..b524c920e9 100644
--- a/common/install_magma.sh
+++ b/common/install_magma.sh
@@ -7,17 +7,10 @@ MAGMA_VERSION="2.5.2"
 function do_install() {
     cuda_version=$1
     cuda_version_nodot=${1/./}
-    
-    if [[ ${cuda_version_nodot} == 116 ]]; then
-        MAGMA_VERSION="2.6.1"
-        magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-0.tar.bz2"
-    elif [[ ${cuda_version_nodot} == 117 ]]; then
-	MAGMA_VERSION="2.6.1"
-	magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-0.tar.bz2"
-    else
-        magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
-    fi
-    
+
+    MAGMA_VERSION="2.6.1"
+    magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
+
     cuda_dir="/usr/local/cuda-${cuda_version}"
     (
         set -x
diff --git a/common/install_miopen.sh b/common/install_miopen.sh
index 27521c429d..a5166c0974 100644
--- a/common/install_miopen.sh
+++ b/common/install_miopen.sh
@@ -33,8 +33,9 @@ if [[ $ROCM_INT -lt 40001 ]]; then
     exit 0
 fi
 
+# CHANGED: Do not uninstall. To avoid out of disk space issues, we will copy lib over existing.
 # Uninstall existing package, to avoid errors during later yum install indicating packages did not change.
-yum remove -y miopen-hip
+#yum remove -y miopen-hip
 
 # Function to retry functions that sometimes timeout or have flaky failures
 retry () {
@@ -91,8 +92,25 @@ fi
 
 git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
+# remove .git to save disk space ince CI runner was running out
+rm -rf .git
+# Don't build MLIR to save docker build time
+# since we are disabling MLIR backend for MIOpen anyway
+if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
+    sed -i '/rocMLIR/d' requirements.txt
+elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then
+    sed -i '/llvm-project-mlir/d' requirements.txt
+fi
 ## MIOpen minimum requirements
 cmake -P install_deps.cmake --minimum
+
+# clean up since CI runner was running out of disk space
+rm -rf /tmp/*
+yum clean all
+rm -rf /var/cache/yum
+rm -rf /var/lib/yum/yumdb
+rm -rf /var/lib/yum/history
+
 ## Build MIOpen
 mkdir -p build
 cd build
@@ -101,13 +119,19 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang
     ${MIOPEN_CMAKE_DB_FLAGS} \
     -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
 make MIOpen -j $(nproc)
-make -j $(nproc) package
-yum install -y miopen-*.rpm
+
+# CHANGED: Do not build package.
+# Build MIOpen package
+#make -j $(nproc) package
+
+# clean up since CI runner was running out of disk space
+rm -rf /usr/local/cget
+
+# CHANGED: Do not install package, just copy lib over existing.
+#yum install -y miopen-*.rpm
+dest=$(ls ${ROCM_INSTALL_PATH}/lib/libMIOpen.so.1.0.*)
+rm -f ${dest}
+cp lib/libMIOpen.so.1.0 ${dest}
+
 popd
 rm -rf MIOpen
-
-# Cleanup
-yum clean all
-rm -rf /var/cache/yum
-rm -rf /var/lib/yum/yumdb
-rm -rf /var/lib/yum/history
diff --git a/common/install_mkl.sh b/common/install_mkl.sh
index 5ebdd94b1b..5889dc1f0e 100644
--- a/common/install_mkl.sh
+++ b/common/install_mkl.sh
@@ -3,8 +3,8 @@
 set -ex
 
 # MKL
-MKL_VERSION=2020.0
-MKL_BUILD=166
+MKL_VERSION=2022.2.1
+MKL_BUILD=16993
 mkdir -p /opt/intel/lib
 pushd /tmp
 curl -fsSL https://anaconda.org/intel/mkl-static/${MKL_VERSION}/download/linux-64/mkl-static-${MKL_VERSION}-intel_${MKL_BUILD}.tar.bz2 | tar xjv
diff --git a/common/install_patchelf.sh b/common/install_patchelf.sh
index 032e3cc27a..37b69415e8 100644
--- a/common/install_patchelf.sh
+++ b/common/install_patchelf.sh
@@ -2,7 +2,9 @@
 
 set -ex
 
-git clone https://github.com/NixOS/patchelf
+# Pin the version to latest release 0.17.2, building newer commit starts
+# to fail on the current image
+git clone -b 0.17.2 --single-branch https://github.com/NixOS/patchelf
 cd patchelf
 sed -i 's/serial/parallel/g' configure.ac
 ./bootstrap.sh
diff --git a/common/install_rocm.sh b/common/install_rocm.sh
index d4352c21c7..4323cebd29 100644
--- a/common/install_rocm.sh
+++ b/common/install_rocm.sh
@@ -47,6 +47,10 @@ install_ubuntu() {
         ROCM_REPO="xenial"
     fi
 
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+        ROCM_REPO="${UBUNTU_VERSION_NAME}"
+    fi
+
     # Add rocm repository
     wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
     local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh
index c651a6e4e2..00540fbecd 100644
--- a/common/install_rocm_magma.sh
+++ b/common/install_rocm_magma.sh
@@ -37,5 +37,8 @@ make -f make.gen.hipMAGMA -j $(nproc)
 LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
 make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
 popd
-mv magma /opt/rocm
+mkdir -p /opt/rocm/magma
+mv magma/include /opt/rocm/magma
+mv magma/lib /opt/rocm/magma
+rm -rf magma
 
diff --git a/conda/Dockerfile b/conda/Dockerfile
index f4f4c834a6..c65e1ad99e 100644
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
@@ -41,20 +41,12 @@ RUN bash ./install_conda.sh && rm install_conda.sh
 
 # Install CUDA
 FROM base as cuda
+ARG CUDA_VERSION=10.2
 RUN rm -rf /usr/local/cuda-*
 ADD ./common/install_cuda.sh install_cuda.sh
-
-FROM cuda as cuda10.2
-RUN bash ./install_cuda.sh 10.2
-ENV DESIRED_CUDA=10.2
-
-FROM cuda as cuda11.3
-RUN bash ./install_cuda.sh 11.3
-ENV DESIRED_CUDA=11.3
-
-FROM cuda as cuda11.5
-RUN bash ./install_cuda.sh 11.5
-ENV DESIRED_CUDA=11.5
+ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
+# Make things in our path by default
+ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH
 
 FROM cuda as cuda11.6
 RUN bash ./install_cuda.sh 11.6
@@ -64,17 +56,19 @@ FROM cuda as cuda11.7
 RUN bash ./install_cuda.sh 11.7
 ENV DESIRED_CUDA=11.7
 
+FROM cuda as cuda11.8
+RUN bash ./install_cuda.sh 11.8
+ENV DESIRED_CUDA=11.8
+
 # Install MNIST test data
 FROM base as mnist
 ADD ./common/install_mnist.sh install_mnist.sh
 RUN bash ./install_mnist.sh
 
 FROM base as all_cuda
-COPY --from=cuda10.2  /usr/local/cuda-10.2 /usr/local/cuda-10.2
-COPY --from=cuda11.3  /usr/local/cuda-11.3 /usr/local/cuda-11.3
-COPY --from=cuda11.5  /usr/local/cuda-11.5 /usr/local/cuda-11.5
 COPY --from=cuda11.6  /usr/local/cuda-11.6 /usr/local/cuda-11.6
 COPY --from=cuda11.7  /usr/local/cuda-11.7 /usr/local/cuda-11.7
+COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8
 
 FROM ${BASE_TARGET} as final
 # Install LLVM
diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh
index bc43976750..1dc5ffe4f9 100755
--- a/conda/build_all_docker.sh
+++ b/conda/build_all_docker.sh
@@ -4,6 +4,6 @@ set -eou pipefail
 
 TOPDIR=$(git rev-parse --show-toplevel)
 
-for CUDA_VERSION in 11.7 11.6 11.5 11.3 10.2 cpu; do
+for CUDA_VERSION in 11.8 11.7 11.6 cpu; do
   CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh
 done
diff --git a/conda/build_docker.sh b/conda/build_docker.sh
index db7c5be62c..a3385ff38c 100755
--- a/conda/build_docker.sh
+++ b/conda/build_docker.sh
@@ -52,6 +52,11 @@ if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
     set -x
     docker tag ${DOCKER_IMAGE} "pytorch/conda-builder:cuda${CUDA_VERSION/./}"
   )
+  # Test that we're using the right CUDA compiler
+  (
+    set -x
+    docker run --rm "${DOCKER_IMAGE}" nvcc --version | grep "cuda_${CUDA_VERSION}"
+  )
 fi
 
 if [[ -n ${GITHUB_REF} ]]; then
diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index e430538c3f..30986b4088 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -31,7 +31,7 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 
-# Parse arguments and determmine version
+# Parse arguments and determine version
 ###########################################################
 if [[ -n "$DESIRED_CUDA" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then
     desired_cuda="$DESIRED_CUDA"
@@ -106,7 +106,7 @@ if [[ -z "$DESIRED_PYTHON" ]]; then
     fi
 fi
 if [[ "$OSTYPE" == "darwin"* ]]; then
-    DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer
+    DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer
 fi
 if [[ "$desired_cuda" == 'cpu' ]]; then
     cpu_only=1
@@ -190,7 +190,7 @@ if [[ ! -d "$pytorch_rootdir" ]]; then
     popd
 fi
 pushd "$pytorch_rootdir"
-git submodule update --init --recursive --jobs 0
+git submodule update --init --recursive
 echo "Using Pytorch from "
 git --no-pager log --max-count 1
 popd
@@ -207,8 +207,6 @@ if [[ "$(uname)" == 'Darwin' ]]; then
         rm "$miniconda_sh"
     export PATH="$tmp_conda/bin:$PATH"
     retry conda install -yq conda-build
-    # Install py-lief=0.12.0 containing https://github.com/lief-project/LIEF/pull/579 to speed up the builds
-    retry conda install -yq  py-lief==0.12.0 -c malfet
 elif [[ "$OSTYPE" == "msys" ]]; then
     export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
     export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
@@ -245,12 +243,13 @@ fi
 meta_yaml="$build_folder/meta.yaml"
 echo "Using conda-build folder $build_folder"
 
-# Switch between CPU or CUDA configerations
+# Switch between CPU or CUDA configurations
 ###########################################################
 build_string_suffix="$PYTORCH_BUILD_NUMBER"
 if [[ -n "$cpu_only" ]]; then
     export USE_CUDA=0
     export CONDA_CUDATOOLKIT_CONSTRAINT=""
+    export CONDA_TRITON_CONSTRAINT=""
     export MAGMA_PACKAGE=""
     export CUDA_VERSION="0.0"
     export CUDNN_VERSION="0.0"
@@ -266,22 +265,24 @@ else
     . ./switch_cuda_version.sh "$desired_cuda"
     # TODO, simplify after anaconda fixes their cudatoolkit versioning inconsistency.
     # see: https://github.com/conda-forge/conda-forge.github.io/issues/687#issuecomment-460086164
-    if [[ "$desired_cuda" == "11.7" ]]; then
-	    export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.7,<11.8 # [not osx]"
-	    export MAGMA_PACKAGE="    - magma-cuda117 # [not osx and not win]"
+    if [[ "$desired_cuda" == "11.8" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.8,<11.9 # [not osx]"
+        export MAGMA_PACKAGE="    - magma-cuda118 # [not osx and not win]"
+    elif [[ "$desired_cuda" == "11.7" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.7,<11.8 # [not osx]"
+        export MAGMA_PACKAGE="    - magma-cuda117 # [not osx and not win]"
     elif [[ "$desired_cuda" == "11.6" ]]; then
         export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.6,<11.7 # [not osx]"
         export MAGMA_PACKAGE="    - magma-cuda116 # [not osx and not win]"
-    elif [[ "$desired_cuda" == "11.3" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=11.3,<11.4 # [not osx]"
-        export MAGMA_PACKAGE="    - magma-cuda113 # [not osx and not win]"
-    elif [[ "$desired_cuda" == "10.2" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=10.2,<10.3 # [not osx]"
-        export MAGMA_PACKAGE="    - magma-cuda102 # [not osx and not win]"
     else
         echo "unhandled desired_cuda: $desired_cuda"
         exit 1
     fi
+    if [[ "$OSTYPE" != "msys" ]]; then
+        # TODO: Remove me when Triton has a proper release channel
+        TRITON_SHORTHASH=$(cut -c1-10 $pytorch_rootdir/.github/ci_commit_pins/triton.txt)
+        export CONDA_TRITON_CONSTRAINT="    - torchtriton==2.1.0+${TRITON_SHORTHASH}"
+    fi
 
     build_string_suffix="cuda${CUDA_VERSION}_cudnn${CUDNN_VERSION}_${build_string_suffix}"
 fi
@@ -298,6 +299,12 @@ else
     export CONDA_BUILD_EXTRA_ARGS=""
 fi
 
+if [[ "$DESIRED_PYTHON" == "3.11" ]]; then
+    # TODO: Remove me when numpy is available in default channel
+    # or copy numpy to pytorch channel
+    export CONDA_BUILD_EXTRA_ARGS="-c malfet ${CONDA_BUILD_EXTRA_ARGS}"
+fi
+
 # Build PyTorch with Gloo's TCP_TLS transport
 if [[ "$(uname)" == 'Linux' ]]; then
     export USE_GLOO_WITH_OPENSSL=1
@@ -339,13 +346,14 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     # Build the package
     echo "Build $build_folder for Python version $py_ver"
     conda config --set anaconda_upload no
-    conda install -y conda-package-handling
-    # NS: To be removed after conda docker images are updated
-    conda update -y conda-build
+    conda install -y conda-package-handling conda==22.9.0
 
     if [[ "$OSTYPE" == "msys" ]]; then
       # Don't run tests on windows (they were ignored mostly anyways)
       NO_TEST="--no-test"
+    else
+      # NS: To be removed after conda docker images are updated
+      conda update -y conda-build
     fi
 
     echo "Calling conda-build at $(date)"
@@ -386,7 +394,18 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
 
     # Install the built package and run tests, unless it's for mac cross compiled arm64
     if [[ -z "$CROSS_COMPILE_ARM64" ]]; then
-        conda install -y "$built_package"
+        # Install the package as if from local repo instead of tar.bz2 directly in order
+        # to trigger runtime dependency installation. See https://github.com/conda/conda/issues/1884
+        # Notes:
+        # - pytorch-nightly is included to install torchtriton
+        # - nvidia is included for cuda builds, there's no harm in listing the channel for cpu builds
+        if [[ "$OSTYPE" == "msys" ]]; then
+          # note the extra slash: `pwd -W` returns `c:/path/to/dir`, we need to add an extra slash for the URI
+          local_channel="/$(pwd -W)/$output_folder"
+        else
+          local_channel="$(pwd)/$output_folder"
+        fi
+        conda install -y -c "file://$local_channel" pytorch==$PYTORCH_BUILD_VERSION -c pytorch -c numba/label/dev -c pytorch-nightly -c nvidia
 
         echo "$(date) :: Running tests"
         pushd "$pytorch_rootdir"
diff --git a/conda/build_vision.sh b/conda/build_vision.sh
deleted file mode 100755
index 3061e4740b..0000000000
--- a/conda/build_vision.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-if [[ -x "/remote/anaconda_token" ]]; then
-    . /remote/anaconda_token || true
-fi
-
-set -ex
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-if [ "$#" -ne 1 ]; then
-    echo "Illegal number of parameters. Pass cuda version"
-    echo "CUDA version should be M.m with no dot, e.g. '8.0' or 'cpu'"
-    exit 1
-fi
-desired_cuda="$1"
-
-export TORCHVISION_BUILD_VERSION="0.3.0"
-export TORCHVISION_BUILD_NUMBER=1
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-
-if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then
-    WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)"
-fi
-
-if [[ "$OSTYPE" == "msys" ]]; then
-    mkdir -p "$WIN_PACKAGE_WORK_DIR" || true
-    vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src"
-    git config --system core.longpaths true
-else
-    vision_rootdir="$(pwd)/torchvision-src"
-fi
-
-if [[ ! -d "$vision_rootdir" ]]; then
-    rm -rf "$vision_rootdir"
-    git clone "https://github.com/pytorch/vision" "$vision_rootdir"
-    pushd "$vision_rootdir"
-    git checkout v$TORCHVISION_BUILD_VERSION
-    popd
-fi
-
-cd "$SOURCE_DIR"
-
-if [[ "$OSTYPE" == "msys" ]]; then
-    export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
-    export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
-    rm -rf "$tmp_conda"
-    rm -f "$miniconda_exe"
-    curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe"
-    "$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
-    pushd $tmp_conda
-    export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"
-    popd
-    # We have to skip 3.17 because of the following bug.
-    # https://github.com/conda/conda-build/issues/3285
-    retry conda install -yq conda-build
-fi
-
-ANACONDA_USER=pytorch
-conda config --set anaconda_upload no
-
-
-export TORCHVISION_PACKAGE_SUFFIX=""
-if [[ "$desired_cuda" == 'cpu' ]]; then
-    export CONDA_CUDATOOLKIT_CONSTRAINT=""
-    export CUDA_VERSION="None"
-    if [[ "$OSTYPE" != "darwin"* ]]; then
-        export TORCHVISION_PACKAGE_SUFFIX="-cpu"
-    fi
-else
-    . ./switch_cuda_version.sh $desired_cuda
-    if [[ "$desired_cuda" == "10.0" ]]; then
-	export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=10.0,<10.1 # [not osx]"
-    elif [[ "$desired_cuda" == "9.0" ]]; then
-	export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=9.0,<9.1 # [not osx]"
-    else
-	echo "unhandled desired_cuda: $desired_cuda"
-	exit 1
-    fi
-fi
-
-if [[ "$OSTYPE" == "msys" ]]; then
-    time conda build -c $ANACONDA_USER --no-anaconda-upload vs2017
-else
-    time conda build -c $ANACONDA_USER --no-anaconda-upload --python 2.7 torchvision
-fi
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.5 torchvision
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.6 torchvision
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.7 torchvision
-
-set +e
diff --git a/conda/debugging_pytorch.sh b/conda/debugging_pytorch.sh
index e79567acb5..4cce4f225e 100644
--- a/conda/debugging_pytorch.sh
+++ b/conda/debugging_pytorch.sh
@@ -14,7 +14,7 @@ export USE_CUDA_STATIC_LINK=1
 . ./switch_cuda_version.sh 9.0
 
 
-conda install -y cmake numpy=1.17 setuptools pyyaml cffi mkl=2018 mkl-include typing_extension ninja magma-cuda80 -c pytorch
+conda install -y cmake numpy=1.17 setuptools pyyaml mkl=2018 mkl-include typing_extension ninja magma-cuda80 -c pytorch
 
 export CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
 git clone https://github.com/pytorch/pytorch -b nightly2 --recursive
diff --git a/conda/pytorch-cuda/conda_build_config.yaml b/conda/pytorch-cuda/conda_build_config.yaml
index 802638b8eb..67d14f2b17 100644
--- a/conda/pytorch-cuda/conda_build_config.yaml
+++ b/conda/pytorch-cuda/conda_build_config.yaml
@@ -1,3 +1,7 @@
 version:
     - 11.6
     - 11.7
+    - 11.8
+target_platform:
+    - win-64
+    - linux-64
diff --git a/conda/pytorch-cuda/meta.yaml b/conda/pytorch-cuda/meta.yaml
index 92c970654f..ecb438ca86 100644
--- a/conda/pytorch-cuda/meta.yaml
+++ b/conda/pytorch-cuda/meta.yaml
@@ -1,7 +1,40 @@
-{% set build = 0 %}
+# Package to manage cuda version in PyTorch.
+#
+# Windows anaconda packages are packaged differently,
+# All dlls are kept within *-dev packages hence we need
+# include the dev packages for Windows see:
+# https://github.com/pytorch/vision/issues/7185#issuecomment-1420002413
+#
+# Please note: Build number should be advanced with
+# every deployment. After the deployment to production
+# use following links to validate the correctness of
+# deployment:
+# https://conda.anaconda.org/pytorch/noarch/
+# https://conda.anaconda.org/pytorch/noarch/repodata.json
+{% set build = 3 %}
 {% set cuda_constraints=">=11.6,<11.7" %}
+{% set libcufft_constraints=">=10.7.0.55,<10.7.2.50" %}
+{% set libcublas_constraints=">=11.8.1.74,<11.10.1.25" %}
+{% set libcusolver_constraints=">=11.3.2.55,<11.3.5.50" %}
+{% set libcusparse_constraints=">=11.7.1.55,<11.7.3.50" %}
+{% set libnpp_constraints=">=11.6.0.55,<11.7.3.21" %}
+{% set libnvjpeg_constraints=">=11.6.0.55,<11.7.2.34" %}
 {% if version == '11.7' %}
 {% set cuda_constraints=">=11.7,<11.8" %}
+{% set libcufft_constraints=">=10.7.2.50,<10.9.0.58" %}
+{% set libcublas_constraints=">=11.10.1.25,<11.11.3.6" %}
+{% set libcusolver_constraints=">=11.3.5.50,<11.4.1.48" %}
+{% set libcusparse_constraints=">=11.7.3.50,<11.7.5.86" %}
+{% set libnpp_constraints=">=11.7.3.21,<11.8.0.86" %}
+{% set libnvjpeg_constraints=">=11.7.2.34,<11.9.0.86" %}
+{% elif version == '11.8' %}
+{% set cuda_constraints=">=11.8,<12.0" %}
+{% set libcufft_constraints=">=10.9.0.58,<11.0.0.21" %}
+{% set libcublas_constraints=">=11.11.3.6,<12.0.1.189" %}
+{% set libcusolver_constraints=">=11.4.1.48,<11.4.2.57" %}
+{% set libcusparse_constraints=">=11.7.5.86,<12.0.0.76" %}
+{% set libnpp_constraints=">=11.8.0.86,<12.0.0.30" %}
+{% set libnvjpeg_constraints=">=11.9.0.86,<12.0.0.28" %}
 {% endif %}
 
 package:
@@ -9,33 +42,29 @@ package:
   version: {{ version }}
 build:
   number: {{ build }}
-  noarch: generic
 requirements:
   run:
-    - cuda={{ version }}
-  run_constrained:
-    - cuda-cccl {{ cuda_constraints }}
-    - cuda-command-line-tools {{ cuda_constraints }}
-    - cuda-compiler {{ cuda_constraints }}
+    - cuda-libraries {{ cuda_constraints }}
+    - cuda-nvtx {{ cuda_constraints }}
+    - libnvjpeg {{ libnvjpeg_constraints }}
     - cuda-cudart {{ cuda_constraints }}
-    - cuda-cudart-dev {{ cuda_constraints }}
-    - cuda-cuobjdump {{ cuda_constraints }}
     - cuda-cupti {{ cuda_constraints }}
-    - cuda-cuxxfilt {{ cuda_constraints }}
-    - cuda-driver-dev {{ cuda_constraints }}
-    - cuda-libraries {{ cuda_constraints }}
-    - cuda-libraries-dev {{ cuda_constraints }}
-    - cuda-cudaart-dev {{ cuda_constraints }}
-    - cuda-nvcc {{ cuda_constraints }}
-    - cuda-nvml-dev {{ cuda_constraints }}
-    - cuda-nvprune {{ cuda_constraints }}
     - cuda-nvrtc {{ cuda_constraints }}
-    - cuda-nvrtc-dev {{ cuda_constraints }}
-    - cuda-nvtx {{ cuda_constraints }}
     - cuda-runtime {{ cuda_constraints }}
-    - cuda-toolkit {{ cuda_constraints }}
-    - cuda-tools {{ cuda_constraints }}
-  # None, pytorch should depend on pytorch-cuda
+    - libcufft {{ libcufft_constraints }}
+    - libcublas {{ libcublas_constraints }}
+    - libcusolver {{ libcusolver_constraints }}
+    - libcusparse {{ libcusparse_constraints }}
+    - libnpp {{ libnpp_constraints }}
+    - cuda-libraries-dev {{ cuda_constraints }} # [win64]
+    - libnvjpeg-dev {{ libnvjpeg_constraints }} # [win64]
+    - cuda-cudart-dev {{ cuda_constraints }} # [win64]
+    - cuda-nvrtc-dev {{ cuda_constraints }} # [win64]
+    - libcufft-dev {{ libcufft_constraints }} # [win64]
+    - libcublas-dev {{ libcublas_constraints }} # [win64]
+    - libcusolver-dev {{ libcusolver_constraints }} # [win64]
+    - libcusparse-dev {{ libcusparse_constraints }} # [win64]
+    - libnpp-dev {{ libnpp_constraints }} # [win64]
 test:
   commands:
     - echo "pytorch-cuda metapackage is created."
diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat
index 09dc1a8167..18850f758a 100644
--- a/conda/pytorch-nightly/bld.bat
+++ b/conda/pytorch-nightly/bld.bat
@@ -20,22 +20,18 @@ if "%build_with_cuda%" == "" goto cuda_flags_end
 set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
 set CUDA_BIN_PATH=%CUDA_PATH%\bin
 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0
-if "%desired_cuda%" == "10.2" set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5
-if "%desired_cuda%" == "11.3" (
-    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6
-    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
-)
+set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6
 if "%desired_cuda%" == "11.5" (
-    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
 )
 if "%desired_cuda%" == "11.6" (
-    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
 )
 if "%desired_cuda%" == "11.7" (
-    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6
+    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
+)
+if "%desired_cuda%" == "11.8" (
+    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;9.0
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
 )
 
@@ -112,6 +108,7 @@ IF "%USE_SCCACHE%" == "1" (
 
 if NOT "%build_with_cuda%" == "" (
     copy "%CUDA_BIN_PATH%\cudnn*64_*.dll*" %SP_DIR%\torch\lib
+    copy "%NVTOOLSEXT_PATH%\bin\x64\nvToolsExt64_*.dll*" %SP_DIR%\torch\lib
     :: cupti library file name changes aggressively, bundle it to avoid
     :: potential file name mismatch.
     copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" %SP_DIR%\torch\lib
diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh
index 05a496fc69..ad1871ac4c 100755
--- a/conda/pytorch-nightly/build.sh
+++ b/conda/pytorch-nightly/build.sh
@@ -8,6 +8,7 @@ export PYTORCH_BUILD_VERSION=$PKG_VERSION
 export PYTORCH_BUILD_NUMBER=$PKG_BUILDNUM
 export USE_LLVM="/opt/llvm_no_cxx11_abi"
 export LLVM_DIR="$USE_LLVM/lib/cmake/llvm"
+export PACKAGE_TYPE="conda"
 
 # set OPENSSL_ROOT_DIR=/opt/openssl if it exists
 if [[ -e /opt/openssl ]]; then
@@ -51,41 +52,35 @@ if [[ -z "$USE_CUDA" || "$USE_CUDA" == 1 ]]; then
 fi
 if [[ -n "$build_with_cuda" ]]; then
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-    export TORCH_CUDA_ARCH_LIST="3.7+PTX;5.0"
+    TORCH_CUDA_ARCH_LIST="3.7+PTX;5.0"
     export USE_STATIC_CUDNN=1 # links cudnn statically (driven by tools/setup_helpers/cudnn.py)
 
-    if [[ $CUDA_VERSION == 10* ]]; then
-        export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5"
-        DEPS_LIST=(/usr/local/cuda-10.2/extras/CUPTI/lib64/libcupti.so.10.2)
-    elif [[ $CUDA_VERSION == 11.3* ]]; then
-        export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
-	      #for cuda 11.3 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html
-        #which does not have single static libcudnn_static.a deliverable to link with
-        export USE_STATIC_CUDNN=0
-        #for cuda 11.3 include all dynamic loading libraries
-        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.3/extras/CUPTI/lib64/libcupti.so.11.3)
-    elif [[ $CUDA_VERSION == 11.5* ]]; then
-        export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
-        #for cuda 11.5 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html
-        #which does not have single static libcudnn_static.a deliverable to link with
-        export USE_STATIC_CUDNN=0
-        #for cuda 11.5 include all dynamic loading libraries
-        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.5/extras/CUPTI/lib64/libcupti.so.11.5)
-    elif [[ $CUDA_VERSION == 11.6* ]]; then
-        export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
+    if [[ $CUDA_VERSION == 11.6* ]]; then
+        TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
         #for cuda 11.5 we use cudnn 8.3.2.44 https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_8.html
         #which does not have single static libcudnn_static.a deliverable to link with
         export USE_STATIC_CUDNN=0
         #for cuda 11.5 include all dynamic loading libraries
         DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.6/extras/CUPTI/lib64/libcupti.so.11.6)
     elif [[ $CUDA_VERSION == 11.7* ]]; then
-        export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
+        TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6"
         #for cuda 11.7 we use cudnn 8.5
         #which does not have single static libcudnn_static.a deliverable to link with
         export USE_STATIC_CUDNN=0
         #for cuda 11.7 include all dynamic loading libraries
         DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.7/extras/CUPTI/lib64/libcupti.so.11.7)
+    elif [[ $CUDA_VERSION == 11.8* ]]; then
+	TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0"
+	#for cuda 11.8 we use cudnn 8.7
+	#which does not have single static libcudnn_static.a deliverable to link with
+	export USE_STATIC_CUDNN=0
+	#for cuda 11.8 include all dynamic loading libraries
+	DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
+    fi
+    if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then
+        TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST"
     fi
+    export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST"
     export NCCL_ROOT_DIR=/usr/local/cuda
     export USE_STATIC_NCCL=1  # links nccl statically (driven by tools/setup_helpers/nccl.py, some of the NCCL cmake files such as FindNCCL.cmake and gloo/FindNCCL.cmake)
 
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 36e92d5e38..9416a1ed84 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -21,15 +21,17 @@ requirements:
     - pyyaml
     {% if cross_compile_arm64 == 0 %}
     - mkl-include # [x86_64]
-    - mkl=2020.2 # [x86_64 and (not win or py <= 39)]
-    - mkl=2021.4 # [x86_64 and win and py >= 310]
+    - mkl=2020.2 # [x86_64 and not win]
+    - mkl=2021.4 # [x86_64 and win]
     {% endif %}
     - typing_extensions
     - ninja
     - libuv # [win]
     - numpy=1.19 # [py <= 39]
-    - numpy=1.21.5 # [py >= 310]
-    - openssl=1.1.1l # [py >= 310 and linux]
+    - numpy=1.21.5 # [py == 310]
+    - numpy=1.23.5 # [py >= 311]
+    - openssl=1.1.1l # [py == 310 and linux]
+    - openssl=1.1.1s # [py >= 311 and linux]
 {{ environ.get('PYTORCH_LLVM_PACKAGE', '    - llvmdev=9') }}
 {{ environ.get('MAGMA_PACKAGE', '') }}
 
@@ -41,11 +43,16 @@ requirements:
     - libuv # [win]
     - intel-openmp # [win]
     - typing_extensions
+    - sympy
+    - filelock
+    - networkx
+    - jinja2 # [py <= 310]
     {% if cross_compile_arm64 == 0 %}
     - blas * mkl
     {% endif %}
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
 {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }}
+{{ environ.get('CONDA_TRITON_CONSTRAINT', '') }}
 
   {% if build_variant == 'cpu' %}
   run_constrained:
@@ -64,6 +71,7 @@ build:
     - CUDA_VERSION
     - CUDNN_VERSION
     - CONDA_CUDATOOLKIT_CONSTRAINT
+    - CONDA_TRITON_CONSTRAINT
     - USE_CUDA
     - CMAKE_ARGS
     - EXTRA_CAFFE2_CMAKE_FLAGS
@@ -81,6 +89,7 @@ build:
     - USE_PYTORCH_METAL_EXPORT # [osx]
     - USE_COREML_DELEGATE # [osx]
     - _GLIBCXX_USE_CXX11_ABI # [unix]
+    - OVERRIDE_TORCH_CUDA_ARCH_LIST
 
 test:
  imports:
diff --git a/conda/torchvision/bld.bat b/conda/torchvision/bld.bat
deleted file mode 100644
index 14f6935fba..0000000000
--- a/conda/torchvision/bld.bat
+++ /dev/null
@@ -1,24 +0,0 @@
-@echo on
-
-set TORCHVISION_BUILD_VERSION=%PKG_VERSION%
-set TORCHVISION_BUILD_NUMBER=%PKG_BUILDNUM%
-
-if not "%CUDA_VERSION%" == "None" (
-    set build_with_cuda=1
-    set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1%
-) else (
-    set build_with_cuda=
-)
-
-if "%build_with_cuda%" == "" goto cuda_flags_end
-
-set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
-set CUDA_BIN_PATH=%CUDA_PATH%\bin
-set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr
-if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
-if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-
-:cuda_flags_end
-
-python setup.py install --single-version-externally-managed --record=record.txt
-if errorlevel 1 exit /b 1
diff --git a/conda/torchvision/meta.yaml b/conda/torchvision/meta.yaml
deleted file mode 100644
index caa439c7d2..0000000000
--- a/conda/torchvision/meta.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-package:
-  name: torchvision{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }}
-  version: "{{ environ.get('TORCHVISION_BUILD_VERSION') }}"
-
-source:
- git_rev: v{{ environ.get('TORCHVISION_BUILD_VERSION') }}
- git_url: https://github.com/pytorch/vision.git
-   
-
-requirements:
-  build:
-    - {{ compiler('c') }} # [win]
-
-  host:
-    - python
-    - setuptools
-    - pytorch{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }} >=1.1.0
-{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
-
-  run:
-    - python
-    - pillow >=4.1.1
-    - numpy >=1.11
-    - pytorch{{ environ.get('TORCHVISION_PACKAGE_SUFFIX') }} >=1.1.0
-    - six
-{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
-
-build:
-  number: {{ environ.get('TORCHVISION_BUILD_NUMBER') }}
-  string: py{{py}}_cu{{ environ['CUDA_VERSION'] }}_{{environ.get('TORCHVISION_BUILD_NUMBER')}}
-  script: python setup.py install --single-version-externally-managed --record=record.txt # [not win]
-  script_env:
-    - CUDA_VERSION
-
-test:
-  imports:
-    - torchvision
-    - torchvision.datasets
-    - torchvision.transforms
-  source_files:
-    - test
-  requires:
-    - pytest
-    - scipy
-  commands:
-    pytest .
-
-
-about:
-  home: https://github.com/pytorch/vision
-  license: BSD
-  license_file: LICENSE
-  summary: 'image and video datasets and models for torch deep learning'
diff --git a/conda/vs2017/conda_build_config.yaml b/conda/vs2017/conda_build_config.yaml
deleted file mode 100755
index 5188bb0ebe..0000000000
--- a/conda/vs2017/conda_build_config.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-blas_impl:
-  - mkl                        # [x86_64]
-c_compiler:
-  - vs2017                     # [win]
-cxx_compiler:
-  - vs2017                     # [win]
-python:
-  - 3.5
-  - 3.6
-# This differs from target_platform in that it determines what subdir the compiler
-#    will target, not what subdir the compiler package will be itself.
-#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
-#    code on win-64 miniconda.
-cross_compiler_target_platform:
-  - win-64                     # [win]
-target_platform:
-  - win-64                     # [win]
-vc:
-  - 14
-zip_keys:
-  -                             # [win]
-    - vc                        # [win]
-    - c_compiler                # [win]
-    - cxx_compiler              # [win]
diff --git a/conda/vs2017/activate.bat b/conda/vs2022/activate.bat
old mode 100755
new mode 100644
similarity index 57%
rename from conda/vs2017/activate.bat
rename to conda/vs2022/activate.bat
index ccecfc2544..fe18f77230
--- a/conda/vs2017/activate.bat
+++ b/conda/vs2022/activate.bat
@@ -1,19 +1,26 @@
 :: Set env vars that tell distutils to use the compiler that we put on path
-SET DISTUTILS_USE_SDK=1
-SET MSSdk=1
+set DISTUTILS_USE_SDK=1
+set MSSdk=1
 
-SET "VS_VERSION=15.0"
-SET "VS_MAJOR=15"
-SET "VS_YEAR=2017"
+set "VS_VERSION=17.4"
+set "VS_MAJOR=17"
+set "VC_YEAR=2022"
+set "VC_VERSION_LOWER=17"
+set "VC_VERSION_UPPER=18"
 
 set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
 set "MSYS2_ENV_CONV_EXCL=CL"
 
 :: For Python 3.5+, ensure that we link with the dynamic runtime.  See
 :: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
-set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
+set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime143.dll"
 
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+if not "%VS15INSTALLDIR%" == "" if exist "%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat" (
+    set "VSINSTALLDIR=%VS15INSTALLDIR%\"
+    goto :vswhere
+)
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
     if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
         set "VSINSTALLDIR=%%i\"
         goto :vswhere
@@ -23,15 +30,15 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio
 :vswhere
 
 :: Shorten PATH to avoid the `input line too long` error.
-SET MyPath=%PATH%
+set MyPath=%PATH%
 
 setlocal EnableDelayedExpansion
 
-SET TempPath="%MyPath:;=";"%"
-SET var=
-FOR %%a IN (%TempPath%) DO (
-    IF EXIST %%~sa (
-        SET "var=!var!;%%~sa"
+set TempPath="%MyPath:;=";"%"
+set var=
+for %%a in (%TempPath%) do (
+    if exist %%~sa (
+        set "var=!var!;%%~sa"
     )
 )
 
@@ -39,6 +46,6 @@ set "TempPath=!var:~1!"
 endlocal & set "PATH=%TempPath%"
 
 :: Shorten current directory too
-FOR %%A IN (.) DO CD "%%~sA"
+for %%A in (.) do cd "%%~sA"
 
 :: other things added by install_activate.bat at package build time
diff --git a/conda/torchvision/conda_build_config.yaml b/conda/vs2022/conda_build_config.yaml
similarity index 86%
rename from conda/torchvision/conda_build_config.yaml
rename to conda/vs2022/conda_build_config.yaml
index 5188bb0ebe..e2a4de3c2e 100644
--- a/conda/torchvision/conda_build_config.yaml
+++ b/conda/vs2022/conda_build_config.yaml
@@ -1,12 +1,13 @@
 blas_impl:
   - mkl                        # [x86_64]
 c_compiler:
-  - vs2017                     # [win]
+  - vs2022                     # [win]
 cxx_compiler:
-  - vs2017                     # [win]
+  - vs2022                     # [win]
 python:
-  - 3.5
-  - 3.6
+  - 3.8
+  - 3.9
+  - 3.10
 # This differs from target_platform in that it determines what subdir the compiler
 #    will target, not what subdir the compiler package will be itself.
 #    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
diff --git a/conda/vs2017/install_activate.bat b/conda/vs2022/install_activate.bat
old mode 100755
new mode 100644
similarity index 98%
rename from conda/vs2017/install_activate.bat
rename to conda/vs2022/install_activate.bat
index 2ca223ebc8..eb85767d67
--- a/conda/vs2017/install_activate.bat
+++ b/conda/vs2022/install_activate.bat
@@ -1,5 +1,5 @@
-set YEAR=2017
-set VER=15
+set YEAR=2022
+set VER=17
 
 mkdir "%PREFIX%\etc\conda\activate.d"
 copy "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
diff --git a/conda/vs2017/install_runtime.bat b/conda/vs2022/install_runtime.bat
old mode 100755
new mode 100644
similarity index 92%
rename from conda/vs2017/install_runtime.bat
rename to conda/vs2022/install_runtime.bat
index 5163c16cf2..bac684dae6
--- a/conda/vs2017/install_runtime.bat
+++ b/conda/vs2022/install_runtime.bat
@@ -3,7 +3,7 @@ if "%ARCH%"=="64" (
    set VC_PATH=x64
 )
 
-set MSC_VER=2017
+set MSC_VER=2022
 
 rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
 rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
@@ -23,10 +23,10 @@ robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%
 robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
 if %ERRORLEVEL% GEQ 8 exit 1
 
-REM ========== This one comes from visual studio 2017
-set "VC_VER=141"
+REM ========== This one comes from visual studio 2022
+set "VC_VER=143"
 
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [17^,18^) -property installationPath`) do (
     if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
         set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
         goto :eof
diff --git a/conda/vs2017/meta.yaml b/conda/vs2022/meta.yaml
old mode 100755
new mode 100644
similarity index 66%
rename from conda/vs2017/meta.yaml
rename to conda/vs2022/meta.yaml
index 1f569525ee..184c4c32df
--- a/conda/vs2017/meta.yaml
+++ b/conda/vs2022/meta.yaml
@@ -1,7 +1,7 @@
-{% set vcver="14.1" %}
-{% set vcfeature="14" %}
-{% set vsyear="2017" %}
-{% set fullver="15.4.27004.2010" %}
+{% set vcver="17.4" %}
+{% set vcfeature="17" %}
+{% set vsyear="2022" %}
+{% set fullver="17.4.33110.190" %}
 
 package:
   name: vs{{ vsyear }}
@@ -16,7 +16,7 @@ outputs:
   - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
     script: install_activate.bat
     track_features:
-      # VS 2017 is binary-compatible with VS 2015/vc14.  Tools are "v141".
+      # VS 2022 is binary-compatible with VS 2019/vc 14.2, VS 2017/vc 14.1 and 2015/vc14. Tools are "v143".
       strong:
         - vc{{ vcfeature }}
     about:
diff --git a/cron/nightly_defaults.sh b/cron/nightly_defaults.sh
index 0f6532adc1..d8b6f5ee04 100755
--- a/cron/nightly_defaults.sh
+++ b/cron/nightly_defaults.sh
@@ -120,7 +120,7 @@ if [[ ! -d "$NIGHTLIES_PYTORCH_ROOT" ]]; then
         export PYTORCH_BRANCH="$last_commit"
     fi
     git checkout "$PYTORCH_BRANCH"
-    git submodule update --jobs 0
+    git submodule update
     popd
 fi
 
@@ -229,7 +229,7 @@ if [[ "$DAYS_TO_KEEP" < '1' ]]; then
 fi
 
 # PYTORCH_NIGHTLIES_TIMEOUT
-#   Timeout in seconds. 
+#   Timeout in seconds.
 #   When full testing is enabled, condas builds often take up to 2 hours 20
 #   minutes, so the default is set to (2 * 60 + 20 + 40 [buffer]) * 60 == 10800
 #   seconds.
diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile
index f38aca0632..c5eb904ce6 100644
--- a/libtorch/Dockerfile
+++ b/libtorch/Dockerfile
@@ -42,19 +42,7 @@ ENV CUDA_HOME /usr/local/cuda
 FROM base as conda
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
-RUN /opt/conda/bin/conda install -y cmake=3.14
-
-FROM cuda as cuda10.2
-RUN bash ./install_cuda.sh 10.2
-RUN bash ./install_magma.sh 10.2
-
-FROM cuda as cuda11.3
-RUN bash ./install_cuda.sh 11.3
-RUN bash ./install_magma.sh 11.3
-
-FROM cuda as cuda11.5
-RUN bash ./install_cuda.sh 11.5
-RUN bash ./install_magma.sh 11.5
+RUN /opt/conda/bin/conda install -y cmake=3.18
 
 FROM cuda as cuda11.6
 RUN bash ./install_cuda.sh 11.6
@@ -64,6 +52,10 @@ FROM cuda as cuda11.7
 RUN bash ./install_cuda.sh 11.7
 RUN bash ./install_magma.sh 11.7
 
+FROM cuda as cuda11.8
+RUN bash ./install_cuda.sh 11.8
+RUN bash ./install_magma.sh 11.8
+
 FROM cpu as rocm
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
@@ -77,23 +69,13 @@ RUN apt-get update -y && \
     apt-get install python -y && \
     apt-get clean
 
-FROM rocm as rocm5.2
-RUN ROCM_VERSION=5.2 bash ./install_rocm.sh && rm install_rocm.sh
-RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
-RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
-
-FROM rocm as rocm5.1.3
-RUN ROCM_VERSION=5.1.3 bash ./install_rocm.sh && rm install_rocm.sh
-RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
-RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
-
-FROM rocm as rocm5.1.1
-RUN ROCM_VERSION=5.1.1 bash ./install_rocm.sh && rm install_rocm.sh
+FROM rocm as rocm5.4.2
+RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
 
-FROM rocm as rocm5.1
-RUN ROCM_VERSION=5.1 bash ./install_rocm.sh && rm install_rocm.sh
+FROM rocm as rocm5.3
+RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
 
diff --git a/libtorch/build.sh b/libtorch/build.sh
index b2551a6be3..88c8c6f9a8 100644
--- a/libtorch/build.sh
+++ b/libtorch/build.sh
@@ -7,4 +7,4 @@ set -ex
 
 SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.7" ${SCRIPTPATH}/../manywheel/build.sh
+BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.8" ${SCRIPTPATH}/../manywheel/build.sh
diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh
index 5703ca41cc..8d25da9bcd 100755
--- a/libtorch/build_all_docker.sh
+++ b/libtorch/build_all_docker.sh
@@ -4,10 +4,10 @@ set -eou pipefail
 
 TOPDIR=$(git rev-parse --show-toplevel)
 
-for cuda_version in 11.7 11.6 11.5 11.3 10.2; do
+for cuda_version in 11.8 11.7 11.6; do
     GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
 
-for rocm_version in 5.1.1 5.2; do
+for rocm_version in 5.3 5.4.2; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh
index fe441bb9a6..bbf42b1d02 100755
--- a/libtorch/build_docker.sh
+++ b/libtorch/build_docker.sh
@@ -27,7 +27,7 @@ case ${GPU_ARCH_TYPE} in
     rocm)
         BASE_TARGET=rocm${GPU_ARCH_VERSION}
         DOCKER_TAG=rocm${GPU_ARCH_VERSION}
-        GPU_IMAGE=rocm/dev-ubuntu-18.04:${GPU_ARCH_VERSION}
+        GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}
         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908"
         ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
         if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
diff --git a/magma/Makefile b/magma/Makefile
index 2d690c14c8..4a90a43e2b 100644
--- a/magma/Makefile
+++ b/magma/Makefile
@@ -1,8 +1,8 @@
 SHELL=/usr/bin/env bash
 
-DESIRED_CUDA ?= 11.3
-PACKAGE_NAME ?= magma-cuda113
-CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70
+DESIRED_CUDA ?= 11.6
+PACKAGE_NAME ?= magma-cuda116
+CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
 
 DOCKER_RUN = set -eou pipefail; docker run --rm -i \
 	-v $(shell git rev-parse --show-toplevel):/builder \
@@ -14,47 +14,30 @@ DOCKER_RUN = set -eou pipefail; docker run --rm -i \
 	magma/build_magma.sh
 
 .PHONY: all
+all: magma-cuda118
 all: magma-cuda117
 all: magma-cuda116
-all: magma-cuda115
-all: magma-cuda113
-all: magma-cuda102
 
 .PHONY:
 clean:
 	$(RM) -r magma-*
 	$(RM) -r output
 
+.PHONY: magma-cuda118
+magma-cuda118: DESIRED_CUDA := 11.8
+magma-cuda118: PACKAGE_NAME := magma-cuda118
+magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_90,code=sm_90
+magma-cuda118:
+	$(DOCKER_RUN)
+
 .PHONY: magma-cuda117
 magma-cuda117: DESIRED_CUDA := 11.7
 magma-cuda117: PACKAGE_NAME := magma-cuda117
-magma-cuda117: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
 magma-cuda117:
 	$(DOCKER_RUN)
 
 .PHONY: magma-cuda116
 magma-cuda116: DESIRED_CUDA := 11.6
 magma-cuda116: PACKAGE_NAME := magma-cuda116
-magma-cuda116: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
 magma-cuda116:
 	$(DOCKER_RUN)
-
-.PHONY: magma-cuda115
-magma-cuda115: DESIRED_CUDA := 11.5
-magma-cuda115: PACKAGE_NAME := magma-cuda115
-magma-cuda115: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
-magma-cuda115:
-	$(DOCKER_RUN)
-
-.PHONY: magma-cuda113
-magma-cuda113: DESIRED_CUDA := 11.3
-magma-cuda113: PACKAGE_NAME := magma-cuda113
-magma-cuda113: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
-magma-cuda113:
-	$(DOCKER_RUN)
-
-.PHONY: magma-cuda102
-magma-cuda102: DESIRED_CUDA := 10.2
-magma-cuda102: PACKAGE_NAME := magma-cuda102
-magma-cuda102:
-	$(DOCKER_RUN)
diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile
index 3140f98638..43a7d0568b 100644
--- a/manywheel/Dockerfile
+++ b/manywheel/Dockerfile
@@ -21,9 +21,10 @@ RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm &
     rpm -ivh epel-release-latest-7.noarch.rpm && \
     rm -f epel-release-latest-7.noarch.rpm
 
-# cmake
-RUN yum install -y cmake3 && \
-    ln -s /usr/bin/cmake3 /usr/bin/cmake
+# cmake-3.18.4 from pip
+RUN yum install -y python3-pip && \
+    python3 -mpip install cmake==3.18.4 && \
+    ln -s /usr/local/bin/cmake /usr/bin/cmake
 
 RUN yum install -y autoconf aclocal automake make
 
@@ -35,18 +36,19 @@ FROM base as openssl
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 
-FROM base as python
+# EPEL for cmake
+FROM base as patchelf
+# Install patchelf
+ADD ./common/install_patchelf.sh install_patchelf.sh
+RUN bash ./install_patchelf.sh && rm install_patchelf.sh
+RUN cp $(which patchelf) /patchelf
+
+FROM patchelf as python
 # build python
 COPY manywheel/build_scripts /build_scripts
 ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
 RUN bash build_scripts/build.sh && rm -r build_scripts
 
-# remove unncessary python versions
-RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
-RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
-RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
-RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
-
 FROM base as cuda
 ARG BASE_CUDA_VERSION=10.2
 # Install CUDA
@@ -58,13 +60,6 @@ FROM base as intel
 ADD ./common/install_mkl.sh install_mkl.sh
 RUN bash ./install_mkl.sh && rm install_mkl.sh
 
-# EPEL for cmake
-FROM base as patchelf
-# Install patchelf
-ADD ./common/install_patchelf.sh install_patchelf.sh
-RUN bash ./install_patchelf.sh && rm install_patchelf.sh
-RUN cp $(which patchelf) /patchelf
-
 FROM base as magma
 ARG BASE_CUDA_VERSION=10.2
 # Install magma
@@ -142,9 +137,12 @@ RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_
 ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 
-# cmake
-RUN yum install -y cmake3 && \
-    ln -s /usr/bin/cmake3 /usr/bin/cmake
+# cmake is already installed inside the rocm base image, so remove if present
+RUN rpm -e cmake || true
+# cmake-3.18.4 from pip
+RUN yum install -y python3-pip && \
+    python3 -mpip install cmake==3.18.4 && \
+    ln -s /usr/local/bin/cmake /usr/bin/cmake
 
 # ninja
 RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-5.el7.noarch.rpm
@@ -155,7 +153,7 @@ RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 
-FROM common as rocm_final
+FROM cpu_final as rocm_final
 ARG ROCM_VERSION=3.7
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
@@ -166,3 +164,5 @@ ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
+# cmake3 is needed for the MIOpen build
+RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
diff --git a/manywheel/Dockerfile_cxx11-abi b/manywheel/Dockerfile_cxx11-abi
index 966d570869..a5b0673e97 100644
--- a/manywheel/Dockerfile_cxx11-abi
+++ b/manywheel/Dockerfile_cxx11-abi
@@ -13,7 +13,6 @@ RUN yum -y update
 RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel
 RUN yum install -y autoconf automake make cmake gdb gcc gcc-c++
 
-
 FROM base as openssl
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh
index d50eea49d7..395f71be36 100644
--- a/manywheel/build_all_docker.sh
+++ b/manywheel/build_all_docker.sh
@@ -9,12 +9,12 @@ MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh"
 
 GPU_ARCH_TYPE=cpu-cxx11-abi "${TOPDIR}/manywheel/build_docker.sh"
 
-for cuda_version in 11.5 11.3 10.2; do
+for cuda_version in 11.7 11.6; do
     GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done
 
-for rocm_version in 5.1.1 5.2; do
+for rocm_version in 5.3 5.4.2; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
index 878d81628b..cc56e695a4 100644
--- a/manywheel/build_common.sh
+++ b/manywheel/build_common.sh
@@ -371,15 +371,15 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip;
 
     # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
     find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib'
-        $PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile
+        echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}"
+        $PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile
         $PATCHELF_BIN --print-rpath $sofile
     done
 
     # set RPATH of lib/ files to $ORIGIN
     find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to " '$ORIGIN'
-        $PATCHELF_BIN --set-rpath '$ORIGIN' $sofile
+        echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}"
+        $PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile
         $PATCHELF_BIN --print-rpath $sofile
     done
 
@@ -387,10 +387,10 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip;
     record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
     if [[ -e $record_file ]]; then
         echo "Generating new record file $record_file"
-        rm -f $record_file
+        : > "$record_file"
         # generate records for folders in wheel
         find * -type f | while read fname; do
-            echo $(make_wheel_record $fname) >>$record_file
+            make_wheel_record "$fname" >>"$record_file"
         done
     fi
 
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index efea1ae93d..6b5cd91117 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -58,12 +58,12 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
 
 TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0"
 case ${CUDA_VERSION} in
-    11.[3567])
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6"
+    11.8)
+        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0"
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
-    10.*)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
+    11.[67])
+        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6"
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
     *)
@@ -108,96 +108,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
     LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
 fi
 
-if [[ $CUDA_VERSION == "10.2" ]]; then
-DEPS_LIST=(
-    "/usr/local/cuda/lib64/libcudart.so.10.2"
-    "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-    "/usr/local/cuda/lib64/libnvrtc.so.10.2"
-    "/usr/local/cuda/lib64/libnvrtc-builtins.so"
-    "/usr/local/cuda/lib64/libcublas.so.10"
-    "/usr/local/cuda/lib64/libcublasLt.so.10"
-    "$LIBGOMP_PATH"
-)
-
-DEPS_SONAME=(
-    "libcudart.so.10.2"
-    "libnvToolsExt.so.1"
-    "libnvrtc.so.10.2"
-    "libnvrtc-builtins.so"
-    "libcublas.so.10"
-    "libcublasLt.so.10"
-    "libgomp.so.1"
-)
-elif [[ $CUDA_VERSION == "11.3" ]]; then
-export USE_STATIC_CUDNN=0
-DEPS_LIST=(
-    "/usr/local/cuda/lib64/libcudart.so.11.0"
-    "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-    "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake for 11.3, it links to 11.3.58
-    "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.3"
-    "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn.so.8"
-    "/usr/local/cuda/lib64/libcublas.so.11"
-    "/usr/local/cuda/lib64/libcublasLt.so.11"
-    "$LIBGOMP_PATH"
-)
-
-DEPS_SONAME=(
-    "libcudart.so.11.0"
-    "libnvToolsExt.so.1"
-    "libnvrtc.so.11.2"
-    "libnvrtc-builtins.so.11.3"
-    "libcudnn_adv_infer.so.8"
-    "libcudnn_adv_train.so.8"
-    "libcudnn_cnn_infer.so.8"
-    "libcudnn_cnn_train.so.8"
-    "libcudnn_ops_infer.so.8"
-    "libcudnn_ops_train.so.8"
-    "libcudnn.so.8"
-    "libcublas.so.11"
-    "libcublasLt.so.11"
-    "libgomp.so.1"
-)
-elif [[ $CUDA_VERSION == "11.5" ]]; then
-export USE_STATIC_CUDNN=0
-DEPS_LIST=(
-    "/usr/local/cuda/lib64/libcudart.so.11.0"
-    "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-    "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake for 11.5, it links to 11.5.50
-    "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.5"
-    "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn.so.8"
-    "/usr/local/cuda/lib64/libcublas.so.11"
-    "/usr/local/cuda/lib64/libcublasLt.so.11"
-    "$LIBGOMP_PATH"
-)
-DEPS_SONAME=(
-    "libcudart.so.11.0"
-    "libnvToolsExt.so.1"
-    "libnvrtc.so.11.2"
-    "libnvrtc-builtins.so.11.5"
-    "libcudnn_adv_infer.so.8"
-    "libcudnn_adv_train.so.8"
-    "libcudnn_cnn_infer.so.8"
-    "libcudnn_cnn_train.so.8"
-    "libcudnn_ops_infer.so.8"
-    "libcudnn_ops_train.so.8"
-    "libcudnn.so.8"
-    "libcublas.so.11"
-    "libcublasLt.so.11"
-    "libgomp.so.1"
-)
-elif [[ $CUDA_VERSION == "11.6" ]]; then
+if [[ $CUDA_VERSION == "11.6" ]]; then
 export USE_STATIC_CUDNN=0
 DEPS_LIST=(
     "/usr/local/cuda/lib64/libcudart.so.11.0"
@@ -231,48 +142,101 @@ DEPS_SONAME=(
     "libcublasLt.so.11"
     "libgomp.so.1"
 )
-elif [[ $CUDA_VERSION == "11.7" ]]; then
-export USE_STATIC_CUDNN=0
-DEPS_LIST=(
-    "/usr/local/cuda/lib64/libcudart.so.11.0"
-    "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-    "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake for 11.7, it links to 11.7.50
-    "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
-    "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
-    "/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
-    "/usr/local/cuda/lib64/libcudnn.so.8"
-    "/usr/local/cuda/lib64/libcublas.so.11"
-    "/usr/local/cuda/lib64/libcublasLt.so.11"
-    "$LIBGOMP_PATH"
-)
-DEPS_SONAME=(
-    "libcudart.so.11.0"
-    "libnvToolsExt.so.1"
-    "libnvrtc.so.11.2"
-    "libnvrtc-builtins.so.11.7"
-    "libcudnn_adv_infer.so.8"
-    "libcudnn_adv_train.so.8"
-    "libcudnn_cnn_infer.so.8"
-    "libcudnn_cnn_train.so.8"
-    "libcudnn_ops_infer.so.8"
-    "libcudnn_ops_train.so.8"
-    "libcudnn.so.8"
-    "libcublas.so.11"
-    "libcublasLt.so.11"
-    "libgomp.so.1"
-)
+elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
+    export USE_STATIC_CUDNN=0
+    # Try parallelizing nvcc as well
+    export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
+    DEPS_LIST=(
+        "$LIBGOMP_PATH"
+    )
+    DEPS_SONAME=(
+        "libgomp.so.1"
+    )
 
-# Try parallelizing nvcc as well
-export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
+    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
+        echo "Bundling with cudnn and cublas."
+        DEPS_LIST+=(
+            "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
+            "/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
+            "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
+            "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
+            "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
+            "/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
+            "/usr/local/cuda/lib64/libcudnn.so.8"
+            "/usr/local/cuda/lib64/libcublas.so.11"
+            "/usr/local/cuda/lib64/libcublasLt.so.11"
+            "/usr/local/cuda/lib64/libcudart.so.11.0"
+            "/usr/local/cuda/lib64/libnvToolsExt.so.1"
+            "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake, it links to more specific cuda version
+        )
+        DEPS_SONAME+=(
+            "libcudnn_adv_infer.so.8"
+            "libcudnn_adv_train.so.8"
+            "libcudnn_cnn_infer.so.8"
+            "libcudnn_cnn_train.so.8"
+            "libcudnn_ops_infer.so.8"
+            "libcudnn_ops_train.so.8"
+            "libcudnn.so.8"
+            "libcublas.so.11"
+            "libcublasLt.so.11"
+            "libcudart.so.11.0"
+            "libnvToolsExt.so.1"
+            "libnvrtc.so.11.2"
+        )
+        if [[ $CUDA_VERSION == "11.7" ]]; then
+            DEPS_LIST+=(
+                "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
+            )
+            DEPS_SONAME+=(
+                "libnvrtc-builtins.so.11.7"
+            )
+        fi
+        if [[ $CUDA_VERSION == "11.8" ]]; then
+            DEPS_LIST+=(
+                "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
+            )
+            DEPS_SONAME+=(
+                "libnvrtc-builtins.so.11.8"
+            )
+        fi
+    else
+        echo "Using nvidia libs from pypi."
+        CUDA_RPATHS=(
+            '$ORIGIN/../../nvidia/cublas/lib'
+            '$ORIGIN/../../nvidia/cuda_cupti/lib'
+            '$ORIGIN/../../nvidia/cuda_nvrtc/lib'
+            '$ORIGIN/../../nvidia/cuda_runtime/lib'
+            '$ORIGIN/../../nvidia/cudnn/lib'
+            '$ORIGIN/../../nvidia/cufft/lib'
+            '$ORIGIN/../../nvidia/curand/lib'
+            '$ORIGIN/../../nvidia/cusolver/lib'
+            '$ORIGIN/../../nvidia/cusparse/lib'
+            '$ORIGIN/../../nvidia/nccl/lib'
+            '$ORIGIN/../../nvidia/nvtx/lib'
+        )
+        CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
+        export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
+        export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
+        export FORCE_RPATH="--force-rpath"
+        export USE_STATIC_NCCL=0
+        export USE_SYSTEM_NCCL=1
+        export ATEN_STATIC_CUDA=0
+        export USE_CUDA_STATIC_LINK=0
+        export USE_CUPTI_SO=1
+        export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
+        export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
+    fi
 else
     echo "Unknown cuda version $CUDA_VERSION"
     exit 1
 fi
 
+# TODO: Remove me when Triton has a proper release channel
+if [[ $(uname) == "Linux" && -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
+    TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
+    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton==2.1.0+${TRITON_SHORTHASH}"
+fi
+
 # builder/test.sh requires DESIRED_CUDA to know what tests to exclude
 export DESIRED_CUDA="$cuda_version_nodot"
 
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index 1b8b04e706..9b0480210f 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -56,7 +56,7 @@ case ${GPU_ARCH_TYPE} in
         if [[ $ROCM_VERSION_INT -ge 40300 ]]; then
             PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030"
         fi
-        DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
+        DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
         ;;
     *)
         echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
diff --git a/manywheel/build_libtorch.sh b/manywheel/build_libtorch.sh
index 855d4bcc83..32e0f7a1a1 100644
--- a/manywheel/build_libtorch.sh
+++ b/manywheel/build_libtorch.sh
@@ -65,13 +65,11 @@ fi
 # ever pass one python version, so we assume that DESIRED_PYTHON is not a list
 # in this case
 if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
-    if [[ "$DESIRED_PYTHON" == '2.7mu' ]]; then
-      DESIRED_PYTHON='cp27-cp27mu'
-    elif [[ "$DESIRED_PYTHON" == '3.8m' ]]; then
-      DESIRED_PYTHON='cp38-cp38'
+    if [[ "$DESIRED_PYTHON" == '3.7' ]]; then
+      DESIRED_PYTHON='cp37-cp37m'
     else
       python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
-      DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}m"
+      DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
     fi
 fi
 pydir="/opt/python/$DESIRED_PYTHON"
diff --git a/release/promote.sh b/release/promote.sh
index 984788e42b..1147dc0c98 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -6,10 +6,11 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 source "${DIR}/release_versions.sh"
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-1.12.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.13.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-0.12.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.13.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
@@ -70,16 +71,48 @@ promote_pypi() {
     echo
 }
 
+# Promote s3 dependencies
+# promote_s3 "certifi" whl "2022.12.7"
+# promote_s3 "charset_normalizer" whl "2.1.1"
+# promote_s3 "cmake" whl "3.25"
+# promote_s3 "colorama" whl "0.4.6"
+# promote_s3 "triton" whl "2.0.0"
+# promote_s3 "pytorch_triton_rocm" whl "2.0.1"
+# promote_s3 "tqdm" whl "4.64.1"
+# promote_s3 "Pillow" whl "9.3.0"
+# for python 3.8-3.11
+# promote_s3 "numpy" whl "1.24.1"
+# for python 3.7 older pytorch versions
+# promote_s3 "numpy" whl "1.21.6"
+# promote_s3 "urllib3" whl "1.26.13"
+# promote_s3 "lit" whl "15.0.7"
+# promote_s3 "sympy" whl "1.11.1"
+# promote_s3 "typing_extensions" whl "4.4.0"
+# promote_s3 "filelock" whl "3.9.0"
+# promote_s3 "mpmath" whl "1.2.1"
+# promote_s3 "MarkupSafe" whl "2.1.2"
+# promote_s3 "Jinja2" whl "3.1.2"
+# promote_s3 "idna" whl "3.4"
+# promote_s3 "networkx" whl "3.0"
+# promote_s3 "packaging" whl "22.0"
+# promote_s3 "requests" whl "2.28.1"
+
 # promote_s3 torch whl "${PYTORCH_VERSION}"
 # promote_s3 torchvision whl "${TORCHVISION_VERSION}"
 # promote_s3 torchaudio whl "${TORCHAUDIO_VERSION}"
 # promote_s3 torchtext whl "${TORCHTEXT_VERSION}"
+# promote_s3 torchdata whl "${TORCHDATA_VERSION}"
 # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}"
 
+# promote_conda torchtriton conda "2.0.0"
+# promote_conda pytorch-cuda conda "11.7"
+# promote_conda pytorch-cuda conda "11.8"
+
 # promote_conda pytorch conda "${PYTORCH_VERSION}"
 # promote_conda torchvision conda "${TORCHVISION_VERSION}"
 # promote_conda torchaudio conda "${TORCHAUDIO_VERSION}"
 # promote_conda torchtext conda "${TORCHTEXT_VERSION}"
+# promote_conda torchdata conda "${TORCHDATA_VERSION}"
 
 # Uncomment these to promote to pypi
 LINUX_VERSION_SUFFIX="%2Bcu102"
diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh
old mode 100644
new mode 100755
index 201e4b9ac5..fdd9bf4a0e
--- a/release/pypi/prep_binary_for_pypi.sh
+++ b/release/pypi/prep_binary_for_pypi.sh
@@ -12,6 +12,19 @@
 set -eou pipefail
 shopt -s globstar
 
+# Function copied from manywheel/build_common.sh
+make_wheel_record() {
+    FPATH=$1
+    if echo $FPATH | grep RECORD >/dev/null 2>&1; then
+        # if the RECORD file, then
+        echo "$FPATH,,"
+    else
+        HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
+        FSIZE=$(ls -nl $FPATH | awk '{print $5}')
+        echo "$FPATH,sha256=$HASH,$FSIZE"
+    fi
+}
+
 OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)}
 
 tmp_dir="$(mktemp -d)"
@@ -27,8 +40,9 @@ for whl_file in "$@"; do
         set -x
         unzip -q "${whl_file}" -d "${whl_dir}"
     )
-    version_with_suffix=$(grep '^Version:' "${whl_dir}"/*/METADATA | cut -d' ' -f2)
+    version_with_suffix=$(grep '^Version:' "${whl_dir}"/*/METADATA | cut -d' ' -f2 | tr -d "[:space:]")
     version_with_suffix_escaped=${version_with_suffix/+/%2B}
+
     # Remove all suffixed +bleh versions
     version_no_suffix=${version_with_suffix/+*/}
     new_whl_file=${OUTPUT_DIR}/$(basename "${whl_file/${version_with_suffix_escaped}/${version_no_suffix}}")
@@ -37,11 +51,37 @@ for whl_file in "$@"; do
     dirname_dist_info_folder=$(dirname "${dist_info_folder}")
     (
         set -x
+
+        # Special build with pypi cudnn remove it from version
+        if [[ $whl_file == *"with.pypi.cudnn"* ]]; then
+            rm -rf "${whl_dir}/caffe2"
+            rm -rf "${whl_dir}"/torch/lib/libnvrtc*
+
+            sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py"
+        fi
+
         find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;
         # Moves distinfo from one with a version suffix to one without
         # Example: torch-1.8.0+cpu.dist-info => torch-1.8.0.dist-info
         mv "${dist_info_folder}" "${dirname_dist_info_folder}/${basename_dist_info_folder/${version_with_suffix}/${version_no_suffix}}"
         cd "${whl_dir}"
-        zip -qr "${new_whl_file}" .
+
+        (
+            set +x
+            # copied from manywheel/build_common.sh
+            # regenerate the RECORD file with new hashes
+            record_file="${dirname_dist_info_folder}/${basename_dist_info_folder/${version_with_suffix}/${version_no_suffix}}/RECORD"
+            if [[ -e $record_file ]]; then
+                echo "Generating new record file $record_file"
+                : > "$record_file"
+                # generate records for folders in wheel
+                find * -type f | while read fname; do
+                    make_wheel_record "$fname" >>"$record_file"
+                done
+            fi
+        )
+
+        rm -rf "${new_whl_file}"
+        zip -qr9 "${new_whl_file}" .
     )
 done
diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index 02ebe4833c..74f139680e 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -21,16 +21,17 @@ upload_pypi_to_staging() {
 }
 
 # Uncomment these to promote to pypi
-LINUX_VERSION_SUFFIX="%2Bcu102"
+PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu117.with.pypi.cudnn"
+LINUX_VERSION_SUFFIX="%2Bcu117"
 WIN_VERSION_SUFFIX="%2Bcpu"
 MACOS_X86_64="macosx_.*_x86_64"
 MACOS_ARM64="macosx_.*_arm64"
 
-PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}"
-PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                        upload_pypi_to_staging torch "${PYTORCH_VERSION}"
-PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"   upload_pypi_to_staging torch "${PYTORCH_VERSION}"
-PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                        upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac
-PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                        upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac
+PLATFORM="linux_x86_64"          VERSION_SUFFIX="${PYTORCH_LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}"
+PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}"
+PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"           upload_pypi_to_staging torch "${PYTORCH_VERSION}"
+PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac
+PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
diff --git a/release/release_versions.sh b/release/release_versions.sh
index 95ebfa363b..f0db2a0895 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -1,7 +1,8 @@
 #!/usr/bin/env bash
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-1.12.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.13.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-0.12.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.13.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0}
diff --git a/run_tests.sh b/run_tests.sh
index 18b00f00b3..fd66835e23 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -72,21 +72,6 @@ fi
 
 # Environment initialization
 if [[ "$package_type" == conda || "$(uname)" == Darwin ]]; then
-    # Why are there two different ways to install dependencies after installing an offline package?
-    # The "cpu" conda package for pytorch doesn't actually depend on "cpuonly" which means that
-    # when we attempt to update dependencies using "conda update --all" it will attempt to install
-    # whatever "cudatoolkit" your current computer relies on (which is sometimes none). When conda
-    # tries to install this cudatoolkit that correlates with your current hardware it will also
-    # overwrite the currently installed "local" pytorch package meaning you aren't actually testing
-    # the right package.
-    # TODO (maybe): Make the "cpu" package of pytorch depend on "cpuonly"
-    if [[ "$cuda_ver" = 'cpu' ]]; then
-      # Installing cpuonly will also install dependencies as well
-      retry conda install -y -c pytorch cpuonly
-    else
-      # Install dependencies from installing the pytorch conda package offline
-      retry conda update -yq --all -c defaults -c pytorch -c numba/label/dev
-    fi
     # Install the testing dependencies
     retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml
 else
@@ -140,15 +125,21 @@ python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else
 
 # Test that CUDA builds are setup correctly
 if [[ "$cuda_ver" != 'cpu' ]]; then
-    # Test CUDA archs
-    echo "Checking that CUDA archs are setup correctly"
-    timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
-
-    # These have to run after CUDA is initialized
-    echo "Checking that magma is available"
-    python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
-    echo "Checking that CuDNN is available"
-    python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
+    cuda_installed=1
+    nvidia-smi || cuda_installed=0
+    if [[ "$cuda_installed" == 0 ]]; then
+      echo "Skip CUDA tests for machines without a Nvidia GPU card"
+    else
+      # Test CUDA archs
+      echo "Checking that CUDA archs are setup correctly"
+      timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
+
+      # These have to run after CUDA is initialized
+      echo "Checking that magma is available"
+      python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
+      echo "Checking that CuDNN is available"
+      python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
+    fi
 fi
 
 # Check that OpenBlas is not linked to on Macs
diff --git a/s3_management/backup_conda.py b/s3_management/backup_conda.py
index a75c23407d..06926589d3 100644
--- a/s3_management/backup_conda.py
+++ b/s3_management/backup_conda.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
-# Downloads domain library packages from channel
+# Downloads domain pytorch and library packages from channel
 # And backs them up to S3
 # Do not use unless you know what you are doing
+# Usage:  python backup_conda.py --version 1.6.0
 
 import conda.api
 import boto3
@@ -9,6 +10,7 @@
 import urllib
 import os
 import hashlib
+import argparse
 
 S3 = boto3.resource('s3')
 BUCKET = S3.Bucket('pytorch-backup')
@@ -23,11 +25,13 @@ def compute_md5(path:str) -> str:
 def download_conda_package(package:str, version:Optional[str] = None, depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
     packages = conda.api.SubdirData.query_all(package, channels = [channel] if channel is not None else None, subdirs = _known_subdirs)
     rc = []
+
     for pkg in packages:
         if version is not None and pkg.version != version:
             continue
         if depends is not None and depends not in pkg.depends:
             continue
+
         print(f"Downloading {pkg.url}...")
         os.makedirs(pkg.subdir, exist_ok = True)
         fname = f"{pkg.subdir}/{pkg.fn}"
@@ -50,6 +54,18 @@ def upload_to_s3(prefix: str, fnames: List[str]) -> None:
 
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--version",
+        help="PyTorch Version to backup",
+        type=str,
+        required = True
+    )
+    options = parser.parse_args()
+    rc = download_conda_package("pytorch", channel = "pytorch", version = options.version)
+    upload_to_s3(f"v{options.version}/conda", rc)
+
     for libname in ["torchvision", "torchaudio", "torchtext"]:
-        rc = download_conda_package(libname, channel = "pytorch", depends = "pytorch 1.9.0")
-        upload_to_s3("v1.9.0-rc4/conda", rc)
+        print(f"processing {libname}")
+        rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}")
+        upload_to_s3(f"v{options.version}/conda", rc)
diff --git a/s3_management/manage.py b/s3_management/manage.py
index 6b6d0c6faa..15b37cf3da 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -1,16 +1,15 @@
 #!/usr/bin/env python
 
 import argparse
-import tempfile
 import time
 
 from os import path, makedirs
+from datetime import datetime
 from collections import defaultdict
 from typing import Iterator, List, Type, Dict, Set, TypeVar, Optional
-from re import sub, match
+from re import sub, match, search
 from packaging.version import parse
 
-import botocore
 import boto3
 
 
@@ -18,7 +17,7 @@
 CLIENT = boto3.client('s3')
 BUCKET = S3.Bucket('pytorch')
 
-ACCEPTED_FILE_EXTENSIONS = ("whl", "zip")
+ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz")
 ACCEPTED_SUBDIR_PATTERNS = [
     r"cu[0-9]+",           # for cuda
     r"rocm[0-9]+\.[0-9]+", # for rocm
@@ -31,11 +30,71 @@
     "whl/test": "torch_test.html",
 }
 
+# NOTE: This refers to the name on the wheels themselves and not the name of
+# package as specified by setuptools, for packages with "-" (hyphens) in their
+# names you need to convert them to "_" (underscores) in order for them to be
+# allowed here since the name of the wheels is compared here
+PACKAGE_ALLOW_LIST = {
+    "Pillow",
+    "certifi",
+    "charset_normalizer",
+    "cmake",
+    "colorama",
+    "filelock",
+    "idna",
+    "Jinja2",
+    "lit",
+    "MarkupSafe",
+    "mpmath",
+    "nestedtensor",
+    "networkx",
+    "numpy",
+    "packaging",
+    "portalocker",
+    "pytorch_triton",
+    "pytorch_triton_rocm",
+    "requests",
+    "sympy",
+    "torch",
+    "torcharrow",
+    "torchaudio",
+    "torchcsprng",
+    "torchdata",
+    "torchdistx",
+    "torchrec",
+    "torchtext",
+    "torchvision",
+    "triton",
+    "tqdm",
+    "typing_extensions",
+    "urllib3",
+}
+
+# Should match torch-2.0.0.dev20221221+cu118-cp310-cp310-linux_x86_64.whl as:
+# Group 1: torch-2.0.0.dev
+# Group 2: 20221221
+PACKAGE_DATE_REGEX = r"([a-zA-z]*-[0-9.]*.dev)([0-9]*)"
+
 # How many packages should we keep of a specific package?
 KEEP_THRESHOLD = 60
 
 S3IndexType = TypeVar('S3IndexType', bound='S3Index')
 
+def extract_package_build_time(full_package_name: str) -> datetime:
+    result = search(PACKAGE_DATE_REGEX, full_package_name)
+    if result is not None:
+        try:
+            return datetime.strptime(result.group(2), "%Y%m%d")
+        except ValueError:
+            # Ignore any value errors since they probably shouldn't be hidden anyways
+            pass
+    return datetime.now()
+
+def between_bad_dates(package_build_time: datetime):
+    start_bad = datetime(year=2022, month=8, day=17)
+    end_bad = datetime(year=2022, month=12, day=30)
+    return start_bad <= package_build_time <= end_bad
+
 
 class S3Index:
     def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None:
@@ -70,9 +129,17 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]:
         packages: Dict[str, int] = defaultdict(int)
         to_hide: Set[str] = set()
         for obj in all_sorted_packages:
-            package_name = path.basename(obj).split('-')[0]
+            full_package_name = path.basename(obj)
+            package_name = full_package_name.split('-')[0]
+            package_build_time = extract_package_build_time(full_package_name)
+            # Hard pass on packages that are included in our allow list
+            if package_name not in PACKAGE_ALLOW_LIST:
+                to_hide.add(obj)
+                continue
             if packages[package_name] >= KEEP_THRESHOLD:
                 to_hide.add(obj)
+            elif between_bad_dates(package_build_time):
+                to_hide.add(obj)
             else:
                 packages[package_name] += 1
         return set(self.objects).difference({
@@ -162,7 +229,7 @@ def to_simple_package_html(
         out.append('<!DOCTYPE html>')
         out.append('<html>')
         out.append('  <body>')
-        out.append('    <h1>Links for {}</h1>'.format(package_name))
+        out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_","-")))
         for obj in sorted(self.gen_file_list(subdir, package_name)):
             out.append(f'    <a href="/{obj}">{path.basename(obj).replace("%2B","+")}</a><br/>')
         # Adding html footer
@@ -183,7 +250,7 @@ def to_simple_packages_html(
         out.append('<html>')
         out.append('  <body>')
         for pkg_name in sorted(self.get_package_names(subdir)):
-            out.append(f'    <a href="{pkg_name}/">{pkg_name}</a><br/>')
+            out.append(f'    <a href="{pkg_name.replace("_","-")}/">{pkg_name.replace("_","-")}</a><br/>')
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')
@@ -214,9 +281,10 @@ def upload_pep503_htmls(self) -> None:
                 Body=self.to_simple_packages_html(subdir=subdir)
             )
             for pkg_name in self.get_package_names(subdir=subdir):
-                print(f"INFO Uploading {subdir}/{pkg_name}/index.html")
+                compat_pkg_name = pkg_name.lower().replace("_", "-")
+                print(f"INFO Uploading {subdir}/{compat_pkg_name}/index.html")
                 BUCKET.Object(
-                    key=f"{subdir}/{pkg_name}/index.html"
+                    key=f"{subdir}/{compat_pkg_name}/index.html"
                 ).put(
                     ACL='public-read',
                     CacheControl='no-cache,no-store,must-revalidate',
diff --git a/s3_management/requirements.txt b/s3_management/requirements.txt
index 86199dbc6e..d9fe7f1f00 100644
--- a/s3_management/requirements.txt
+++ b/s3_management/requirements.txt
@@ -1,2 +1,2 @@
-boto3
-packaging
+boto3==1.12.7
+packaging==21.3
diff --git a/smoke_test.sh b/smoke_test.sh
deleted file mode 100755
index e2459b49d6..0000000000
--- a/smoke_test.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-set -eux -o pipefail
-SOURCE_DIR=$(cd $(dirname $0) && pwd)
-
-# This is meant to be run in either a docker image or in a Mac. This assumes an
-# environment that will be teared down after execution is finishes, so it will
-# probably mess up what environment it runs in.
-
-# This is now only meant to be run in CircleCI, after calling the
-# .circleci/scripts/binary_populate_env.sh . You can call this manually if you
-# make sure all the needed variables are still populated.
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-if ! [ -x "$(command -v curl)" ]; then
-    if [ -f /etc/lsb-release ]; then
-      # TODO: Remove this once nvidia package repos are back online
-      # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
-      # shellcheck disable=SC2046
-      sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
-
-      apt-get update
-      apt-get install -y curl
-    fi
-fi
-
-# Use today's date if none is given
-if [[ -z "${DATE:-}" || "${DATE:-}" == 'today' ]]; then
-    DATE="$(date +%Y%m%d)"
-fi
-
-# DESIRED_PYTHON is in format 2.7m?u?
-# DESIRED_CUDA is in format cu80 (or 'cpu')
-
-if [[ "$DESIRED_CUDA" == cpu ]]; then
-  export USE_CUDA=0
-else
-  export USE_CUDA=1
-fi
-
-# Generate M.m formats for CUDA and Python versions
-if [[ "$DESIRED_CUDA" != cpu ]]; then
-  cuda_dot="$(echo $DESIRED_CUDA | tr -d 'cpu')"
-  if [[ "${#cuda_dot}" == 2 ]]; then
-    cuda_dot="${cuda_dot:0:1}.${cuda_dot:1}"
-  else
-    cuda_dot="${cuda_dot:0:2}.${cuda_dot:2}"
-  fi
-fi
-py_dot="${DESIRED_PYTHON:0:3}"
-
-# Generate "long" python versions cp27-cp27mu
-py_long="cp${DESIRED_PYTHON:0:1}${DESIRED_PYTHON:2:1}-cp${DESIRED_PYTHON:0:1}${DESIRED_PYTHON:2}"
-# TODO: I know this is the wrong way to do this translation, we should probably fix it upstream, but this is the quickest way
-if [[ "${py_long}" = "cp38-cp38m" ]]; then
-  py_long="cp38-cp38"
-fi
-
-# Determine package name
-if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
-  if [[ "$(uname)" == Darwin ]]; then
-    libtorch_variant='macos'
-  elif [[ -z "${LIBTORCH_VARIANT:-}" ]]; then
-    echo "No libtorch variant given. This smoke test does not know which zip"
-    echo "to download."
-    exit 1
-  else
-    libtorch_variant="$LIBTORCH_VARIANT"
-  fi
-  if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
-      LIBTORCH_ABI="cxx11-abi-"
-  else
-      LIBTORCH_ABI=
-  fi
-  if [[ "$DESIRED_CUDA" == 'cu102' || "$libtorch_variant" == 'macos' ]]; then
-    package_name="libtorch-$LIBTORCH_ABI$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}.zip"
-  else
-    package_name="libtorch-$LIBTORCH_ABI$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}%2B${DESIRED_CUDA}.zip"
-  fi
-
-elif [[ "$PACKAGE_TYPE" == *wheel ]]; then
-  package_name='torch'
-else
-  package_name='pytorch'
-fi
-if [[ "$(uname)" == 'Darwin' ]] || [[ "$DESIRED_CUDA" == "cu102" ]] || [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-  package_name_and_version="${package_name}==${NIGHTLIES_DATE_PREAMBLE}${DATE}"
-else
-  # Linux binaries have the cuda version appended to them. This is only on
-  # linux, since all macos builds are cpu.  (NB: We also omit
-  # DESIRED_CUDA if it's the default)
-  package_name_and_version="${package_name}==${NIGHTLIES_DATE_PREAMBLE}${DATE}+${DESIRED_CUDA}"
-fi
-
-# Switch to the desired python
-if [[ "$PACKAGE_TYPE" == 'conda' || "$(uname)" == 'Darwin' ]]; then
-  # Create a new conda env in conda, or on MacOS
-  conda create -yn test python="$py_dot" && source activate test
-  python_version=$(python --version 2>&1)
-  dependencies="numpy protobuf six requests"
-  case ${python_version} in
-    *3.6.*)
-      dependencies="${dependencies} future dataclasses"
-      ;;
-  esac
-  conda install -yq ${dependencies}
-else
-  export PATH=/opt/python/${py_long}/bin:$PATH
-  if [[ "$(python --version 2>&1)" == *3.6.* ]]; then
-    retry pip install -q future numpy protobuf six requests dataclasses
-  else
-    retry pip install -q future numpy protobuf six requests
-  fi
-fi
-
-# Switch to the desired CUDA if using the conda-cuda Docker image
-if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-  rm -rf /usr/local/cuda || true
-  if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
-    ln -s "/usr/local/cuda-${cuda_dot}" /usr/local/cuda
-    export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130
-    export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0
-    export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
-  fi
-fi
-
-# Print some debugging info
-python --version
-pip --version
-which python
-# If you are debugging packages not found then run these commands.
-#if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-#  conda search -c pytorch "$package_name"
-#elif [[ "$PACKAGE_TYPE" == *wheel ]]; then
-#  retry curl "https://download.pytorch.org/whl/nightly/$DESIRED_CUDA/torch_nightly.html" -v
-#fi
-
-# Install the package for the requested date
-if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
-  mkdir tmp_libtorch
-  pushd tmp_libtorch
-  libtorch_url="https://download.pytorch.org/libtorch/nightly/$DESIRED_CUDA/$package_name"
-  retry curl -o libtorch_zip "${libtorch_url}"
-  unzip -q libtorch_zip
-  cd libtorch
-elif [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-    if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
-	if [[ "$(uname)" == 'Darwin' ]]; then
-	    retry conda install -yq -c pytorch-nightly "$package_name_and_version"
-	else
-	    retry conda install -yq -c pytorch-nightly "$package_name_and_version" cpuonly
-	fi
-  else
-    retry conda install -yq -c pytorch-nightly "cudatoolkit=$CUDA_VERSION_SHORT" "$package_name_and_version"
-  fi
-else
-  # We need to upgrade pip now that we have '+cuver' in the package name, as
-  # old pips do not correctly change the '+' to '%2B' in the url and fail to
-  # find the package.
-  pip install --upgrade pip -q
-  pip_url="https://download.pytorch.org/whl/nightly/$DESIRED_CUDA/torch_nightly.html"
-  retry pip install "$package_name_and_version" \
-      -f "$pip_url" \
-      --no-cache-dir \
-      --no-index \
-      -q
-fi
-
-# Check that all conda features are working
-if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-  # Check that conda didn't change the Python version out from under us. Conda
-  # will do this if it didn't find the requested package for the current Python
-  # version and if nothing else has been installed in the current env.
-  if [[ -z "$(python --version 2>&1 | grep -o $py_dot)" ]]; then
-    echo "The Python version has changed to $(python --version)"
-    echo "Probably the package for the version we want does not exist"
-    echo '(conda will change the Python version even if it was explicitly declared)'
-    exit 1
-  fi
-
-  # Check that the CUDA feature is working
-  if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
-    if [[ -n "$(conda list torch | grep -o cuda)" ]]; then
-      echo "The installed package is built for CUDA:: $(conda list torch)"
-      exit 1
-    fi
-  elif [[ -z "$(conda list torch | grep -o cuda$cuda_dot)" ]]; then
-    echo "The installed package doesn't seem to be built for CUDA $cuda_dot"
-    echo "The full package is $(conda list torch)"
-    exit 1
-  fi
-fi
-
-"${SOURCE_DIR}/check_binary.sh"
diff --git a/test/smoke_test/assets/dog2.jpg b/test/smoke_test/assets/dog2.jpg
new file mode 100644
index 0000000000..528dfec720
Binary files /dev/null and b/test/smoke_test/assets/dog2.jpg differ
diff --git a/test/smoke_test/assets/rgb_pytorch.jpg b/test/smoke_test/assets/rgb_pytorch.jpg
new file mode 100644
index 0000000000..d49e658b94
Binary files /dev/null and b/test/smoke_test/assets/rgb_pytorch.jpg differ
diff --git a/test/smoke_test/assets/rgb_pytorch.png b/test/smoke_test/assets/rgb_pytorch.png
new file mode 100644
index 0000000000..c9d08e6c7d
Binary files /dev/null and b/test/smoke_test/assets/rgb_pytorch.png differ
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index bae7a5d29d..1a55cfed72 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -1,49 +1,243 @@
 import os
+import re
 import sys
+from pathlib import Path
+import argparse
 import torch
-import torchvision
-import torchaudio
-
-def smoke_test_cuda() -> None:
-    gpu_arch_ver = os.getenv('GPU_ARCH_VER')
-    gpu_arch_type = os.getenv('GPU_ARCH_TYPE')
-    is_cuda_system = gpu_arch_type == "cuda"
-
-    if(not torch.cuda.is_available() and is_cuda_system):
-        print(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
-        sys.exit(1)
-    if(torch.cuda.is_available()):
-        if(torch.version.cuda != gpu_arch_ver):
-            print(f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}")
-            sys.exit(1)
-        y=torch.randn([3,5]).cuda()
+import platform
+import importlib
+import subprocess
+import torch._dynamo
+import torch.nn as nn
+import torch.nn.functional as F
+
+gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
+gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
+channel = os.getenv("MATRIX_CHANNEL")
+stable_version = os.getenv("MATRIX_STABLE_VERSION")
+package_type = os.getenv("MATRIX_PACKAGE_TYPE")
+
+is_cuda_system = gpu_arch_type == "cuda"
+SCRIPT_DIR = Path(__file__).parent
+NIGHTLY_ALLOWED_DELTA = 3
+
+MODULES = [
+    {
+        "name": "torchvision",
+        "repo": "https://github.com/pytorch/vision.git",
+        "smoke_test": "python ./vision/test/smoke_test.py",
+        "extension": "extension",
+    },
+    {
+        "name": "torchaudio",
+        "repo": "https://github.com/pytorch/audio.git",
+        "smoke_test": "python ./audio/test/smoke_test/smoke_test.py --no-ffmpeg",
+        "extension": "_extension",
+    },
+]
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.fc1 = nn.Linear(9216, 1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = F.max_pool2d(x, 2)
+        x = torch.flatten(x, 1)
+        output = self.fc1(x)
+        return output
+
+def check_version(package: str) -> None:
+    # only makes sense to check nightly package where dates are known
+    if channel == "nightly":
+        check_nightly_binaries_date(package)
+    else:
+        if not torch.__version__.startswith(stable_version):
+            raise RuntimeError(
+                f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
+            )
+
+def check_nightly_binaries_date(package: str) -> None:
+    from datetime import datetime, timedelta
+    format_dt = '%Y%m%d'
+
+    torch_str = torch.__version__
+    date_t_str = re.findall("dev\d+", torch.__version__)
+    date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt)
+    if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA:
+        raise RuntimeError(
+            f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!"
+        )
+
+    if(package == "all"):
+        for module in MODULES:
+            imported_module = importlib.import_module(module["name"])
+            module_version = imported_module.__version__
+            date_m_str = re.findall("dev\d+", module_version)
+            date_m_delta = datetime.now() - datetime.strptime(date_m_str[0][3:], format_dt)
+            print(f"Nightly date check for {module['name']} version {module_version}")
+            if date_m_delta.days > NIGHTLY_ALLOWED_DELTA:
+                raise RuntimeError(
+                    f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}"
+                )
+
+def test_cuda_runtime_errors_captured() -> None:
+    cuda_exception_missed=True
+    try:
+        print("Testing test_cuda_runtime_errors_captured")
+        torch._assert_async(torch.tensor(0, device="cuda"))
+        torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
+    except RuntimeError as e:
+        if re.search("CUDA", f"{e}"):
+            print(f"Caught CUDA exception with success: {e}")
+            cuda_exception_missed = False
+        else:
+            raise e
+    if(cuda_exception_missed):
+        raise RuntimeError( f"Expected CUDA RuntimeError but have not received!")
+
+def smoke_test_cuda(package: str) -> None:
+    if not torch.cuda.is_available() and is_cuda_system:
+        raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
+
+    if(package == 'all' and is_cuda_system):
+        for module in MODULES:
+            imported_module = importlib.import_module(module["name"])
+            # TBD for vision move extension module to private so it will
+            # be _extention.
+            version = "N/A"
+            if module["extension"] == "extension":
+                version = imported_module.extension._check_cuda_version()
+            else:
+                version = imported_module._extension._check_cuda_version()
+            print(f"{module['name']} CUDA: {version}")
+
+    if torch.cuda.is_available():
+        if torch.version.cuda != gpu_arch_ver:
+            raise RuntimeError(
+                f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
+            )
         print(f"torch cuda: {torch.version.cuda}")
-        #todo add cudnn version validation
+        # todo add cudnn version validation
         print(f"torch cudnn: {torch.backends.cudnn.version()}")
+        print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
+
+        # torch.compile is available only on Linux and python 3.8-3.10
+        if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0):
+            smoke_test_compile()
+
+        test_cuda_runtime_errors_captured()
+
+
+def smoke_test_conv2d() -> None:
+    import torch.nn as nn
+
+    print("Testing smoke_test_conv2d")
+    # With square kernels and equal stride
+    m = nn.Conv2d(16, 33, 3, stride=2)
+    # non-square kernels and unequal stride and with padding
+    m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+    # non-square kernels and unequal stride and with padding and dilation
+    basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
+    input = torch.randn(20, 16, 50, 100)
+    output = basic_conv(input)
+
+    if is_cuda_system:
+        print("Testing smoke_test_conv2d with cuda")
+        conv = nn.Conv2d(3, 3, 3).cuda()
+        x = torch.randn(1, 3, 24, 24).cuda()
+        with torch.cuda.amp.autocast():
+            out = conv(x)
+
+        supported_dtypes = [torch.float16, torch.float32, torch.float64]
+        for dtype in supported_dtypes:
+            print(f"Testing smoke_test_conv2d with cuda for {dtype}")
+            conv = basic_conv.to(dtype).cuda()
+            input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
+            output = conv(input)
+
+def smoke_test_linalg() -> None:
+    print("Testing smoke_test_linalg")
+    A = torch.randn(5, 3)
+    U, S, Vh = torch.linalg.svd(A, full_matrices=False)
+    U.shape, S.shape, Vh.shape
+    torch.dist(A, U @ torch.diag(S) @ Vh)
+
+    U, S, Vh = torch.linalg.svd(A)
+    U.shape, S.shape, Vh.shape
+    torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
+
+    A = torch.randn(7, 5, 3)
+    U, S, Vh = torch.linalg.svd(A, full_matrices=False)
+    torch.dist(A, U @ torch.diag_embed(S) @ Vh)
+
+    if is_cuda_system:
+        supported_dtypes = [torch.float32, torch.float64]
+        for dtype in supported_dtypes:
+            print(f"Testing smoke_test_linalg with cuda for {dtype}")
+            A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
+            torch.linalg.svd(A)
+
+def smoke_test_compile() -> None:
+    supported_dtypes = [torch.float16, torch.float32, torch.float64]
+    def foo(x: torch.Tensor) -> torch.Tensor:
+        return torch.sin(x) + torch.cos(x)
+    for dtype in supported_dtypes:
+        print(f"Testing smoke_test_compile for {dtype}")
+        x = torch.rand(3, 3, device="cuda").type(dtype)
+        x_eager = foo(x)
+        x_pt2 = torch.compile(foo)(x)
+        print(torch.allclose(x_eager, x_pt2))
+
+    # Reset torch dynamo since we are changing mode
+    torch._dynamo.reset()
+    dtype = torch.float32
+    torch.set_float32_matmul_precision('high')
+    print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}")
+    x = torch.rand(64, 1, 28, 28, device="cuda").type(torch.float32)
+    model = Net().to(device="cuda")
+    x_pt2 = torch.compile(model, mode="max-autotune")(x)
+
+def smoke_test_modules():
+    for module in MODULES:
+        if module["repo"]:
+            subprocess.check_output(f"git clone --depth 1 {module['repo']}", stderr=subprocess.STDOUT, shell=True)
+            try:
+                output = subprocess.check_output(
+                    module["smoke_test"], stderr=subprocess.STDOUT, shell=True,
+                    universal_newlines=True)
+            except subprocess.CalledProcessError as exc:
+                raise RuntimeError(
+                        f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}"
+                    )
+            else:
+                print("Output: \n{}\n".format(output))
 
-def smoke_test_torchvision() -> None:
-    import torchvision.datasets as dset
-    import torchvision.transforms
-    print('Is torchvision useable?', all(x is not None for x in [torch.ops.image.decode_png, torch.ops.torchvision.roi_align]))
-
-def smoke_test_torchaudio() -> None:
-    import torchaudio.compliance.kaldi  # noqa: F401
-    import torchaudio.datasets  # noqa: F401
-    import torchaudio.functional  # noqa: F401
-    import torchaudio.models  # noqa: F401
-    import torchaudio.pipelines  # noqa: F401
-    import torchaudio.sox_effects  # noqa: F401
-    import torchaudio.transforms  # noqa: F401
-    import torchaudio.utils  # noqa: F401
 
 def main() -> None:
-    #todo add torch, torchvision and torchaudio tests
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--package",
+        help="Package to include in smoke testing",
+        type=str,
+        choices=["all", "torchonly"],
+        default="all",
+    )
+    options = parser.parse_args()
     print(f"torch: {torch.__version__}")
-    print(f"torchvision: {torchvision.__version__}")
-    print(f"torchaudio: {torchaudio.__version__}")
-    smoke_test_cuda()
-    smoke_test_torchvision()
-    smoke_test_torchaudio()
+    check_version(options.package)
+    smoke_test_conv2d()
+    smoke_test_linalg()
+
+    if options.package == "all":
+        smoke_test_modules()
+
+    smoke_test_cuda(options.package)
+
 
 if __name__ == "__main__":
     main()
diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index 08b47335af..26df3d71d6 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -97,16 +97,7 @@ fi
 whl_tmp_dir="${MAC_PACKAGE_WORK_DIR}/dist"
 mkdir -p "$whl_tmp_dir"
 
-# Python 3.5 build against macOS 10.6, others build against 10.9
-# NB: Sometimes Anaconda revs the version, in which case you'll have to
-# update this!
-# An example of this happened on Aug 13, 2019, when osx-64/python-2.7.16-h97142e2_2.tar.bz2
-# was uploaded to https://anaconda.org/anaconda/python/files
-if [[ "$desired_python" == 3.5 ]]; then
-    mac_version='macosx_10_6_x86_64'
-elif [[ "$desired_python" == 2.7 ]]; then
-    mac_version='macosx_10_7_x86_64'
-elif [[ -n "$CROSS_COMPILE_ARM64" ]]; then
+if [[ -n "$CROSS_COMPILE_ARM64" || $(uname -m) == "arm64" ]]; then
     mac_version='macosx_11_0_arm64'
 else
     mac_version='macosx_10_9_x86_64'
@@ -128,7 +119,7 @@ if [[ ! -d "$pytorch_rootdir" ]]; then
     popd
 fi
 pushd "$pytorch_rootdir"
-git submodule update --init --recursive --jobs 0
+git submodule update --init --recursive
 popd
 
 ##########################
@@ -144,6 +135,11 @@ SETUPTOOLS_PINNED_VERSION="=46.0.0"
 PYYAML_PINNED_VERSION="=5.3"
 EXTRA_CONDA_INSTALL_FLAGS=""
 case ${desired_python} in
+    3.11)
+        SETUPTOOLS_PINNED_VERSION=">=46.0.0"
+        PYYAML_PINNED_VERSION=">=5.3"
+        NUMPY_PINNED_VERSION="==1.23.5"
+        ;;
     3.10)
         SETUPTOOLS_PINNED_VERSION=">=46.0.0"
         PYYAML_PINNED_VERSION=">=5.3"
@@ -167,8 +163,12 @@ tmp_env_name="wheel_py$python_nodot"
 conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python"
 source activate "$tmp_env_name"
 
-retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" cffi typing_extensions ninja requests
-retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq mkl-include==2020.1 mkl-static==2020.1 -c intel
+if [[ "$desired_python" == "3.11" ]]; then
+  retry pip install -q "numpy${NUMPY_PINNED_VERSION}" "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
+else
+  retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
+fi
+retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja mkl-include==2022.2.1 mkl-static==2022.2.1 -c intel
 retry pip install -qr "${pytorch_rootdir}/requirements.txt" || true
 
 # For USE_DISTRIBUTED=1 on macOS, need libuv and pkg-config to find libuv.
diff --git a/windows/build_all.bat b/windows/build_all.bat
index 0c1edcf655..f60da8c763 100755
--- a/windows/build_all.bat
+++ b/windows/build_all.bat
@@ -30,8 +30,8 @@ set "ORIG_PATH=%PATH%"
 conda remove -n py36 --all -y || rmdir %CONDA_HOME%\envs\py36 /s
 conda remove -n py37 --all -y || rmdir %CONDA_HOME%\envs\py37 /s
 
-conda create -n py36 -y -q numpy=1.11 mkl=2018 cffi pyyaml boto3 cmake ninja typing_extensions python=3.6
-conda create -n py37 -y -q numpy=1.11 mkl=2018 cffi pyyaml boto3 cmake ninja typing_extensions python=3.7
+conda create -n py36 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.6
+conda create -n py37 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.7
 
 REM Install MKL
 rmdir /s /q mkl
diff --git a/windows/condaenv.bat b/windows/condaenv.bat
index 470575340f..6d945badd1 100644
--- a/windows/condaenv.bat
+++ b/windows/condaenv.bat
@@ -9,10 +9,11 @@ FOR %%v IN (%DESIRED_PYTHON%) DO (
     set PYTHON_VERSION_STR=%%v
     set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
     conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s
-    if "%%v" == "3.7" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" cffi pyyaml boto3 cmake ninja typing_extensions python=%%v
+    if "%%v" == "3.7" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.8" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y -q "numpy>=1.11" "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR!  -y -q -c=conda-forge "numpy>=1.21.2" "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
+    if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR!  -y -q -c=conda-forge "numpy>=1.21.2" "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
     if "%%v" == "3" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
 )
 endlocal
diff --git a/windows/cuda102.bat b/windows/cuda102.bat
deleted file mode 100644
index 1d90c86b81..0000000000
--- a/windows/cuda102.bat
+++ /dev/null
@@ -1,58 +0,0 @@
-@echo off
-
-set MODULE_NAME=pytorch
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-) ELSE (
-    call internal\clean.bat
-)
-IF ERRORLEVEL 1 goto :eof
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto :eof
-
-REM Check for optional components
-
-set USE_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib"  (
-        set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
-    ) ELSE (
-        echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-        exit /b 1
-    )
-)
-
-IF "%CUDA_PATH_V10_2%"=="" (
-    IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" (
-        set "CUDA_PATH_V10_2=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2"
-    ) ELSE (
-        echo CUDA 10.2 not found, failing
-        exit /b 1
-    )
-)
-
-IF "%BUILD_VISION%" == "" (
-    set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5
-    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-) ELSE (
-    set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-)
-
-set "CUDA_PATH=%CUDA_PATH_V10_2%"
-set "PATH=%CUDA_PATH_V10_2%\bin;%PATH%"
-
-:optcheck
-
-call internal\check_opts.bat
-IF ERRORLEVEL 1 goto :eof
-
-call internal\copy.bat
-IF ERRORLEVEL 1 goto :eof
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto :eof
diff --git a/windows/cuda115.bat b/windows/cuda115.bat
deleted file mode 100644
index bf037b22cc..0000000000
--- a/windows/cuda115.bat
+++ /dev/null
@@ -1,58 +0,0 @@
-@echo off
-
-set MODULE_NAME=pytorch
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-) ELSE (
-    call internal\clean.bat
-)
-IF ERRORLEVEL 1 goto :eof
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto :eof
-
-REM Check for optional components
-
-set USE_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib"  (
-        set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
-    ) ELSE (
-        echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-        exit /b 1
-    )
-)
-
-IF "%CUDA_PATH_V115%"=="" (
-    IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin\nvcc.exe" (
-        set "CUDA_PATH_V115=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5"
-    ) ELSE (
-        echo CUDA 11.5 not found, failing
-        exit /b 1
-    )
-)
-
-IF "%BUILD_VISION%" == "" (
-    set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6
-    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-) ELSE (
-    set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86
-)
-
-set "CUDA_PATH=%CUDA_PATH_V115%"
-set "PATH=%CUDA_PATH_V115%\bin;%PATH%"
-
-:optcheck
-
-call internal\check_opts.bat
-IF ERRORLEVEL 1 goto :eof
-
-call internal\copy.bat
-IF ERRORLEVEL 1 goto :eof
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto :eof
diff --git a/windows/cuda113.bat b/windows/cuda118.bat
similarity index 81%
rename from windows/cuda113.bat
rename to windows/cuda118.bat
index 568f1e754d..02d91adc38 100644
--- a/windows/cuda113.bat
+++ b/windows/cuda118.bat
@@ -27,24 +27,24 @@ IF "%NVTOOLSEXT_PATH%"=="" (
     )
 )
 
-IF "%CUDA_PATH_V113%"=="" (
-    IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin\nvcc.exe" (
-        set "CUDA_PATH_V113=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3"
+IF "%CUDA_PATH_V118%"=="" (
+    IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin\nvcc.exe" (
+        set "CUDA_PATH_V118=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
     ) ELSE (
-        echo CUDA 11.3 not found, failing
+        echo CUDA 11.8 not found, failing
         exit /b 1
     )
 )
 
 IF "%BUILD_VISION%" == "" (
-    set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6
+    set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
 ) ELSE (
-    set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86
+    set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
 )
 
-set "CUDA_PATH=%CUDA_PATH_V113%"
-set "PATH=%CUDA_PATH_V113%\bin;%PATH%"
+set "CUDA_PATH=%CUDA_PATH_V118%"
+set "PATH=%CUDA_PATH_V118%\bin;%PATH%"
 
 :optcheck
 
diff --git a/windows/internal/check_deps.bat b/windows/internal/check_deps.bat
index 25c4c4a51d..5e1f58e35e 100755
--- a/windows/internal/check_deps.bat
+++ b/windows/internal/check_deps.bat
@@ -16,18 +16,16 @@ IF "%BUILD_VISION%" == "" (
     )
 )
 
-IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
+if not exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+    echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows
     exit /b 1
 )
 
-IF "%VC_YEAR%" == "" set VC_YEAR=2019
-
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
-IF "%VC_YEAR%" == "2017" (
-    set VC_VERSION_LOWER=15
-    set VC_VERSION_UPPER=16
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
 )
 
 if NOT "%VS15INSTALLDIR%" == "" if exist "%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat" (
diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat
index a79571014b..b4f11a58a4 100644
--- a/windows/internal/cuda_install.bat
+++ b/windows/internal/cuda_install.bat
@@ -17,69 +17,23 @@ set CUDNN_FOLDER="cuda"
 set CUDNN_LIB_FOLDER="lib\x64"
 
 :: Skip all of this if we already have cuda installed
-if exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" goto set_cuda_env_vars
+if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" goto set_cuda_env_vars
 
-if %CUDA_VER% EQU 102 goto cuda102
-if %CUDA_VER% EQU 113 goto cuda113
-if %CUDA_VER% EQU 115 goto cuda115
 if %CUDA_VER% EQU 116 goto cuda116
 if %CUDA_VER% EQU 117 goto cuda117
+if %CUDA_VER% EQU 118 goto cuda118
 
 echo CUDA %CUDA_VERSION_STR% is not supported
 exit /b 1
 
-:cuda102
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-)
-
-goto cuda_common
-
-:cuda113
-
-set CUDA_INSTALL_EXE=cuda_11.3.0_465.89_win10.exe
-if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
-    curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3"
-)
-
-set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive
-set CUDNN_LIB_FOLDER="lib"
-set CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip"
-if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
-    curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
-)
-
-@REM Cuda 8.3+ required zlib to be installed on the path
-echo Installing ZLIB dlls
-curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip"
-7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib"
-xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
-
-goto cuda_common
-
-:cuda115
+:cuda116
 
-set CUDA_INSTALL_EXE=cuda_11.5.0_496.13_win10.exe
+set CUDA_INSTALL_EXE=cuda_11.6.0_511.23_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1
     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=thrust_11.5 nvcc_11.5 cuobjdump_11.5 nvprune_11.5 nvprof_11.5 cupti_11.5 cublas_11.5 cublas_dev_11.5 cudart_11.5 cufft_11.5 cufft_dev_11.5 curand_11.5 curand_dev_11.5 cusolver_11.5 cusolver_dev_11.5 cusparse_11.5 cusparse_dev_11.5 npp_11.5 npp_dev_11.5 nvrtc_11.5 nvrtc_dev_11.5 nvml_dev_11.5"
+    set "ARGS=thrust_11.6 nvcc_11.6 cuobjdump_11.6 nvprune_11.6 nvprof_11.6 cupti_11.6 cublas_11.6 cublas_dev_11.6 cudart_11.6 cufft_11.6 cufft_dev_11.6 curand_11.6 curand_dev_11.6 cusolver_11.6 cusolver_dev_11.6 cusparse_11.6 cusparse_dev_11.6 npp_11.6 npp_dev_11.6 nvrtc_11.6 nvrtc_dev_11.6 nvml_dev_11.6"
 )
 
 set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive
@@ -99,17 +53,17 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
 
 goto cuda_common
 
-:cuda116
+:cuda117
 
-set CUDA_INSTALL_EXE=cuda_11.6.0_511.23_windows.exe
+set CUDA_INSTALL_EXE=cuda_11.7.0_516.01_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1
     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=thrust_11.6 nvcc_11.6 cuobjdump_11.6 nvprune_11.6 nvprof_11.6 cupti_11.6 cublas_11.6 cublas_dev_11.6 cudart_11.6 cufft_11.6 cufft_dev_11.6 curand_11.6 curand_dev_11.6 cusolver_11.6 cusolver_dev_11.6 cusparse_11.6 cusparse_dev_11.6 npp_11.6 npp_dev_11.6 nvrtc_11.6 nvrtc_dev_11.6 nvml_dev_11.6"
+    set "ARGS=thrust_11.7 nvcc_11.7 cuobjdump_11.7 nvprune_11.7 nvprof_11.7 cupti_11.7 cublas_11.7 cublas_dev_11.7 cudart_11.7 cufft_11.7 cufft_dev_11.7 curand_11.7 curand_dev_11.7 cusolver_11.7 cusolver_dev_11.7 cusparse_11.7 cusparse_dev_11.7 npp_11.7 npp_dev_11.7 nvrtc_11.7 nvrtc_dev_11.7 nvml_dev_11.7"
 )
 
-set CUDNN_FOLDER=cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive
+set CUDNN_FOLDER=cudnn-windows-x86_64-8.5.0.96_cuda11-archive
 set CUDNN_LIB_FOLDER="lib"
 set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip"
 if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
@@ -126,17 +80,17 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
 
 goto cuda_common
 
-:cuda117
+:cuda118
 
-set CUDA_INSTALL_EXE=cuda_11.7.0_516.01_windows.exe
+set CUDA_INSTALL_EXE=cuda_11.8.0_522.06_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1
     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=thrust_11.7 nvcc_11.7 cuobjdump_11.7 nvprune_11.7 nvprof_11.7 cupti_11.7 cublas_11.7 cublas_dev_11.7 cudart_11.7 cufft_11.7 cufft_dev_11.7 curand_11.7 curand_dev_11.7 cusolver_11.7 cusolver_dev_11.7 cusparse_11.7 cusparse_dev_11.7 npp_11.7 npp_dev_11.7 nvrtc_11.7 nvrtc_dev_11.7 nvml_dev_11.7"
+    set "ARGS=cuda_profiler_api_11.8 thrust_11.8 nvcc_11.8 cuobjdump_11.8 nvprune_11.8 nvprof_11.8 cupti_11.8 cublas_11.8 cublas_dev_11.8 cudart_11.8 cufft_11.8 cufft_dev_11.8 curand_11.8 curand_dev_11.8 cusolver_11.8 cusolver_dev_11.8 cusparse_11.8 cusparse_dev_11.8 npp_11.8 npp_dev_11.8 nvrtc_11.8 nvrtc_dev_11.8 nvml_dev_11.8"
 )
 
-set CUDNN_FOLDER=cudnn-windows-x86_64-8.5.0.96_cuda11-archive
+set CUDNN_FOLDER=cudnn-windows-x86_64-8.7.0.84_cuda11-archive
 set CUDNN_LIB_FOLDER="lib"
 set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip"
 if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
@@ -158,7 +112,7 @@ goto cuda_common
 :: With GHA runners these should be pre-installed as part of our AMI process
 :: If you cannot find the CUDA version you want to build for here then please
 :: add it @ https://github.com/pytorch/test-infra/tree/main/aws/ami/windows
-if not exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
+if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
     if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
         curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
         if errorlevel 1 exit /b 1
@@ -183,12 +137,12 @@ if not exist "C:\\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION
     popd
 
     echo Installing VS integration...
-    if "%VC_YEAR%" == "2017" (
-        xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations"
-    )
     if "%VC_YEAR%" == "2019" (
         xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations"
     )
+    if "%VC_YEAR%" == "2022" (
+        xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations"
+    )
 
     echo Installing NvToolsExt...
     7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
diff --git a/windows/internal/env_fix.bat b/windows/internal/env_fix.bat
index dd0aaf5f2d..2a53198a99 100644
--- a/windows/internal/env_fix.bat
+++ b/windows/internal/env_fix.bat
@@ -5,12 +5,19 @@
 
 setlocal
 
-IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
+if not exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+    echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows
     exit /b 1
 )
 
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
     if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
         set "VS15INSTALLDIR=%%i"
         set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
@@ -20,8 +27,8 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio
 
 :vswhere
 
-IF "%VS15VCVARSALL%"=="" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
+if "%VS15VCVARSALL%"=="" (
+    echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows
     exit /b 1
 )
 
diff --git a/windows/internal/install_nightly_package.bat b/windows/internal/install_nightly_package.bat
deleted file mode 100644
index 7db23ebd60..0000000000
--- a/windows/internal/install_nightly_package.bat
+++ /dev/null
@@ -1,67 +0,0 @@
-if "%PACKAGE_TYPE%" == "wheel" goto wheel
-if "%PACKAGE_TYPE%" == "conda" goto conda
-if "%PACKAGE_TYPE%" == "libtorch" goto libtorch
-
-:wheel
-echo "install pytorch wheel from nightly"
-
-set pip_url="https://download.pytorch.org/whl/nightly/%DESIRED_CUDA%/torch_nightly.html"
-if "%DESIRED_CUDA%" == "cu102" (
-    set package_name_and_version="torch==%NIGHTLIES_DATE_PREAMBLE%%DATE%"
-) else (
-    set package_name_and_version="torch==%NIGHTLIES_DATE_PREAMBLE%%DATE%+%DESIRED_CUDA%"
-)
-pip install "%package_name_and_version%" -f "%pip_url%" --no-cache-dir --no-index -q
-if errorlevel 1 exit /b 1
-
-exit /b 0
-
-:conda
-echo "install pytorch conda from nightly"
-set package_name_and_version="pytorch==%NIGHTLIES_DATE_PREAMBLE%%DATE%"
-
-if "%DESIRED_CUDA%" == "cpu" (
-    call conda install -yq -c pytorch-nightly %package_name_and_version% cpuonly
-) else (
-    call conda install -yq -c pytorch-nightly "cudatoolkit=%CUDA_VERSION_STR%" %package_name_and_version%
-)
-if ERRORLEVEL 1 exit /b 1
-
-FOR /f %%i in ('python -c "import sys;print(sys.version)"') do set cur_python=%%i
-
-if not %cur_python:~0,3% == %DESIRED_PYTHON% (
-    echo "The Python version has changed to %cur_python%"
-    echo "Probably the package for the version we want does not exist"
-    echo "conda will change the Python version even if it was explicitly declared"
-)
-
-if "%DESIRED_CUDA%" == "cpu" (
-    call conda list torch | findstr cuda || exit /b 0
-	echo "The installed package is built for CUDA, the full package is"
-	call conda list torch
-) else (
-    call conda list torch | findstr cuda%CUDA_VERSION% && exit /b 0
-	echo "The installed package doesn't seem to be built for CUDA "%CUDA_VERSION_STR%
-	echo "the full package is "
-	call conda list torch
-)
-exit /b 1
-
-:libtorch
-echo "install libtorch from nightly"
-if "%LIBTORCH_CONFIG%" == "debug" (
-    set NAME_PREFIX=libtorch-win-shared-with-deps-debug
-) else (
-    set NAME_PREFIX=libtorch-win-shared-with-deps
-)
-if "%DESIRED_CUDA%" == "cu102" (
-    set package_name=%NAME_PREFIX%-%NIGHTLIES_DATE_PREAMBLE%%DATE%.zip
-) else (
-    set package_name=%NAME_PREFIX%-%NIGHTLIES_DATE_PREAMBLE%%DATE%%%2B%DESIRED_CUDA%.zip
-)
-set libtorch_url="https://download.pytorch.org/libtorch/nightly/%DESIRED_CUDA%/%package_name%"
-curl --retry 3 -k "%libtorch_url%" -o %package_name%
-if ERRORLEVEL 1 exit /b 1
-
-7z x %package_name% -otmp
-if ERRORLEVEL 1 exit /b 1
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index 836a04311b..2e1b1b243a 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -30,6 +30,7 @@ exit /b 1
 echo "install wheel package"
 
 set PYTHON_INSTALLER_URL=
+if "%DESIRED_PYTHON%" == "3.11" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.11.0/python-3.11.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.10" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.9" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.8" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.8.2/python-3.8.2-amd64.exe"
@@ -51,13 +52,8 @@ set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%"
 pip install -q numpy protobuf "mkl>=2019"
 if errorlevel 1 exit /b 1
 
-if "%TEST_NIGHTLY_PACKAGE%" == "1" (
-    call internal\install_nightly_package.bat
-    if errorlevel 1 exit /b 1
-) else (
-    for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
-    if errorlevel 1 exit /b 1
-)
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
+if errorlevel 1 exit /b 1
 
 goto smoke_test
 
@@ -68,15 +64,15 @@ echo "install conda package"
 set "CONDA_HOME=%CD%\conda"
 set "tmp_conda=%CONDA_HOME%"
 set "miniconda_exe=%CD%\miniconda.exe"
-set "CONDA_EXTRA_ARGS="
-if "%CUDA_VERSION%" == "115" (
-    set "CONDA_EXTRA_ARGS=-c=nvidia"
-)
+set "CONDA_EXTRA_ARGS=cpuonly -c pytorch-nightly"
 if "%CUDA_VERSION%" == "116" (
-    set "CONDA_EXTRA_ARGS=-c=nvidia"
+    set "CONDA_EXTRA_ARGS=pytorch-cuda=11.6 -c nvidia -c pytorch-nightly"
 )
 if "%CUDA_VERSION%" == "117" (
-    set "CONDA_EXTRA_ARGS=-c=nvidia"
+    set "CONDA_EXTRA_ARGS=pytorch-cuda=11.7 -c nvidia -c pytorch-nightly"
+)
+if "%CUDA_VERSION%" == "118" (
+    set "CONDA_EXTRA_ARGS=pytorch-cuda=11.8 -c nvidia -c pytorch-nightly"
 )
 
 rmdir /s /q conda
@@ -93,9 +89,8 @@ if errorlevel 1 exit /b 1
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
 
-call conda update -n base -y -c defaults conda
-
-call conda install %CONDA_EXTRA_ARGS% -yq protobuf numpy
+:: do conda install to make sure all the dependencies are installed
+call conda install -yq pytorch %CONDA_EXTRA_ARGS%
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%
@@ -103,25 +98,8 @@ set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1%
 set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
 set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
 
-if "%TEST_NIGHTLY_PACKAGE%" == "1" (
-    call internal\install_nightly_package.bat
-    if errorlevel 1 exit /b 1
-    goto smoke_test
-)
-
-for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install %CONDA_EXTRA_ARGS% -y "%%i" --offline
-if ERRORLEVEL 1 exit /b 1
-
-if "%CUDA_VERSION%" == "cpu" goto install_cpu_torch
-
-:: We do an update --all here since that will install the dependencies for any package that's installed offline
-call conda update --all %CONDA_EXTRA_ARGS% -y -c pytorch -c defaults -c numba/label/dev
-if ERRORLEVEL 1 exit /b 1
-
-goto smoke_test
-
-:install_cpu_torch
-call conda install %CONDA_EXTRA_ARGS% -y cpuonly -c pytorch
+:: Install package we just build
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install -yq "%%i" --offline
 if ERRORLEVEL 1 exit /b 1
 
 :smoke_test
@@ -162,24 +140,21 @@ goto end
 :libtorch
 echo "install and test libtorch"
 
-if "%VC_YEAR%" == "2017" powershell internal\vs2017_install.ps1
+if "%VC_YEAR%" == "2019" powershell internal\vs2019_install.ps1
+if "%VC_YEAR%" == "2022" powershell internal\vs2022_install.ps1
+
 if ERRORLEVEL 1 exit /b 1
 
-if "%TEST_NIGHTLY_PACKAGE%" == "1" (
-    call internal\install_nightly_package.bat
-    if errorlevel 1 exit /b 1
-) else (
-    for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do 7z x "%%i" -otmp
-    if ERRORLEVEL 1 exit /b 1
-)
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do 7z x "%%i" -otmp
+if ERRORLEVEL 1 exit /b 1
 
 pushd tmp\libtorch
 
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
-IF "%VC_YEAR%" == "2017" (
-    set VC_VERSION_LOWER=15
-    set VC_VERSION_UPPER=16
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+IF "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
 )
 
 for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
@@ -192,7 +167,7 @@ for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio
 
 :vswhere
 IF "%VS15VCVARSALL%"=="" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch test on Windows
+    echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch test on Windows
     exit /b 1
 )
 call "%VS15VCVARSALL%" x64
@@ -202,13 +177,13 @@ set INCLUDE=%INCLUDE%;%install_root%\include;%install_root%\include\torch\csrc\a
 set LIB=%LIB%;%install_root%\lib
 set PATH=%PATH%;%install_root%\lib
 
-cl %BUILDER_ROOT%\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc
+cl %BUILDER_ROOT%\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc /std:c++17
 if ERRORLEVEL 1 exit /b 1
 
 .\simple-torch-test.exe
 if ERRORLEVEL 1 exit /b 1
 
-cl %BUILDER_ROOT%\test_example_code\check-torch-mkl.cpp c10.lib torch_cpu.lib /EHsc
+cl %BUILDER_ROOT%\test_example_code\check-torch-mkl.cpp c10.lib torch_cpu.lib /EHsc /std:c++17
 if ERRORLEVEL 1 exit /b 1
 
 .\check-torch-mkl.exe
@@ -223,9 +198,9 @@ set BUILD_SPLIT_CUDA=
 if exist "%install_root%\lib\torch_cuda_cu.lib" if exist "%install_root%\lib\torch_cuda_cpp.lib" set BUILD_SPLIT_CUDA=ON
 
 if "%BUILD_SPLIT_CUDA%" == "ON" (
-    cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda_cu.lib torch_cuda_cpp.lib /EHsc /link /INCLUDE:?warp_size@cuda@at@@YAHXZ /INCLUDE:?_torch_cuda_cu_linker_symbol_op_cuda@native@at@@YA?AVTensor@2@AEBV32@@Z
+    cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda_cu.lib torch_cuda_cpp.lib /EHsc /std:c++17 /link /INCLUDE:?warp_size@cuda@at@@YAHXZ /INCLUDE:?_torch_cuda_cu_linker_symbol_op_cuda@native@at@@YA?AVTensor@2@AEBV32@@Z
 ) else (
-    cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda.lib /EHsc /link /INCLUDE:?warp_size@cuda@at@@YAHXZ
+    cl %BUILDER_ROOT%\test_example_code\check-torch-cuda.cpp torch_cpu.lib c10.lib torch_cuda.lib /EHsc /std:c++17 /link /INCLUDE:?warp_size@cuda@at@@YAHXZ
 )
 .\check-torch-cuda.exe
 if ERRORLEVEL 1 exit /b 1
diff --git a/windows/internal/vc_install_helper.bat b/windows/internal/vc_install_helper.bat
index 6a2a0e0d99..61ab6d5f8c 100644
--- a/windows/internal/vc_install_helper.bat
+++ b/windows/internal/vc_install_helper.bat
@@ -1,7 +1,12 @@
 if "%VC_YEAR%" == "2019" powershell windows/internal/vs2019_install.ps1
+if "%VC_YEAR%" == "2022" powershell windows/internal/vs2022_install.ps1
 
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
 
 for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"  -products Microsoft.VisualStudio.Product.BuildTools -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
     if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
diff --git a/windows/internal/vs2017_install.ps1 b/windows/internal/vs2017_install.ps1
deleted file mode 100644
index 873e4eb17f..0000000000
--- a/windows/internal/vs2017_install.ps1
+++ /dev/null
@@ -1,28 +0,0 @@
-$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
-$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.Component.MSBuild",
-                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
-                                                     "--add Microsoft.VisualStudio.Component.TextTemplating",
-                                                     "--add Microsoft.VisualStudio.Component.VC.CoreIde",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
-
-if ($args.Count -ne 0) {
-    $VS_INSTALL_ARGS += "--add Microsoft.VisualStudio.Component.VC.Tools.$($args[0])"
-}
-
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
-if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2017 installer failed"
-    exit 1
-}
-
-$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
-Remove-Item -Path vs_installer.exe -Force
-$exitCode = $process.ExitCode
-if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
-    exit 1
-}
diff --git a/windows/internal/vs2022_install.ps1 b/windows/internal/vs2022_install.ps1
new file mode 100644
index 0000000000..55fba47378
--- /dev/null
+++ b/windows/internal/vs2022_install.ps1
@@ -0,0 +1,56 @@
+# https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479
+# https://learn.microsoft.com/en-us/visualstudio/releases/2022/release-history#evergreen-bootstrappers
+
+# 17.4.3 BuildTools
+$VS_DOWNLOAD_LINK = "https://download.visualstudio.microsoft.com/download/pr/8f480125-28b8-4a2c-847c-c2b02a8cdd1b/64be21d4ada005d7d07896ed0b004c322409bd04d6e8eba4c03c9fa39c928e7a/vs_BuildTools.exe"
+$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
+$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
+                                                     "--add Microsoft.Component.MSBuild",
+                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
+                                                     "--add Microsoft.VisualStudio.Component.TextTemplating",
+                                                     "--add Microsoft.VisualStudio.Component.VC.CoreIde",
+                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
+                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
+                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
+                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
+
+curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
+if ($LASTEXITCODE -ne 0) {
+    echo "Download of the VS $VC_YEAR Version $VS_VERSION installer failed"
+    exit 1
+}
+
+if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") {
+    $existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[17, 18)" -property installationPath
+    if ($existingPath -ne $null) {
+        if (!${env:CIRCLECI}) {
+            echo "Found correctly versioned existing BuildTools installation in $existingPath"
+            exit 0
+        }
+        echo "Found existing BuildTools installation in $existingPath"
+        $VS_UNINSTALL_ARGS = @("uninstall", "--installPath", "`"$existingPath`"", "--quiet","--wait")
+        $process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_UNINSTALL_ARGS -NoNewWindow -Wait -PassThru
+        $exitCode = $process.ExitCode
+        if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
+            echo "Original BuildTools uninstall failed with code $exitCode"
+            exit 1
+        }
+        echo "Original BuildTools uninstalled"
+    }
+}
+
+$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
+Remove-Item -Path vs_installer.exe -Force
+$exitCode = $process.ExitCode
+if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
+    echo "VS $VC_YEAR installer exited with code $exitCode, which should be one of [0, 3010]."
+    curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
+    if ($LASTEXITCODE -ne 0) {
+        echo "Download of the VS Collect tool failed."
+        exit 1
+    }
+    Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
+    New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
+    Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
+    exit 1
+}
diff --git a/windows/internal/vs_install.bat b/windows/internal/vs_install.bat
index 624227f0be..221ec33136 100644
--- a/windows/internal/vs_install.bat
+++ b/windows/internal/vs_install.bat
@@ -1,12 +1,12 @@
 @echo off
 
-set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_buildtools.exe
+set VS_DOWNLOAD_LINK=https://download.visualstudio.microsoft.com/download/pr/8f480125-28b8-4a2c-847c-c2b02a8cdd1b/64be21d4ada005d7d07896ed0b004c322409bd04d6e8eba4c03c9fa39c928e7a/vs_BuildTools.exe
 IF "%VS_LATEST%" == "1" (
    set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools
    set VSDEVCMD_ARGS=
 ) ELSE (
    set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^
-                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
+                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.34 ^
                                                 --add Microsoft.Component.MSBuild ^
                                                 --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^
                                                 --add Microsoft.VisualStudio.Component.TextTemplating ^
@@ -14,9 +14,9 @@ IF "%VS_LATEST%" == "1" (
                                                 --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^
                                                 --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^
                                                 --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^
-                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
+                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.34 ^
                                                 --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81
-   set VSDEVCMD_ARGS=-vcvars_ver=14.11
+   set VSDEVCMD_ARGS=-vcvars_ver=14.34
 )
 
 curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe