allenai · dirkgr · Oct 11, 2022 · Jun 29, 2022 · Jun 29, 2022 · Jun 29, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,10 +18,10 @@ on:
 
 env:
   # NOTE: Need to update `TORCH_VERSION`, and `TORCH_*_INSTALL` for new torch releases.
-  TORCH_VERSION: 1.11.0
+  TORCH_VERSION: 1.12.0
   # TORCH_CPU_INSTALL: conda install pytorch torchvision torchaudio cpuonly -c pytorch
   # TORCH_GPU_INSTALL: conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
-  TORCH_CPU_INSTALL: pip install torch torchvision torchaudio
+  TORCH_CPU_INSTALL: pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
   TORCH_GPU_INSTALL: pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
   # Change this to invalidate existing cache.
   CACHE_PREFIX: v11
@@ -32,6 +32,8 @@ env:
   # See https://github.com/pytorch/pytorch/issues/37377#issuecomment-677851112.
   MKL_THREADING_LAYER: 'GNU'
   DEFAULT_PYTHON_VERSION: 3.8
+  # For debugging GPU tests.
+  CUDA_LAUNCH_BLOCKING: '1'
 
 defaults:
   run:
@@ -172,6 +174,7 @@ jobs:
         source .venv/bin/activate
         git clone https://github.com/allenai/allennlp-models.git
         cd allennlp-models
+        # git checkout dependabot/pip/torch-gte-1.7.0-and-lt-1.13.0
         pip install -e .[dev,all]
 
     - name: Debug info
@@ -409,7 +412,7 @@ jobs:
       env:
         CUDA: ${{ matrix.cuda }}
       run: |
-        echo "DOCKER_TORCH_VERSION=${TORCH_VERSION}-cuda${CUDA}" >> $GITHUB_ENV;
+        echo "DOCKER_TORCH_VERSION=${TORCH_VERSION}-cuda${CUDA}-python3.8" >> $GITHUB_ENV;
         if [[ $GITHUB_EVENT_NAME == 'release' ]]; then
             echo "DOCKER_IMAGE_NAME=allennlp/allennlp:${GITHUB_REF#refs/tags/}-cuda${CUDA}" >> $GITHUB_ENV;
         else

diff --git a/Dockerfile b/Dockerfile
@@ -2,14 +2,9 @@
 # It's built from a wheel installation of allennlp using the base images from
 # https://github.com/allenai/docker-images/pkgs/container/pytorch
 
-ARG TORCH=1.11.0-cuda11.3
+ARG TORCH=1.12.0-cuda11.3-python3.8
 FROM ghcr.io/allenai/pytorch:${TORCH}
 
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
 WORKDIR /stage/allennlp
 
 # Installing AllenNLP's dependencies is the most time-consuming part of building

diff --git a/Dockerfile.test b/Dockerfile.test
@@ -1,13 +1,8 @@
 # Used to build an image for running tests.
 
-ARG TORCH=1.11.0-cuda11.3
+ARG TORCH=1.12.0-cuda11.3-python3.8
 FROM ghcr.io/allenai/pytorch:${TORCH}
 
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
 # These environment variables are helpful for debugging.
 # See https://pytorch.org/docs/stable/distributed.html#common-environment-variables for more info.
 ENV NCCL_DEBUG INFO

diff --git a/Makefile b/Makefile
@@ -11,8 +11,8 @@ MD_DOCS_TGT = site/
 MD_DOCS_EXTRAS = $(addprefix $(MD_DOCS_ROOT),README.md CHANGELOG.md CONTRIBUTING.md)
 
 TORCH_INSTALL = pip install torch torchvision
-DOCKER_TORCH_VERSION = 1.11.0-cuda11.3
-DOCKER_TEST_TORCH_VERSION = 1.11.0-cuda11.3
+DOCKER_TORCH_VERSION = 1.12.0-cuda11.3-python3.8
+DOCKER_TEST_TORCH_VERSION = 1.12.0-cuda11.3-python3.8
 
 DOCKER_TAG = latest
 DOCKER_IMAGE_NAME = allennlp/allennlp:$(DOCKER_TAG)

diff --git a/allennlp/nn/util.py b/allennlp/nn/util.py
@@ -2237,7 +2237,7 @@ def _collect_state_dict(
             else:
                 missing_keys.append(key)
         logger.debug("Broadcasting distributed parameter '%s'", prefix + key)
-        tensor = tensor.to(dist_device)
+        tensor = tensor.to(dist_device).contiguous()
         dist.broadcast(tensor, 0)
         current_state_dict[key] = tensor.to(state_dict_device)
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,8 @@
 ################################
 ###### Core dependencies #######
 ################################
-torch>=1.10.0,<1.12.0
-torchvision>=0.8.1,<0.13.0
+torch>=1.10.0,<1.13.0
+torchvision>=0.8.1,<0.14.0
 cached-path>=1.1.3,<1.2.0
 fairscale==0.4.6
 jsonnet>=0.10.0 ; sys.platform != 'win32'
@@ -42,7 +42,7 @@ rich==12.1
 # pyasn1-modules>=0.2.8
 
 # Protobuf is a dependency of wandb and tensorboard, but they are missing this pin.
-protobuf>=3.20.0,<4.0.0
+protobuf>=3.12.0,<4.0.0
 
 # We need this for building the Docker image
 traitlets>5.1.1