Skip to content

Commit

Permalink
Limit build and test to TF 2.6+
Browse files Browse the repository at this point in the history
Signed-off-by: Max H. Gerlach <git@maxgerlach.de>
  • Loading branch information
maxhgerlach committed Aug 27, 2021
1 parent 16c6a5a commit 7970416
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 24 deletions.
27 changes: 15 additions & 12 deletions horovod/tensorflow/CMakeLists.txt
Expand Up @@ -61,23 +61,26 @@ set(Tensorflow_CXX11 ${Tensorflow_CXX11} PARENT_SCOPE)
list(APPEND TF_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/mpi_ops.cc")
list(APPEND TF_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/xla_mpi_ops.cc")

list(APPEND TF_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/dense_update_functor.cc"
"${PROJECT_SOURCE_DIR}/horovod/tensorflow/training_op_helpers.cc")
if(VERSION_DEC GREATER 2006000000 OR VERSION_DEC EQUAL 2006000000)
# dependencies for inplace broadcast with resource variables
list(APPEND TF_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/dense_update_functor.cc"
"${PROJECT_SOURCE_DIR}/horovod/tensorflow/training_op_helpers.cc")

if(HAVE_CUDA OR HAVE_SUB_PROJECT_CUDA)
list(APPEND TF_CUDA_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/dense_update_functor_gpu.cc.cu")
# dense_update_functor_gpu.cu.cc from TensorFlow source had to be renamed because it was not compiled via nvcc otherwise
if(HAVE_CUDA OR HAVE_SUB_PROJECT_CUDA)
list(APPEND TF_CUDA_SOURCES "${PROJECT_SOURCE_DIR}/horovod/tensorflow/dense_update_functor_gpu.cc.cu")
# dense_update_functor_gpu.cu.cc from TensorFlow source had to be renamed because it was not compiled via nvcc otherwise

set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})

set(ENV{PYTHONPATH} "${PROJECT_SOURCE_DIR}/cmake:$ENV{PYTHONPATH}")
execute_process(COMMAND ${PY_EXE} -c "import build_utils; print(' '.join(build_utils.get_nvcc_flags()))"
OUTPUT_VARIABLE HVD_NVCC_COMPILE_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE)
set(ENV{PYTHONPATH} "${PROJECT_SOURCE_DIR}/cmake:$ENV{PYTHONPATH}")
execute_process(COMMAND ${PY_EXE} -c "import build_utils; print(' '.join(build_utils.get_nvcc_flags()))"
OUTPUT_VARIABLE HVD_NVCC_COMPILE_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE)

list(APPEND CUDA_NVCC_FLAGS "${HVD_NVCC_COMPILE_FLAGS}")
list(APPEND CUDA_NVCC_FLAGS "${HVD_NVCC_COMPILE_FLAGS}")

cuda_add_library(tensorflow_cuda_kernels ${TF_CUDA_SOURCES} STATIC OPTIONS -DGOOGLE_CUDA=1)
list(APPEND TF_LINKER_LIBS tensorflow_cuda_kernels)
cuda_add_library(tensorflow_cuda_kernels ${TF_CUDA_SOURCES} STATIC OPTIONS -DGOOGLE_CUDA=1)
list(APPEND TF_LINKER_LIBS tensorflow_cuda_kernels)
endif()
endif()

# Create library
Expand Down
8 changes: 4 additions & 4 deletions horovod/tensorflow/functions.py
Expand Up @@ -68,10 +68,10 @@ def broadcast_variables(variables, root_rank, process_set=global_process_set, in
Broadcasts variables from root rank to all other processes
in a process set (defaults to all Horovod processes).
Optionally, the broadcast may be performed in-place. Some restrictions apply then:
Reference variables (default in TensorFlow 1, legacy in TensorFlow 2) must all be
of the same data type. Resource variables (default in TensorFlow 2) are not supported
with TensorFlow
Optionally, the broadcast may be performed in-place. This is only supported with
TensorFlow 2.6 or later. Reference variables (legacy support in TF 2) must all
be of the same data type. There is no such restriction for resource variables
(default in TF 2).
Arguments:
variables: variables for broadcast
Expand Down
5 changes: 5 additions & 0 deletions horovod/tensorflow/mpi_ops.cc
Expand Up @@ -31,9 +31,12 @@
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/common_shape_fns.h"

#if TENSORFLOW_VERSION >= 2006000000
#include "tensorflow/core/framework/resource_mgr.h"
#include "tensorflow/core/framework/resource_var.h"
#include "tensorflow/core/kernels/training_op_helpers.h"
#endif // TENSORFLOW_VERSION >= 2006000000

#include "../common/common.h"

Expand Down Expand Up @@ -839,6 +842,7 @@ Output
`tensor` on root rank.
)doc");

#if TENSORFLOW_VERSION >= 2006000000
namespace {
std::string NormalizeNameForTensorFlow(const std::string& name) {
static const std::regex normalize_re(R"regex([^a-zA-Z0-9_])regex");
Expand Down Expand Up @@ -1087,6 +1091,7 @@ Input
resources: Variables to broadcast. They will be updated in-place
to the values from the root rank.
)doc");
#endif // TENSORFLOW_VERSION >= 2006000000

class HorovodJoinOp : public AsyncOpKernel {
public:
Expand Down
7 changes: 5 additions & 2 deletions horovod/tensorflow/mpi_ops.py
Expand Up @@ -305,12 +305,15 @@ def broadcast_(variables, root_rank, name=None, process_set=global_process_set):
are ready to send and receive all variables. In each process all variables need
to be located on the same device (CPU or GPU).
Note: Due to a bug in TensorFlow this will not work with resource variables in
TF 2.3, 2.4, or 2.5. Versions 1.15 and 2.6 should be fine, however.
Note: This is only supported with TensorFlow 2.6 or later.
Returns:
The tensor values of the updated `variables` as broadcasted from root rank.
"""
from distutils.version import LooseVersion
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
raise NotImplementedError("In-place broadcasts are only supported with TensorFlow 2.6 or later")

from tensorflow.python.ops import resource_variable_ops
if all(resource_variable_ops.is_resource_variable(var) for var in variables):
with tf.control_dependencies(
Expand Down
12 changes: 6 additions & 6 deletions test/parallel/test_tensorflow.py
Expand Up @@ -2227,8 +2227,8 @@ def test_horovod_broadcast_gpu(self):

def test_horovod_broadcast_inplace_cpu(self):
"""Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on CPU."""
if LooseVersion('2.3.0') <= LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables are known to be broken with TensorFlow 2.3, 2.4, and 2.5")
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables only work with TF 2.6+")

hvd.init()
rank = hvd.rank()
Expand Down Expand Up @@ -2271,8 +2271,8 @@ def test_horovod_broadcast_inplace_cpu(self):

def test_horovod_broadcast_inplace_gpu(self):
"""Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on GPU."""
if LooseVersion('2.3.0') <= LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables are known to be broken with TensorFlow 2.3, 2.4, and 2.5")
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables only work with TF 2.6+")

if not tf.test.is_gpu_available(cuda_only=True):
self.skipTest("No GPUs available")
Expand Down Expand Up @@ -2321,8 +2321,8 @@ def test_horovod_broadcast_inplace_gpu(self):

def test_horovod_broadcast_inplace_multiple(self):
"""Test that the inplace broadcast correctly broadcasts multiple variables on CPU."""
if LooseVersion('2.3.0') <= LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables are known to be broken with TensorFlow 2.3, 2.4, and 2.5")
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
self.skipTest("Custom Ops using resource variables only work with TF 2.6+")

hvd.init()
rank = hvd.rank()
Expand Down

0 comments on commit 7970416

Please sign in to comment.