From 1f952dae93b9c9e18be0ec623f397bd231fe10a2 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 29 Oct 2021 13:48:41 +0800 Subject: [PATCH 1/2] Support building with CTK11.5. * Require system cub installation for CTK11.4+. * Check thrust version for segmented sort. --- CMakeLists.txt | 4 ++++ doc/build.rst | 3 ++- src/common/device_helpers.cuh | 31 +++++++++++++++++++++---------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1705f3ad8cf..d56bc7a0cd77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,10 @@ if (USE_CUDA) set(GEN_CODE "") format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap) + + if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB)) + message(SEND_ERROR "`BUILD_WITH_CUDA_CUB` should be set to `ON` for CUDA >= 11.4") + endif () endif (USE_CUDA) if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND diff --git a/doc/build.rst b/doc/build.rst index d624379b4ded..d20fdacdca1f 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -148,7 +148,8 @@ From the command line on Linux starting from the XGBoost directory: mkdir build cd build - cmake .. -DUSE_CUDA=ON + # For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required. + cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON make -j4 .. note:: Specifying compute capability diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 61e0fd553659..50a5d9094d85 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -711,6 +711,12 @@ constexpr std::pair CUDAVersion() { constexpr std::pair ThrustVersion() { return std::make_pair(THRUST_MAJOR_VERSION, THRUST_MINOR_VERSION); } +// Whether do we have thrust 1.x with x >= minor +template +constexpr bool HasThrustMinorVer() { + return (ThrustVersion().first == 1 && ThrustVersion().second >= minor) || + ThrustVersion().first > 1; +} namespace detail { template @@ -725,10 +731,8 @@ class TypedDiscard : public thrust::discard_iterator { template using TypedDiscard = - std::conditional_t<((ThrustVersion().first == 1 && - ThrustVersion().second >= 12) || - ThrustVersion().first > 1), - detail::TypedDiscardCTK114, detail::TypedDiscard>; + std::conditional_t(), detail::TypedDiscardCTK114, + detail::TypedDiscard>; /** * \class AllReducer @@ -1442,20 +1446,27 @@ void ArgSort(xgboost::common::Span keys, xgboost::common::Span sorted_i namespace detail { // Wrapper around cub sort for easier `descending` sort. template + typename BeginOffsetIteratorT, typename EndOffsetIteratorT> void DeviceSegmentedRadixSortPair( void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, // NOLINT KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, - size_t num_items, size_t num_segments, OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, int begin_bit = 0, + size_t num_items, size_t num_segments, BeginOffsetIteratorT d_begin_offsets, + EndOffsetIteratorT d_end_offsets, int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) { cub::DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); cub::DoubleBuffer d_values(const_cast(d_values_in), d_values_out); - using OffsetT = int32_t; // num items in dispatch is also int32_t, no way to change. - CHECK_LE(num_items, std::numeric_limits::max()); + // In old version of cub, num_items in dispatch is also int32_t, no way to change. + using OffsetT = + std::conditional_t(), size_t, + int32_t>; + CHECK_LE(num_items, std::numeric_limits::max()); + // For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation safe_cuda((cub::DispatchSegmentedRadixSort< - descending, KeyT, ValueT, OffsetIteratorT, + descending, KeyT, ValueT, BeginOffsetIteratorT, +#if (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13) || THRUST_MAJOR_VERSION > 1 + EndOffsetIteratorT, +#endif OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, From 7c8626a6f9339c91c8ea1150a95b4f4cae61b1d5 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 29 Oct 2021 14:50:22 +0800 Subject: [PATCH 2/2] win build. --- src/common/device_helpers.cuh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 50a5d9094d85..ee0621c65085 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -1462,15 +1462,23 @@ void DeviceSegmentedRadixSortPair( int32_t>; CHECK_LE(num_items, std::numeric_limits::max()); // For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation + +#if (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13) || THRUST_MAJOR_VERSION > 1 + safe_cuda((cub::DispatchSegmentedRadixSort< + descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT, + OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, + d_values, num_items, num_segments, + d_begin_offsets, d_end_offsets, begin_bit, + end_bit, false, nullptr, false))); +#else safe_cuda((cub::DispatchSegmentedRadixSort< descending, KeyT, ValueT, BeginOffsetIteratorT, -#if (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13) || THRUST_MAJOR_VERSION > 1 - EndOffsetIteratorT, -#endif OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, nullptr, false))); +#endif + } } // namespace detail