Skip to content
This repository has been archived by the owner on Aug 7, 2023. It is now read-only.

Commit

Permalink
Support building with CTK11.5. (dmlc#7379)
Browse files Browse the repository at this point in the history
* Support building with CTK11.5.

* Require system cub installation for CTK11.4+.
* Check thrust version for segmented sort.
  • Loading branch information
trivialfis authored and ajschmidt8 committed Nov 2, 2021
1 parent 584b45a commit b5ac37c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Expand Up @@ -135,6 +135,10 @@ if (USE_CUDA)
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
message(SEND_ERROR "`BUILD_WITH_CUDA_CUB` should be set to `ON` for CUDA >= 11.4")
endif ()
endif (USE_CUDA)

if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
Expand Down
3 changes: 2 additions & 1 deletion doc/build.rst
Expand Up @@ -148,7 +148,8 @@ From the command line on Linux starting from the XGBoost directory:
mkdir build
cd build
cmake .. -DUSE_CUDA=ON
# For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required.
cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON
make -j4
.. note:: Specifying compute capability
Expand Down
39 changes: 29 additions & 10 deletions src/common/device_helpers.cuh
Expand Up @@ -711,6 +711,12 @@ constexpr std::pair<int, int> CUDAVersion() {
constexpr std::pair<int32_t, int32_t> ThrustVersion() {
return std::make_pair(THRUST_MAJOR_VERSION, THRUST_MINOR_VERSION);
}
// Whether do we have thrust 1.x with x >= minor
template <int32_t minor>
constexpr bool HasThrustMinorVer() {
return (ThrustVersion().first == 1 && ThrustVersion().second >= minor) ||
ThrustVersion().first > 1;
}

namespace detail {
template <typename T>
Expand All @@ -725,10 +731,8 @@ class TypedDiscard : public thrust::discard_iterator<T> {

template <typename T>
using TypedDiscard =
std::conditional_t<((ThrustVersion().first == 1 &&
ThrustVersion().second >= 12) ||
ThrustVersion().first > 1),
detail::TypedDiscardCTK114<T>, detail::TypedDiscard<T>>;
std::conditional_t<HasThrustMinorVer<12>(), detail::TypedDiscardCTK114<T>,
detail::TypedDiscard<T>>;

/**
* \class AllReducer
Expand Down Expand Up @@ -1442,24 +1446,39 @@ void ArgSort(xgboost::common::Span<U> keys, xgboost::common::Span<IdxT> sorted_i
namespace detail {
// Wrapper around cub sort for easier `descending` sort.
template <bool descending, typename KeyT, typename ValueT,
typename OffsetIteratorT>
typename BeginOffsetIteratorT, typename EndOffsetIteratorT>
void DeviceSegmentedRadixSortPair(
void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, // NOLINT
KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out,
size_t num_items, size_t num_segments, OffsetIteratorT d_begin_offsets,
OffsetIteratorT d_end_offsets, int begin_bit = 0,
size_t num_items, size_t num_segments, BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets, int begin_bit = 0,
int end_bit = sizeof(KeyT) * 8) {
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in),
d_values_out);
using OffsetT = int32_t; // num items in dispatch is also int32_t, no way to change.
CHECK_LE(num_items, std::numeric_limits<int32_t>::max());
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
using OffsetT =
std::conditional_t<BuildWithCUDACub() && HasThrustMinorVer<13>(), size_t,
int32_t>;
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation

#if (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13) || THRUST_MAJOR_VERSION > 1
safe_cuda((cub::DispatchSegmentedRadixSort<
descending, KeyT, ValueT, OffsetIteratorT,
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys,
d_values, num_items, num_segments,
d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, nullptr, false)));
#else
safe_cuda((cub::DispatchSegmentedRadixSort<
descending, KeyT, ValueT, BeginOffsetIteratorT,
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys,
d_values, num_items, num_segments,
d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, nullptr, false)));
#endif

}
} // namespace detail

Expand Down

0 comments on commit b5ac37c

Please sign in to comment.