diff --git a/gputreeshap b/gputreeshap index acb5be3c17e9..787259b412c1 160000 --- a/gputreeshap +++ b/gputreeshap @@ -1 +1 @@ -Subproject commit acb5be3c17e9adae34ac0b176da6ea8e197cb17e +Subproject commit 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3 diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu index 4ecf8b0f1da1..bf3f6a0db316 100644 --- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu +++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu @@ -1,7 +1,7 @@ #include -#include #include "../../../../src/common/device_helpers.cuh" +#include "../../../../src/common/cuda_pinned_allocator.h" #include "../../../../src/data/array_interface.h" #include "jvm_utils.h" #include @@ -131,7 +131,7 @@ class DataIteratorProxy { bool cache_on_host_{true}; // TODO(Bobby): Make this optional. template - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; template using HostVector = std::vector>; diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h new file mode 100644 index 000000000000..d11851d99d37 --- /dev/null +++ b/src/common/cuda_pinned_allocator.h @@ -0,0 +1,91 @@ +/*! + * Copyright 2022 by XGBoost Contributors + * \file common.h + * \brief cuda pinned allocator for usage with thrust containers + */ + +#pragma once + +#include +#include + +#include "common.h" + +namespace xgboost { +namespace common { +namespace cuda { + +// \p pinned_allocator is a CUDA-specific host memory allocator +// that employs \c cudaMallocHost for allocation. +// +// This implementation is ported from the experimental/pinned_allocator +// that Thrust used to provide. +// +// \see https://en.cppreference.com/w/cpp/memory/allocator +template +class pinned_allocator; + +template <> +class pinned_allocator { + public: + using value_type = void; // NOLINT: The type of the elements in the allocator + using pointer = void*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const void*; // NOLINT: The type returned by address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers + + template + struct rebind { // NOLINT + using other = pinned_allocator; // NOLINT: The rebound type + }; +}; + + +template +class pinned_allocator { + public: + using value_type = T; // NOLINT: The type of the elements in the allocator + using pointer = T*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const T*; // NOLINT: The type returned by address() + using reference = T&; // NOLINT: The parameter type for address() + using const_reference = const T&; // NOLINT: The parameter type for address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers + + template + struct rebind { // NOLINT + using other = pinned_allocator; // NOLINT: The rebound type + }; + + XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions + + + template + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT + + XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT + XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT + + inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT + if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if + + pointer result(nullptr); + dh::safe_cuda(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); + return result; + } + + inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT + + inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } // NOLINT + + XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; } + + XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const { + return !operator==(x); + } +}; +} // namespace cuda +} // namespace common +} // namespace xgboost diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh index 2da207e79104..d3174c4df651 100644 --- a/src/tree/gpu_hist/evaluate_splits.cuh +++ b/src/tree/gpu_hist/evaluate_splits.cuh @@ -3,10 +3,10 @@ */ #ifndef EVALUATE_SPLITS_CUH_ #define EVALUATE_SPLITS_CUH_ -#include #include #include "../../common/categorical.h" +#include "../../common/cuda_pinned_allocator.h" #include "../split_evaluator.h" #include "../updater_gpu_common.cuh" #include "expand_entry.cuh" @@ -57,7 +57,7 @@ struct CatAccessor { class GPUHistEvaluator { using CatST = common::CatBitField::value_type; // categorical storage type // use pinned memory to stage the categories, used for sort based splits. - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; private: TreeEvaluator tree_evaluator_;