From 75c9735c198bb22a2fa234d572aa01fcff79f5ac Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 4 Nov 2022 14:29:44 -0400 Subject: [PATCH 1/6] Thrust 1.17 removes the experimental/pinned_allocator. When xgboost is brought into a large project it can be compiled against Thrust 1.17+ which don't offer this experimental allocator. To ensure that going forward xgboost works in all enviornments we provide a xgboost namespaced version of the pinned_allocator that previously was in Thrust. --- .../xgboost4j-gpu/src/native/xgboost4j-gpu.cu | 4 +- src/common/cuda_pinned_allocator.h | 91 +++++++++++++++++++ src/tree/gpu_hist/evaluate_splits.cuh | 4 +- 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 src/common/cuda_pinned_allocator.h diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu index 4ecf8b0f1da1..eea07207819d 100644 --- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu +++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu @@ -1,7 +1,7 @@ #include -#include #include "../../../../src/common/device_helpers.cuh" +#include "../../../../src/common/cuda_pinned_allocator.cuh" #include "../../../../src/data/array_interface.h" #include "jvm_utils.h" #include @@ -131,7 +131,7 @@ class DataIteratorProxy { bool cache_on_host_{true}; // TODO(Bobby): Make this optional. template - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; template using HostVector = std::vector>; diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h new file mode 100644 index 000000000000..dd34e8929937 --- /dev/null +++ b/src/common/cuda_pinned_allocator.h @@ -0,0 +1,91 @@ +/*! + * Copyright 2022 by XGBoost Contributors + * \file common.h + * \brief cuda pinned allocator for usage with thrust containers + */ + +#pragma once + +#include +#include + +#include "common.h" + +namespace xgboost { +namespace common { +namespace cuda { + +// \p pinned_allocator is a CUDA-specific host memory allocator +// that employs \c cudaMallocHost for allocation. +// +// This implementation is ported from the experimental/pinned_allocator +// that Thrust used to provide. +// +// \see https://en.cppreference.com/w/cpp/memory/allocator +template +class pinned_allocator; + +template <> +class pinned_allocator { + public: + using value_type = void; ///< The type of the elements in the allocator + using pointer = void*; ///< The type returned by address() / allocate() + using const_pointer = const void*; ///< The type returned by address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + template + struct rebind { + using other = pinned_allocator; ///< The rebound type + }; +}; + + +template +class pinned_allocator { + public: + using value_type = T; ///< The type of the elements in the allocator + using pointer = T*; ///< The type returned by address() / allocate() + using const_pointer = const T*; ///< The type returned by address() + using reference = T&; ///< The parameter type for address() + using const_reference = const T&; ///< The parameter type for address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + template + struct rebind { + using other = pinned_allocator; ///< The rebound type + }; + + XGBOOST_DEVICE inline pinned_allocator() {} + XGBOOST_DEVICE inline ~pinned_allocator() {} + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} + template + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} + + XGBOOST_DEVICE inline pointer address(reference r) { return &r; } + XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } + + inline pointer allocate(size_type cnt, const_pointer = 0) + { + if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if + + pointer result(0); + dh::safe_cuda(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); + return result; + } + + inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } + + inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } + + XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; } + + XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const + { + return !operator==(x); + } +}; +} +} +} diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh index 2da207e79104..d3174c4df651 100644 --- a/src/tree/gpu_hist/evaluate_splits.cuh +++ b/src/tree/gpu_hist/evaluate_splits.cuh @@ -3,10 +3,10 @@ */ #ifndef EVALUATE_SPLITS_CUH_ #define EVALUATE_SPLITS_CUH_ -#include #include #include "../../common/categorical.h" +#include "../../common/cuda_pinned_allocator.h" #include "../split_evaluator.h" #include "../updater_gpu_common.cuh" #include "expand_entry.cuh" @@ -57,7 +57,7 @@ struct CatAccessor { class GPUHistEvaluator { using CatST = common::CatBitField::value_type; // categorical storage type // use pinned memory to stage the categories, used for sort based splits. - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; private: TreeEvaluator tree_evaluator_; From 00dcb8c739f55dbc490c8e5db2420d4be6903dd6 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 7 Nov 2022 09:19:53 -0500 Subject: [PATCH 2/6] Update gputreeshap to work with libcudacxx 1.9 --- gputreeshap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gputreeshap b/gputreeshap index acb5be3c17e9..787259b412c1 160000 --- a/gputreeshap +++ b/gputreeshap @@ -1 +1 @@ -Subproject commit acb5be3c17e9adae34ac0b176da6ea8e197cb17e +Subproject commit 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3 From 89073765cf72f1aad5e95dd574ed65f48ef02063 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 8 Nov 2022 08:31:13 -0500 Subject: [PATCH 3/6] Correct style issues found by CI --- src/common/cuda_pinned_allocator.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h index dd34e8929937..d31a8267ffdc 100644 --- a/src/common/cuda_pinned_allocator.h +++ b/src/common/cuda_pinned_allocator.h @@ -66,8 +66,7 @@ class pinned_allocator { XGBOOST_DEVICE inline pointer address(reference r) { return &r; } XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } - inline pointer allocate(size_type cnt, const_pointer = 0) - { + inline pointer allocate(size_type cnt, const_pointer = 0) { if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if pointer result(0); @@ -81,11 +80,10 @@ class pinned_allocator { XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; } - XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const - { + XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const { return !operator==(x); } }; -} -} -} +} // namespace cuda +} // namespace common +} // namespace xgboost From 85f375ad49971301b463b3d9e2a3f5f38b468838 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 8 Nov 2022 13:21:22 -0500 Subject: [PATCH 4/6] Correct header path to cuda_pinned_allocator --- jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu index eea07207819d..bf3f6a0db316 100644 --- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu +++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu @@ -1,7 +1,7 @@ #include #include "../../../../src/common/device_helpers.cuh" -#include "../../../../src/common/cuda_pinned_allocator.cuh" +#include "../../../../src/common/cuda_pinned_allocator.h" #include "../../../../src/data/array_interface.h" #include "jvm_utils.h" #include From aeebe6a02a6c8b1f2172f3b9480a01ab55ef3082 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 8 Nov 2022 13:37:09 -0500 Subject: [PATCH 5/6] Specify clang-tidy exclusions for pinned allocator --- src/common/cuda_pinned_allocator.h | 50 ++++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h index d31a8267ffdc..e42b7df3cc72 100644 --- a/src/common/cuda_pinned_allocator.h +++ b/src/common/cuda_pinned_allocator.h @@ -28,15 +28,15 @@ class pinned_allocator; template <> class pinned_allocator { public: - using value_type = void; ///< The type of the elements in the allocator - using pointer = void*; ///< The type returned by address() / allocate() - using const_pointer = const void*; ///< The type returned by address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + using value_type = void; // NOLINT: The type of the elements in the allocator + using pointer = void*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const void*; // NOLINT: The type returned by address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers template struct rebind { - using other = pinned_allocator; ///< The rebound type + using other = pinned_allocator; // NOLINT: The rebound type }; }; @@ -44,39 +44,41 @@ class pinned_allocator { template class pinned_allocator { public: - using value_type = T; ///< The type of the elements in the allocator - using pointer = T*; ///< The type returned by address() / allocate() - using const_pointer = const T*; ///< The type returned by address() - using reference = T&; ///< The parameter type for address() - using const_reference = const T&; ///< The parameter type for address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + using value_type = T; // NOLINT: The type of the elements in the allocator + using pointer = T*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const T*; // NOLINT: The type returned by address() + using reference = T&; // NOLINT: The parameter type for address() + using const_reference = const T&; // NOLINT: The parameter type for address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers template struct rebind { - using other = pinned_allocator; ///< The rebound type + using other = pinned_allocator; // NOLINT: The rebound type }; - XGBOOST_DEVICE inline pinned_allocator() {} - XGBOOST_DEVICE inline ~pinned_allocator() {} - XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} + XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions + + template - XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT - XGBOOST_DEVICE inline pointer address(reference r) { return &r; } - XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } + XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT + XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT - inline pointer allocate(size_type cnt, const_pointer = 0) { + inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - pointer result(0); + pointer result(nullptr); dh::safe_cuda(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); return result; } - inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } + inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT - inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } + inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } // NOLINT XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; } From b2186a6d5d2936ed9e8864dccd35c698ea55a63e Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 9 Nov 2022 10:25:16 -0500 Subject: [PATCH 6/6] Specify clang-tidy exclusions for pinned allocator --- src/common/cuda_pinned_allocator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h index e42b7df3cc72..d11851d99d37 100644 --- a/src/common/cuda_pinned_allocator.h +++ b/src/common/cuda_pinned_allocator.h @@ -35,7 +35,7 @@ class pinned_allocator { using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers template - struct rebind { + struct rebind { // NOLINT using other = pinned_allocator; // NOLINT: The rebound type }; }; @@ -53,7 +53,7 @@ class pinned_allocator { using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers template - struct rebind { + struct rebind { // NOLINT using other = pinned_allocator; // NOLINT: The rebound type };