From 75c9735c198bb22a2fa234d572aa01fcff79f5ac Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 4 Nov 2022 14:29:44 -0400
Subject: [PATCH 1/6] Thrust 1.17 removes the experimental/pinned_allocator.

When xgboost is brought into a large project it can
be compiled against Thrust 1.17+ which don't offer
this experimental allocator.

To ensure that going forward xgboost works in all enviornments we provide a xgboost namespaced version of
the pinned_allocator that previously was in Thrust.
---
 .../xgboost4j-gpu/src/native/xgboost4j-gpu.cu |  4 +-
 src/common/cuda_pinned_allocator.h            | 91 +++++++++++++++++++
 src/tree/gpu_hist/evaluate_splits.cuh         |  4 +-
 3 files changed, 95 insertions(+), 4 deletions(-)
 create mode 100644 src/common/cuda_pinned_allocator.h
diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
index 4ecf8b0f1da1..eea07207819d 100644
--- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
+++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
@@ -1,7 +1,7 @@
 #include <jni.h>
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 
 #include "../../../../src/common/device_helpers.cuh"
+#include "../../../../src/common/cuda_pinned_allocator.cuh"
 #include "../../../../src/data/array_interface.h"
 #include "jvm_utils.h"
 #include <xgboost/c_api.h>
@@ -131,7 +131,7 @@ class DataIteratorProxy {
   bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
 
   template <typename T>
-  using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
+  using Alloc = xgboost::common::cuda::pinned_allocator<T>;
   template <typename U>
   using HostVector = std::vector<U, Alloc<U>>;
 
diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h
new file mode 100644
index 000000000000..dd34e8929937
--- /dev/null
+++ b/src/common/cuda_pinned_allocator.h
@@ -0,0 +1,91 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ * \file common.h
+ * \brief cuda pinned allocator for usage with thrust containers
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <limits>
+
+#include "common.h"
+
+namespace xgboost {
+namespace common {
+namespace cuda {
+
+// \p pinned_allocator is a CUDA-specific host memory allocator
+//  that employs \c cudaMallocHost for allocation.
+//
+// This implementation is ported from the experimental/pinned_allocator
+// that Thrust used to provide.
+//
+//  \see https://en.cppreference.com/w/cpp/memory/allocator
+template <typename T>
+class pinned_allocator;
+
+template <>
+class pinned_allocator<void> {
+ public:
+  using value_type      = void;            ///< The type of the elements in the allocator
+  using pointer         = void*;           ///< The type returned by address() / allocate()
+  using const_pointer   = const void*;     ///< The type returned by address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  template <typename U>
+  struct rebind {
+    using other = pinned_allocator<U>;  ///< The rebound type
+  };
+};
+
+
+template <typename T>
+class pinned_allocator {
+ public:
+  using value_type      = T;               ///< The type of the elements in the allocator
+  using pointer         = T*;              ///< The type returned by address() / allocate()
+  using const_pointer   = const T*;        ///< The type returned by address()
+  using reference       = T&;              ///< The parameter type for address()
+  using const_reference = const T&;        ///< The parameter type for address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  template <typename U>
+  struct rebind {
+    using other = pinned_allocator<U>;  ///< The rebound type
+  };
+
+  XGBOOST_DEVICE inline pinned_allocator() {}
+  XGBOOST_DEVICE inline ~pinned_allocator() {}
+  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {}
+  template <typename U>
+  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {}
+
+  XGBOOST_DEVICE inline pointer address(reference r) { return &r; }
+  XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; }
+
+  inline pointer allocate(size_type cnt, const_pointer = 0)
+  {
+    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
+
+    pointer result(0);
+    dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
+    return result;
+  }
+
+  inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); }
+
+  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }
+
+  XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
+
+  XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const
+  {
+    return !operator==(x);
+  }
+};
+}
+}
+}
diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh
index 2da207e79104..d3174c4df651 100644
--- a/src/tree/gpu_hist/evaluate_splits.cuh
+++ b/src/tree/gpu_hist/evaluate_splits.cuh
@@ -3,10 +3,10 @@
  */
 #ifndef EVALUATE_SPLITS_CUH_
 #define EVALUATE_SPLITS_CUH_
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 #include <xgboost/span.h>
 
 #include "../../common/categorical.h"
+#include "../../common/cuda_pinned_allocator.h"
 #include "../split_evaluator.h"
 #include "../updater_gpu_common.cuh"
 #include "expand_entry.cuh"
@@ -57,7 +57,7 @@ struct CatAccessor {
 class GPUHistEvaluator {
   using CatST = common::CatBitField::value_type;  // categorical storage type
   // use pinned memory to stage the categories, used for sort based splits.
-  using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
+  using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
 
  private:
   TreeEvaluator tree_evaluator_;

From 00dcb8c739f55dbc490c8e5db2420d4be6903dd6 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 7 Nov 2022 09:19:53 -0500
Subject: [PATCH 2/6] Update gputreeshap to work with libcudacxx 1.9

---
 gputreeshap | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gputreeshap b/gputreeshap
index acb5be3c17e9..787259b412c1 160000
--- a/gputreeshap
+++ b/gputreeshap
@@ -1 +1 @@
-Subproject commit acb5be3c17e9adae34ac0b176da6ea8e197cb17e
+Subproject commit 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3

From 89073765cf72f1aad5e95dd574ed65f48ef02063 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 8 Nov 2022 08:31:13 -0500
Subject: [PATCH 3/6] Correct style issues found by CI

---
 src/common/cuda_pinned_allocator.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h
index dd34e8929937..d31a8267ffdc 100644
--- a/src/common/cuda_pinned_allocator.h
+++ b/src/common/cuda_pinned_allocator.h
@@ -66,8 +66,7 @@ class pinned_allocator {
   XGBOOST_DEVICE inline pointer address(reference r) { return &r; }
   XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; }
 
-  inline pointer allocate(size_type cnt, const_pointer = 0)
-  {
+  inline pointer allocate(size_type cnt, const_pointer = 0) {
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
 
     pointer result(0);
@@ -81,11 +80,10 @@ class pinned_allocator {
 
   XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
 
-  XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const
-  {
+  XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
     return !operator==(x);
   }
 };
-}
-}
-}
+}  // namespace cuda
+}  // namespace common
+}  // namespace xgboost

From 85f375ad49971301b463b3d9e2a3f5f38b468838 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 8 Nov 2022 13:21:22 -0500
Subject: [PATCH 4/6] Correct header path to cuda_pinned_allocator

---
 jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
index eea07207819d..bf3f6a0db316 100644
--- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
+++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
@@ -1,7 +1,7 @@
 #include <jni.h>
 
 #include "../../../../src/common/device_helpers.cuh"
-#include "../../../../src/common/cuda_pinned_allocator.cuh"
+#include "../../../../src/common/cuda_pinned_allocator.h"
 #include "../../../../src/data/array_interface.h"
 #include "jvm_utils.h"
 #include <xgboost/c_api.h>

From aeebe6a02a6c8b1f2172f3b9480a01ab55ef3082 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 8 Nov 2022 13:37:09 -0500
Subject: [PATCH 5/6] Specify clang-tidy exclusions for pinned allocator

---
 src/common/cuda_pinned_allocator.h | 50 ++++++++++++++++--------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h
index d31a8267ffdc..e42b7df3cc72 100644
--- a/src/common/cuda_pinned_allocator.h
+++ b/src/common/cuda_pinned_allocator.h
@@ -28,15 +28,15 @@ class pinned_allocator;
 template <>
 class pinned_allocator<void> {
  public:
-  using value_type      = void;            ///< The type of the elements in the allocator
-  using pointer         = void*;           ///< The type returned by address() / allocate()
-  using const_pointer   = const void*;     ///< The type returned by address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+  using value_type      = void;            // NOLINT: The type of the elements in the allocator
+  using pointer         = void*;           // NOLINT: The type returned by address() / allocate()
+  using const_pointer   = const void*;     // NOLINT: The type returned by address()
+  using size_type       = std::size_t;     // NOLINT: The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
 
   template <typename U>
   struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
+    using other = pinned_allocator<U>;  // NOLINT: The rebound type
   };
 };
 
@@ -44,39 +44,41 @@ class pinned_allocator<void> {
 template <typename T>
 class pinned_allocator {
  public:
-  using value_type      = T;               ///< The type of the elements in the allocator
-  using pointer         = T*;              ///< The type returned by address() / allocate()
-  using const_pointer   = const T*;        ///< The type returned by address()
-  using reference       = T&;              ///< The parameter type for address()
-  using const_reference = const T&;        ///< The parameter type for address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+  using value_type      = T;               // NOLINT: The type of the elements in the allocator
+  using pointer         = T*;              // NOLINT: The type returned by address() / allocate()
+  using const_pointer   = const T*;        // NOLINT: The type returned by address()
+  using reference       = T&;              // NOLINT: The parameter type for address()
+  using const_reference = const T&;        // NOLINT: The parameter type for address()
+  using size_type       = std::size_t;     // NOLINT: The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
 
   template <typename U>
   struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
+    using other = pinned_allocator<U>;  // NOLINT: The rebound type
   };
 
-  XGBOOST_DEVICE inline pinned_allocator() {}
-  XGBOOST_DEVICE inline ~pinned_allocator() {}
-  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {}
+  XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
+  XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
+  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
+
+
   template <typename U>
-  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {}
+  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
 
-  XGBOOST_DEVICE inline pointer address(reference r) { return &r; }
-  XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; }
+  XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
+  XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
 
-  inline pointer allocate(size_type cnt, const_pointer = 0) {
+  inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
 
-    pointer result(0);
+    pointer result(nullptr);
     dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
     return result;
   }
 
-  inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); }
+  inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
 
-  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }
+  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
 
   XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
 

From b2186a6d5d2936ed9e8864dccd35c698ea55a63e Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 9 Nov 2022 10:25:16 -0500
Subject: [PATCH 6/6] Specify clang-tidy exclusions for pinned allocator

---
 src/common/cuda_pinned_allocator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h
index e42b7df3cc72..d11851d99d37 100644
--- a/src/common/cuda_pinned_allocator.h
+++ b/src/common/cuda_pinned_allocator.h
@@ -35,7 +35,7 @@ class pinned_allocator<void> {
   using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
 
   template <typename U>
-  struct rebind {
+  struct rebind {                       // NOLINT
     using other = pinned_allocator<U>;  // NOLINT: The rebound type
   };
 };
@@ -53,7 +53,7 @@ class pinned_allocator {
   using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
 
   template <typename U>
-  struct rebind {
+  struct rebind {                       // NOLINT
     using other = pinned_allocator<U>;  // NOLINT: The rebound type
   };