Skip to content

Commit

Permalink
Got test passing.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 13, 2020
1 parent 7f5dd73 commit 86edce9
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 46 deletions.
17 changes: 17 additions & 0 deletions src/common/device_helpers.cuh
Expand Up @@ -1107,4 +1107,21 @@ size_t SegmentedUnique(Inputs &&...inputs) {
dh::XGBCachingDeviceAllocator<char> alloc;
return SegmentedUnique(thrust::cuda::par(alloc), std::forward<Inputs&&>(inputs)...);
}

template <typename Policy, typename InputIt, typename Init, typename Func>
auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) {
size_t constexpr kLimit = std::numeric_limits<int32_t>::max() / 2;
size_t size = std::distance(first, second);
using Ty = std::remove_cv_t<Init>;
Ty aggregate = init;
for (size_t offset = 0; offset < size; offset += kLimit) {
auto begin_it = first + offset;
auto end_it = first + std::min(offset + kLimit, size);
size_t batch_size = std::distance(begin_it, end_it);
CHECK_LE(batch_size, size);
auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);
aggregate = reduce_op(aggregate, ret);
}
return aggregate;
}
} // namespace dh
4 changes: 1 addition & 3 deletions src/data/device_adapter.cuh
Expand Up @@ -221,9 +221,7 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
}
});
dh::XGBCachingDeviceAllocator<char> alloc;
CHECK_LE(offset.size(), std::numeric_limits<int32_t>::max() - 1000)
<< "Known limitation: Number of rows cannot exceed 1 ** 30 for quantle based DMatrix.";
size_t row_stride = thrust::reduce(
size_t row_stride = dh::Reduce(
thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(), size_t(0),
thrust::maximum<size_t>());
Expand Down
4 changes: 2 additions & 2 deletions src/tree/gpu_hist/histogram.cu
Expand Up @@ -53,7 +53,7 @@ struct Pair {
GradientPair first;
GradientPair second;
};
XGBOOST_DEV_INLINE Pair operator+(Pair const& lhs, Pair const& rhs) {
__host__ XGBOOST_DEV_INLINE Pair operator+(Pair const& lhs, Pair const& rhs) {
return {lhs.first + rhs.first, lhs.second + rhs.second};
}
} // anonymous namespace
Expand Down Expand Up @@ -86,7 +86,7 @@ GradientSumT CreateRoundingFactor(common::Span<GradientPair const> gpair) {
thrust::device_ptr<GradientPair const> gpair_end {gpair.data() + gpair.size()};
auto beg = thrust::make_transform_iterator(gpair_beg, Clip());
auto end = thrust::make_transform_iterator(gpair_end, Clip());
Pair p = thrust::reduce(thrust::cuda::par(alloc), beg, end, Pair{});
Pair p = dh::Reduce(thrust::cuda::par(alloc), beg, end, Pair{}, thrust::plus<Pair>{});
GradientPair positive_sum {p.first}, negative_sum {p.second};

auto histogram_rounding = GradientSumT {
Expand Down
5 changes: 3 additions & 2 deletions src/tree/updater_gpu_hist.cu
Expand Up @@ -642,10 +642,11 @@ struct GPUHistMakerDevice {
ExpandEntry InitRoot(RegTree* p_tree, dh::AllReducer* reducer) {
constexpr bst_node_t kRootNIdx = 0;
dh::XGBCachingDeviceAllocator<char> alloc;
GradientPair root_sum = thrust::reduce(
GradientPair root_sum = dh::Reduce(
thrust::cuda::par(alloc),
thrust::device_ptr<GradientPair const>(gpair.data()),
thrust::device_ptr<GradientPair const>(gpair.data() + gpair.size()));
thrust::device_ptr<GradientPair const>(gpair.data() + gpair.size()),
GradientPair{}, thrust::plus<GradientPair>{});
rabit::Allreduce<rabit::op::Sum, float>(reinterpret_cast<float*>(&root_sum),
2);

Expand Down
11 changes: 9 additions & 2 deletions tests/cpp/common/test_device_helpers.cu
@@ -1,4 +1,3 @@

/*!
* Copyright 2017 XGBoost contributors
*/
Expand Down Expand Up @@ -122,6 +121,14 @@ void TestSegmentedUniqueRegression(std::vector<SketchEntry> values, size_t n_dup
ASSERT_EQ(segments.at(1), d_segments_out[1] + n_duplicated);
}

TEST(DeviceHelpers, Reduce) {
size_t kSize = std::numeric_limits<uint32_t>::max();
auto it = thrust::make_counting_iterator(0ul);
dh::XGBCachingDeviceAllocator<char> alloc;
auto batched = dh::Reduce(thrust::cuda::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});
CHECK_EQ(batched, kSize - 1);
}


TEST(SegmentedUnique, Regression) {
{
Expand Down Expand Up @@ -157,4 +164,4 @@ TEST(SegmentedUnique, Regression) {
}
}
} // namespace common
} // namespace xgboost
} // namespace xgboost
37 changes: 0 additions & 37 deletions tests/cpp/data/test_ellpack_page.cu
Expand Up @@ -234,41 +234,4 @@ TEST(EllpackPage, Compact) {
}
}
}

struct KeyIter {
size_t size;
size_t __host__ __device__ operator()(size_t idx) {
assert(idx < size);
return idx;
}
};

void TestScan() {
size_t size = std::numeric_limits<int32_t>::max();
{
auto key_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0ul),
[=] __host__ __device__(size_t idx) {
assert(idx < size);
return idx;
});
auto end_it = key_iter + size;
thrust::inclusive_scan(thrust::device, key_iter, end_it,
thrust::make_discard_iterator(),
[] XGBOOST_DEVICE(size_t a, size_t b) { return b; });
}
{
auto key_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0ul),
KeyIter{size});
auto end_it = key_iter + size;
thrust::inclusive_scan(thrust::device, key_iter, end_it,
thrust::make_discard_iterator(),
[] XGBOOST_DEVICE(size_t a, size_t b) { return b; });
}
}

TEST(Thrust, Scan) {
TestScan();
}
} // namespace xgboost

0 comments on commit 86edce9

Please sign in to comment.