From 03f68306724207463f79b8d2f44922f4fdb95f62 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 18:20:04 +0800 Subject: [PATCH 01/13] Prototype. --- include/xgboost/linalg.h | 47 ++++++++++++++++++++++++++++++--- tests/cpp/common/test_linalg.cc | 17 ++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index bb0486866f81..cfb78ca7e925 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -55,8 +55,16 @@ constexpr void CalcStride(size_t (&shape)[D], size_t (&stride)[D]) { } struct AllTag {}; + struct IntTag {}; +template +struct RangeTag { + I beg; + I end; + constexpr size_t Size() const { return end - beg; } +}; + /** * \brief Calculate the dimension of sliced tensor. */ @@ -83,7 +91,7 @@ template using RemoveCRType = std::remove_const_t>; template -using IndexToTag = std::conditional_t>::value, IntTag, AllTag>; +using IndexToTag = std::conditional_t>::value, IntTag, S>; template XGBOOST_DEVICE constexpr auto UnrollLoop(Fn fn) { @@ -192,9 +200,16 @@ XGBOOST_DEVICE decltype(auto) constexpr Apply(Fn &&f, Tup &&t) { } // namespace detail /** - * \brief Specify all elements in the axis is used for slice. + * \brief Specify all elements in the axis for slicing. */ constexpr detail::AllTag All() { return {}; } +/** + * \brief Specify a range of elements in the axis for slicing. + */ +template +constexpr detail::RangeTag Range(I beg, I end) { + return {beg, end}; +} /** * \brief A tensor view with static type and shape. It implements indexing and slicing. @@ -233,7 +248,33 @@ class TensorView { } } - template + template + XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], + detail::RangeTag &&range) const { + static_assert(new_dim < D, ""); + static_assert(old_dim < kDim, ""); + new_stride[new_dim] = stride_[old_dim]; + assert(range.Size() <= shape_[old_dim]); + new_shape[new_dim] = range.Size(); + return 0; + } + /** + * \brief Slice dimension for Range tag. + */ + template + XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], + detail::RangeTag &&range, S &&...slices) const { + static_assert(new_dim < D, ""); + static_assert(old_dim < kDim, ""); + new_stride[new_dim] = stride_[old_dim]; + assert(range.Size() <= shape_[old_dim]); + new_shape[new_dim] = range.Size(); + return MakeSliceDim(new_shape, new_stride, + std::forward(slices)...) + + range.beg; + } + + template XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const { static_assert(new_dim < D, ""); diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 44a91c3f2c2e..aa982f6128e8 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -107,6 +107,23 @@ TEST(Linalg, TensorView) { auto s = t.Slice(1, 2, All()); static_assert(decltype(s)::kDimension == 1, ""); } + { + TensorView t{data, {2, 3, 4}, 0}; + auto s = t.Slice(1, linalg::All(), 1); + ASSERT_EQ(s(0), 13); + ASSERT_EQ(s(1), 17); + ASSERT_EQ(s(2), 21); + } + { + TensorView t{data, {2, 3, 4}, 0}; + auto s = t.Slice(linalg::All(), linalg::Range(0, 3), 2); + static_assert(decltype(s)::kDimension == 2, ""); + } + { + TensorView t{data, {2, 3, 4}, 0}; + auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); + static_assert(decltype(s)::kDimension == 3, ""); + } } TEST(Linalg, Tensor) { From 7a116bd79d23b72b723eb7e19e3b403c1cfc580e Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 19:36:33 +0800 Subject: [PATCH 02/13] remove iterators. --- include/xgboost/linalg.h | 16 ++++++++-------- tests/cpp/common/test_linalg.cc | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index cfb78ca7e925..55d29ea69010 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -254,9 +254,11 @@ class TensorView { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; - assert(range.Size() <= shape_[old_dim]); new_shape[new_dim] = range.Size(); - return 0; + assert(static_cast(range.end) <= shape_[old_dim]); + + auto offset = stride_[old_dim] * range.beg; + return offset; } /** * \brief Slice dimension for Range tag. @@ -267,11 +269,13 @@ class TensorView { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; - assert(range.Size() <= shape_[old_dim]); new_shape[new_dim] = range.Size(); + assert(static_cast(range.end) <= shape_[old_dim]); + + auto offset = stride_[old_dim] * range.beg; return MakeSliceDim(new_shape, new_stride, std::forward(slices)...) + - range.beg; + offset; } template @@ -439,10 +443,6 @@ class TensorView { */ XGBOOST_DEVICE auto Stride(size_t i) const { return stride_[i]; } - XGBOOST_DEVICE auto cbegin() const { return data_.cbegin(); } // NOLINT - XGBOOST_DEVICE auto cend() const { return data_.cend(); } // NOLINT - XGBOOST_DEVICE auto begin() { return data_.begin(); } // NOLINT - XGBOOST_DEVICE auto end() { return data_.end(); } // NOLINT /** * \brief Number of items in the tensor. */ diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index aa982f6128e8..79654157f4a9 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -136,7 +136,8 @@ TEST(Linalg, Tensor) { size_t n = 2 * 3 * 4; ASSERT_EQ(t.Size(), n); - ASSERT_TRUE(std::equal(k_view.cbegin(), k_view.cbegin(), view.begin())); + ASSERT_TRUE( + std::equal(k_view.Values().cbegin(), k_view.Values().cbegin(), view.Values().begin())); Tensor t_0{std::move(t)}; ASSERT_EQ(t_0.Size(), n); @@ -190,7 +191,7 @@ TEST(Linalg, ArrayInterface) { auto cpu = kCpuId; auto t = Tensor{{3, 3}, cpu}; auto v = t.View(cpu); - std::iota(v.begin(), v.end(), 0); + std::iota(v.Values().begin(), v.Values().end(), 0); auto arr = Json::Load(StringView{v.ArrayInterfaceStr()}); ASSERT_EQ(get(arr["shape"][0]), 3); ASSERT_EQ(get(arr["strides"][0]), 3 * sizeof(double)); From 7913d942e69adcbd4a236aeb0f556b62a331c107 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 20:24:16 +0800 Subject: [PATCH 03/13] Helper functions. --- include/xgboost/linalg.h | 23 +++++++++++++++++++---- src/metric/auc.cc | 5 ++--- tests/cpp/common/test_linalg.cc | 29 ++++++++++++++++++++++++----- tests/cpp/common/test_linalg.cu | 20 ++++++++++++++++++++ 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 55d29ea69010..9d408e3b9713 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -212,7 +212,7 @@ constexpr detail::RangeTag Range(I beg, I end) { } /** - * \brief A tensor view with static type and shape. It implements indexing and slicing. + * \brief A tensor view with static type and dimension. It implements indexing and slicing. * * Most of the algorithms in XGBoost are implemented for both CPU and GPU without using * much linear algebra routines, this class is a helper intended to ease some high level @@ -224,7 +224,7 @@ constexpr detail::RangeTag Range(I beg, I end) { * some functions expect data types that can be used in everywhere (update prediction * cache for example). */ -template +template class TensorView { public: using ShapeT = size_t[kDim]; @@ -389,7 +389,7 @@ class TensorView { */ template XGBOOST_DEVICE T &operator()(Index &&...index) { - static_assert(sizeof...(index) <= kDim, "Invalid index."); + static_assert(sizeof...(index) == kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); return ptr_[offset]; @@ -399,7 +399,7 @@ class TensorView { */ template XGBOOST_DEVICE T const &operator()(Index &&...index) const { - static_assert(sizeof...(index) <= kDim, "Invalid index."); + static_assert(sizeof...(index) == kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); return ptr_[offset]; @@ -516,6 +516,21 @@ class TensorView { } }; +/** + * \brief Constructor for automatic type deduction. + */ +template ::value> * = nullptr> +auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device = 0) { // NOLINT + using T = typename Container::value_type; + return TensorView{data, shape, device}; +} + +template +auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device = 0) { + return TensorView{data, shape, device}; +} + /** * \brief Turns linear index into multi-dimension index. Similar to numpy unravel. */ diff --git a/src/metric/auc.cc b/src/metric/auc.cc index ec8b6ee01bdd..5097116fbb78 100644 --- a/src/metric/auc.cc +++ b/src/metric/auc.cc @@ -85,9 +85,8 @@ double MultiClassOVR(common::Span predts, MetaInfo const &info, auto const &labels = info.labels_.ConstHostVector(); std::vector results_storage(n_classes * 3, 0); - linalg::TensorView results(results_storage, - {n_classes, static_cast(3)}, - GenericParameter::kCpuId); + linalg::TensorView results(results_storage, {n_classes, static_cast(3)}, + GenericParameter::kCpuId); auto local_area = results.Slice(linalg::All(), 0); auto tp = results.Slice(linalg::All(), 1); auto auc = results.Slice(linalg::All(), 2); diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 79654157f4a9..47d6e4622547 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -51,7 +51,7 @@ TEST(Linalg, TensorView) { std::vector data(2 * 3 * 4, 0); std::iota(data.begin(), data.end(), 0); - TensorView t{data, {2, 3, 4}, -1}; + auto t = MakeTensorView(data, {2, 3, 4}, -1); ASSERT_EQ(t.Shape()[0], 2); ASSERT_EQ(t.Shape()[1], 3); ASSERT_EQ(t.Shape()[2], 4); @@ -96,33 +96,52 @@ TEST(Linalg, TensorView) { // assignment TensorView t{data, {2, 3, 4}, 0}; double pi = 3.14159; + auto old = t(1, 2, 3); t(1, 2, 3) = pi; ASSERT_EQ(t(1, 2, 3), pi); + t(1, 2, 3) = old; + ASSERT_EQ(t(1, 2, 3), old); } { // Don't assign the initial dimension, tensor should be able to deduce the correct dim // for Slice. - TensorView t{data, {2, 3, 4}, 0}; + auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(1, 2, All()); static_assert(decltype(s)::kDimension == 1, ""); } { - TensorView t{data, {2, 3, 4}, 0}; + auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(1, linalg::All(), 1); ASSERT_EQ(s(0), 13); ASSERT_EQ(s(1), 17); ASSERT_EQ(s(2), 21); } { - TensorView t{data, {2, 3, 4}, 0}; + auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(linalg::All(), linalg::Range(0, 3), 2); static_assert(decltype(s)::kDimension == 2, ""); + std::vector sol{2, 6, 10, 14, 18, 22}; + auto k = 0; + for (size_t i = 0; i < s.Shape(0); ++i) { + for (size_t j = 0; j < s.Shape(1); ++j) { + ASSERT_EQ(s(i, j), sol.at(k)); + k++; + } + } } { - TensorView t{data, {2, 3, 4}, 0}; + auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); static_assert(decltype(s)::kDimension == 3, ""); + auto all = t.Slice(linalg::All(), linalg::All(), linalg::All()); + for (size_t i = 0; i < s.Shape(0); ++i) { + for (size_t j = 0; j < s.Shape(1); ++j) { + for (size_t k = 0; k < s.Shape(2); ++k) { + ASSERT_EQ(s(i, j, k), all(i,j, k)); + } + } + } } } diff --git a/tests/cpp/common/test_linalg.cu b/tests/cpp/common/test_linalg.cu index abfef8bfd353..9f5101c5b827 100644 --- a/tests/cpp/common/test_linalg.cu +++ b/tests/cpp/common/test_linalg.cu @@ -56,7 +56,27 @@ void TestElementWiseKernel() { } } } + +void TestSlice() { + thrust::device_vector data(2 * 3 * 4); + auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0); + dh::LaunchN(1, [=] __device__(size_t) { + auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); + auto all = t.Slice(linalg::All(), linalg::All(), linalg::All()); + static_assert(decltype(s)::kDimension == 3, ""); + for (size_t i = 0; i < s.Shape(0); ++i) { + for (size_t j = 0; j < s.Shape(1); ++j) { + for (size_t k = 0; k < s.Shape(2); ++k) { + SPAN_CHECK(s(i, j, k) == all(i, j, k)); + } + } + } + }); +} } // anonymous namespace + TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); } + +TEST(Linalg, GPUTensorView) { TestSlice(); } } // namespace linalg } // namespace xgboost From a0ef665bcac5c686f2f87317f1bd587a6eb4f3fb Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 21:02:29 +0800 Subject: [PATCH 04/13] Decouple. --- include/xgboost/linalg.h | 92 ++++++++++++++++++++------------- src/common/linalg_op.cuh | 2 +- src/common/linalg_op.h | 2 +- tests/cpp/common/test_linalg.cc | 25 +++++++-- tests/cpp/common/test_linalg.cu | 4 +- 5 files changed, 80 insertions(+), 45 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 9d408e3b9713..f68e6ad6dbbc 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -20,6 +20,15 @@ #include #include +// decouple it from xgboost. +#ifndef LINALG_HD +#if defined(__CUDA__) || defined(__NVCC__) +#define LINALG_HD __host__ __device__ +#else +#define LINALG_HD +#endif // defined (__CUDA__) || defined(__NVCC__) +#endif // LINALG_HD + namespace xgboost { namespace linalg { namespace detail { @@ -47,7 +56,7 @@ constexpr std::enable_if_t Offset(S (&strides)[D], } template -constexpr void CalcStride(size_t (&shape)[D], size_t (&stride)[D]) { +constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) { stride[D - 1] = 1; for (int32_t s = D - 2; s >= 0; --s) { stride[s] = shape[s + 1] * stride[s + 1]; @@ -94,7 +103,7 @@ template using IndexToTag = std::conditional_t>::value, IntTag, S>; template -XGBOOST_DEVICE constexpr auto UnrollLoop(Fn fn) { +LINALG_HD constexpr auto UnrollLoop(Fn fn) { #if defined __CUDA_ARCH__ #pragma unroll n #endif // defined __CUDA_ARCH__ @@ -110,7 +119,7 @@ int32_t NativePopc(T v) { return c; } -inline XGBOOST_DEVICE int Popc(uint32_t v) { +inline LINALG_HD int Popc(uint32_t v) { #if defined(__CUDA_ARCH__) return __popc(v); #elif defined(__GNUC__) || defined(__clang__) @@ -122,7 +131,7 @@ inline XGBOOST_DEVICE int Popc(uint32_t v) { #endif // compiler } -inline XGBOOST_DEVICE int Popc(uint64_t v) { +inline LINALG_HD int Popc(uint64_t v) { #if defined(__CUDA_ARCH__) return __popcll(v); #elif defined(__GNUC__) || defined(__clang__) @@ -148,7 +157,7 @@ constexpr auto Arr2Tup(T (&arr)[N]) { // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64 // bit when the index is smaller, then try to avoid division when it's exp of 2. template -XGBOOST_DEVICE auto UnravelImpl(I idx, common::Span shape) { +LINALG_HD auto UnravelImpl(I idx, common::Span shape) { size_t index[D]{0}; static_assert(std::is_signed::value, "Don't change the type without changing the for loop."); @@ -182,7 +191,7 @@ void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) { } template -XGBOOST_DEVICE decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence) { +LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence) { return f(std::get(t)...); } @@ -193,7 +202,7 @@ XGBOOST_DEVICE decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequen * \param t tuple of arguments */ template -XGBOOST_DEVICE decltype(auto) constexpr Apply(Fn &&f, Tup &&t) { +LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) { constexpr auto kSize = std::tuple_size::value; return Apply(std::forward(f), std::forward(t), std::make_index_sequence{}); } @@ -240,7 +249,7 @@ class TensorView { int32_t device_{-1}; // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view. - XGBOOST_DEVICE void CalcSize() { + LINALG_HD void CalcSize() { if (data_.empty()) { size_ = 0; } else { @@ -249,8 +258,8 @@ class TensorView { } template - XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], - detail::RangeTag &&range) const { + LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], + detail::RangeTag &&range) const { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; @@ -264,8 +273,8 @@ class TensorView { * \brief Slice dimension for Range tag. */ template - XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], - detail::RangeTag &&range, S &&...slices) const { + LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], + detail::RangeTag &&range, S &&...slices) const { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; @@ -279,8 +288,7 @@ class TensorView { } template - XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], - detail::AllTag) const { + LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; @@ -291,8 +299,8 @@ class TensorView { * \brief Slice dimension for All tag. */ template - XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag, - S &&...slices) const { + LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag, + S &&...slices) const { static_assert(new_dim < D, ""); static_assert(old_dim < kDim, ""); new_stride[new_dim] = stride_[old_dim]; @@ -302,7 +310,7 @@ class TensorView { } template - XGBOOST_DEVICE size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], Index i) const { + LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], Index i) const { static_assert(old_dim < kDim, ""); return stride_[old_dim] * i; } @@ -310,7 +318,7 @@ class TensorView { * \brief Slice dimension for Index tag. */ template - XGBOOST_DEVICE std::enable_if_t::value, size_t> MakeSliceDim( + LINALG_HD std::enable_if_t::value, size_t> MakeSliceDim( size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const { static_assert(old_dim < kDim, ""); auto offset = stride_[old_dim] * i; @@ -336,7 +344,7 @@ class TensorView { * \param device Device ordinal */ template - XGBOOST_DEVICE TensorView(common::Span data, I const (&shape)[D], int32_t device) + LINALG_HD TensorView(common::Span data, I const (&shape)[D], int32_t device) : data_{data}, ptr_{data_.data()}, device_{device} { static_assert(D > 0 && D <= kDim, "Invalid shape."); // shape @@ -355,8 +363,8 @@ class TensorView { * stride can be calculated from shape. */ template - XGBOOST_DEVICE TensorView(common::Span data, I const (&shape)[D], I const (&stride)[D], - int32_t device) + LINALG_HD TensorView(common::Span data, I const (&shape)[D], I const (&stride)[D], + int32_t device) : data_{data}, ptr_{data_.data()}, device_{device} { static_assert(D == kDim, "Invalid shape & stride."); detail::UnrollLoop([&](auto i) { @@ -366,7 +374,7 @@ class TensorView { this->CalcSize(); } - XGBOOST_DEVICE TensorView(TensorView const &that) + LINALG_HD TensorView(TensorView const &that) : data_{that.data_}, ptr_{data_.data()}, size_{that.size_}, device_{that.device_} { detail::UnrollLoop([&](auto i) { stride_[i] = that.stride_[i]; @@ -388,7 +396,7 @@ class TensorView { * \endcode */ template - XGBOOST_DEVICE T &operator()(Index &&...index) { + LINALG_HD T &operator()(Index &&...index) { static_assert(sizeof...(index) == kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); @@ -398,7 +406,7 @@ class TensorView { * \brief Index the tensor to obtain a scalar value. */ template - XGBOOST_DEVICE T const &operator()(Index &&...index) const { + LINALG_HD T const &operator()(Index &&...index) const { static_assert(sizeof...(index) == kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); @@ -419,7 +427,7 @@ class TensorView { * \endcode */ template - XGBOOST_DEVICE auto Slice(S &&...slices) const { + LINALG_HD auto Slice(S &&...slices) const { static_assert(sizeof...(slices) <= kDim, "Invalid slice."); int32_t constexpr kNewDim{detail::CalcSliceDim...>()}; size_t new_shape[kNewDim]; @@ -432,33 +440,39 @@ class TensorView { return ret; } - XGBOOST_DEVICE auto Shape() const { return common::Span{shape_}; } + LINALG_HD auto Shape() const { return common::Span{shape_}; } /** * Get the shape for i^th dimension */ - XGBOOST_DEVICE auto Shape(size_t i) const { return shape_[i]; } - XGBOOST_DEVICE auto Stride() const { return common::Span{stride_}; } + LINALG_HD auto Shape(size_t i) const { return shape_[i]; } + LINALG_HD auto Stride() const { return common::Span{stride_}; } /** * Get the stride for i^th dimension, stride is specified as number of items instead of bytes. */ - XGBOOST_DEVICE auto Stride(size_t i) const { return stride_[i]; } + LINALG_HD auto Stride(size_t i) const { return stride_[i]; } /** * \brief Number of items in the tensor. */ - XGBOOST_DEVICE size_t Size() const { return size_; } + LINALG_HD size_t Size() const { return size_; } /** - * \brief Whether it's a contiguous array. (c and f contiguous are both contiguous) + * \brief Whether it's a c-contiguous array. */ - XGBOOST_DEVICE bool Contiguous() const { return size_ == data_.size(); } + LINALG_HD bool CContiguous() const { + StrideT stride; + static_assert(std::is_same::value, ""); + // It's contiguous if the stride can be calculated from shape. + detail::CalcStride(shape_, stride); + return common::Span{stride_} == common::Span{stride}; + } /** - * \brief Obtain the raw data. + * \brief Obtain a reference to the raw data. */ - XGBOOST_DEVICE auto Values() const { return data_; } + LINALG_HD auto Values() const -> decltype(data_) const & { return data_; } /** * \brief Obtain the CUDA device ordinal. */ - XGBOOST_DEVICE auto DeviceIdx() const { return device_; } + LINALG_HD auto DeviceIdx() const { return device_; } /** * \brief Array Interface defined by @@ -527,7 +541,7 @@ auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device = 0) { } template -auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device = 0) { +LINALG_HD auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device = 0) { return TensorView{data, shape, device}; } @@ -535,7 +549,7 @@ auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device = * \brief Turns linear index into multi-dimension index. Similar to numpy unravel. */ template -XGBOOST_DEVICE auto UnravelIndex(size_t idx, common::Span shape) { +LINALG_HD auto UnravelIndex(size_t idx, common::Span shape) { if (idx > std::numeric_limits::max()) { return detail::UnravelImpl(static_cast(idx), shape); } else { @@ -730,4 +744,8 @@ void Stack(Tensor *l, Tensor const &r) { } } // namespace linalg } // namespace xgboost + +#if defined(LINALG_HD) +#undef LINALG_HD +#endif // defined(LINALG_HD) #endif // XGBOOST_LINALG_H_ diff --git a/src/common/linalg_op.cuh b/src/common/linalg_op.cuh index dfab58729b56..f0a18a912a22 100644 --- a/src/common/linalg_op.cuh +++ b/src/common/linalg_op.cuh @@ -10,7 +10,7 @@ namespace xgboost { namespace linalg { template void ElementWiseKernelDevice(linalg::TensorView t, Fn&& fn, cudaStream_t s = nullptr) { - if (t.Contiguous()) { + if (t.CContiguous()) { auto ptr = t.Values().data(); dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); }); } else { diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h index a74b119e7947..987e07a59d77 100644 --- a/src/common/linalg_op.h +++ b/src/common/linalg_op.h @@ -10,7 +10,7 @@ namespace xgboost { namespace linalg { template void ElementWiseKernelHost(linalg::TensorView t, int32_t n_threads, Fn&& fn) { - if (t.Contiguous()) { + if (t.CContiguous()) { auto ptr = t.Values().data(); common::ParallelFor(t.Size(), n_threads, [&](size_t i) { ptr[i] = fn(i, ptr[i]); }); } else { diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 47d6e4622547..75a33a415e01 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -119,9 +119,9 @@ TEST(Linalg, TensorView) { } { auto t = MakeTensorView(data, {2, 3, 4}, 0); - auto s = t.Slice(linalg::All(), linalg::Range(0, 3), 2); + auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2); static_assert(decltype(s)::kDimension == 2, ""); - std::vector sol{2, 6, 10, 14, 18, 22}; + std::vector sol{6, 10, 18, 22}; auto k = 0; for (size_t i = 0; i < s.Shape(0); ++i) { for (size_t j = 0; j < s.Shape(1); ++j) { @@ -129,6 +129,21 @@ TEST(Linalg, TensorView) { k++; } } + ASSERT_FALSE(s.CContiguous()); + } + { + auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3)); + static_assert(decltype(s)::kDimension == 2, ""); + std::vector sol{17, 18, 21, 22}; + auto k = 0; + for (size_t i = 0; i < s.Shape(0); ++i) { + for (size_t j = 0; j < s.Shape(1); ++j) { + ASSERT_EQ(s(i, j), sol.at(k)); + k++; + } + } + ASSERT_FALSE(s.CContiguous()); } { auto t = MakeTensorView(data, {2, 3, 4}, 0); @@ -138,10 +153,12 @@ TEST(Linalg, TensorView) { for (size_t i = 0; i < s.Shape(0); ++i) { for (size_t j = 0; j < s.Shape(1); ++j) { for (size_t k = 0; k < s.Shape(2); ++k) { - ASSERT_EQ(s(i, j, k), all(i,j, k)); + ASSERT_EQ(s(i, j, k), all(i, j, k)); } } } + ASSERT_TRUE(s.CContiguous()); + ASSERT_TRUE(all.CContiguous()); } } @@ -156,7 +173,7 @@ TEST(Linalg, Tensor) { size_t n = 2 * 3 * 4; ASSERT_EQ(t.Size(), n); ASSERT_TRUE( - std::equal(k_view.Values().cbegin(), k_view.Values().cbegin(), view.Values().begin())); + std::equal(k_view.Values().cbegin(), k_view.Values().cend(), view.Values().cbegin())); Tensor t_0{std::move(t)}; ASSERT_EQ(t_0.Size(), n); diff --git a/tests/cpp/common/test_linalg.cu b/tests/cpp/common/test_linalg.cu index 9f5101c5b827..9ea6b22dd012 100644 --- a/tests/cpp/common/test_linalg.cu +++ b/tests/cpp/common/test_linalg.cu @@ -18,7 +18,7 @@ void TestElementWiseKernel() { */ // GPU view auto t = l.View(0).Slice(linalg::All(), 1, linalg::All()); - ASSERT_FALSE(t.Contiguous()); + ASSERT_FALSE(t.CContiguous()); ElementWiseKernelDevice(t, [] __device__(size_t i, float) { return i; }); // CPU view t = l.View(GenericParameter::kCpuId).Slice(linalg::All(), 1, linalg::All()); @@ -42,7 +42,7 @@ void TestElementWiseKernel() { */ auto t = l.View(0); ElementWiseKernelDevice(t, [] __device__(size_t i, float) { return i; }); - ASSERT_TRUE(t.Contiguous()); + ASSERT_TRUE(t.CContiguous()); // CPU view t = l.View(GenericParameter::kCpuId); From d2e3170fd49f4bc491e4152ae528f30fdf801fb4 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 21:26:05 +0800 Subject: [PATCH 05/13] Move array interface out. --- include/xgboost/linalg.h | 131 ++++++++++++++----------- src/data/file_iterator.h | 6 +- tests/cpp/common/test_linalg.cc | 6 +- tests/cpp/data/test_adapter.cc | 6 +- tests/cpp/data/test_array_interface.cc | 5 +- 5 files changed, 85 insertions(+), 69 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index f68e6ad6dbbc..b6e688f83a84 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -374,11 +374,14 @@ class TensorView { this->CalcSize(); } - LINALG_HD TensorView(TensorView const &that) - : data_{that.data_}, ptr_{data_.data()}, size_{that.size_}, device_{that.device_} { + template < + typename U, + std::enable_if_t::value> * = nullptr> + LINALG_HD TensorView(TensorView const &that) // NOLINT + : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.DeviceIdx()} { detail::UnrollLoop([&](auto i) { - stride_[i] = that.stride_[i]; - shape_[i] = that.shape_[i]; + stride_[i] = that.Stride(i); + shape_[i] = that.Shape(i); }); } @@ -473,61 +476,6 @@ class TensorView { * \brief Obtain the CUDA device ordinal. */ LINALG_HD auto DeviceIdx() const { return device_; } - - /** - * \brief Array Interface defined by - * numpy. - * - * `stream` is optionally included when data is on CUDA device. - */ - Json ArrayInterface() const { - Json array_interface{Object{}}; - array_interface["data"] = std::vector(2); - array_interface["data"][0] = Integer(reinterpret_cast(data_.data())); - array_interface["data"][1] = Boolean{true}; - if (this->DeviceIdx() >= 0) { - // Change this once we have different CUDA stream. - array_interface["stream"] = Null{}; - } - std::vector shape(Shape().size()); - std::vector stride(Stride().size()); - for (size_t i = 0; i < Shape().size(); ++i) { - shape[i] = Integer(Shape(i)); - stride[i] = Integer(Stride(i) * sizeof(T)); - } - array_interface["shape"] = Array{shape}; - array_interface["strides"] = Array{stride}; - array_interface["version"] = 3; - - char constexpr kT = detail::ArrayInterfaceHandler::TypeChar(); - static_assert(kT != '\0', ""); - if (DMLC_LITTLE_ENDIAN) { - array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))}; - } else { - array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))}; - } - return array_interface; - } - /** - * \brief Same as const version, but returns non-readonly data pointer. - */ - Json ArrayInterface() { - auto const &as_const = *this; - auto res = as_const.ArrayInterface(); - res["data"][1] = Boolean{false}; - return res; - } - - auto ArrayInterfaceStr() const { - std::string str; - Json::Dump(this->ArrayInterface(), &str); - return str; - } - auto ArrayInterfaceStr() { - std::string str; - Json::Dump(this->ArrayInterface(), &str); - return str; - } }; /** @@ -586,6 +534,71 @@ auto MakeVec(T *ptr, size_t s, int32_t device = -1) { template using MatrixView = TensorView; + + /** + * \brief Array Interface defined by + * numpy. + * + * `stream` is optionally included when data is on CUDA device. + */ +template +Json ArrayInterface(TensorView const& t) { + Json array_interface{Object{}}; + array_interface["data"] = std::vector(2); + array_interface["data"][0] = Integer(reinterpret_cast(t.Values().data())); + array_interface["data"][1] = Boolean{true}; + if (t.DeviceIdx() >= 0) { + // Change this once we have different CUDA stream. + array_interface["stream"] = Null{}; + } + std::vector shape(t.Shape().size()); + std::vector stride(t.Stride().size()); + for (size_t i = 0; i < t.Shape().size(); ++i) { + shape[i] = Integer(t.Shape(i)); + stride[i] = Integer(t.Stride(i) * sizeof(T)); + } + array_interface["shape"] = Array{shape}; + array_interface["strides"] = Array{stride}; + array_interface["version"] = 3; + + char constexpr kT = detail::ArrayInterfaceHandler::TypeChar(); + static_assert(kT != '\0', ""); + if (DMLC_LITTLE_ENDIAN) { + array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))}; + } else { + array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))}; + } + return array_interface; +} + +/** + * \brief Same as const version, but returns non-readonly data pointer. + */ +template +Json ArrayInterface(TensorView const &t) { + TensorView const &as_const = t; + auto res = ArrayInterface(as_const); + res["data"][1] = Boolean{false}; + return res; +} + +/** + * \brief Return string representation of array interface. + */ +template +auto ArrayInterfaceStr(TensorView const &t) { + std::string str; + Json::Dump(ArrayInterface(t), &str); + return str; +} + +template +auto ArrayInterfaceStr(TensorView const &t) { + std::string str; + Json::Dump(ArrayInterface(t), &str); + return str; +} + /** * \brief A tensor storage. To use it for other functionality like slicing one needs to * obtain a view first. This way we can use it on both host and device. diff --git a/src/data/file_iterator.h b/src/data/file_iterator.h index 70a5d51c30b9..96f0e09d485b 100644 --- a/src/data/file_iterator.h +++ b/src/data/file_iterator.h @@ -61,9 +61,9 @@ class FileIterator { row_block_ = parser_->Value(); using linalg::MakeVec; - indptr_ = MakeVec(row_block_.offset, row_block_.size + 1).ArrayInterfaceStr(); - values_ = MakeVec(row_block_.value, row_block_.offset[row_block_.size]).ArrayInterfaceStr(); - indices_ = MakeVec(row_block_.index, row_block_.offset[row_block_.size]).ArrayInterfaceStr(); + indptr_ = ArrayInterfaceStr(MakeVec(row_block_.offset, row_block_.size + 1)); + values_ = ArrayInterfaceStr(MakeVec(row_block_.value, row_block_.offset[row_block_.size])); + indices_ = ArrayInterfaceStr(MakeVec(row_block_.index, row_block_.offset[row_block_.size])); size_t n_columns = *std::max_element(row_block_.index, row_block_.index + row_block_.offset[row_block_.size]); diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 75a33a415e01..f13defeb8bd4 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -228,12 +228,16 @@ TEST(Linalg, ArrayInterface) { auto t = Tensor{{3, 3}, cpu}; auto v = t.View(cpu); std::iota(v.Values().begin(), v.Values().end(), 0); - auto arr = Json::Load(StringView{v.ArrayInterfaceStr()}); + auto arr = Json::Load(StringView{ArrayInterfaceStr(v)}); ASSERT_EQ(get(arr["shape"][0]), 3); ASSERT_EQ(get(arr["strides"][0]), 3 * sizeof(double)); ASSERT_FALSE(get(arr["data"][1])); ASSERT_EQ(reinterpret_cast(get(arr["data"][0])), v.Values().data()); + + TensorView as_const = v; + auto const_arr = ArrayInterface(as_const); + ASSERT_TRUE(get(const_arr["data"][1])); } TEST(Linalg, Popc) { diff --git a/tests/cpp/data/test_adapter.cc b/tests/cpp/data/test_adapter.cc index c6de226194f6..fa3ed61f6808 100644 --- a/tests/cpp/data/test_adapter.cc +++ b/tests/cpp/data/test_adapter.cc @@ -42,9 +42,9 @@ TEST(Adapter, CSRArrayAdapter) { size_t n_features = 100, n_samples = 10; RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices); using linalg::MakeVec; - auto indptr_arr = MakeVec(indptr.HostPointer(), indptr.Size()).ArrayInterfaceStr(); - auto values_arr = MakeVec(values.HostPointer(), values.Size()).ArrayInterfaceStr(); - auto indices_arr = MakeVec(indices.HostPointer(), indices.Size()).ArrayInterfaceStr(); + auto indptr_arr = ArrayInterfaceStr(MakeVec(indptr.HostPointer(), indptr.Size())); + auto values_arr = ArrayInterfaceStr(MakeVec(values.HostPointer(), values.Size())); + auto indices_arr = ArrayInterfaceStr(MakeVec(indices.HostPointer(), indices.Size())); auto adapter = data::CSRArrayAdapter( StringView{indptr_arr.c_str(), indptr_arr.size()}, StringView{values_arr.c_str(), values_arr.size()}, diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc index 8efd30eb75b2..3c2e0e38d5c3 100644 --- a/tests/cpp/data/test_array_interface.cc +++ b/tests/cpp/data/test_array_interface.cc @@ -19,9 +19,8 @@ TEST(ArrayInterface, Initialize) { ASSERT_EQ(arr_interface.type, ArrayInterfaceHandler::kF4); HostDeviceVector u64_storage(storage.Size()); - std::string u64_arr_str{linalg::TensorView{ - u64_storage.ConstHostSpan(), {kRows, kCols}, GenericParameter::kCpuId} - .ArrayInterfaceStr()}; + std::string u64_arr_str{ArrayInterfaceStr(linalg::TensorView{ + u64_storage.ConstHostSpan(), {kRows, kCols}, GenericParameter::kCpuId})}; std::copy(storage.ConstHostVector().cbegin(), storage.ConstHostVector().cend(), u64_storage.HostSpan().begin()); auto u64_arr = ArrayInterface<2>{u64_arr_str}; From c5d3bbe60450f9702e8ec84dbcf1f9843ad97ef4 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 21:29:19 +0800 Subject: [PATCH 06/13] format. --- include/xgboost/linalg.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index b6e688f83a84..8329320380ea 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -534,15 +534,14 @@ auto MakeVec(T *ptr, size_t s, int32_t device = -1) { template using MatrixView = TensorView; - - /** - * \brief Array Interface defined by - * numpy. - * - * `stream` is optionally included when data is on CUDA device. - */ +/** + * \brief Array Interface defined by + * numpy. + * + * `stream` is optionally included when data is on CUDA device. + */ template -Json ArrayInterface(TensorView const& t) { +Json ArrayInterface(TensorView const &t) { Json array_interface{Object{}}; array_interface["data"] = std::vector(2); array_interface["data"][0] = Integer(reinterpret_cast(t.Values().data())); From eeeb4412466ee67ee8d3b0ef841fbafcac9ad53d Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 21:38:26 +0800 Subject: [PATCH 07/13] Compile with master. --- include/xgboost/linalg.h | 4 ++-- src/data/data.cc | 6 +++--- tests/cpp/data/test_metainfo.cc | 8 ++++---- tests/cpp/data/test_metainfo.h | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 8329320380ea..b6b2584988c1 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -400,7 +400,7 @@ class TensorView { */ template LINALG_HD T &operator()(Index &&...index) { - static_assert(sizeof...(index) == kDim, "Invalid index."); + static_assert(sizeof...(index) <= kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); return ptr_[offset]; @@ -410,7 +410,7 @@ class TensorView { */ template LINALG_HD T const &operator()(Index &&...index) const { - static_assert(sizeof...(index) == kDim, "Invalid index."); + static_assert(sizeof...(index) <= kDim, "Invalid index."); size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward(index)...); assert(offset < data_.size() && "Out of bound access."); return ptr_[offset]; diff --git a/src/data/data.cc b/src/data/data.cc index 3a2215180dce..205311c8f6d2 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -413,7 +413,7 @@ void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor* p_out) { } p_out->Reshape(array.shape); auto t = p_out->View(GenericParameter::kCpuId); - CHECK(t.Contiguous()); + CHECK(t.CContiguous()); // FIXME(jiamingy): Remove the use of this default thread. linalg::ElementWiseKernelHost(t, common::OmpGetNumThreads(0), [&](auto i, auto) { return linalg::detail::Apply(TypedIndex{array}, linalg::UnravelIndex(i, t.Shape())); @@ -531,8 +531,8 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t using T = std::remove_pointer_t; auto t = linalg::TensorView(common::Span{cast_d_ptr, num}, {num}, GenericParameter::kCpuId); - CHECK(t.Contiguous()); - Json interface { t.ArrayInterface() }; + CHECK(t.CContiguous()); + Json interface { linalg::ArrayInterface(t) }; assert(ArrayInterface<1>{interface}.is_contiguous); return interface; }; diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index 4f379f4fdd25..4a1ef7a7290f 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -127,7 +127,8 @@ TEST(MetaInfo, SaveLoadBinary) { auto orig_margin = info.base_margin_.View(xgboost::GenericParameter::kCpuId); auto read_margin = inforead.base_margin_.View(xgboost::GenericParameter::kCpuId); - EXPECT_TRUE(std::equal(orig_margin.cbegin(), orig_margin.cend(), read_margin.cbegin())); + EXPECT_TRUE(std::equal(orig_margin.Values().cbegin(), orig_margin.Values().cend(), + read_margin.Values().cbegin())); EXPECT_EQ(inforead.feature_type_names.size(), kCols); EXPECT_EQ(inforead.feature_types.Size(), kCols); @@ -259,9 +260,8 @@ TEST(MetaInfo, Validate) { xgboost::HostDeviceVector d_groups{groups}; d_groups.SetDevice(0); d_groups.DevicePointer(); // pull to device - std::string arr_interface_str{ - xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0) - .ArrayInterfaceStr()}; + std::string arr_interface_str{ArrayInterfaceStr( + xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0))}; EXPECT_THROW(info.SetInfo("group", xgboost::StringView{arr_interface_str}), dmlc::Error); #endif // defined(XGBOOST_USE_CUDA) } diff --git a/tests/cpp/data/test_metainfo.h b/tests/cpp/data/test_metainfo.h index 67da633d4be5..2b30e646593c 100644 --- a/tests/cpp/data/test_metainfo.h +++ b/tests/cpp/data/test_metainfo.h @@ -30,7 +30,7 @@ inline void TestMetaInfoStridedData(int32_t device) { is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device}; auto s = t.Slice(linalg::All(), 0); - auto str = s.ArrayInterfaceStr(); + auto str = ArrayInterfaceStr(s); ASSERT_EQ(s.Size(), 32); info.SetInfo("label", StringView{str}); @@ -48,7 +48,7 @@ inline void TestMetaInfoStridedData(int32_t device) { auto& h_qid = qid.Data()->HostVector(); std::iota(h_qid.begin(), h_qid.end(), 0); auto s = qid.View(device).Slice(linalg::All(), 0); - auto str = s.ArrayInterfaceStr(); + auto str = ArrayInterfaceStr(s); info.SetInfo("qid", StringView{str}); auto const& h_result = info.group_ptr_; ASSERT_EQ(h_result.size(), s.Size() + 1); @@ -62,7 +62,7 @@ inline void TestMetaInfoStridedData(int32_t device) { auto t_margin = base_margin.View(device).Slice(linalg::All(), linalg::All(), 0, linalg::All()); ASSERT_EQ(t_margin.Shape().size(), 3); - info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()}); + info.SetInfo("base_margin", StringView{ArrayInterfaceStr(t_margin)}); auto const& h_result = info.base_margin_.View(-1); ASSERT_EQ(h_result.Shape().size(), 3); auto in_margin = base_margin.View(-1); From f94b6fec661e4bf5710879ff68111be00d2a422e Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 21:51:35 +0800 Subject: [PATCH 08/13] Don't the default device. --- include/xgboost/linalg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index b6b2584988c1..a3b8193603f9 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -483,13 +483,13 @@ class TensorView { */ template ::value> * = nullptr> -auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device = 0) { // NOLINT +auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) { // NOLINT using T = typename Container::value_type; return TensorView{data, shape, device}; } template -LINALG_HD auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device = 0) { +LINALG_HD auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device) { return TensorView{data, shape, device}; } From 9a520b1ea10b7510663a2525e80ace3947c0d566 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 22:06:59 +0800 Subject: [PATCH 09/13] Test. --- tests/cpp/common/test_linalg.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index f13defeb8bd4..4425cbf41e38 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -160,6 +160,18 @@ TEST(Linalg, TensorView) { ASSERT_TRUE(s.CContiguous()); ASSERT_TRUE(all.CContiguous()); } + + { + auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto copied = t; + auto moved = std::move(t); + for (size_t i = 0; i < t.Shape().size(); ++i) { + ASSERT_EQ(t.Shape(i), copied.Shape(i)); + ASSERT_EQ(t.Shape(i), moved.Shape(i)); + ASSERT_EQ(t.Stride(i), copied.Stride(i)); + ASSERT_EQ(t.Stride(i), moved.Stride(i)); + } + } } TEST(Linalg, Tensor) { From 88b2c52967b574d3917bb987a70f1706ca60a89e Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 19 Nov 2021 22:11:45 +0800 Subject: [PATCH 10/13] don't test moved object. --- tests/cpp/common/test_linalg.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 4425cbf41e38..e6d1e2414129 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -118,6 +118,7 @@ TEST(Linalg, TensorView) { ASSERT_EQ(s(2), 21); } { + // range slice auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2); static_assert(decltype(s)::kDimension == 2, ""); @@ -132,6 +133,7 @@ TEST(Linalg, TensorView) { ASSERT_FALSE(s.CContiguous()); } { + // range slice auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3)); static_assert(decltype(s)::kDimension == 2, ""); @@ -146,6 +148,7 @@ TEST(Linalg, TensorView) { ASSERT_FALSE(s.CContiguous()); } { + // same as no slice. auto t = MakeTensorView(data, {2, 3, 4}, 0); auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); static_assert(decltype(s)::kDimension == 3, ""); @@ -162,14 +165,13 @@ TEST(Linalg, TensorView) { } { + // copy and move constructor. auto t = MakeTensorView(data, {2, 3, 4}, 0); - auto copied = t; - auto moved = std::move(t); + auto from_copy = t; + auto from_move = std::move(t); for (size_t i = 0; i < t.Shape().size(); ++i) { - ASSERT_EQ(t.Shape(i), copied.Shape(i)); - ASSERT_EQ(t.Shape(i), moved.Shape(i)); - ASSERT_EQ(t.Stride(i), copied.Stride(i)); - ASSERT_EQ(t.Stride(i), moved.Stride(i)); + ASSERT_EQ(from_copy.Shape(i), from_move.Shape(i)); + ASSERT_EQ(from_copy.Stride(i), from_copy.Stride(i)); } } } From aef06970a3bceea5102820727b2c5a495b3208e4 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 23 Nov 2021 05:54:52 +0800 Subject: [PATCH 11/13] Restore contiguous. --- include/xgboost/linalg.h | 4 ++++ src/common/linalg_op.cuh | 2 +- src/common/linalg_op.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index a3b8193603f9..fd9597a6667e 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -458,6 +458,10 @@ class TensorView { * \brief Number of items in the tensor. */ LINALG_HD size_t Size() const { return size_; } + /** + * \brief Whether this is a contiguous array, both C and F contiguous returns true. + */ + LINALG_HD bool Contiguous() const { return this->Size() == data_.size(); } /** * \brief Whether it's a c-contiguous array. */ diff --git a/src/common/linalg_op.cuh b/src/common/linalg_op.cuh index f0a18a912a22..dfab58729b56 100644 --- a/src/common/linalg_op.cuh +++ b/src/common/linalg_op.cuh @@ -10,7 +10,7 @@ namespace xgboost { namespace linalg { template void ElementWiseKernelDevice(linalg::TensorView t, Fn&& fn, cudaStream_t s = nullptr) { - if (t.CContiguous()) { + if (t.Contiguous()) { auto ptr = t.Values().data(); dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); }); } else { diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h index 987e07a59d77..a74b119e7947 100644 --- a/src/common/linalg_op.h +++ b/src/common/linalg_op.h @@ -10,7 +10,7 @@ namespace xgboost { namespace linalg { template void ElementWiseKernelHost(linalg::TensorView t, int32_t n_threads, Fn&& fn) { - if (t.CContiguous()) { + if (t.Contiguous()) { auto ptr = t.Values().data(); common::ParallelFor(t.Size(), n_threads, [&](size_t i) { ptr[i] = fn(i, ptr[i]); }); } else { From 2c6a57fa22faaa892f46be66956094fba3ba1cf0 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 23 Nov 2021 06:24:16 +0800 Subject: [PATCH 12/13] Extend to f-contiguous. --- include/xgboost/linalg.h | 29 ++++++++++++++++++++++++----- tests/cpp/common/test_linalg.cc | 22 +++++++++++++++++++++- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index fd9597a6667e..200610367616 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -55,11 +55,18 @@ constexpr std::enable_if_t Offset(S (&strides)[D], return Offset(strides, n + (head * strides[dim]), std::forward(rest)...); } -template +template constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) { - stride[D - 1] = 1; - for (int32_t s = D - 2; s >= 0; --s) { - stride[s] = shape[s + 1] * stride[s + 1]; + if (f_array) { + stride[0] = 1; + for (int32_t s = 1; s < D; ++s) { + stride[s] = shape[s - 1] * stride[s - 1]; + } + } else { + stride[D - 1] = 1; + for (int32_t s = D - 2; s >= 0; --s) { + stride[s] = shape[s + 1] * stride[s + 1]; + } } } @@ -461,7 +468,9 @@ class TensorView { /** * \brief Whether this is a contiguous array, both C and F contiguous returns true. */ - LINALG_HD bool Contiguous() const { return this->Size() == data_.size(); } + LINALG_HD bool Contiguous() const { + return data_.size() == this->Size() || this->CContiguous() || this->FContiguous(); + } /** * \brief Whether it's a c-contiguous array. */ @@ -472,6 +481,16 @@ class TensorView { detail::CalcStride(shape_, stride); return common::Span{stride_} == common::Span{stride}; } + /** + * \brief Whether it's a f-contiguous array. + */ + LINALG_HD bool FContiguous() const { + StrideT stride; + static_assert(std::is_same::value, ""); + // It's contiguous if the stride can be calculated from shape. + detail::CalcStride(shape_, stride); + return common::Span{stride_} == common::Span{stride}; + } /** * \brief Obtain a reference to the raw data. */ diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index e6d1e2414129..bc4945219763 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -166,7 +166,7 @@ TEST(Linalg, TensorView) { { // copy and move constructor. - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(data, {2, 3, 4}, kCpuId); auto from_copy = t; auto from_move = std::move(t); for (size_t i = 0; i < t.Shape().size(); ++i) { @@ -174,6 +174,26 @@ TEST(Linalg, TensorView) { ASSERT_EQ(from_copy.Stride(i), from_copy.Stride(i)); } } + + { + // multiple slices + auto t = MakeTensorView(data, {2, 3, 4}, kCpuId); + auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4)); + ASSERT_FALSE(s_0.CContiguous()); + auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2)); + ASSERT_EQ(s_1.Size(), 2); + ASSERT_TRUE(s_1.CContiguous()); + ASSERT_TRUE(s_1.Contiguous()); + ASSERT_EQ(s_1(0), 17); + ASSERT_EQ(s_1(1), 18); + } + { + // f-contiguous + TensorView t{data, {4, 3, 2}, {1, 4, 12}, kCpuId}; + ASSERT_TRUE(t.Contiguous()); + ASSERT_TRUE(t.FContiguous()); + ASSERT_FALSE(t.CContiguous()); + } } TEST(Linalg, Tensor) { From 6e66d7dcb20e6b7c8ae420733494de6c4ae238c0 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 23 Nov 2021 06:47:54 +0800 Subject: [PATCH 13/13] One more test. --- tests/cpp/common/test_linalg.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index bc4945219763..a4f3e6ab41fb 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -186,6 +186,16 @@ TEST(Linalg, TensorView) { ASSERT_TRUE(s_1.Contiguous()); ASSERT_EQ(s_1(0), 17); ASSERT_EQ(s_1(1), 18); + + auto s_2 = s_0.Slice(1, linalg::All(), linalg::Range(0, 2)); + std::vector sol{13, 14, 17, 18}; + auto k = 0; + for (size_t i = 0; i < s_2.Shape(0); i++) { + for (size_t j = 0; j < s_2.Shape(1); ++j) { + ASSERT_EQ(s_2(i, j), sol[k]); + k++; + } + } } { // f-contiguous