Add range-based slicing to tensor view. (#7453)

dmlc · Nov 27, 2021 · 85cbd32 · 85cbd32
1 parent 6f38f5a
commit 85cbd32
Show file tree

Hide file tree

Showing 10 changed files with 358 additions and 129 deletions.
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
diff --git a/src/data/data.cc b/src/data/data.cc
@@ -413,7 +413,7 @@ void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor<T, D>* p_out) {
   }
   p_out->Reshape(array.shape);
   auto t = p_out->View(GenericParameter::kCpuId);
-  CHECK(t.Contiguous());
+  CHECK(t.CContiguous());
   // FIXME(jiamingy): Remove the use of this default thread.
   linalg::ElementWiseKernelHost(t, common::OmpGetNumThreads(0), [&](auto i, auto) {
     return linalg::detail::Apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, t.Shape()));
@@ -531,8 +531,8 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
     using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
     auto t =
         linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, GenericParameter::kCpuId);
-    CHECK(t.Contiguous());
-    Json interface { t.ArrayInterface() };
+    CHECK(t.CContiguous());
+    Json interface { linalg::ArrayInterface(t) };
     assert(ArrayInterface<1>{interface}.is_contiguous);
     return interface;
   };

diff --git a/src/data/file_iterator.h b/src/data/file_iterator.h
@@ -61,9 +61,9 @@ class FileIterator {
       row_block_ = parser_->Value();
       using linalg::MakeVec;
 
-      indptr_ = MakeVec(row_block_.offset, row_block_.size + 1).ArrayInterfaceStr();
-      values_ = MakeVec(row_block_.value, row_block_.offset[row_block_.size]).ArrayInterfaceStr();
-      indices_ = MakeVec(row_block_.index, row_block_.offset[row_block_.size]).ArrayInterfaceStr();
+      indptr_ = ArrayInterfaceStr(MakeVec(row_block_.offset, row_block_.size + 1));
+      values_ = ArrayInterfaceStr(MakeVec(row_block_.value, row_block_.offset[row_block_.size]));
+      indices_ = ArrayInterfaceStr(MakeVec(row_block_.index, row_block_.offset[row_block_.size]));
 
       size_t n_columns = *std::max_element(row_block_.index,
                                            row_block_.index + row_block_.offset[row_block_.size]);

diff --git a/src/metric/auc.cc b/src/metric/auc.cc
@@ -85,9 +85,8 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
   auto const &labels = info.labels_.ConstHostVector();
 
   std::vector<double> results_storage(n_classes * 3, 0);
-  linalg::TensorView<double> results(results_storage,
-                                     {n_classes, static_cast<size_t>(3)},
-                                     GenericParameter::kCpuId);
+  linalg::TensorView<double, 2> results(results_storage, {n_classes, static_cast<size_t>(3)},
+                                        GenericParameter::kCpuId);
   auto local_area = results.Slice(linalg::All(), 0);
   auto tp = results.Slice(linalg::All(), 1);
   auto auc = results.Slice(linalg::All(), 2);

diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc
@@ -51,7 +51,7 @@ TEST(Linalg, TensorView) {
   std::vector<double> data(2 * 3 * 4, 0);
   std::iota(data.begin(), data.end(), 0);
 
-  TensorView<double> t{data, {2, 3, 4}, -1};
+  auto t = MakeTensorView(data, {2, 3, 4}, -1);
   ASSERT_EQ(t.Shape()[0], 2);
   ASSERT_EQ(t.Shape()[1], 3);
   ASSERT_EQ(t.Shape()[2], 4);
@@ -96,17 +96,114 @@ TEST(Linalg, TensorView) {
     // assignment
     TensorView<double, 3> t{data, {2, 3, 4}, 0};
     double pi = 3.14159;
+    auto old = t(1, 2, 3);
     t(1, 2, 3) = pi;
     ASSERT_EQ(t(1, 2, 3), pi);
+    t(1, 2, 3) = old;
+    ASSERT_EQ(t(1, 2, 3), old);
   }
 
   {
     // Don't assign the initial dimension, tensor should be able to deduce the correct dim
     // for Slice.
-    TensorView<double> t{data, {2, 3, 4}, 0};
+    auto t = MakeTensorView(data, {2, 3, 4}, 0);
     auto s = t.Slice(1, 2, All());
     static_assert(decltype(s)::kDimension == 1, "");
   }
+  {
+    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto s = t.Slice(1, linalg::All(), 1);
+    ASSERT_EQ(s(0), 13);
+    ASSERT_EQ(s(1), 17);
+    ASSERT_EQ(s(2), 21);
+  }
+  {
+    // range slice
+    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);
+    static_assert(decltype(s)::kDimension == 2, "");
+    std::vector<double> sol{6, 10, 18, 22};
+    auto k = 0;
+    for (size_t i = 0; i < s.Shape(0); ++i) {
+      for (size_t j = 0; j < s.Shape(1); ++j) {
+        ASSERT_EQ(s(i, j), sol.at(k));
+        k++;
+      }
+    }
+    ASSERT_FALSE(s.CContiguous());
+  }
+  {
+    // range slice
+    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));
+    static_assert(decltype(s)::kDimension == 2, "");
+    std::vector<double> sol{17, 18, 21, 22};
+    auto k = 0;
+    for (size_t i = 0; i < s.Shape(0); ++i) {
+      for (size_t j = 0; j < s.Shape(1); ++j) {
+        ASSERT_EQ(s(i, j), sol.at(k));
+        k++;
+      }
+    }
+    ASSERT_FALSE(s.CContiguous());
+  }
+  {
+    // same as no slice.
+    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
+    static_assert(decltype(s)::kDimension == 3, "");
+    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
+    for (size_t i = 0; i < s.Shape(0); ++i) {
+      for (size_t j = 0; j < s.Shape(1); ++j) {
+        for (size_t k = 0; k < s.Shape(2); ++k) {
+          ASSERT_EQ(s(i, j, k), all(i, j, k));
+        }
+      }
+    }
+    ASSERT_TRUE(s.CContiguous());
+    ASSERT_TRUE(all.CContiguous());
+  }
+
+  {
+    // copy and move constructor.
+    auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
+    auto from_copy = t;
+    auto from_move = std::move(t);
+    for (size_t i = 0; i < t.Shape().size(); ++i) {
+      ASSERT_EQ(from_copy.Shape(i), from_move.Shape(i));
+      ASSERT_EQ(from_copy.Stride(i), from_copy.Stride(i));
+    }
+  }
+
+  {
+    // multiple slices
+    auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
+    auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));
+    ASSERT_FALSE(s_0.CContiguous());
+    auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));
+    ASSERT_EQ(s_1.Size(), 2);
+    ASSERT_TRUE(s_1.CContiguous());
+    ASSERT_TRUE(s_1.Contiguous());
+    ASSERT_EQ(s_1(0), 17);
+    ASSERT_EQ(s_1(1), 18);
+
+    auto s_2 = s_0.Slice(1, linalg::All(), linalg::Range(0, 2));
+    std::vector<double> sol{13, 14, 17, 18};
+    auto k = 0;
+    for (size_t i = 0; i < s_2.Shape(0); i++) {
+      for (size_t j = 0; j < s_2.Shape(1); ++j) {
+        ASSERT_EQ(s_2(i, j), sol[k]);
+        k++;
+      }
+    }
+  }
+  {
+    // f-contiguous
+    TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, kCpuId};
+    ASSERT_TRUE(t.Contiguous());
+    ASSERT_TRUE(t.FContiguous());
+    ASSERT_FALSE(t.CContiguous());
+  }
 }
 
 TEST(Linalg, Tensor) {
@@ -119,7 +216,8 @@ TEST(Linalg, Tensor) {
 
     size_t n = 2 * 3 * 4;
     ASSERT_EQ(t.Size(), n);
-    ASSERT_TRUE(std::equal(k_view.cbegin(), k_view.cbegin(), view.begin()));
+    ASSERT_TRUE(
+        std::equal(k_view.Values().cbegin(), k_view.Values().cend(), view.Values().cbegin()));
 
     Tensor<float, 3> t_0{std::move(t)};
     ASSERT_EQ(t_0.Size(), n);
@@ -173,13 +271,17 @@ TEST(Linalg, ArrayInterface) {
   auto cpu = kCpuId;
   auto t = Tensor<double, 2>{{3, 3}, cpu};
   auto v = t.View(cpu);
-  std::iota(v.begin(), v.end(), 0);
-  auto arr = Json::Load(StringView{v.ArrayInterfaceStr()});
+  std::iota(v.Values().begin(), v.Values().end(), 0);
+  auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});
   ASSERT_EQ(get<Integer>(arr["shape"][0]), 3);
   ASSERT_EQ(get<Integer>(arr["strides"][0]), 3 * sizeof(double));
 
   ASSERT_FALSE(get<Boolean>(arr["data"][1]));
   ASSERT_EQ(reinterpret_cast<double *>(get<Integer>(arr["data"][0])), v.Values().data());
+
+  TensorView<double const, 2> as_const = v;
+  auto const_arr = ArrayInterface(as_const);
+  ASSERT_TRUE(get<Boolean>(const_arr["data"][1]));
 }
 
 TEST(Linalg, Popc) {

diff --git a/tests/cpp/common/test_linalg.cu b/tests/cpp/common/test_linalg.cu
@@ -18,7 +18,7 @@ void TestElementWiseKernel() {
      */
     // GPU view
     auto t = l.View(0).Slice(linalg::All(), 1, linalg::All());
-    ASSERT_FALSE(t.Contiguous());
+    ASSERT_FALSE(t.CContiguous());
     ElementWiseKernelDevice(t, [] __device__(size_t i, float) { return i; });
     // CPU view
     t = l.View(GenericParameter::kCpuId).Slice(linalg::All(), 1, linalg::All());
@@ -42,7 +42,7 @@ void TestElementWiseKernel() {
      */
     auto t = l.View(0);
     ElementWiseKernelDevice(t, [] __device__(size_t i, float) { return i; });
-    ASSERT_TRUE(t.Contiguous());
+    ASSERT_TRUE(t.CContiguous());
     // CPU view
     t = l.View(GenericParameter::kCpuId);
 
@@ -56,7 +56,27 @@ void TestElementWiseKernel() {
     }
   }
 }
+
+void TestSlice() {
+  thrust::device_vector<double> data(2 * 3 * 4);
+  auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0);
+  dh::LaunchN(1, [=] __device__(size_t) {
+    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
+    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
+    static_assert(decltype(s)::kDimension == 3, "");
+    for (size_t i = 0; i < s.Shape(0); ++i) {
+      for (size_t j = 0; j < s.Shape(1); ++j) {
+        for (size_t k = 0; k < s.Shape(2); ++k) {
+          SPAN_CHECK(s(i, j, k) == all(i, j, k));
+        }
+      }
+    }
+  });
+}
 }  // anonymous namespace
+
 TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
+
+TEST(Linalg, GPUTensorView) { TestSlice(); }
 }  // namespace linalg
 }  // namespace xgboost
diff --git a/tests/cpp/data/test_adapter.cc b/tests/cpp/data/test_adapter.cc
@@ -42,9 +42,9 @@ TEST(Adapter, CSRArrayAdapter) {
   size_t n_features = 100, n_samples = 10;
   RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);
   using linalg::MakeVec;
-  auto indptr_arr = MakeVec(indptr.HostPointer(), indptr.Size()).ArrayInterfaceStr();
-  auto values_arr = MakeVec(values.HostPointer(), values.Size()).ArrayInterfaceStr();
-  auto indices_arr = MakeVec(indices.HostPointer(), indices.Size()).ArrayInterfaceStr();
+  auto indptr_arr = ArrayInterfaceStr(MakeVec(indptr.HostPointer(), indptr.Size()));
+  auto values_arr = ArrayInterfaceStr(MakeVec(values.HostPointer(), values.Size()));
+  auto indices_arr = ArrayInterfaceStr(MakeVec(indices.HostPointer(), indices.Size()));
   auto adapter = data::CSRArrayAdapter(
       StringView{indptr_arr.c_str(), indptr_arr.size()},
       StringView{values_arr.c_str(), values_arr.size()},

diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc
@@ -19,9 +19,8 @@ TEST(ArrayInterface, Initialize) {
   ASSERT_EQ(arr_interface.type, ArrayInterfaceHandler::kF4);
 
   HostDeviceVector<size_t> u64_storage(storage.Size());
-  std::string u64_arr_str{linalg::TensorView<size_t const, 2>{
-      u64_storage.ConstHostSpan(), {kRows, kCols}, GenericParameter::kCpuId}
-                              .ArrayInterfaceStr()};
+  std::string u64_arr_str{ArrayInterfaceStr(linalg::TensorView<size_t const, 2>{
+      u64_storage.ConstHostSpan(), {kRows, kCols}, GenericParameter::kCpuId})};
   std::copy(storage.ConstHostVector().cbegin(), storage.ConstHostVector().cend(),
             u64_storage.HostSpan().begin());
   auto u64_arr = ArrayInterface<2>{u64_arr_str};

diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc
@@ -127,7 +127,8 @@ TEST(MetaInfo, SaveLoadBinary) {
 
     auto orig_margin = info.base_margin_.View(xgboost::GenericParameter::kCpuId);
     auto read_margin = inforead.base_margin_.View(xgboost::GenericParameter::kCpuId);
-    EXPECT_TRUE(std::equal(orig_margin.cbegin(), orig_margin.cend(), read_margin.cbegin()));
+    EXPECT_TRUE(std::equal(orig_margin.Values().cbegin(), orig_margin.Values().cend(),
+                           read_margin.Values().cbegin()));
 
     EXPECT_EQ(inforead.feature_type_names.size(), kCols);
     EXPECT_EQ(inforead.feature_types.Size(), kCols);
@@ -259,9 +260,8 @@ TEST(MetaInfo, Validate) {
   xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
   d_groups.SetDevice(0);
   d_groups.DevicePointer();  // pull to device
-  std::string arr_interface_str{
-      xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0)
-          .ArrayInterfaceStr()};
+  std::string arr_interface_str{ArrayInterfaceStr(
+      xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0))};
   EXPECT_THROW(info.SetInfo("group", xgboost::StringView{arr_interface_str}), dmlc::Error);
 #endif  // defined(XGBOOST_USE_CUDA)
 }

diff --git a/tests/cpp/data/test_metainfo.h b/tests/cpp/data/test_metainfo.h
@@ -30,7 +30,7 @@ inline void TestMetaInfoStridedData(int32_t device) {
         is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device};
     auto s = t.Slice(linalg::All(), 0);
 
-    auto str = s.ArrayInterfaceStr();
+    auto str = ArrayInterfaceStr(s);
     ASSERT_EQ(s.Size(), 32);
 
     info.SetInfo("label", StringView{str});
@@ -48,7 +48,7 @@ inline void TestMetaInfoStridedData(int32_t device) {
     auto& h_qid = qid.Data()->HostVector();
     std::iota(h_qid.begin(), h_qid.end(), 0);
     auto s = qid.View(device).Slice(linalg::All(), 0);
-    auto str = s.ArrayInterfaceStr();
+    auto str = ArrayInterfaceStr(s);
     info.SetInfo("qid", StringView{str});
     auto const& h_result = info.group_ptr_;
     ASSERT_EQ(h_result.size(), s.Size() + 1);
@@ -62,7 +62,7 @@ inline void TestMetaInfoStridedData(int32_t device) {
     auto t_margin = base_margin.View(device).Slice(linalg::All(), 0, linalg::All());
     ASSERT_EQ(t_margin.Shape().size(), 2);
 
-    info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()});
+    info.SetInfo("base_margin", StringView{ArrayInterfaceStr(t_margin)});
     auto const& h_result = info.base_margin_.View(-1);
     ASSERT_EQ(h_result.Shape().size(), 2);
     auto in_margin = base_margin.View(-1);