dmlc · trivialfis · Dec 16, 2021 · Nov 20, 2021 · Nov 27, 2021 · Nov 27, 2021
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
@@ -56,7 +56,7 @@ class MetaInfo {
   /*! \brief number of nonzero entries in the data */
   uint64_t num_nonzero_{0};  // NOLINT
   /*! \brief label of each instance */
-  HostDeviceVector<bst_float> labels_;  // NOLINT
+  linalg::Tensor<float, 2> labels;
   /*!
    * \brief the index of begin and end of a group
    *  needed when the learning task is ranking.
@@ -119,12 +119,12 @@ class MetaInfo {
   }
   /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
   inline const std::vector<size_t>& LabelAbsSort() const {
-    if (label_order_cache_.size() == labels_.Size()) {
+    if (label_order_cache_.size() == labels.Size()) {
       return label_order_cache_;
     }
-    label_order_cache_.resize(labels_.Size());
+    label_order_cache_.resize(labels.Size());
     std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
-    const auto& l = labels_.HostVector();
+    const auto& l = labels.Data()->HostVector();
     XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
               [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
 

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
@@ -635,6 +635,20 @@ class Tensor {
   HostDeviceVector<T> data_;
   ShapeT shape_{0};
 
+  template <typename I, std::int32_t D>
+  void Initialize(I const (&shape)[D], std::int32_t device) {
+    static_assert(D <= kDim, "Invalid shape.");
+    std::copy(shape, shape + D, shape_);
+    for (auto i = D; i < kDim; ++i) {
+      shape_[i] = 1;
+    }
+    if (device >= 0) {
+      data_.SetDevice(device);
+      data_.DevicePointer();  // Pull to device;
+    }
+    CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
+  }
+
  public:
   Tensor() = default;
 
@@ -665,20 +679,20 @@ class Tensor {
    */
   template <typename It, typename I, int32_t D>
   explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
-    // shape
-    static_assert(D <= kDim, "Invalid shape.");
-    std::copy(shape, shape + D, shape_);
-    for (auto i = D; i < kDim; ++i) {
-      shape_[i] = 1;
-    }
     auto &h_vec = data_.HostVector();
     h_vec.insert(h_vec.begin(), begin, end);
-    if (device >= 0) {
-      data_.SetDevice(device);
-      data_.DevicePointer();  // Pull to device;
-    }
-    CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
+    // shape
+    this->Initialize(shape, device);
   }
+
+  template <typename I, int32_t D>
+  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
+    auto &h_vec = data_.HostVector();
+    h_vec = data;
+    // shape
+    this->Initialize(shape, device);
+  }
+
   /**
    * \brief Get a \ref TensorView for this tensor.
    */
@@ -703,6 +717,9 @@ class Tensor {
     }
   }
 
+  auto HostView() const { return this->View(-1); }
+  auto HostView() { return this->View(-1); }
+
   size_t Size() const { return data_.Size(); }
   auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
   auto Shape(size_t i) const { return shape_[i]; }
@@ -756,14 +773,15 @@ class Tensor {
   /**
    * \brief Set device ordinal for this tensor.
    */
-  void SetDevice(int32_t device) { data_.SetDevice(device); }
+  void SetDevice(int32_t device) const { data_.SetDevice(device); }
+  int32_t DeviceIdx() const { return data_.DeviceIdx(); }
 };
 
 // Only first axis is supported for now.
 template <typename T, int32_t D>
 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
-  if (r.Data()->DeviceIdx() >= 0) {
-    l->Data()->SetDevice(r.Data()->DeviceIdx());
+  if (r.DeviceIdx() >= 0) {
+    l->SetDevice(r.DeviceIdx());
   }
   l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
     for (size_t i = 1; i < D; ++i) {

diff --git a/plugin/example/custom_obj.cc b/plugin/example/custom_obj.cc
@@ -46,15 +46,15 @@ class MyLogistic : public ObjFunction {
     out_gpair->Resize(preds.Size());
     const std::vector<bst_float>& preds_h = preds.HostVector();
     std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
-    const std::vector<bst_float>& labels_h = info.labels_.HostVector();
+    auto const labels_h = info.labels.HostView();
     for (size_t i = 0; i < preds_h.size(); ++i) {
       bst_float w = info.GetWeight(i);
       // scale the negative examples!
-      if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight;
+      if (labels_h(i) == 0.0f) w *= param_.scale_neg_weight;
       // logistic transformation
       bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
       // this is the gradient
-      bst_float grad = (p - labels_h[i]) * w;
+      bst_float grad = (p - labels_h(i)) * w;
       // this is the second order gradient
       bst_float hess = p * (1.0f - p) * w;
       out_gpair_h.at(i) = GradientPair(grad, hess);

diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
@@ -956,11 +956,21 @@ thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) {  // NOLINT
   return thrust::device_ptr<T>(span.data());
 }
 
+template <typename T>
+thrust::device_ptr<T> tbegin(xgboost::common::Span<T> const& span) {  // NOLINT
+  return thrust::device_ptr<T>(span.data());
+}
+
 template <typename T>
 thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) {  // NOLINT
   return tbegin(span) + span.size();
 }
 
+template <typename T>
+thrust::device_ptr<T> tend(xgboost::common::Span<T> const& span) {  // NOLINT
+  return tbegin(span) + span.size();
+}
+
 template <typename T>
 XGBOOST_DEVICE auto trbegin(xgboost::common::Span<T> &span) {  // NOLINT
   return thrust::make_reverse_iterator(span.data() + span.size());

diff --git a/src/data/data.cc b/src/data/data.cc
@@ -176,7 +176,7 @@ uint64_t constexpr MetaInfo::kNumField;
 // implementation of inline functions
 void MetaInfo::Clear() {
   num_row_ = num_col_ = num_nonzero_ = 0;
-  labels_.HostVector().clear();
+  labels = decltype(labels){};
   group_ptr_.clear();
   weights_.HostVector().clear();
   base_margin_ = decltype(base_margin_){};
@@ -213,8 +213,7 @@ void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
   SaveScalarField(fo, u8"num_row", DataType::kUInt64, num_row_); ++field_cnt;
   SaveScalarField(fo, u8"num_col", DataType::kUInt64, num_col_); ++field_cnt;
   SaveScalarField(fo, u8"num_nonzero", DataType::kUInt64, num_nonzero_); ++field_cnt;
-  SaveVectorField(fo, u8"labels", DataType::kFloat32,
-                  {labels_.Size(), 1}, labels_); ++field_cnt;
+  SaveTensorField(fo, u8"labels", DataType::kFloat32, labels); ++field_cnt;
   SaveVectorField(fo, u8"group_ptr", DataType::kUInt32,
                   {group_ptr_.size(), 1}, group_ptr_); ++field_cnt;
   SaveVectorField(fo, u8"weights", DataType::kFloat32,
@@ -291,7 +290,7 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
   LoadScalarField(fi, u8"num_row", DataType::kUInt64, &num_row_);
   LoadScalarField(fi, u8"num_col", DataType::kUInt64, &num_col_);
   LoadScalarField(fi, u8"num_nonzero", DataType::kUInt64, &num_nonzero_);
-  LoadVectorField(fi, u8"labels", DataType::kFloat32, &labels_);
+  LoadTensorField(fi, u8"labels", DataType::kFloat32, &labels);
   LoadVectorField(fi, u8"group_ptr", DataType::kUInt32, &group_ptr_);
   LoadVectorField(fi, u8"weights", DataType::kFloat32, &weights_);
   LoadTensorField(fi, u8"base_margin", DataType::kFloat32, &base_margin_);
@@ -326,7 +325,19 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
   out.num_col_ = this->num_col_;
   // Groups is maintained by a higher level Python function.  We should aim at deprecating
   // the slice function.
-  out.labels_.HostVector() = Gather(this->labels_.HostVector(), ridxs);
+  if (this->labels.Size() != this->num_row_) {
+    auto t_labels = this->labels.View(this->labels.Data()->DeviceIdx());
+    out.labels.Reshape(ridxs.size(), labels.Shape(1));
+    out.labels.Data()->HostVector() =
+        Gather(this->labels.Data()->HostVector(), ridxs, t_labels.Stride(0));
+  } else {
+    out.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
+      data->HostVector() = Gather(this->labels.Data()->HostVector(), ridxs);
+      shape[0] = data->Size();
+      shape[1] = 1;
+    });
+  }
+
   out.labels_upper_bound_.HostVector() =
       Gather(this->labels_upper_bound_.HostVector(), ridxs);
   out.labels_lower_bound_.HostVector() =
@@ -343,13 +354,16 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
   if (this->base_margin_.Size() != this->num_row_) {
     CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0)
         << "Incorrect size of base margin vector.";
-    auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
-    out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1]);
-    size_t stride = margin.Stride(0);
+    auto t_margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
+    out.base_margin_.Reshape(ridxs.size(), t_margin.Shape(1));
     out.base_margin_.Data()->HostVector() =
-        Gather(this->base_margin_.Data()->HostVector(), ridxs, stride);
+        Gather(this->base_margin_.Data()->HostVector(), ridxs, t_margin.Stride(0));
   } else {
-    out.base_margin_.Data()->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);
+    out.base_margin_.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
+      data->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);
+      shape[0] = data->Size();
+      shape[1] = 1;
+    });
   }
 
   out.feature_weights.Resize(this->feature_weights.Size());
@@ -460,6 +474,17 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
       this->base_margin_.Reshape(this->num_row_, n_groups);
     }
     return;
+  } else if (key == "label") {
+    CopyTensorInfoImpl(arr, &this->labels);
+    if (this->num_row_ != 0 && this->labels.Shape(0) != this->num_row_) {
+      CHECK_EQ(this->labels.Size() % this->num_row_, 0) << "Incorrect size for labels.";
+      size_t n_targets = this->labels.Size() / this->num_row_;
+      this->labels.Reshape(this->num_row_, n_targets);
+    }
+    auto const& h_labels = labels.Data()->ConstHostVector();
+    auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
+    CHECK(valid) << "Label contains NaN, infinity or a value too large.";
+    return;
   }
   // uint info
   if (key == "group") {
@@ -500,12 +525,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
   // float info
   linalg::Tensor<float, 1> t;
   CopyTensorInfoImpl<1>(arr, &t);
-  if (key == "label") {
-    this->labels_ = std::move(*t.Data());
-    auto const& h_labels = labels_.ConstHostVector();
-    auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
-    CHECK(valid) << "Label contains NaN, infinity or a value too large.";
-  } else if (key == "weight") {
+  if (key == "weight") {
     this->weights_ = std::move(*t.Data());
     auto const& h_weights = this->weights_.ConstHostVector();
     auto valid = std::none_of(h_weights.cbegin(), h_weights.cend(),
@@ -568,7 +588,7 @@ void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype,
   if (dtype == DataType::kFloat32) {
     const std::vector<bst_float>* vec = nullptr;
     if (!std::strcmp(key, "label")) {
-      vec = &this->labels_.HostVector();
+      vec = &this->labels.Data()->HostVector();
     } else if (!std::strcmp(key, "weight")) {
       vec = &this->weights_.HostVector();
     } else if (!std::strcmp(key, "base_margin")) {
@@ -649,8 +669,7 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
   }
   this->num_col_ = that.num_col_;
 
-  this->labels_.SetDevice(that.labels_.DeviceIdx());
-  this->labels_.Extend(that.labels_);
+  linalg::Stack(&this->labels, that.labels);
 
   this->weights_.SetDevice(that.weights_.DeviceIdx());
   this->weights_.Extend(that.weights_);
@@ -702,7 +721,7 @@ void MetaInfo::Validate(int32_t device) const {
         << "Invalid group structure.  Number of rows obtained from groups "
            "doesn't equal to actual number of rows given by data.";
   }
-  auto check_device = [device](HostDeviceVector<float> const &v) {
+  auto check_device = [device](HostDeviceVector<float> const& v) {
     CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
           device  == GenericParameter::kCpuId ||
           v.DeviceIdx() == device)
@@ -717,10 +736,10 @@ void MetaInfo::Validate(int32_t device) const {
     check_device(weights_);
     return;
   }
-  if (labels_.Size() != 0) {
-    CHECK_EQ(labels_.Size(), num_row_)
+  if (labels.Size() != 0) {
+    CHECK_EQ(labels.Size(), num_row_)
         << "Size of labels must equal to number of rows.";
-    check_device(labels_);
+    check_device(*labels.Data());
     return;
   }
   if (labels_lower_bound_.Size() != 0) {

diff --git a/src/data/data.cu b/src/data/data.cu
@@ -119,6 +119,12 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
   if (key == "base_margin") {
     CopyTensorInfoImpl(array, &base_margin_);
     return;
+  } else if (key == "label") {
+    CopyTensorInfoImpl(array, &labels);
+    auto ptr = labels.Data()->ConstDevicePointer();
+    auto valid = thrust::none_of(thrust::device, ptr, ptr + labels.Size(), data::LabelsCheck{});
+    CHECK(valid) << "Label contains NaN, infinity or a value too large.";
+    return;
   }
   // uint info
   if (key == "group") {
@@ -135,12 +141,7 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
   // float info
   linalg::Tensor<float, 1> t;
   CopyTensorInfoImpl(array, &t);
-  if (key == "label") {
-    this->labels_ = std::move(*t.Data());
-    auto ptr = labels_.ConstDevicePointer();
-    auto valid = thrust::none_of(thrust::device, ptr, ptr + labels_.Size(), data::LabelsCheck{});
-    CHECK(valid) << "Label contains NaN, infinity or a value too large.";
-  } else if (key == "weight") {
+  if (key == "weight") {
     this->weights_ = std::move(*t.Data());
     auto ptr = weights_.ConstDevicePointer();
     auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(), data::WeightsCheck{});

diff --git a/src/data/iterative_device_dmatrix.cu b/src/data/iterative_device_dmatrix.cu
@@ -153,7 +153,7 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
   if (batches == 1) {
     this->info_ = std::move(proxy->Info());
     this->info_.num_nonzero_ = nnz;
-    CHECK_EQ(proxy->Info().labels_.Size(), 0);
+    CHECK_EQ(proxy->Info().labels.Size(), 0);
   }
 
   iter.Reset();

diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc
@@ -127,14 +127,16 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
     total_batch_size += batch.Size();
     // Append meta information if available
     if (batch.Labels() != nullptr) {
-      auto& labels = info_.labels_.HostVector();
-      labels.insert(labels.end(), batch.Labels(),
-                    batch.Labels() + batch.Size());
+      info_.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
+        shape[1] = 1;
+        auto& labels = data->HostVector();
+        labels.insert(labels.end(), batch.Labels(), batch.Labels() + batch.Size());
+        shape[0] += batch.Size();
+      });
     }
     if (batch.Weights() != nullptr) {
       auto& weights = info_.weights_.HostVector();
-      weights.insert(weights.end(), batch.Weights(),
-                     batch.Weights() + batch.Size());
+      weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());
     }
     if (batch.BaseMargin() != nullptr) {
       info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),