Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert labels into tensor. #7456

Merged
merged 3 commits into from Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions include/xgboost/data.h
Expand Up @@ -56,7 +56,7 @@ class MetaInfo {
/*! \brief number of nonzero entries in the data */
uint64_t num_nonzero_{0}; // NOLINT
/*! \brief label of each instance */
HostDeviceVector<bst_float> labels_; // NOLINT
linalg::Tensor<float, 2> labels;
/*!
* \brief the index of begin and end of a group
* needed when the learning task is ranking.
Expand Down Expand Up @@ -119,12 +119,12 @@ class MetaInfo {
}
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache_.size() == labels_.Size()) {
if (label_order_cache_.size() == labels.Size()) {
return label_order_cache_;
}
label_order_cache_.resize(labels_.Size());
label_order_cache_.resize(labels.Size());
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
const auto& l = labels_.HostVector();
const auto& l = labels.Data()->HostVector();
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});

Expand Down
46 changes: 32 additions & 14 deletions include/xgboost/linalg.h
Expand Up @@ -635,6 +635,20 @@ class Tensor {
HostDeviceVector<T> data_;
ShapeT shape_{0};

template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
if (device >= 0) {
data_.SetDevice(device);
data_.DevicePointer(); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
}

public:
Tensor() = default;

Expand Down Expand Up @@ -665,20 +679,20 @@ class Tensor {
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
// shape
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
if (device >= 0) {
data_.SetDevice(device);
data_.DevicePointer(); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
// shape
this->Initialize(shape, device);
}

template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
this->Initialize(shape, device);
}

/**
* \brief Get a \ref TensorView for this tensor.
*/
Expand All @@ -703,6 +717,9 @@ class Tensor {
}
}

auto HostView() const { return this->View(-1); }
auto HostView() { return this->View(-1); }

size_t Size() const { return data_.Size(); }
auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
auto Shape(size_t i) const { return shape_[i]; }
Expand Down Expand Up @@ -756,14 +773,15 @@ class Tensor {
/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(int32_t device) { data_.SetDevice(device); }
void SetDevice(int32_t device) const { data_.SetDevice(device); }
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
};

// Only first axis is supported for now.
template <typename T, int32_t D>
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
if (r.Data()->DeviceIdx() >= 0) {
l->Data()->SetDevice(r.Data()->DeviceIdx());
if (r.DeviceIdx() >= 0) {
l->SetDevice(r.DeviceIdx());
}
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
for (size_t i = 1; i < D; ++i) {
Expand Down
6 changes: 3 additions & 3 deletions plugin/example/custom_obj.cc
Expand Up @@ -46,15 +46,15 @@ class MyLogistic : public ObjFunction {
out_gpair->Resize(preds.Size());
const std::vector<bst_float>& preds_h = preds.HostVector();
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
const std::vector<bst_float>& labels_h = info.labels_.HostVector();
auto const labels_h = info.labels.HostView();
for (size_t i = 0; i < preds_h.size(); ++i) {
bst_float w = info.GetWeight(i);
// scale the negative examples!
if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight;
if (labels_h(i) == 0.0f) w *= param_.scale_neg_weight;
// logistic transformation
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
// this is the gradient
bst_float grad = (p - labels_h[i]) * w;
bst_float grad = (p - labels_h(i)) * w;
// this is the second order gradient
bst_float hess = p * (1.0f - p) * w;
out_gpair_h.at(i) = GradientPair(grad, hess);
Expand Down
10 changes: 10 additions & 0 deletions src/common/device_helpers.cuh
Expand Up @@ -956,11 +956,21 @@ thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) { // NOLINT
return thrust::device_ptr<T>(span.data());
}

template <typename T>
thrust::device_ptr<T> tbegin(xgboost::common::Span<T> const& span) { // NOLINT
return thrust::device_ptr<T>(span.data());
}

template <typename T>
thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) { // NOLINT
return tbegin(span) + span.size();
}

template <typename T>
thrust::device_ptr<T> tend(xgboost::common::Span<T> const& span) { // NOLINT
return tbegin(span) + span.size();
}

template <typename T>
XGBOOST_DEVICE auto trbegin(xgboost::common::Span<T> &span) { // NOLINT
return thrust::make_reverse_iterator(span.data() + span.size());
Expand Down
65 changes: 42 additions & 23 deletions src/data/data.cc
Expand Up @@ -176,7 +176,7 @@ uint64_t constexpr MetaInfo::kNumField;
// implementation of inline functions
void MetaInfo::Clear() {
num_row_ = num_col_ = num_nonzero_ = 0;
labels_.HostVector().clear();
labels = decltype(labels){};
group_ptr_.clear();
weights_.HostVector().clear();
base_margin_ = decltype(base_margin_){};
Expand Down Expand Up @@ -213,8 +213,7 @@ void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
SaveScalarField(fo, u8"num_row", DataType::kUInt64, num_row_); ++field_cnt;
SaveScalarField(fo, u8"num_col", DataType::kUInt64, num_col_); ++field_cnt;
SaveScalarField(fo, u8"num_nonzero", DataType::kUInt64, num_nonzero_); ++field_cnt;
SaveVectorField(fo, u8"labels", DataType::kFloat32,
{labels_.Size(), 1}, labels_); ++field_cnt;
SaveTensorField(fo, u8"labels", DataType::kFloat32, labels); ++field_cnt;
SaveVectorField(fo, u8"group_ptr", DataType::kUInt32,
{group_ptr_.size(), 1}, group_ptr_); ++field_cnt;
SaveVectorField(fo, u8"weights", DataType::kFloat32,
Expand Down Expand Up @@ -291,7 +290,7 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
LoadScalarField(fi, u8"num_row", DataType::kUInt64, &num_row_);
LoadScalarField(fi, u8"num_col", DataType::kUInt64, &num_col_);
LoadScalarField(fi, u8"num_nonzero", DataType::kUInt64, &num_nonzero_);
LoadVectorField(fi, u8"labels", DataType::kFloat32, &labels_);
LoadTensorField(fi, u8"labels", DataType::kFloat32, &labels);
LoadVectorField(fi, u8"group_ptr", DataType::kUInt32, &group_ptr_);
LoadVectorField(fi, u8"weights", DataType::kFloat32, &weights_);
LoadTensorField(fi, u8"base_margin", DataType::kFloat32, &base_margin_);
Expand Down Expand Up @@ -326,7 +325,19 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
out.num_col_ = this->num_col_;
// Groups is maintained by a higher level Python function. We should aim at deprecating
// the slice function.
out.labels_.HostVector() = Gather(this->labels_.HostVector(), ridxs);
if (this->labels.Size() != this->num_row_) {
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
auto t_labels = this->labels.View(this->labels.Data()->DeviceIdx());
out.labels.Reshape(ridxs.size(), labels.Shape(1));
out.labels.Data()->HostVector() =
Gather(this->labels.Data()->HostVector(), ridxs, t_labels.Stride(0));
} else {
out.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
data->HostVector() = Gather(this->labels.Data()->HostVector(), ridxs);
shape[0] = data->Size();
shape[1] = 1;
});
}

out.labels_upper_bound_.HostVector() =
Gather(this->labels_upper_bound_.HostVector(), ridxs);
out.labels_lower_bound_.HostVector() =
Expand All @@ -343,13 +354,16 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
if (this->base_margin_.Size() != this->num_row_) {
CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0)
<< "Incorrect size of base margin vector.";
auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1]);
size_t stride = margin.Stride(0);
auto t_margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
out.base_margin_.Reshape(ridxs.size(), t_margin.Shape(1));
out.base_margin_.Data()->HostVector() =
Gather(this->base_margin_.Data()->HostVector(), ridxs, stride);
Gather(this->base_margin_.Data()->HostVector(), ridxs, t_margin.Stride(0));
} else {
out.base_margin_.Data()->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);
out.base_margin_.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
data->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);
shape[0] = data->Size();
shape[1] = 1;
});
}

out.feature_weights.Resize(this->feature_weights.Size());
Expand Down Expand Up @@ -460,6 +474,17 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
this->base_margin_.Reshape(this->num_row_, n_groups);
}
return;
} else if (key == "label") {
CopyTensorInfoImpl(arr, &this->labels);
if (this->num_row_ != 0 && this->labels.Shape(0) != this->num_row_) {
CHECK_EQ(this->labels.Size() % this->num_row_, 0) << "Incorrect size for labels.";
size_t n_targets = this->labels.Size() / this->num_row_;
this->labels.Reshape(this->num_row_, n_targets);
}
auto const& h_labels = labels.Data()->ConstHostVector();
auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
return;
}
// uint info
if (key == "group") {
Expand Down Expand Up @@ -500,12 +525,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
// float info
linalg::Tensor<float, 1> t;
CopyTensorInfoImpl<1>(arr, &t);
if (key == "label") {
this->labels_ = std::move(*t.Data());
auto const& h_labels = labels_.ConstHostVector();
auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
} else if (key == "weight") {
if (key == "weight") {
this->weights_ = std::move(*t.Data());
auto const& h_weights = this->weights_.ConstHostVector();
auto valid = std::none_of(h_weights.cbegin(), h_weights.cend(),
Expand Down Expand Up @@ -568,7 +588,7 @@ void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype,
if (dtype == DataType::kFloat32) {
const std::vector<bst_float>* vec = nullptr;
if (!std::strcmp(key, "label")) {
vec = &this->labels_.HostVector();
vec = &this->labels.Data()->HostVector();
} else if (!std::strcmp(key, "weight")) {
vec = &this->weights_.HostVector();
} else if (!std::strcmp(key, "base_margin")) {
Expand Down Expand Up @@ -649,8 +669,7 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
}
this->num_col_ = that.num_col_;

this->labels_.SetDevice(that.labels_.DeviceIdx());
this->labels_.Extend(that.labels_);
linalg::Stack(&this->labels, that.labels);

this->weights_.SetDevice(that.weights_.DeviceIdx());
this->weights_.Extend(that.weights_);
Expand Down Expand Up @@ -702,7 +721,7 @@ void MetaInfo::Validate(int32_t device) const {
<< "Invalid group structure. Number of rows obtained from groups "
"doesn't equal to actual number of rows given by data.";
}
auto check_device = [device](HostDeviceVector<float> const &v) {
auto check_device = [device](HostDeviceVector<float> const& v) {
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
device == GenericParameter::kCpuId ||
v.DeviceIdx() == device)
Expand All @@ -717,10 +736,10 @@ void MetaInfo::Validate(int32_t device) const {
check_device(weights_);
return;
}
if (labels_.Size() != 0) {
CHECK_EQ(labels_.Size(), num_row_)
if (labels.Size() != 0) {
CHECK_EQ(labels.Size(), num_row_)
<< "Size of labels must equal to number of rows.";
check_device(labels_);
check_device(*labels.Data());
return;
}
if (labels_lower_bound_.Size() != 0) {
Expand Down
13 changes: 7 additions & 6 deletions src/data/data.cu
Expand Up @@ -119,6 +119,12 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
if (key == "base_margin") {
CopyTensorInfoImpl(array, &base_margin_);
return;
} else if (key == "label") {
CopyTensorInfoImpl(array, &labels);
auto ptr = labels.Data()->ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + labels.Size(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
return;
}
// uint info
if (key == "group") {
Expand All @@ -135,12 +141,7 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
// float info
linalg::Tensor<float, 1> t;
CopyTensorInfoImpl(array, &t);
if (key == "label") {
this->labels_ = std::move(*t.Data());
auto ptr = labels_.ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + labels_.Size(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
} else if (key == "weight") {
if (key == "weight") {
this->weights_ = std::move(*t.Data());
auto ptr = weights_.ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(), data::WeightsCheck{});
Expand Down
2 changes: 1 addition & 1 deletion src/data/iterative_device_dmatrix.cu
Expand Up @@ -153,7 +153,7 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
if (batches == 1) {
this->info_ = std::move(proxy->Info());
this->info_.num_nonzero_ = nnz;
CHECK_EQ(proxy->Info().labels_.Size(), 0);
CHECK_EQ(proxy->Info().labels.Size(), 0);
}

iter.Reset();
Expand Down
12 changes: 7 additions & 5 deletions src/data/simple_dmatrix.cc
Expand Up @@ -127,14 +127,16 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
total_batch_size += batch.Size();
// Append meta information if available
if (batch.Labels() != nullptr) {
auto& labels = info_.labels_.HostVector();
labels.insert(labels.end(), batch.Labels(),
batch.Labels() + batch.Size());
info_.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
shape[1] = 1;
auto& labels = data->HostVector();
labels.insert(labels.end(), batch.Labels(), batch.Labels() + batch.Size());
shape[0] += batch.Size();
});
}
if (batch.Weights() != nullptr) {
auto& weights = info_.weights_.HostVector();
weights.insert(weights.end(), batch.Weights(),
batch.Weights() + batch.Size());
weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());
}
if (batch.BaseMargin() != nullptr) {
info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),
Expand Down