From 47f421148fa86818dac4942e2906fbd6bf11a26d Mon Sep 17 00:00:00 2001 From: fis Date: Sat, 20 Nov 2021 17:37:05 +0800 Subject: [PATCH] Reduce base margin to 2 dim for now. --- include/xgboost/data.h | 2 +- src/data/data.cc | 32 +++++++++++------------ src/data/simple_dmatrix.cc | 8 +++--- src/predictor/predictor.cc | 3 ++- tests/cpp/data/test_metainfo.h | 15 +++++------ tests/cpp/data/test_simple_dmatrix.cc | 2 +- tests/cpp/predictor/test_gpu_predictor.cu | 4 +-- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/include/xgboost/data.h b/include/xgboost/data.h index c91451678856..8cafbf02813e 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -69,7 +69,7 @@ class MetaInfo { * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from. */ - linalg::Tensor base_margin_; // NOLINT + linalg::Tensor base_margin_; // NOLINT /*! * \brief lower bound of the label, to be used for survival analysis (censored regression) */ diff --git a/src/data/data.cc b/src/data/data.cc index 3a2215180dce..2ad4643c3689 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -185,20 +185,20 @@ void MetaInfo::Clear() { /* * Binary serialization format for MetaInfo: * - * | name | type | is_scalar | num_row | num_col | dim3 | value | - * |--------------------+----------+-----------+-------------+-------------+-------------+------------------------| - * | num_row | kUInt64 | True | NA | NA | NA | ${num_row_} | - * | num_col | kUInt64 | True | NA | NA | NA | ${num_col_} | - * | num_nonzero | kUInt64 | True | NA | NA | NA | ${num_nonzero_} | - * | labels | kFloat32 | False | ${size} | 1 | NA | ${labels_} | - * | group_ptr | kUInt32 | False | ${size} | 1 | NA | ${group_ptr_} | - * | weights | kFloat32 | False | ${size} | 1 | NA | ${weights_} | - * | base_margin | kFloat32 | False | ${Shape(0)} | ${Shape(1)} | ${Shape(2)} | ${base_margin_} | - * | labels_lower_bound | kFloat32 | False | ${size} | 1 | NA | ${labels_lower_bound_} | - * | labels_upper_bound | kFloat32 | False | ${size} | 1 | NA | ${labels_upper_bound_} | - * | feature_names | kStr | False | ${size} | 1 | NA | ${feature_names} | - * | feature_types | kStr | False | ${size} | 1 | NA | ${feature_types} | - * | feature_types | kFloat32 | False | ${size} | 1 | NA | ${feature_weights} | + * | name | type | is_scalar | num_row | num_col | value | + * |--------------------+----------+-----------+-------------+-------------+------------------------| + * | num_row | kUInt64 | True | NA | NA | ${num_row_} | + * | num_col | kUInt64 | True | NA | NA | ${num_col_} | + * | num_nonzero | kUInt64 | True | NA | NA | ${num_nonzero_} | + * | labels | kFloat32 | False | ${size} | 1 | ${labels_} | + * | group_ptr | kUInt32 | False | ${size} | 1 | ${group_ptr_} | + * | weights | kFloat32 | False | ${size} | 1 | ${weights_} | + * | base_margin | kFloat32 | False | ${Shape(0)} | ${Shape(1)} | ${base_margin_} | + * | labels_lower_bound | kFloat32 | False | ${size} | 1 | ${labels_lower_bound_} | + * | labels_upper_bound | kFloat32 | False | ${size} | 1 | ${labels_upper_bound_} | + * | feature_names | kStr | False | ${size} | 1 | ${feature_names} | + * | feature_types | kStr | False | ${size} | 1 | ${feature_types} | + * | feature_weights | kFloat32 | False | ${size} | 1 | ${feature_weights} | * * Note that the scalar fields (is_scalar=True) will have num_row and num_col missing. * Also notice the difference between the saved name and the name used in `SetInfo': @@ -344,7 +344,7 @@ MetaInfo MetaInfo::Slice(common::Span ridxs) const { CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0) << "Incorrect size of base margin vector."; auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx()); - out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1], margin.Shape()[2]); + out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1]); size_t stride = margin.Stride(0); out.base_margin_.Data()->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs, stride); @@ -447,7 +447,7 @@ void MetaInfo::SetInfo(StringView key, StringView interface_str) { void MetaInfo::SetInfoFromHost(StringView key, Json arr) { // multi-dim float info if (key == "base_margin") { - CopyTensorInfoImpl<3>(arr, &this->base_margin_); + CopyTensorInfoImpl(arr, &this->base_margin_); // FIXME(jiamingy): Remove the deprecated API and let all language bindings aware of // input shape. This issue is CPU only since CUDA uses array interface from day 1. // diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index e83559d3958a..ce2e262c113f 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -137,10 +137,10 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) { batch.Weights() + batch.Size()); } if (batch.BaseMargin() != nullptr) { - info_.base_margin_ = linalg::Tensor{batch.BaseMargin(), - batch.BaseMargin() + batch.Size(), - {batch.Size()}, - GenericParameter::kCpuId}; + info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(), + batch.BaseMargin() + batch.Size(), + {batch.Size()}, + GenericParameter::kCpuId}; } if (batch.Qid() != nullptr) { qids.insert(qids.end(), batch.Qid(), batch.Qid() + batch.Size()); diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc index b86474184ccc..284d3b5992c9 100644 --- a/src/predictor/predictor.cc +++ b/src/predictor/predictor.cc @@ -61,7 +61,8 @@ Predictor* Predictor::Create( return p_predictor; } -void ValidateBaseMarginShape(linalg::Tensor const& margin, bst_row_t n_samples, +template +void ValidateBaseMarginShape(linalg::Tensor const& margin, bst_row_t n_samples, bst_group_t n_groups) { // FIXME: Bindings other than Python doesn't have shape. std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) + diff --git a/tests/cpp/data/test_metainfo.h b/tests/cpp/data/test_metainfo.h index 67da633d4be5..eb13a7916fc2 100644 --- a/tests/cpp/data/test_metainfo.h +++ b/tests/cpp/data/test_metainfo.h @@ -55,24 +55,23 @@ inline void TestMetaInfoStridedData(int32_t device) { } { // base margin - linalg::Tensor base_margin; - base_margin.Reshape(4, 3, 2, 3); + linalg::Tensor base_margin; + base_margin.Reshape(4, 2, 3); auto& h_margin = base_margin.Data()->HostVector(); std::iota(h_margin.begin(), h_margin.end(), 0.0); - auto t_margin = base_margin.View(device).Slice(linalg::All(), linalg::All(), 0, linalg::All()); - ASSERT_EQ(t_margin.Shape().size(), 3); + auto t_margin = base_margin.View(device).Slice(linalg::All(), 0, linalg::All()); + ASSERT_EQ(t_margin.Shape().size(), 2); info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()}); auto const& h_result = info.base_margin_.View(-1); - ASSERT_EQ(h_result.Shape().size(), 3); + ASSERT_EQ(h_result.Shape().size(), 2); auto in_margin = base_margin.View(-1); linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) { auto tup = linalg::UnravelIndex(i, h_result.Shape()); auto i0 = std::get<0>(tup); auto i1 = std::get<1>(tup); - auto i2 = std::get<2>(tup); - // Sliced at 3^th dimension. - auto v_1 = in_margin(i0, i1, 0, i2); + // Sliced at second dimension. + auto v_1 = in_margin(i0, 0, i1); CHECK_EQ(v_0, v_1); return v_0; }); diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index c25e877079d2..40dd270a6b88 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -254,7 +254,7 @@ TEST(SimpleDMatrix, Slice) { std::iota(upper.begin(), upper.end(), 1.0f); auto& margin = p_m->Info().base_margin_; - margin = linalg::Tensor{{kRows, kClasses}, GenericParameter::kCpuId}; + margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, GenericParameter::kCpuId}; std::array ridxs {1, 3, 5}; std::unique_ptr out { p_m->Slice(ridxs) }; diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index b36df742da4f..1c7b5e124113 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -108,8 +108,8 @@ TEST(GPUPredictor, ExternalMemoryTest) { dmats.push_back(CreateSparsePageDMatrix(8000)); for (const auto& dmat: dmats) { - dmat->Info().base_margin_ = - linalg::Tensor{{dmat->Info().num_row_, static_cast(n_classes)}, 0}; + dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){ + {dmat->Info().num_row_, static_cast(n_classes)}, 0}; dmat->Info().base_margin_.Data()->Fill(0.5); PredictionCacheEntry out_predictions; gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);