Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate base_score based on input labels. #8107

Merged
merged 34 commits into from Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6671efb
Calculate `base_score` based on input labels.
trivialfis Jul 22, 2022
d041498
Custom objective.
trivialfis Jul 27, 2022
28739cc
Fixes.
trivialfis Jul 27, 2022
117b175
Use a tensor in learner.
trivialfis Jul 28, 2022
fefde60
fixes.
trivialfis Jul 28, 2022
697fd12
Fix.
trivialfis Jul 28, 2022
47cfa11
Lint.
trivialfis Jul 28, 2022
de46dc2
Remove.
trivialfis Jul 29, 2022
0b0616a
Cache the model.
trivialfis Jul 29, 2022
c243e66
Empty dmatrix.
trivialfis Aug 23, 2022
7dfa87a
Revert unnecessary changes.
trivialfis Aug 23, 2022
8db5676
Fix.
trivialfis Aug 23, 2022
f260759
Add serialization test.
trivialfis Aug 23, 2022
fba7245
CPU build.
trivialfis Aug 23, 2022
2fc0e60
revert.
trivialfis Aug 24, 2022
fa9d499
Better average.
trivialfis Sep 13, 2022
7bc63d1
Move configuration.
trivialfis Sep 13, 2022
7c457ad
Check for model initialized.
trivialfis Sep 13, 2022
052fff0
Merge dispatching into median.
trivialfis Sep 13, 2022
0c3c3a6
Split up the configuration.
trivialfis Sep 13, 2022
bbb30a0
Add a quick test.
trivialfis Sep 13, 2022
e78c608
check.
trivialfis Sep 13, 2022
964fc05
test.
trivialfis Sep 13, 2022
6c67acb
Don't change.
trivialfis Sep 13, 2022
6c07f98
check.
trivialfis Sep 14, 2022
bb1fc88
check.
trivialfis Sep 14, 2022
8890a2a
cleanup.
trivialfis Sep 14, 2022
bba1cd9
typo.
trivialfis Sep 16, 2022
644bbe2
Weighted average.
trivialfis Sep 19, 2022
dad7a37
Change name.
trivialfis Sep 19, 2022
9c2bdac
Add tests.
trivialfis Sep 19, 2022
79fab2b
CPU build.
trivialfis Sep 19, 2022
5099c3c
Fix.
trivialfis Sep 19, 2022
103c722
Add a test for distributed training.
trivialfis Sep 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 23 additions & 7 deletions include/xgboost/learner.h
Expand Up @@ -8,10 +8,9 @@
#ifndef XGBOOST_LEARNER_H_
#define XGBOOST_LEARNER_H_

#include <dmlc/any.h>
#include <xgboost/base.h>
#include <xgboost/feature_map.h>
#include <xgboost/generic_parameters.h>
#include <xgboost/generic_parameters.h> // Context
#include <xgboost/host_device_vector.h>
#include <xgboost/model.h>
#include <xgboost/predictor.h>
Expand Down Expand Up @@ -274,7 +273,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
/**
* \brief Return the context object of this Booster.
*/
virtual GenericParameter const* Ctx() const = 0;
virtual Context const* Ctx() const = 0;
/*!
* \brief Get configuration arguments currently stored by the learner
* \return Key-value pairs representing configuration arguments
Expand All @@ -289,7 +288,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
/*! \brief The evaluation metrics used to evaluate the model. */
std::vector<std::unique_ptr<Metric> > metrics_;
/*! \brief Training parameter. */
GenericParameter generic_parameters_;
Context ctx_;
};

struct LearnerModelParamLegacy;
Expand All @@ -298,8 +297,14 @@ struct LearnerModelParamLegacy;
* \brief Basic Model Parameters, used to describe the booster.
*/
struct LearnerModelParam {
/* \brief global bias */
bst_float base_score { 0.5f };
private:
/**
* \brief Global bias, this is just a scalar value but can be extended to vector when we
* support multi-class and multi-target.
*/
linalg::Tensor<float, 1> base_score_;

public:
/* \brief number of features */
uint32_t num_feature { 0 };
/* \brief number of classes, if it is multi-class classification */
Expand All @@ -310,7 +315,18 @@ struct LearnerModelParam {
LearnerModelParam() = default;
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
// this one as an immutable copy.
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin, ObjInfo t);
LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
linalg::Tensor<float, 1> base_margin, ObjInfo t);
LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
uint32_t n_groups)
: base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}

linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
linalg::TensorView<float const, 1> BaseScore(int32_t device) const;

void Copy(LearnerModelParam const& that);

/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
bool Initialized() const { return num_feature != 0; }
};
Expand Down
56 changes: 48 additions & 8 deletions include/xgboost/linalg.h
Expand Up @@ -8,6 +8,7 @@

#include <dmlc/endian.h>
#include <xgboost/base.h>
#include <xgboost/generic_parameters.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/json.h>
#include <xgboost/span.h>
Expand All @@ -16,6 +17,7 @@
#include <cassert>
#include <limits>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -213,6 +215,22 @@ LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
constexpr auto kSize = std::tuple_size<Tup>::value;
return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
}

/**
* C++ 17 conjunction
*/
template <class...>
struct Conjunction : std::true_type {};
template <class B1>
struct Conjunction<B1> : B1 {};
template <class B1, class... Bn>
struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};

template <typename... Index>
using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;

template <typename... Index>
using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
} // namespace detail

/**
Expand Down Expand Up @@ -406,7 +424,7 @@ class TensorView {
*
* \endcode
*/
template <typename... Index>
template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
LINALG_HD T &operator()(Index &&...index) {
static_assert(sizeof...(index) <= kDim, "Invalid index.");
size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
Expand All @@ -416,7 +434,7 @@ class TensorView {
/**
* \brief Index the tensor to obtain a scalar value.
*/
template <typename... Index>
template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
LINALG_HD T const &operator()(Index &&...index) const {
static_assert(sizeof...(index) <= kDim, "Invalid index.");
size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
Expand Down Expand Up @@ -656,7 +674,7 @@ class Tensor {
}
if (device >= 0) {
data_.SetDevice(device);
data_.DevicePointer(); // Pull to device;
data_.ConstDevicePointer(); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
}
Expand Down Expand Up @@ -702,12 +720,29 @@ class Tensor {
}

template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
int32_t device = Context::kCpuId) {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
this->Initialize(shape, device);
}
/**
* \brief Index operator. Not thread safe, should not be used in performance critical
* region. For more efficient indexing, consider getting a view first.
*/
template <typename... Index>
T &operator()(Index &&...idx) {
return this->HostView()(std::forward<Index>(idx)...);
}
/**
* \brief Index operator. Not thread safe, should not be used in performance critical
* region. For more efficient indexing, consider getting a view first.
*/
template <typename... Index>
T const &operator()(Index &&...idx) const {
return this->HostView()(std::forward<Index>(idx)...);
}

/**
* \brief Get a \ref TensorView for this tensor.
Expand Down Expand Up @@ -761,7 +796,7 @@ class Tensor {
*
* If the total size is changed, then data in this tensor is no longer valid.
*/
template <typename... S>
template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
void Reshape(S &&...s) {
static_assert(sizeof...(S) <= kDim, "Invalid shape.");
detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
Expand All @@ -777,15 +812,20 @@ class Tensor {
*
* If the total size is changed, then data in this tensor is no longer valid.
*/
template <int32_t D>
void Reshape(size_t (&shape)[D]) {
template <size_t D>
void Reshape(common::Span<size_t const, D> shape) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, this->shape_);
std::copy(shape.data(), shape.data() + D, this->shape_);
std::fill(shape_ + D, shape_ + kDim, 1);
auto n = detail::CalcSize(shape_);
data_.Resize(n);
}

template <size_t D>
void Reshape(size_t (&shape)[D]) {
this->Reshape(common::Span<size_t const, D>{shape});
}

/**
* \brief Set device ordinal for this tensor.
*/
Expand Down
12 changes: 11 additions & 1 deletion include/xgboost/objective.h
Expand Up @@ -27,7 +27,10 @@ class RegTree;
/*! \brief interface of objective function */
class ObjFunction : public Configurable {
protected:
GenericParameter const* ctx_;
Context const* ctx_;

public:
static constexpr float DefaultBaseScore() { return 0.5f; }

public:
/*! \brief virtual destructor */
Expand Down Expand Up @@ -75,6 +78,13 @@ class ObjFunction : public Configurable {
virtual bst_float ProbToMargin(bst_float base_score) const {
return base_score;
}
/**
* \brief Make initialize estimation of prediction.
*
* \param info MetaInfo that contains label.
* \param base_score Output estimation.
*/
virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
/*!
* \brief Return task of this objective.
*/
Expand Down
7 changes: 2 additions & 5 deletions include/xgboost/predictor.h
Expand Up @@ -102,13 +102,10 @@ class PredictionContainer {
*/
class Predictor {
protected:
/*
* \brief Runtime parameters.
*/
GenericParameter const* ctx_;
Context const* ctx_;

public:
explicit Predictor(GenericParameter const* ctx) : ctx_{ctx} {}
explicit Predictor(Context const* ctx) : ctx_{ctx} {}

virtual ~Predictor() = default;

Expand Down
12 changes: 10 additions & 2 deletions src/common/common.h
Expand Up @@ -265,6 +265,7 @@ struct OptionalWeights {
explicit OptionalWeights(float w) : dft{w} {}

XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
auto Empty() const { return weights.empty(); }
};

/**
Expand All @@ -276,22 +277,29 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
}

/**
* @brief A CRTP (curiously recurring template pattern) helper function.
* \brief A CRTP (curiously recurring template pattern) helper function.
*
* https://www.fluentcpp.com/2017/05/19/crtp-helper/
*
* Does two things:
* 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
* 2. Avoids having to `static_cast` in a lot of places.
*
* @tparam T The derived class in a CRTP hierarchy.
* \tparam T The derived class in a CRTP hierarchy.
*/
template <typename T>
struct Crtp {
T &Underlying() { return static_cast<T &>(*this); }
T const &Underlying() const { return static_cast<T const &>(*this); }
};

/**
* \brief C++17 std::as_const
*/
template <typename T>
typename std::add_const<T>::type &AsConst(T &v) noexcept { // NOLINT(runtime/references)
return v;
}
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_
26 changes: 26 additions & 0 deletions src/common/linalg_op.h
Expand Up @@ -4,6 +4,7 @@
#ifndef XGBOOST_COMMON_LINALG_OP_H_
#define XGBOOST_COMMON_LINALG_OP_H_
#include <type_traits>
#include <cstdint> // std::int32_t

#include "common.h"
#include "threading_utils.h"
Expand Down Expand Up @@ -59,6 +60,31 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
ElementWiseKernelHost(t, ctx->Threads(), fn);
}
#endif // !defined(XGBOOST_USE_CUDA)

template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> v) { // NOLINT
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
});
return it;
}

template <typename T, std::int32_t kDim>
auto cend(TensorView<T, kDim> v) { // NOLINT
return cbegin(v) + v.Size();
}

template <typename T, std::int32_t kDim>
auto begin(TensorView<T, kDim> v) { // NOLINT
auto it = common::MakeIndexTransformIter(
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
return it;
}

template <typename T, std::int32_t kDim>
auto end(TensorView<T, kDim> v) { // NOLINT
return begin(v) + v.Size();
}
} // namespace linalg
} // namespace xgboost
#endif // XGBOOST_COMMON_LINALG_OP_H_
47 changes: 47 additions & 0 deletions src/common/stats.cu
@@ -0,0 +1,47 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/

#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator

#include "common.h" // common::OptionalWeights
#include "device_helpers.cuh" // dh::MakeTransformIterator, tcbegin, tcend
#include "stats.cuh" // common::SegmentedQuantile, common::SegmentedWeightedQuantile
#include "xgboost/generic_parameters.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // linalg::TensorView, UnravelIndex, Apply

namespace xgboost {
namespace common {
namespace cuda {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights) {
HostDeviceVector<size_t> segments{0, t.Size()};
segments.SetDevice(ctx->gpu_id);
auto d_segments = segments.ConstDeviceSpan();
auto val_it = dh::MakeTransformIterator<float>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
});

HostDeviceVector<float> quantile{0};
quantile.SetDevice(ctx->gpu_id);
if (weights.Empty()) {
common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
val_it + t.Size(), &quantile);
} else {
CHECK_NE(t.Shape(1), 0);
auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(size_t i) {
auto sample_idx = i / t.Shape(1);
return weights[sample_idx];
});
common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
}
CHECK_EQ(quantile.Size(), 1);
return quantile.HostVector().front();
}
} // namespace cuda
} // namespace common
} // namespace xgboost