Skip to content

Commit

Permalink
Remove omp_get_max_threads in gbm and linear. (#7537)
Browse files Browse the repository at this point in the history
* Use ctx in gbm.

* Use ctx threads in gbm and linear.
  • Loading branch information
trivialfis committed Jan 4, 2022
1 parent eea094e commit 28af6f9
Show file tree
Hide file tree
Showing 12 changed files with 124 additions and 135 deletions.
2 changes: 1 addition & 1 deletion include/xgboost/gbm.h
Expand Up @@ -38,7 +38,7 @@ class PredictionContainer;
*/
class GradientBooster : public Model, public Configurable {
protected:
GenericParameter const* generic_param_;
GenericParameter const* ctx_;

public:
/*! \brief virtual destructor */
Expand Down
2 changes: 1 addition & 1 deletion include/xgboost/linear_updater.h
Expand Up @@ -29,7 +29,7 @@ class GBLinearModel;
*/
class LinearUpdater : public Configurable {
protected:
GenericParameter const* learner_param_;
GenericParameter const* ctx_;

public:
/*! \brief virtual destructor */
Expand Down
4 changes: 2 additions & 2 deletions src/gbm/gblinear.cc
Expand Up @@ -86,7 +86,7 @@ class GBLinear : public GradientBooster {
}
param_.UpdateAllowUnknown(cfg);
param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
updater_->Configure(cfg);
monitor_.Init("GBLinear");
}
Expand Down Expand Up @@ -120,7 +120,7 @@ class GBLinear : public GradientBooster {
CHECK_EQ(get<String>(in["name"]), "gblinear");
FromJson(in["gblinear_train_param"], &param_);
param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
this->updater_->LoadConfig(in["updater"]);
}
void SaveConfig(Json* p_out) const override {
Expand Down
2 changes: 1 addition & 1 deletion src/gbm/gbm.cc
Expand Up @@ -26,7 +26,7 @@ GradientBooster* GradientBooster::Create(
LOG(FATAL) << "Unknown gbm type " << name;
}
auto p_bst = (e->body)(learner_model_param);
p_bst->generic_param_ = generic_param;
p_bst->ctx_ = generic_param;
return p_bst;
}
} // namespace xgboost
Expand Down
37 changes: 17 additions & 20 deletions src/gbm/gbtree.cc
Expand Up @@ -49,14 +49,14 @@ void GBTree::Configure(const Args& cfg) {
// configure predictors
if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->generic_param_));
Predictor::Create("cpu_predictor", this->ctx_));
}
cpu_predictor_->Configure(cfg);
#if defined(XGBOOST_USE_CUDA)
auto n_gpus = common::AllVisibleGPUs();
if (!gpu_predictor_ && n_gpus != 0) {
gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->generic_param_));
Predictor::Create("gpu_predictor", this->ctx_));
}
if (n_gpus != 0) {
gpu_predictor_->Configure(cfg);
Expand Down Expand Up @@ -201,16 +201,16 @@ void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
}
#endif

void CopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_threads,
bst_group_t n_groups, bst_group_t group_id,
HostDeviceVector<GradientPair> *out_gpair) {
HostDeviceVector<GradientPair>* out_gpair) {
if (in_gpair->DeviceIdx() != GenericParameter::kCpuId) {
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
} else {
std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
auto nsize = static_cast<bst_omp_uint>(out_gpair->Size());
const auto &gpair_h = in_gpair->ConstHostVector();
common::ParallelFor(nsize, [&](bst_omp_uint i) {
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
tmp_h[i] = gpair_h[i * n_groups + group_id];
});
}
Expand All @@ -228,7 +228,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
// break a lots of existing code.
auto device = tparam_.tree_method != TreeMethod::kGPUHist
? GenericParameter::kCpuId
: generic_param_->gpu_id;
: ctx_->gpu_id;
auto out = linalg::TensorView<float, 2>{
device == GenericParameter::kCpuId ? predt->predictions.HostSpan()
: predt->predictions.DeviceSpan(),
Expand All @@ -255,7 +255,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
in_gpair->DeviceIdx());
bool update_predict = true;
for (int gid = 0; gid < ngroup; ++gid) {
CopyGradient(in_gpair, ngroup, gid, &tmp);
CopyGradient(in_gpair, ctx_->Threads(), ngroup, gid, &tmp);
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
const size_t num_new_trees = ret.size();
Expand Down Expand Up @@ -310,7 +310,7 @@ void GBTree::InitUpdater(Args const& cfg) {
// create new updaters
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(
TreeUpdater::Create(pstr.c_str(), generic_param_, model_.learner_model_param->task));
TreeUpdater::Create(pstr.c_str(), ctx_, model_.learner_model_param->task));
up->Configure(cfg);
updaters_.push_back(std::move(up));
}
Expand Down Expand Up @@ -396,7 +396,7 @@ void GBTree::LoadConfig(Json const& in) {
updaters_.clear();
for (auto const& kv : j_updaters) {
std::unique_ptr<TreeUpdater> up(
TreeUpdater::Create(kv.first, generic_param_, model_.learner_model_param->task));
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
up->LoadConfig(kv.second);
updaters_.push_back(std::move(up));
}
Expand Down Expand Up @@ -562,7 +562,7 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
auto on_device = is_ellpack || is_from_device;

// Use GPU Predictor if data is already on device and gpu_id is set.
if (on_device && generic_param_->gpu_id >= 0) {
if (on_device && ctx_->gpu_id >= 0) {
#if defined(XGBOOST_USE_CUDA)
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
CHECK(gpu_predictor_);
Expand Down Expand Up @@ -728,8 +728,8 @@ class Dart : public GBTree {
auto n_groups = model_.learner_model_param->num_output_group;

PredictionCacheEntry predts; // temporary storage for prediction
if (generic_param_->gpu_id != GenericParameter::kCpuId) {
predts.predictions.SetDevice(generic_param_->gpu_id);
if (ctx_->gpu_id != GenericParameter::kCpuId) {
predts.predictions.SetDevice(ctx_->gpu_id);
}
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);

Expand Down Expand Up @@ -758,11 +758,10 @@ class Dart : public GBTree {
} else {
auto &h_out_predts = p_out_preds->predictions.HostVector();
auto &h_predts = predts.predictions.HostVector();
#pragma omp parallel for
for (omp_ulong ridx = 0; ridx < p_fmat->Info().num_row_; ++ridx) {
common::ParallelFor(p_fmat->Info().num_row_, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + group;
h_out_predts[offset] += (h_predts[offset] * w);
}
});
}
}
}
Expand Down Expand Up @@ -846,13 +845,11 @@ class Dart : public GBTree {
if (device == GenericParameter::kCpuId) {
auto &h_predts = predts.predictions.HostVector();
auto &h_out_predts = out_preds->predictions.HostVector();
#pragma omp parallel for
for (omp_ulong ridx = 0; ridx < n_rows; ++ridx) {
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + group;
// Need to remove the base margin from individual tree.
h_out_predts[offset] +=
(h_predts[offset] - model_.learner_model_param->base_score) * w;
}
h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
});
} else {
out_preds->predictions.SetDevice(device);
predts.predictions.SetDevice(device);
Expand Down
5 changes: 2 additions & 3 deletions src/gbm/gbtree.h
Expand Up @@ -413,10 +413,9 @@ class GBTree : public GradientBooster {
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
}

std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format);
}

protected:
Expand Down
7 changes: 3 additions & 4 deletions src/gbm/gbtree_model.h
Expand Up @@ -109,12 +109,11 @@ struct GBTreeModel : public Model {
void SaveModel(Json* p_out) const override;
void LoadModel(Json const& p_out) override;

std::vector<std::string> DumpModel(const FeatureMap &fmap, bool with_stats,
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats, int32_t n_threads,
std::string format) const {
std::vector<std::string> dump(trees.size());
common::ParallelFor(static_cast<omp_ulong>(trees.size()), [&](size_t i) {
dump[i] = trees[i]->DumpModel(fmap, with_stats, format);
});
common::ParallelFor(trees.size(), n_threads,
[&](size_t i) { dump[i] = trees[i]->DumpModel(fmap, with_stats, format); });
return dump;
}
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
Expand Down
91 changes: 45 additions & 46 deletions src/linear/coordinate_common.h
Expand Up @@ -149,21 +149,21 @@ GetGradientParallel(GenericParameter const *ctx, int group_idx, int num_group,
*/
inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0;
DMatrix *p_fmat, int32_t n_threads) {
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
dmlc::OMPException exc;
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
for (bst_omp_uint i = 0; i < ndata; ++i) {
exc.Run([&]() {
auto &p = gpair[i * num_group + group_idx];
if (p.GetHess() >= 0.0f) {
sum_grad += p.GetGrad();
sum_hess += p.GetHess();
}
});
}
exc.Rethrow();
std::vector<double> sum_grad_tloc(n_threads, 0);
std::vector<double> sum_hess_tloc(n_threads, 0);

common::ParallelFor(ndata, n_threads, [&](auto i) {
auto tid = omp_get_thread_num();
auto &p = gpair[i * num_group + group_idx];
if (p.GetHess() >= 0.0f) {
sum_grad_tloc[tid] += p.GetGrad();
sum_hess_tloc[tid] += p.GetHess();
}
});
double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);
double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);
return std::make_pair(sum_grad, sum_hess);
}

Expand All @@ -179,23 +179,18 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
*/
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
float dw, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
DMatrix *p_fmat, int32_t n_threads) {
if (dw == 0.0f) return;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
auto col = page[fidx];
// update grad value
const auto num_row = static_cast<bst_omp_uint>(col.size());
dmlc::OMPException exc;
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < num_row; ++j) {
exc.Run([&]() {
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) return;
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
});
}
exc.Rethrow();
common::ParallelFor(num_row, n_threads, [&](auto j) {
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) return;
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
});
}
}

Expand All @@ -209,30 +204,29 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
* \param p_fmat The input feature matrix.
*/
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
int32_t n_threads) {
if (dbias == 0.0f) return;
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
dmlc::OMPException exc;
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
exc.Run([&]() {
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
if (g.GetHess() < 0.0f) return;
g += GradientPair(g.GetHess() * dbias, 0);
});
}
exc.Rethrow();
common::ParallelFor(ndata, n_threads, [&](auto i) {
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
if (g.GetHess() < 0.0f) return;
g += GradientPair(g.GetHess() * dbias, 0);
});
}

/**
* \brief Abstract class for stateful feature selection or ordering
* in coordinate descent algorithms.
*/
class FeatureSelector {
protected:
int32_t n_threads_{-1};

public:
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
/*! \brief factory method */
static FeatureSelector *Create(int choice);
static FeatureSelector *Create(int choice, int32_t n_threads);
/*! \brief virtual destructor */
virtual ~FeatureSelector() = default;
/**
Expand Down Expand Up @@ -274,6 +268,7 @@ class FeatureSelector {
*/
class CyclicFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int , const std::vector<GradientPair> &,
DMatrix *, float, float) override {
Expand All @@ -287,6 +282,7 @@ class CyclicFeatureSelector : public FeatureSelector {
*/
class ShuffleFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair>&,
DMatrix *, float, float, int) override {
Expand All @@ -313,6 +309,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
*/
class RandomFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int, const gbm::GBLinearModel &model,
int, const std::vector<GradientPair> &,
DMatrix *, float, float) override {
Expand All @@ -331,6 +328,7 @@ class RandomFeatureSelector : public FeatureSelector {
*/
class GreedyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &,
DMatrix *, float, float, int param) override {
Expand Down Expand Up @@ -360,7 +358,7 @@ class GreedyFeatureSelector : public FeatureSelector {
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
const auto col = page[i];
const bst_uint ndata = col.size();
auto &sums = gpair_sums_[group_idx * nfeat + i];
Expand Down Expand Up @@ -407,6 +405,7 @@ class GreedyFeatureSelector : public FeatureSelector {
*/
class ThriftyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override {
Expand All @@ -426,7 +425,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
// column-parallel is usually fastaer than row-parallel
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
const auto col = page[i];
const bst_uint ndata = col.size();
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
Expand Down Expand Up @@ -483,18 +482,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
std::vector<std::pair<double, double>> gpair_sums_;
};

inline FeatureSelector *FeatureSelector::Create(int choice) {
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
switch (choice) {
case kCyclic:
return new CyclicFeatureSelector();
return new CyclicFeatureSelector(n_threads);
case kShuffle:
return new ShuffleFeatureSelector();
return new ShuffleFeatureSelector(n_threads);
case kThrifty:
return new ThriftyFeatureSelector();
return new ThriftyFeatureSelector(n_threads);
case kGreedy:
return new GreedyFeatureSelector();
return new GreedyFeatureSelector(n_threads);
case kRandom:
return new RandomFeatureSelector();
return new RandomFeatureSelector(n_threads);
default:
LOG(FATAL) << "unknown coordinate selector: " << choice;
}
Expand Down
2 changes: 1 addition & 1 deletion src/linear/linear_updater.cc
Expand Up @@ -17,7 +17,7 @@ LinearUpdater* LinearUpdater::Create(const std::string& name, GenericParameter c
LOG(FATAL) << "Unknown linear updater " << name;
}
auto p_linear = (e->body)();
p_linear->learner_param_ = lparam;
p_linear->ctx_ = lparam;
return p_linear;
}

Expand Down

0 comments on commit 28af6f9

Please sign in to comment.