Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using column_sampler for optimization of ColWiseBuildHist #8319

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 35 additions & 13 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,37 +137,50 @@ constexpr size_t Prefetch::kNoPrefetchSize;
struct RuntimeFlags {
const bool first_page;
const bool read_by_column;
const bool column_sampling;
const BinTypeSize bin_type_size;
};

template <bool _any_missing,
bool _first_page = false,
bool _read_by_column = false,
bool _column_sampling = false,
typename BinIdxTypeName = uint8_t>
class GHistBuildingManager {
public:
constexpr static bool kAnyMissing = _any_missing;
constexpr static bool kFirstPage = _first_page;
constexpr static bool kReadByColumn = _read_by_column;
constexpr static bool kColumnSampling = _column_sampling;
using BinIdxType = BinIdxTypeName;

private:
template <bool new_first_page>
struct SetFirstPage {
using Type = GHistBuildingManager<kAnyMissing, new_first_page, kReadByColumn, BinIdxType>;
using Type = GHistBuildingManager<kAnyMissing, new_first_page, kReadByColumn,
kColumnSampling, BinIdxType>;
};

template <bool new_read_by_column>
struct SetReadByColumn {
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, new_read_by_column, BinIdxType>;
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, new_read_by_column,
kColumnSampling, BinIdxType>;
};

template <bool new_column_sampling>
struct SetColumnSampling {
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn,
new_column_sampling, BinIdxType>;
};

template <typename NewBinIdxType>
struct SetBinIdxType {
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn, NewBinIdxType>;
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn,
kColumnSampling, NewBinIdxType>;
};

using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn, BinIdxType>;
using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn,
kColumnSampling, BinIdxType>;

public:
/* Entry point to dispatcher
Expand All @@ -181,6 +194,8 @@ class GHistBuildingManager {
SetFirstPage<true>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));
} else if (flags.read_by_column != kReadByColumn) {
SetReadByColumn<true>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));
} else if (flags.column_sampling != kColumnSampling) {
SetColumnSampling<true>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));
} else if (flags.bin_type_size != sizeof(BinIdxType)) {
DispatchBinType(flags.bin_type_size, [&](auto t) {
using NewBinIdxType = decltype(t);
Expand Down Expand Up @@ -264,9 +279,10 @@ void RowsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
template <class BuildingManager>
void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
GHistRow hist, const std::vector<int>& fids) {
constexpr bool kAnyMissing = BuildingManager::kAnyMissing;
constexpr bool kFirstPage = BuildingManager::kFirstPage;
constexpr bool kColumnSampling = BuildingManager::kColumnSampling;
using BinIdxType = typename BuildingManager::BinIdxType;
const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
Expand All @@ -284,13 +300,14 @@ void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
};

const size_t n_features = gmat.cut.Ptrs().size() - 1;
const size_t n_columns = n_features;
const size_t n_columns = kColumnSampling ? fids.size() : n_features;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is fids empty if there's no sampling?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fids is empty is case of condition here is false.

auto hist_data = reinterpret_cast<double *>(hist.data());
const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
for (size_t cid = 0; cid < n_columns; ++cid) {
for (size_t j = 0; j < n_columns; ++j) {
int cid = kColumnSampling ? fids[j] : j;
const uint32_t offset = kAnyMissing ? 0 : offsets[cid];
for (size_t i = 0; i < size; ++i) {
const size_t row_id = rid[i];
Expand All @@ -317,9 +334,9 @@ void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
template <class BuildingManager>
void BuildHistDispatch(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
GHistRow hist, const std::vector<int>& fids) {
if (BuildingManager::kReadByColumn) {
ColsWiseBuildHistKernel<BuildingManager>(gpair, row_indices, gmat, hist);
ColsWiseBuildHistKernel<BuildingManager>(gpair, row_indices, gmat, hist, fids);
} else {
const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
Expand Down Expand Up @@ -347,32 +364,37 @@ template <bool any_missing>
void GHistBuilder::BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow hist, bool force_read_by_column) const {
GHistRow hist, const std::vector<int>& fids,
bool force_read_by_column) const {
/* force_read_by_column is used for testing the columnwise building of histograms.
* default force_read_by_column = false
*/
constexpr double kAdhocL2Size = 1024 * 1024 * 0.8;
const bool hist_fit_to_l2 = kAdhocL2Size > 2*sizeof(float)*gmat.cut.Ptrs().back();
bool first_page = gmat.base_rowid == 0;
bool read_by_column = !hist_fit_to_l2 && !any_missing;
bool column_sampling = fids.size() > 0;
bool read_by_column = column_sampling ||
(!hist_fit_to_l2 && !any_missing);
auto bin_type_size = gmat.index.GetBinTypeSize();

GHistBuildingManager<any_missing>::DispatchAndExecute(
{first_page, read_by_column || force_read_by_column, bin_type_size},
{first_page, read_by_column || force_read_by_column, column_sampling, bin_type_size},
[&](auto t) {
using BuildingManager = decltype(t);
BuildHistDispatch<BuildingManager>(gpair, row_indices, gmat, hist);
BuildHistDispatch<BuildingManager>(gpair, row_indices, gmat, hist, fids);
});
}

template void GHistBuilder::BuildHist<true>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
const std::vector<int>& fids,
bool force_read_by_column) const;

template void GHistBuilder::BuildHist<false>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
const std::vector<int>& fids,
bool force_read_by_column) const;
} // namespace common
} // namespace xgboost
2 changes: 1 addition & 1 deletion src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ class GHistBuilder {
// construct a histogram via histogram aggregation
template <bool any_missing>
void BuildHist(const std::vector<GradientPair>& gpair, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow hist,
const GHistIndexMatrix& gmat, GHistRow hist, const std::vector<int>& fids,
bool force_read_by_column = false) const;
uint32_t GetNumBins() const {
return nbins_;
Expand Down
40 changes: 31 additions & 9 deletions src/tree/hist/histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
#include <algorithm>
#include <limits>
#include <vector>
#include <memory>

#include "../../common/random.h"
#include "../../collective/communicator-inl.h"
#include "../../common/hist_util.h"
#include "../../data/gradient_index.h"
Expand All @@ -25,7 +27,10 @@ class HistogramBuilder {
common::GHistBuilder builder_;
common::ParallelGHistBuilder buffer_;
BatchParam param_;
TrainParam train_param_;
int32_t n_threads_{-1};
std::shared_ptr<common::ColumnSampler> column_sampler_;
std::vector<int> fids_;
size_t n_batches_{0};
// Whether XGBoost is running in distributed environment.
bool is_distributed_{false};
Expand All @@ -39,12 +44,15 @@ class HistogramBuilder {
* \param is_distributed Mostly used for testing to allow injecting parameters instead
* of using global rabit variable.
*/
void Reset(uint32_t total_bins, BatchParam p, int32_t n_threads, size_t n_batches,
bool is_distributed) {
void Reset(uint32_t total_bins, BatchParam p, const TrainParam& train_param,
std::shared_ptr<common::ColumnSampler> column_sampler,
int32_t n_threads, size_t n_batches, bool is_distributed) {
CHECK_GE(n_threads, 1);
n_threads_ = n_threads;
column_sampler_ = column_sampler;
n_batches_ = n_batches;
param_ = p;
train_param_ = train_param;
hist_.Init(total_bins);
hist_local_worker_.Init(total_bins);
buffer_.Init(total_bins);
Expand All @@ -59,7 +67,7 @@ class HistogramBuilder {
GHistIndexMatrix const &gidx,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
common::RowSetCollection const &row_set_collection,
const std::vector<GradientPair> &gpair_h,
const std::vector<GradientPair> &gpair_h, int depth,
bool force_read_by_column) {
const size_t n_nodes = nodes_for_explicit_hist_build.size();
CHECK_GT(n_nodes, 0);
Expand All @@ -76,6 +84,20 @@ class HistogramBuilder {
buffer_.Reset(this->n_threads_, n_nodes, space, target_hists);
}

constexpr float kColsampleTh = 0.1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why 0.1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is an ad-hoc threshold value.

bool column_sampling = (column_sampler_ != nullptr) &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When column sampler is nullptr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set it to nullptr in tests of the histogram builder without using this optimization.

(train_param_.colsample_bytree < kColsampleTh ||
train_param_.colsample_bylevel < kColsampleTh);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about bynode? Is it used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not now, maybe later, one can investigate these options more deeply.

if (column_sampling) {
const size_t n_sampled_features = column_sampler_->GetFeatureSet(depth)->Size();
fids_.resize(n_sampled_features);
for (size_t i = 0; i < n_sampled_features; ++i) {
fids_[i] = column_sampler_->GetFeatureSet(depth)->ConstHostVector()[i];
}
} else {
fids_.resize(0);
}

// Parallel processing by nodes and data in each node
common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) {
const auto tid = static_cast<unsigned>(omp_get_thread_num());
Expand All @@ -87,7 +109,7 @@ class HistogramBuilder {
elem.begin + end_of_row_set, nid);
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
if (rid_set.Size() != 0) {
builder_.template BuildHist<any_missing>(gpair_h, rid_set, gidx, hist,
builder_.template BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, fids_,
force_read_by_column);
}
});
Expand All @@ -114,7 +136,7 @@ class HistogramBuilder {
RegTree *p_tree, common::RowSetCollection const &row_set_collection,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
std::vector<GradientPair> const &gpair,
std::vector<GradientPair> const &gpair, int depth,
bool force_read_by_column = false) {
int starting_index = std::numeric_limits<int>::max();
int sync_count = 0;
Expand All @@ -126,12 +148,12 @@ class HistogramBuilder {
if (gidx.IsDense()) {
this->BuildLocalHistograms<false>(page_id, space, gidx,
nodes_for_explicit_hist_build,
row_set_collection, gpair,
row_set_collection, gpair, depth,
force_read_by_column);
} else {
this->BuildLocalHistograms<true>(page_id, space, gidx,
nodes_for_explicit_hist_build,
row_set_collection, gpair,
row_set_collection, gpair, depth,
force_read_by_column);
}

Expand All @@ -153,7 +175,7 @@ class HistogramBuilder {
common::RowSetCollection const &row_set_collection,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
std::vector<GradientPair> const &gpair,
std::vector<GradientPair> const &gpair, int depth,
bool force_read_by_column = false) {
const size_t n_nodes = nodes_for_explicit_hist_build.size();
// create space of size (# rows in each node)
Expand All @@ -166,7 +188,7 @@ class HistogramBuilder {
256);
this->BuildHist(page_id, space, gidx, p_tree, row_set_collection,
nodes_for_explicit_hist_build, nodes_for_subtraction_trick,
gpair, force_read_by_column);
gpair, depth, force_read_by_column);
}

void SyncHistogramDistributed(
Expand Down
15 changes: 9 additions & 6 deletions src/tree/updater_approx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class GloablApproxBuilder {
n_batches_++;
}

histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), ctx_->Threads(), n_batches_,
collective::IsDistributed());
histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), param_, col_sampler_,
ctx_->Threads(), n_batches_, collective::IsDistributed());
monitor_->Stop(__func__);
}

Expand All @@ -91,10 +91,11 @@ class GloablApproxBuilder {
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
std::vector<CPUExpandEntry> nodes{best};
size_t i = 0;
const int depth = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
{}, gpair);
{}, gpair, depth);
i++;
}

Expand Down Expand Up @@ -122,7 +123,8 @@ class GloablApproxBuilder {

void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
std::vector<CPUExpandEntry> const &valid_candidates,
std::vector<GradientPair> const &gpair, common::Span<float> hess) {
std::vector<GradientPair> const &gpair, common::Span<float> hess,
int depth) {
monitor_->Start(__func__);
std::vector<CPUExpandEntry> nodes_to_build;
std::vector<CPUExpandEntry> nodes_to_sub;
Expand All @@ -145,7 +147,7 @@ class GloablApproxBuilder {
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes_to_build, nodes_to_sub, gpair);
nodes_to_build, nodes_to_sub, gpair, depth);
i++;
}
monitor_->Stop(__func__);
Expand Down Expand Up @@ -195,6 +197,7 @@ class GloablApproxBuilder {
*/

while (!expand_set.empty()) {
const int depth = expand_set.front().depth + 1;
// candidates that can be further splited.
std::vector<CPUExpandEntry> valid_candidates;
// candidates that can be applied.
Expand All @@ -217,7 +220,7 @@ class GloablApproxBuilder {

std::vector<CPUExpandEntry> best_splits;
if (!valid_candidates.empty()) {
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair, hess);
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair, hess, depth);
for (auto const &candidate : valid_candidates) {
int left_child_nidx = tree[candidate.nid].LeftChild();
int right_child_nidx = tree[candidate.nid].RightChild();
Expand Down
13 changes: 7 additions & 6 deletions src/tree/updater_quantile_hist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,14 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f);

size_t page_id = 0;
const int depth = 0;
auto space = ConstructHistSpace(partitioner_, {node});
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
std::vector<CPUExpandEntry> nodes_to_build{node};
std::vector<CPUExpandEntry> nodes_to_sub;
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair_h);
nodes_to_sub, gpair_h, depth);
++page_id;
}

Expand Down Expand Up @@ -120,7 +121,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(

void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
std::vector<CPUExpandEntry> const &valid_candidates,
std::vector<GradientPair> const &gpair) {
std::vector<GradientPair> const &gpair, int depth) {
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());

Expand All @@ -145,7 +146,7 @@ void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair);
nodes_to_sub, gpair, depth);
++page_id;
}
}
Expand Down Expand Up @@ -197,7 +198,7 @@ void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,

std::vector<CPUExpandEntry> best_splits;
if (!valid_candidates.empty()) {
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair_h);
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair_h, depth);
for (auto const &candidate : valid_candidates) {
int left_child_nidx = tree[candidate.nid].LeftChild();
int right_child_nidx = tree[candidate.nid].RightChild();
Expand Down Expand Up @@ -312,8 +313,8 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
partitioner_.emplace_back(page.Size(), page.base_rowid, this->ctx_->Threads());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed());
histogram_builder_->Reset(n_total_bins, HistBatch(param_), param_, column_sampler_,
ctx_->Threads(), page_id, collective::IsDistributed());

if (param_.subsample < 1.0f) {
CHECK_EQ(param_.sampling_method, TrainParam::kUniform)
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_quantile_hist.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ class QuantileHistMaker: public TreeUpdater {

void BuildHistogram(DMatrix* p_fmat, RegTree* p_tree,
std::vector<CPUExpandEntry> const& valid_candidates,
std::vector<GradientPair> const& gpair);
std::vector<GradientPair> const& gpair, int depth);

void LeafPartition(RegTree const& tree, common::Span<GradientPair const> gpair,
std::vector<bst_node_t>* p_out_position);
Expand Down