Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization/buildhist/colwisebuildhist #8233

Merged
2 changes: 2 additions & 0 deletions src/common/column_matrix.cc
Expand Up @@ -23,10 +23,12 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres
gmat.GetFeatureCounts(feature_counts.data());

// classify features
any_sparse_column_ = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any_sparse_column_ = !all_dense_column

for (bst_feature_t fid = 0; fid < nfeature; ++fid) {
if (static_cast<double>(feature_counts[fid]) < sparse_threshold * nrow) {
type_[fid] = kSparseColumn;
all_dense_column = false;
any_sparse_column_ = true;
} else {
type_[fid] = kDenseColumn;
}
Expand Down
5 changes: 5 additions & 0 deletions src/common/column_matrix.h
Expand Up @@ -221,6 +221,10 @@ class ColumnMatrix {
}
}

bool AnySparseColumn() const {
return any_sparse_column_;
}

/* Set the number of bytes based on numeric limit of maximum number of bins provided by user */
void SetTypeSize(size_t max_bin_per_feat) {
if ((max_bin_per_feat - 1) <= static_cast<int>(std::numeric_limits<uint8_t>::max())) {
Expand Down Expand Up @@ -430,6 +434,7 @@ class ColumnMatrix {
std::vector<bool> missing_flags_;
BinTypeSize bins_type_size_;
bool any_missing_;
bool any_sparse_column_ = false;
};
} // namespace common
} // namespace xgboost
Expand Down
189 changes: 146 additions & 43 deletions src/common/hist_util.cc
Expand Up @@ -139,10 +139,27 @@ struct Prefetch {

constexpr size_t Prefetch::kNoPrefetchSize;

template <bool column_sampling, bool read_by_column>
struct GHistBuildingManager {
GHistBuildingManager(std::shared_ptr<common::ColumnSampler> column_sampler, int depth) {
if (column_sampling && read_by_column) {
const size_t n_sampled_features = column_sampler->GetFeatureSet(depth)->Size();
fids.resize(n_sampled_features);
for (size_t i = 0; i < n_sampled_features; ++i) {
fids[i] = column_sampler->GetFeatureSet(depth)->ConstHostVector()[i];
}
}
}

std::vector<int> fids;
constexpr static bool kColumnSampling = column_sampling;
constexpr static bool kReadByColumn = read_by_column;
};

template <bool do_prefetch, typename BinIdxType, bool first_page, bool any_missing = true>
void BuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void RowsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
Expand Down Expand Up @@ -204,75 +221,161 @@ void BuildHistKernel(const std::vector<GradientPair> &gpair,
}
}

template <bool do_prefetch, bool any_missing>
template <typename BinIdxType, bool first_page, bool any_missing, class GHistBuildingManager>
void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist, const GHistBuildingManager& hbm) {
const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

auto const &row_ptr = gmat.row_ptr.data();
auto base_rowid = gmat.base_rowid;
const uint32_t *offsets = gmat.index.Offset();
auto get_row_ptr = [&](size_t ridx) {
return first_page ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
};
auto get_rid = [&](size_t ridx) {
return first_page ? ridx : (ridx - base_rowid);
};

const size_t n_features = gmat.cut.Ptrs().size() - 1;
const size_t n_columns = GHistBuildingManager::kColumnSampling ? hbm.fids.size() : n_features;
auto hist_data = reinterpret_cast<double *>(hist.data());
const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
for (size_t cid = 0; cid < n_columns; ++cid) {
const size_t local_cid = GHistBuildingManager::kColumnSampling ? hbm.fids[cid] : cid;
for (size_t i = 0; i < size; ++i) {
const size_t row_id = rid[i];
const size_t icol_start =
any_missing ? get_row_ptr(row_id) : get_rid(row_id) * n_features;

const BinIdxType *gr_index_local = gradient_index + icol_start;
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[local_cid]) +
(any_missing ? 0 : offsets[local_cid]));
auto hist_local = hist_data + idx_bin;

const size_t idx_gh = two * row_id;
// The trick with pgh_t buffer helps the compiler to generate faster binary.
const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};
*(hist_local) += pgh_t[0];
*(hist_local + 1) += pgh_t[1];
}
}
}

template <bool do_prefetch, typename BinIdxType, bool first_page,
bool any_missing, class GHistBuildingManager>
void BuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist, const GHistBuildingManager& hbm) {
if (GHistBuildingManager::kReadByColumn) {
ColsWiseBuildHistKernel<BinIdxType, first_page, any_missing>
(gpair, row_indices, gmat, hist, hbm);
} else {
RowsWiseBuildHistKernel<do_prefetch, BinIdxType, first_page, any_missing>
(gpair, row_indices, gmat, hist);
}
}

template <bool do_prefetch, bool first_page, bool any_missing, class GHistBuildingManager>
void BuildHistDispatch(const std::vector<GradientPair> &gpair,
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist, const GHistBuildingManager& hbm) {
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
BuildHistKernel<do_prefetch, uint8_t, first_page, any_missing>
(gpair, row_indices, gmat, hist, hbm);
break;
case kUint16BinsTypeSize:
BuildHistKernel<do_prefetch, uint16_t, first_page, any_missing>
(gpair, row_indices, gmat, hist, hbm);
break;
case kUint32BinsTypeSize:
BuildHistKernel<do_prefetch, uint32_t, first_page, any_missing>
(gpair, row_indices, gmat, hist, hbm);
break;
default:
CHECK(false); // no default behavior
}
}

template <bool do_prefetch, bool any_missing, class GHistBuildingManager>
void BuildHistDispatch(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
GHistRow hist, const GHistBuildingManager& hbm) {
auto first_page = gmat.base_rowid == 0;
if (first_page) {
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
BuildHistKernel<do_prefetch, uint8_t, true, any_missing>(gpair, row_indices, gmat, hist);
break;
case kUint16BinsTypeSize:
BuildHistKernel<do_prefetch, uint16_t, true, any_missing>(gpair, row_indices, gmat, hist);
break;
case kUint32BinsTypeSize:
BuildHistKernel<do_prefetch, uint32_t, true, any_missing>(gpair, row_indices, gmat, hist);
break;
default:
CHECK(false); // no default behavior
}
BuildHistDispatch<do_prefetch, true, any_missing>(gpair, row_indices, gmat, hist, hbm);
} else {
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
BuildHistKernel<do_prefetch, uint8_t, false, any_missing>(gpair, row_indices, gmat, hist);
break;
case kUint16BinsTypeSize:
BuildHistKernel<do_prefetch, uint16_t, false, any_missing>(gpair, row_indices, gmat, hist);
break;
case kUint32BinsTypeSize:
BuildHistKernel<do_prefetch, uint32_t, false, any_missing>(gpair, row_indices, gmat, hist);
break;
default:
CHECK(false); // no default behavior
}
BuildHistDispatch<do_prefetch, false, any_missing>(gpair, row_indices, gmat, hist, hbm);
}
}

template <bool any_missing>
void GHistBuilder::BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) const {
template <bool any_missing, class GHistBuildingManager>
void BuildHistDispatch(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist, const GHistBuildingManager& hbm) {
const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);

// if need to work with all rows from bin-matrix (e.g. root node)
const bool contiguousBlock =
(row_indices.begin[nrows - 1] - row_indices.begin[0]) == (nrows - 1);

if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough
BuildHistDispatch<false, any_missing>(gpair, row_indices,
gmat, hist);
BuildHistDispatch<false, any_missing>(gpair, row_indices, gmat, hist, hbm);
} else {
const RowSetCollection::Elem span1(row_indices.begin,
row_indices.end - no_prefetch_size);
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size,
row_indices.end);

BuildHistDispatch<true, any_missing>(gpair, span1, gmat, hist);
BuildHistDispatch<true, any_missing>(gpair, span1, gmat, hist, hbm);
// no prefetching to avoid loading extra memory
BuildHistDispatch<false, any_missing>(gpair, span2, gmat, hist);
BuildHistDispatch<false, any_missing>(gpair, span2, gmat, hist, hbm);
}
}

template <bool any_missing>
void GHistBuilder::BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow hist, std::shared_ptr<ColumnSampler> column_sampler,
int depth, bool column_sampling) const {
constexpr double kAdhocL2Size = 1024 * 1024 * 0.8;
const bool hist_fit_to_l2 = kAdhocL2Size > 2*sizeof(float)*gmat.cut.Ptrs().back();
bool read_by_column = column_sampling ? true : !hist_fit_to_l2 && !any_missing;

if (read_by_column) {
if (column_sampling) {
GHistBuildingManager<true, true> hbm(column_sampler, depth);
BuildHistDispatch<any_missing>(gpair, row_indices, gmat, hist, hbm);
} else {
GHistBuildingManager<false, true> hbm(column_sampler, depth);
BuildHistDispatch<any_missing>(gpair, row_indices, gmat, hist, hbm);
}
} else {
// column_sampling doesn't matter in this case
GHistBuildingManager<false, false> hbm(column_sampler, depth);
BuildHistDispatch<any_missing>(gpair, row_indices, gmat, hist, hbm);
}
}

template void GHistBuilder::BuildHist<true>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) const;
const GHistIndexMatrix &gmat, GHistRow hist,
std::shared_ptr<ColumnSampler> column_sampler,
int depth, bool column_sampling) const;

template void GHistBuilder::BuildHist<false>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) const;
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
std::shared_ptr<ColumnSampler> column_sampler,
int depth, bool column_sampling) const;
} // namespace common
} // namespace xgboost
10 changes: 8 additions & 2 deletions src/common/hist_util.h
Expand Up @@ -16,6 +16,7 @@
#include <utility>
#include <map>

#include "random.h"
#include "categorical.h"
#include "common.h"
#include "quantile.h"
Expand Down Expand Up @@ -622,8 +623,12 @@ class GHistBuilder {

// construct a histogram via histogram aggregation
template <bool any_missing>
void BuildHist(const std::vector<GradientPair>& gpair, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow hist) const;
void BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
std::shared_ptr<ColumnSampler> column_sampler,
int depth, bool column_sampling) const;

uint32_t GetNumBins() const {
return nbins_;
}
Expand All @@ -632,6 +637,7 @@ class GHistBuilder {
/*! \brief number of all bins over all features */
uint32_t nbins_ { 0 };
};

} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_HIST_UTIL_H_