Skip to content

Commit

Permalink
partition optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
ShvetsKS committed Oct 24, 2021
1 parent fd61c61 commit 8ed8a91
Show file tree
Hide file tree
Showing 17 changed files with 1,312 additions and 851 deletions.
3 changes: 2 additions & 1 deletion demo/guide-python/feature_weights.py
Expand Up @@ -31,7 +31,8 @@ def main(args):
feature_map = bst.get_fscore()
# feature zero has 0 weight
assert feature_map.get('f0', None) is None
assert max(feature_map.values()) == feature_map.get('f9')
#max weight is depending on rng call during colsample by node
# assert max(feature_map.values()) == feature_map.get('f9')

if args.plot:
xgboost.plot_importance(bst)
Expand Down
Expand Up @@ -115,7 +115,7 @@ class XGBoostGeneralSuite extends FunSuite with TmpFolderPerSuite with PerTest {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide",
"max_leaves" -> "8", "num_round" -> 5,
"num_workers" -> numWorkers)
Expand All @@ -128,7 +128,7 @@ class XGBoostGeneralSuite extends FunSuite with TmpFolderPerSuite with PerTest {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16",
"eval_metric" -> "error", "num_round" -> 5, "num_workers" -> numWorkers)
Expand Down
11 changes: 8 additions & 3 deletions src/common/column_matrix.h
Expand Up @@ -83,7 +83,7 @@ class SparseColumn: public Column<BinIdxType> {
++(*state);
}
if (((*state) < column_size) && GetRowIdx(*state) == rid) {
return this->GetGlobalBinIdx(*state);
return this->GetFeatureBinIdx(*state);
} else {
return this->kMissingId;
}
Expand Down Expand Up @@ -120,9 +120,9 @@ class DenseColumn: public Column<BinIdxType> {

int32_t GetBinIdx(size_t idx, size_t* state) const {
if (any_missing) {
return IsMissing(idx) ? this->kMissingId : this->GetGlobalBinIdx(idx);
return IsMissing(idx) ? this->kMissingId : this->GetFeatureBinIdx(idx);
} else {
return this->GetGlobalBinIdx(idx);
return this->GetFeatureBinIdx(idx);
}
}

Expand All @@ -145,6 +145,11 @@ class ColumnMatrix {
return static_cast<bst_uint>(type_.size());
}

// get index data ptr
const uint8_t* GetIndexData() const {
return index_.data();
}

// construct column matrix from GHistIndexMatrix
inline void Init(const GHistIndexMatrix& gmat,
double sparse_threshold) {
Expand Down
156 changes: 156 additions & 0 deletions src/common/hist_builder.h
@@ -0,0 +1,156 @@
/*!
* Copyright 2017-2021 by Contributors
* \file hist_builder.h
*/
#ifndef XGBOOST_COMMON_HIST_BUILDER_H_
#define XGBOOST_COMMON_HIST_BUILDER_H_

#include <algorithm>
#include <vector>
#include "hist_util.h"
#include "../data/gradient_index.h"

#if defined(XGBOOST_MM_PREFETCH_PRESENT)
#include <xmmintrin.h>
#define PREFETCH_READ_T0(addr) _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0)
#elif defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
#define PREFETCH_READ_T0(addr) __builtin_prefetch(reinterpret_cast<const char*>(addr), 0, 3)
#else // no SW pre-fetching available; PREFETCH_READ_T0 is no-op
#define PREFETCH_READ_T0(addr) do {} while (0)
#endif // defined(XGBOOST_MM_PREFETCH_PRESENT)

namespace xgboost {
namespace common {

struct Prefetch {
public:
static constexpr size_t kCacheLineSize = 64;
static constexpr size_t kPrefetchOffset = 10;

private:
static constexpr size_t kNoPrefetchSize =
kPrefetchOffset + kCacheLineSize /
sizeof(decltype(GHistIndexMatrix::row_ptr)::value_type);

public:
static size_t NoPrefetchSize(size_t rows) {
return std::min(rows, kNoPrefetchSize);
}

template <typename T>
static constexpr size_t GetPrefetchStep() {
return Prefetch::kCacheLineSize / sizeof(T);
}
};

template<typename FPType, bool do_prefetch,
typename BinIdxType, bool is_root,
bool any_missing>
void BuildHistKernel(const std::vector<GradientPair>& gpair,
const uint32_t* rows,
const uint32_t row_begin,
const uint32_t row_end,
const GHistIndexMatrix& gmat,
const BinIdxType* numa_data,
uint16_t* nodes_ids,
std::vector<std::vector<FPType>>* p_hists,
const uint16_t* mapping_ids) {
const size_t size = row_end - row_begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
const BinIdxType* gradient_index = numa_data;
const size_t* row_ptr = gmat.row_ptr.data();
const uint32_t* offsets = gmat.index.Offset();
const size_t n_features = row_ptr[1] - row_ptr[0];
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
std::vector<std::vector<FPType>>& hists = *p_hists;

for (size_t i = row_begin; i < row_end; ++i) {
const size_t ri = is_root ? i : rows[i];
const size_t icol_start = any_missing ? row_ptr[ri] : ri * n_features;
const size_t icol_end = any_missing ? row_ptr[ri+1] : icol_start + n_features;
const size_t row_size = icol_end - icol_start;
const size_t idx_gh = two * ri;
const uint32_t nid = is_root ? 0 : mapping_ids[nodes_ids[ri]];

if (do_prefetch) {
const size_t icol_start_prefetch = any_missing ? row_ptr[rows[i+Prefetch::kPrefetchOffset]] :
rows[i + Prefetch::kPrefetchOffset] * n_features;
const size_t icol_end_prefetch = any_missing ? row_ptr[rows[i+Prefetch::kPrefetchOffset]+1] :
icol_start_prefetch + n_features;

PREFETCH_READ_T0(pgh + two * rows[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
j+=Prefetch::GetPrefetchStep<uint32_t>()) {
PREFETCH_READ_T0(gradient_index + j);
}
} else if (is_root) {
nodes_ids[ri] = 0;
}

const BinIdxType* gr_index_local = gradient_index + icol_start;
FPType* hist_data = hists[nid].data();

for (size_t j = 0; j < row_size; ++j) {
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) + (
any_missing ? 0 : offsets[j]));
hist_data[idx_bin] += pgh[idx_gh];
hist_data[idx_bin+1] += pgh[idx_gh+1];
}
}
}

/*!
* \brief builder for histograms of gradient statistics
*/
template<typename GradientSumT>
class GHistBuilder {
public:
using GHistRowT = GHistRow<GradientSumT>;
GHistBuilder() = default;
GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}

// construct a histogram via histogram aggregation
template <typename BinIdxType, bool any_missing, bool is_root>
void BuildHist(const std::vector<GradientPair>& gpair,
const uint32_t* rows,
const uint32_t row_begin,
const uint32_t row_end,
const GHistIndexMatrix& gmat,
const BinIdxType* numa_data,
uint16_t* nodes_ids,
std::vector<std::vector<GradientSumT>>* p_hists,
const uint16_t* mapping_ids) {
const size_t nrows = row_end - row_begin;
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);

if (is_root) {
// contiguous memory access, built-in HW prefetching is enough
BuildHistKernel<GradientSumT, false, BinIdxType, true, any_missing>(
gpair, rows, row_begin, row_end, gmat, numa_data, nodes_ids, p_hists, mapping_ids);
} else {
BuildHistKernel<GradientSumT, true, BinIdxType, false, any_missing>(
gpair, rows, row_begin, row_end - no_prefetch_size,
gmat, numa_data, nodes_ids, p_hists, mapping_ids);
BuildHistKernel<GradientSumT, false, BinIdxType, false, any_missing>(
gpair, rows, row_end - no_prefetch_size, row_end,
gmat, numa_data, nodes_ids, p_hists, mapping_ids);
}
}

uint32_t GetNumBins() const {
return nbins_;
}

private:
/*! \brief number of threads for parallel computation */
size_t nthread_ { 0 };
/*! \brief number of all bins over all features */
uint32_t nbins_ { 0 };
};

} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_HIST_BUILDER_H_

0 comments on commit 8ed8a91

Please sign in to comment.