Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move GHistIndex into DMatrix. #7064

Merged
merged 4 commits into from Jun 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions amalgamation/xgboost-all0.cc
Expand Up @@ -38,6 +38,7 @@
#include "../src/data/sparse_page_raw_format.cc"
#include "../src/data/ellpack_page.cc"
#include "../src/data/ellpack_page_source.cc"
#include "../src/data/gradient_index.cc"

// prediction
#include "../src/predictor/predictor.cc"
Expand Down
8 changes: 8 additions & 0 deletions include/xgboost/data.h
Expand Up @@ -385,6 +385,8 @@ class EllpackPage {
std::unique_ptr<EllpackPageImpl> impl_;
};

class GHistIndexMatrix;

template<typename T>
class BatchIteratorImpl {
public:
Expand Down Expand Up @@ -553,6 +555,7 @@ class DMatrix {
virtual BatchSet<CSCPage> GetColumnBatches() = 0;
virtual BatchSet<SortedCSCPage> GetSortedColumnBatches() = 0;
virtual BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) = 0;
virtual BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) = 0;

virtual bool EllpackExists() const = 0;
virtual bool SparsePageExists() const = 0;
Expand Down Expand Up @@ -587,6 +590,11 @@ template<>
inline BatchSet<EllpackPage> DMatrix::GetBatches(const BatchParam& param) {
return GetEllpackBatches(param);
}

template<>
inline BatchSet<GHistIndexMatrix> DMatrix::GetBatches(const BatchParam& param) {
return GetGradientIndex(param);
}
} // namespace xgboost

namespace dmlc {
Expand Down
8 changes: 5 additions & 3 deletions src/common/column_matrix.h
Expand Up @@ -12,6 +12,7 @@
#include <vector>
#include <memory>
#include "hist_util.h"
#include "../data/gradient_index.h"

namespace xgboost {
namespace common {
Expand Down Expand Up @@ -262,9 +263,10 @@ class ColumnMatrix {
return res;
}

template<typename T>
inline void SetIndexAllDense(T* index, const GHistIndexMatrix& gmat, const size_t nrow,
const size_t nfeature, const bool noMissingValues) {
template <typename T>
inline void SetIndexAllDense(T *index, const GHistIndexMatrix &gmat,
const size_t nrow, const size_t nfeature,
const bool noMissingValues) {
T* local_index = reinterpret_cast<T*>(&index_[0]);

/* missing values make sense only for column with type kDenseColumn,
Expand Down
205 changes: 26 additions & 179 deletions src/common/hist_util.cc
Expand Up @@ -16,6 +16,7 @@
#include "column_matrix.h"
#include "quantile.h"
#include "./../tree/updater_quantile_hist.h"
#include "../data/gradient_index.h"

#if defined(XGBOOST_MM_PREFETCH_PRESENT)
#include <xmmintrin.h>
Expand All @@ -29,164 +30,10 @@
namespace xgboost {
namespace common {

void GHistIndexMatrix::ResizeIndex(const size_t n_index,
const bool isDense) {
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
index.SetBinTypeSize(kUint8BinsTypeSize);
index.Resize((sizeof(uint8_t)) * n_index);
} else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
index.SetBinTypeSize(kUint16BinsTypeSize);
index.Resize((sizeof(uint16_t)) * n_index);
} else {
index.SetBinTypeSize(kUint32BinsTypeSize);
index.Resize((sizeof(uint32_t)) * n_index);
}
}

HistogramCuts::HistogramCuts() {
cut_ptrs_.HostVector().emplace_back(0);
}

void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {
cut = SketchOnDMatrix(p_fmat, max_bins);

max_num_bins = max_bins;
const int32_t nthread = omp_get_max_threads();
const uint32_t nbins = cut.Ptrs().back();
hit_count.resize(nbins, 0);
hit_count_tloc_.resize(nthread * nbins, 0);

this->p_fmat = p_fmat;
size_t new_size = 1;
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
new_size += batch.Size();
}

row_ptr.resize(new_size);
row_ptr[0] = 0;

size_t rbegin = 0;
size_t prev_sum = 0;
const bool isDense = p_fmat->IsDense();
this->isDense_ = isDense;

for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// The number of threads is pegged to the batch size. If the OMP
// block is parallelized on anything other than the batch/block size,
// it should be reassigned
const size_t batch_threads = std::max(
size_t(1),
std::min(batch.Size(), static_cast<size_t>(omp_get_max_threads())));
auto page = batch.GetView();
MemStackAllocator<size_t, 128> partial_sums(batch_threads);
size_t* p_part = partial_sums.Get();

size_t block_size = batch.Size() / batch_threads;

dmlc::OMPException exc;
#pragma omp parallel num_threads(batch_threads)
{
#pragma omp for
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
exc.Run([&]() {
size_t ibegin = block_size * tid;
size_t iend = (tid == (batch_threads-1) ? batch.Size() : (block_size * (tid+1)));

size_t sum = 0;
for (size_t i = ibegin; i < iend; ++i) {
sum += page[i].size();
row_ptr[rbegin + 1 + i] = sum;
}
});
}

#pragma omp single
{
exc.Run([&]() {
p_part[0] = prev_sum;
for (size_t i = 1; i < batch_threads; ++i) {
p_part[i] = p_part[i - 1] + row_ptr[rbegin + i*block_size];
}
});
}

#pragma omp for
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
exc.Run([&]() {
size_t ibegin = block_size * tid;
size_t iend = (tid == (batch_threads-1) ? batch.Size() : (block_size * (tid+1)));

for (size_t i = ibegin; i < iend; ++i) {
row_ptr[rbegin + 1 + i] += p_part[tid];
}
});
}
}
exc.Rethrow();

const size_t n_offsets = cut.Ptrs().size() - 1;
const size_t n_index = row_ptr[rbegin + batch.Size()];
ResizeIndex(n_index, isDense);

CHECK_GT(cut.Values().size(), 0U);

uint32_t* offsets = nullptr;
if (isDense) {
index.ResizeOffset(n_offsets);
offsets = index.Offset();
for (size_t i = 0; i < n_offsets; ++i) {
offsets[i] = cut.Ptrs()[i];
}
}

if (isDense) {
BinTypeSize curent_bin_size = index.GetBinTypeSize();
if (curent_bin_size == kUint8BinsTypeSize) {
common::Span<uint8_t> index_data_span = {index.data<uint8_t>(),
n_index};
SetIndexData(index_data_span, batch_threads, batch, rbegin, nbins,
[offsets](auto idx, auto j) {
return static_cast<uint8_t>(idx - offsets[j]);
});

} else if (curent_bin_size == kUint16BinsTypeSize) {
common::Span<uint16_t> index_data_span = {index.data<uint16_t>(),
n_index};
SetIndexData(index_data_span, batch_threads, batch, rbegin, nbins,
[offsets](auto idx, auto j) {
return static_cast<uint16_t>(idx - offsets[j]);
});
} else {
CHECK_EQ(curent_bin_size, kUint32BinsTypeSize);
common::Span<uint32_t> index_data_span = {index.data<uint32_t>(),
n_index};
SetIndexData(index_data_span, batch_threads, batch, rbegin, nbins,
[offsets](auto idx, auto j) {
return static_cast<uint32_t>(idx - offsets[j]);
});
}

/* For sparse DMatrix we have to store index of feature for each bin
in index field to chose right offset. So offset is nullptr and index is not reduced */
} else {
common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
SetIndexData(index_data_span, batch_threads, batch, rbegin, nbins,
[](auto idx, auto) { return idx; });
}

ParallelFor(bst_omp_uint(nbins), nthread, [&](bst_omp_uint idx) {
for (int32_t tid = 0; tid < nthread; ++tid) {
hit_count[idx] += hit_count_tloc_[tid * nbins + idx];
hit_count_tloc_[tid * nbins + idx] = 0; // reset for next batch
}
});

prev_sum = row_ptr[rbegin + batch.Size()];
rbegin += batch.Size();
}
}

/*!
* \brief fill a histogram by zeros in range [begin, end)
*/
Expand Down Expand Up @@ -289,9 +136,9 @@ constexpr size_t Prefetch::kNoPrefetchSize;

template<typename FPType, bool do_prefetch, typename BinIdxType, bool any_missing = true>
void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType> hist) {
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType> hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
Expand Down Expand Up @@ -337,8 +184,8 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,

template<typename FPType, bool do_prefetch, bool any_missing>
void BuildHistDispatch(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow<FPType> hist) {
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow<FPType> hist) {
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint8_t, any_missing>(gpair, row_indices,
Expand Down Expand Up @@ -382,26 +229,26 @@ void GHistBuilder<GradientSumT>::BuildHist(
BuildHistDispatch<GradientSumT, false, any_missing>(gpair, span2, gmat, hist);
}
}
template
void GHistBuilder<float>::BuildHist<true>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<float> hist);
template
void GHistBuilder<float>::BuildHist<false>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<float> hist);
template
void GHistBuilder<double>::BuildHist<true>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double> hist);
template
void GHistBuilder<double>::BuildHist<false>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double> hist);
template void
GHistBuilder<float>::BuildHist<true>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<float> hist);
template void
GHistBuilder<float>::BuildHist<false>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<float> hist);
template void
GHistBuilder<double>::BuildHist<true>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<double> hist);
template void
GHistBuilder<double>::BuildHist<false>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<double> hist);

template<typename GradientSumT>
void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self,
Expand Down