forked from dmlc/xgboost
/
gradient_index.h
250 lines (224 loc) · 9.38 KB
/
gradient_index.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*!
* Copyright 2017-2022 by XGBoost Contributors
* \brief Data type for fast histogram aggregation.
*/
#ifndef XGBOOST_DATA_GRADIENT_INDEX_H_
#define XGBOOST_DATA_GRADIENT_INDEX_H_
#include <algorithm> // std::min
#include <memory>
#include <vector>
#include "../common/categorical.h"
#include "../common/hist_util.h"
#include "../common/numeric.h"
#include "../common/threading_utils.h"
#include "adapter.h"
#include "proxy_dmatrix.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
namespace xgboost {
namespace common {
class ColumnMatrix;
} // namespace common
/*!
* \brief preprocessed global index matrix, in CSR format
*
* Transform floating values to integer index in histogram This is a global histogram
* index for CPU histogram. On GPU ellpack page is used.
*/
class GHistIndexMatrix {
// Get the size of each row
template <typename AdapterBatchT>
auto GetRowCounts(AdapterBatchT const& batch, float missing, int32_t n_threads) {
std::vector<size_t> valid_counts(batch.Size(), 0);
common::ParallelFor(batch.Size(), n_threads, [&](size_t i) {
auto line = batch.GetLine(i);
for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple elem = line.GetElement(j);
if (data::IsValidFunctor {missing}(elem)) {
valid_counts[i]++;
}
}
});
return valid_counts;
}
/**
* \brief Push a page into index matrix, the function is only necessary because hist has
* partial support for external memory.
*/
void PushBatch(SparsePage const& batch, common::Span<FeatureType const> ft, int32_t n_threads);
template <typename Batch, typename BinIdxType, typename GetOffset, typename IsValid>
void SetIndexData(common::Span<BinIdxType> index_data_span, size_t rbegin,
common::Span<FeatureType const> ft, size_t batch_threads, Batch const& batch,
IsValid&& is_valid, size_t nbins, GetOffset&& get_offset) {
auto batch_size = batch.Size();
BinIdxType* index_data = index_data_span.data();
auto const& ptrs = cut.Ptrs();
auto const& values = cut.Values();
common::ParallelFor(batch_size, batch_threads, [&](size_t i) {
auto line = batch.GetLine(i);
size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block
size_t k = 0;
auto tid = omp_get_thread_num();
for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple elem = line.GetElement(j);
if (is_valid(elem)) {
bst_bin_t bin_idx{-1};
if (common::IsCat(ft, elem.column_idx)) {
bin_idx = cut.SearchCatBin(elem.value, elem.column_idx);
} else {
bin_idx = cut.SearchBin(elem.value, elem.column_idx, ptrs, values);
}
index_data[ibegin + k] = get_offset(bin_idx, j);
++hit_count_tloc_[tid * nbins + bin_idx];
++k;
}
}
});
}
template <typename Batch, typename IsValid>
void PushBatchImpl(int32_t n_threads, Batch const& batch, size_t rbegin, IsValid&& is_valid,
common::Span<FeatureType const> ft) {
// The number of threads is pegged to the batch size. If the OMP block is parallelized
// on anything other than the batch/block size, it should be reassigned
size_t batch_threads =
std::max(static_cast<size_t>(1), std::min(batch.Size(), static_cast<size_t>(n_threads)));
auto n_bins_total = cut.TotalBins();
const size_t n_index = row_ptr[rbegin + batch.Size()]; // number of entries in this page
ResizeIndex(n_index, isDense_);
if (isDense_) {
index.SetBinOffset(cut.Ptrs());
}
uint32_t const* offsets = index.Offset();
if (isDense_) {
// Inside the lambda functions, bin_idx is the index for cut value across all
// features. By subtracting it with starting pointer of each feature, we can reduce
// it to smaller value and compress it to smaller types.
common::DispatchBinType(index.GetBinTypeSize(), [&](auto dtype) {
using T = decltype(dtype);
common::Span<T> index_data_span = {index.data<T>(), index.Size()};
SetIndexData(
index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,
[offsets](auto bin_idx, auto fidx) { return static_cast<T>(bin_idx - offsets[fidx]); });
});
} else {
/* For sparse DMatrix we have to store index of feature for each bin
in index field to chose right offset. So offset is nullptr and index is
not reduced */
common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
SetIndexData(index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,
[](auto idx, auto) { return idx; });
}
common::ParallelFor(n_bins_total, n_threads, [&](bst_omp_uint idx) {
for (int32_t tid = 0; tid < n_threads; ++tid) {
hit_count[idx] += hit_count_tloc_[tid * n_bins_total + idx];
hit_count_tloc_[tid * n_bins_total + idx] = 0; // reset for next batch
}
});
}
public:
/*! \brief row pointer to rows by element position */
std::vector<size_t> row_ptr;
/*! \brief The index data */
common::Index index;
/*! \brief hit count of each index */
std::vector<size_t> hit_count;
/*! \brief The corresponding cuts */
common::HistogramCuts cut;
/*! \brief max_bin for each feature. */
size_t max_num_bins;
/*! \brief base row index for current page (used by external memory) */
size_t base_rowid{0};
~GHistIndexMatrix();
/**
* \brief Constrcutor for SimpleDMatrix.
*/
GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh,
bool sorted_sketch, int32_t n_threads, common::Span<float> hess = {});
/**
* \brief Constructor for Iterative DMatrix. Initialize basic information and prepare
* for push batch.
*/
GHistIndexMatrix(MetaInfo const& info, common::HistogramCuts&& cuts, bst_bin_t max_bin_per_feat);
/**
* \brief Constructor for external memory.
*/
GHistIndexMatrix(SparsePage const& page, common::Span<FeatureType const> ft,
common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense,
double sparse_thresh, int32_t n_threads);
GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back.
template <typename Batch>
void PushAdapterBatch(Context const* ctx, size_t rbegin, size_t prev_sum, Batch const& batch,
float missing, common::Span<FeatureType const> ft, double sparse_thresh,
size_t n_samples_total) {
auto n_bins_total = cut.TotalBins();
hit_count_tloc_.clear();
hit_count_tloc_.resize(ctx->Threads() * n_bins_total, 0);
auto n_threads = ctx->Threads();
auto valid_counts = GetRowCounts(batch, missing, n_threads);
auto it = common::MakeIndexTransformIter([&](size_t ridx) { return valid_counts[ridx]; });
common::PartialSum(n_threads, it, it + batch.Size(), prev_sum, row_ptr.begin() + rbegin);
auto is_valid = data::IsValidFunctor{missing};
PushBatchImpl(ctx->Threads(), batch, rbegin, is_valid, ft);
if (rbegin + batch.Size() == n_samples_total) {
// finished
CHECK(!std::isnan(sparse_thresh));
this->columns_ = std::make_unique<common::ColumnMatrix>(*this, sparse_thresh);
}
}
// Call ColumnMatrix::PushBatch
template <typename Batch>
void PushAdapterBatchColumns(Context const* ctx, Batch const& batch, float missing,
size_t rbegin);
void ResizeIndex(const size_t n_index, const bool isDense);
void GetFeatureCounts(size_t* counts) const {
auto nfeature = cut.Ptrs().size() - 1;
for (unsigned fid = 0; fid < nfeature; ++fid) {
auto ibegin = cut.Ptrs()[fid];
auto iend = cut.Ptrs()[fid + 1];
for (auto i = ibegin; i < iend; ++i) {
counts[fid] += hit_count[i];
}
}
}
bool IsDense() const {
return isDense_;
}
void SetDense(bool is_dense) { isDense_ = is_dense; }
/**
* \brief Get the local row index.
*/
size_t RowIdx(size_t ridx) const { return row_ptr[ridx - base_rowid]; }
bst_row_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
bst_feature_t Features() const { return cut.Ptrs().size() - 1; }
bool ReadColumnPage(dmlc::SeekStream* fi);
size_t WriteColumnPage(dmlc::Stream* fo) const;
common::ColumnMatrix const& Transpose() const;
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
private:
std::unique_ptr<common::ColumnMatrix> columns_;
std::vector<size_t> hit_count_tloc_;
bool isDense_;
};
/**
* \brief Should we regenerate the gradient index?
*
* \param old Parameter stored in DMatrix.
* \param p New parameter passed in by caller.
*/
inline bool RegenGHist(BatchParam old, BatchParam p) {
// parameter is renewed or caller requests a regen
if (p == BatchParam{}) {
// empty parameter is passed in, don't regenerate so that we can use gindex in
// predictor, which doesn't have any training parameter.
return false;
}
// Avoid comparing nan values.
bool l_nan = std::isnan(old.sparse_thresh);
bool r_nan = std::isnan(p.sparse_thresh);
// regenerate if parameter is changed.
bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh));
bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin;
return p.regen || param_chg || st_chg;
}
} // namespace xgboost
#endif // XGBOOST_DATA_GRADIENT_INDEX_H_