/
gblinear.cc
317 lines (286 loc) · 10.8 KB
/
gblinear.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
/*!
* Copyright 2014-2020 by Contributors
* \file gblinear.cc
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun)
* \author Tianqi Chen
*/
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <vector>
#include <string>
#include <sstream>
#include <algorithm>
#include "xgboost/gbm.h"
#include "xgboost/json.h"
#include "xgboost/predictor.h"
#include "xgboost/linear_updater.h"
#include "xgboost/logging.h"
#include "xgboost/learner.h"
#include "gblinear_model.h"
#include "../common/timer.h"
#include "../common/common.h"
namespace xgboost {
namespace gbm {
DMLC_REGISTRY_FILE_TAG(gblinear);
// training parameters
struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
std::string updater;
float tolerance;
size_t max_row_perbatch;
DMLC_DECLARE_PARAMETER(GBLinearTrainParam) {
DMLC_DECLARE_FIELD(updater)
.set_default("shotgun")
.describe("Update algorithm for linear model. One of shotgun/coord_descent");
DMLC_DECLARE_FIELD(tolerance)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("Stop if largest weight update is smaller than this number.");
DMLC_DECLARE_FIELD(max_row_perbatch)
.set_default(std::numeric_limits<size_t>::max())
.describe("Maximum rows per batch.");
}
};
/*!
* \brief gradient boosted linear model
*/
class GBLinear : public GradientBooster {
public:
explicit GBLinear(LearnerModelParam const* learner_model_param)
: learner_model_param_{learner_model_param},
model_{learner_model_param},
previous_model_{learner_model_param},
sum_instance_weight_(0),
sum_weight_complete_(false),
is_converged_(false) {}
void Configure(const Args& cfg) override {
if (model_.weight.size() == 0) {
model_.Configure(cfg);
}
param_.UpdateAllowUnknown(cfg);
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_->Configure(cfg);
monitor_.Init("GBLinear");
if (param_.updater == "gpu_coord_descent") {
common::AssertGPUSupport();
}
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
}
void Save(dmlc::Stream* fo) const override {
model_.Save(fo);
}
void SaveModel(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String{"gblinear"};
out["model"] = Object();
auto& model = out["model"];
model_.SaveModel(&model);
}
void LoadModel(Json const& in) override {
CHECK_EQ(get<String>(in["name"]), "gblinear");
auto const& model = in["model"];
model_.LoadModel(model);
}
void LoadConfig(Json const& in) override {
CHECK_EQ(get<String>(in["name"]), "gblinear");
FromJson(in["gblinear_train_param"], ¶m_);
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
this->updater_->LoadConfig(in["updater"]);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String{"gblinear"};
out["gblinear_train_param"] = ToJson(param_);
out["updater"] = Object();
auto& j_updater = out["updater"];
CHECK(this->updater_);
this->updater_->SaveConfig(&j_updater);
}
void DoBoost(DMatrix *p_fmat,
HostDeviceVector<GradientPair> *in_gpair,
PredictionCacheEntry* predt) override {
monitor_.Start("DoBoost");
model_.LazyInitModel();
this->LazySumWeights(p_fmat);
if (!this->CheckConvergence()) {
updater_->Update(in_gpair, p_fmat, &model_, sum_instance_weight_);
}
monitor_.Stop("DoBoost");
}
void PredictBatch(DMatrix *p_fmat,
PredictionCacheEntry *predts,
bool training,
unsigned ntree_limit) override {
monitor_.Start("PredictBatch");
auto* out_preds = &predts->predictions;
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::Predict ntrees is only valid for gbtree predictor";
this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
monitor_.Stop("PredictBatch");
}
// add base margin
void PredictInstance(const SparsePage::Inst &inst,
std::vector<bst_float> *out_preds,
unsigned ntree_limit) override {
const int ngroup = model_.learner_model_param->num_output_group;
for (int gid = 0; gid < ngroup; ++gid) {
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
learner_model_param_->base_score);
}
}
void PredictLeaf(DMatrix *p_fmat,
std::vector<bst_float> *out_preds,
unsigned ntree_limit) override {
LOG(FATAL) << "gblinear does not support prediction of leaf index";
}
void PredictContribution(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override {
model_.LazyInitModel();
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
const int ngroup = model_.learner_model_param->num_output_group;
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
// make sure contributions is zeroed, we could be reusing a previously allocated one
std::fill(contribs.begin(), contribs.end(), 0);
// start collecting the contributions
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
auto inst = batch[i];
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
// calculate linear terms' contributions
for (auto& ins : inst) {
if (ins.index >= model_.learner_model_param->num_feature) continue;
p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];
}
// add base margin to BIAS
p_contribs[ncolumns - 1] = model_.Bias()[gid] +
((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] :
learner_model_param_->base_score);
}
}
}
}
void PredictInteractionContributions(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = out_contribs->HostVector();
// linear models have no interaction effects
const size_t nelements = model_.learner_model_param->num_feature *
model_.learner_model_param->num_feature;
contribs.resize(p_fmat->Info().num_row_ * nelements *
model_.learner_model_param->num_output_group);
std::fill(contribs.begin(), contribs.end(), 0);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
}
bool UseGPU() const override {
if (param_.updater == "gpu_coord_descent") {
return true;
} else {
return false;
}
}
protected:
void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) {
monitor_.Start("PredictBatchInternal");
model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds;
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
// start collecting the prediction
const int ngroup = model_.learner_model_param->num_output_group;
preds.resize(p_fmat->Info().num_row_ * ngroup);
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// output convention: nrow * k, where nrow is number of rows
// k is number of group
// parallel over local batch
const auto nsize = static_cast<omp_ulong>(batch.Size());
if (base_margin.size() != 0) {
CHECK_EQ(base_margin.size(), nsize * ngroup);
}
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < nsize; ++i) {
const size_t ridx = batch.base_rowid + i;
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float margin =
(base_margin.size() != 0) ?
base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
}
}
}
monitor_.Stop("PredictBatchInternal");
}
bool CheckConvergence() {
if (param_.tolerance == 0.0f) return false;
if (is_converged_) return true;
if (previous_model_.weight.size() != model_.weight.size()) {
previous_model_ = model_;
return false;
}
float largest_dw = 0.0;
for (size_t i = 0; i < model_.weight.size(); i++) {
largest_dw = std::max(
largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i]));
}
previous_model_ = model_;
is_converged_ = largest_dw <= param_.tolerance;
return is_converged_;
}
void LazySumWeights(DMatrix *p_fmat) {
if (!sum_weight_complete_) {
auto &info = p_fmat->Info();
for (size_t i = 0; i < info.num_row_; i++) {
sum_instance_weight_ += info.GetWeight(i);
}
sum_weight_complete_ = true;
}
}
void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
bst_float base) {
bst_float psum = model_.Bias()[gid] + base;
for (const auto& ins : inst) {
if (ins.index >= model_.learner_model_param->num_feature) continue;
psum += ins.fvalue * model_[ins.index][gid];
}
preds[gid] = psum;
}
// biase margin score
LearnerModelParam const* learner_model_param_;
// model field
GBLinearModel model_;
GBLinearModel previous_model_;
GBLinearTrainParam param_;
std::unique_ptr<LinearUpdater> updater_;
double sum_instance_weight_;
bool sum_weight_complete_;
common::Monitor monitor_;
bool is_converged_;
};
// register the objective functions
DMLC_REGISTER_PARAMETER(GBLinearTrainParam);
XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
.describe("Linear booster, implement generalized linear model.")
.set_body([](LearnerModelParam const* booster_config) {
return new GBLinear(booster_config);
});
} // namespace gbm
} // namespace xgboost