Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update GPUTreeshap #6163

Merged
merged 5 commits into from Sep 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion gputreeshap
4 changes: 2 additions & 2 deletions include/xgboost/gbm.h
Expand Up @@ -147,13 +147,13 @@ class GradientBooster : public Model, public Configurable {
* \param condition_feature feature to condition on (i.e. fix) during calculations
*/
virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit = 0,
bool approximate = false, int condition = 0,
unsigned condition_feature = 0) = 0;

virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) = 0;

/*!
Expand Down
4 changes: 2 additions & 2 deletions include/xgboost/predictor.h
Expand Up @@ -201,7 +201,7 @@ class Predictor {
*/

virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
std::vector<bst_float>* tree_weights = nullptr,
Expand All @@ -210,7 +210,7 @@ class Predictor {
unsigned condition_feature = 0) = 0;

virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
std::vector<bst_float>* tree_weights = nullptr,
Expand Down
8 changes: 4 additions & 4 deletions src/gbm/gblinear.cc
Expand Up @@ -155,7 +155,7 @@ class GBLinear : public GradientBooster {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override {
model_.LazyInitModel();
Expand All @@ -165,7 +165,7 @@ class GBLinear : public GradientBooster {
const int ngroup = model_.learner_model_param->num_output_group;
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
// make sure contributions is zeroed, we could be reusing a previously allocated one
std::fill(contribs.begin(), contribs.end(), 0);
Expand Down Expand Up @@ -195,9 +195,9 @@ class GBLinear : public GradientBooster {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();

// linear models have no interaction effects
const size_t nelements = model_.learner_model_param->num_feature *
Expand Down
4 changes: 2 additions & 2 deletions src/gbm/gbtree.cc
Expand Up @@ -600,7 +600,7 @@ class Dart : public GBTree {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
CHECK(configured_);
Expand All @@ -609,7 +609,7 @@ class Dart : public GBTree {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
CHECK(configured_);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
Expand Down
6 changes: 3 additions & 3 deletions src/gbm/gbtree.h
Expand Up @@ -237,7 +237,7 @@ class GBTree : public GradientBooster {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate,
int condition, unsigned condition_feature) override {
CHECK(configured_);
Expand All @@ -246,10 +246,10 @@ class GBTree : public GradientBooster {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
CHECK(configured_);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
this->GetPredictor()->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, nullptr, approximate);
}

Expand Down
4 changes: 2 additions & 2 deletions src/learner.cc
Expand Up @@ -1068,9 +1068,9 @@ class LearnerImpl : public LearnerIO {
this->Configure();
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
if (pred_contribs) {
gbm_->PredictContribution(data.get(), &out_preds->HostVector(), ntree_limit, approx_contribs);
gbm_->PredictContribution(data.get(), out_preds, ntree_limit, approx_contribs);
} else if (pred_interactions) {
gbm_->PredictInteractionContributions(data.get(), &out_preds->HostVector(), ntree_limit,
gbm_->PredictInteractionContributions(data.get(), out_preds, ntree_limit,
approx_contribs);
} else if (pred_leaf) {
gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
Expand Down
23 changes: 13 additions & 10 deletions src/predictor/cpu_predictor.cc
Expand Up @@ -352,7 +352,7 @@ class CPUPredictor : public Predictor {
}
}

void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
const gbm::GBTreeModel& model, uint32_t ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate, int condition,
Expand All @@ -370,7 +370,7 @@ class CPUPredictor : public Predictor {
size_t const ncolumns = model.learner_model_param->num_feature + 1;
CHECK_NE(ncolumns, 0);
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
// make sure contributions is zeroed, we could be reusing a previously
// allocated one
Expand Down Expand Up @@ -423,7 +423,7 @@ class CPUPredictor : public Predictor {
}
}

void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
Expand All @@ -435,21 +435,24 @@ class CPUPredictor : public Predictor {
const unsigned crow_chunk = ngroup * (ncolumns + 1);

// allocate space for (number of features^2) times the number of rows and tmp off/on contribs
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
std::vector<bst_float> contribs_off(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_on(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_diag(info.num_row_ * ngroup * (ncolumns + 1));
HostDeviceVector<bst_float> contribs_off_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_off = contribs_off_hdv.HostVector();
HostDeviceVector<bst_float> contribs_on_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_on = contribs_on_hdv.HostVector();
HostDeviceVector<bst_float> contribs_diag_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_diag = contribs_diag_hdv.HostVector();

// Compute the difference in effects when conditioning on each of the features on and off
// see: Axiomatic characterizations of probabilistic and
// cardinal-probabilistic interaction indices
PredictContribution(p_fmat, &contribs_diag, model, ntree_limit,
PredictContribution(p_fmat, &contribs_diag_hdv, model, ntree_limit,
tree_weights, approximate, 0, 0);
for (size_t i = 0; i < ncolumns + 1; ++i) {
PredictContribution(p_fmat, &contribs_off, model, ntree_limit,
PredictContribution(p_fmat, &contribs_off_hdv, model, ntree_limit,
tree_weights, approximate, -1, i);
PredictContribution(p_fmat, &contribs_on, model, ntree_limit,
PredictContribution(p_fmat, &contribs_on_hdv, model, ntree_limit,
tree_weights, approximate, 1, i);

for (size_t j = 0; j < info.num_row_; ++j) {
Expand Down
86 changes: 66 additions & 20 deletions src/predictor/gpu_predictor.cu
Expand Up @@ -553,7 +553,7 @@ class GPUPredictor : public xgboost::Predictor {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate, int condition,
Expand All @@ -564,6 +564,7 @@ class GPUPredictor : public xgboost::Predictor {
}

dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
uint32_t real_ntree_limit =
ntree_limit * model.learner_model_param->num_output_group;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
Expand All @@ -573,22 +574,21 @@ class GPUPredictor : public xgboost::Predictor {
const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs;
size_t contributions_columns =
model.learner_model_param->num_feature + 1; // +1 for bias
contribs.resize(p_fmat->Info().num_row_ * contributions_columns *
out_contribs->Resize(p_fmat->Info().num_row_ * contributions_columns *
model.learner_model_param->num_output_group);
dh::TemporaryArray<float> phis(contribs.size(), 0.0);
out_contribs->Fill(0.0f);
auto phis = out_contribs->DeviceSpan();
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
auto d_phis = phis.data().get();
// Add the base margin term to last column
dh::LaunchN(
generic_param_->gpu_id,
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
[=] __device__(size_t idx) {
d_phis[(idx + 1) * contributions_columns - 1] =
phis[(idx + 1) * contributions_columns - 1] =
margin.empty() ? base_score : margin[idx];
});

Expand All @@ -602,11 +602,67 @@ class GPUPredictor : public xgboost::Predictor {
model.learner_model_param->num_feature);
gpu_treeshap::GPUTreeShap(
X, device_paths.begin(), device_paths.end(), ngroup,
phis.data().get() + batch.base_rowid * contributions_columns);
phis.data() + batch.base_rowid * contributions_columns, phis.size());
}
}

void PredictInteractionContributions(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
if (approximate) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need dispatching in gbm get predictor.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might be a benefit to explicitly failing, maybe the user is expecting the algorithm to use GPU and it is silently switching to CPU.

LOG(FATAL) << "[Internal error]: " << __func__
<< " approximate is not implemented in GPU Predictor.";
}

dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
uint32_t real_ntree_limit =
ntree_limit * model.learner_model_param->num_output_group;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
}

const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
// allocate space for (number of features + bias) times the number of rows
size_t contributions_columns =
model.learner_model_param->num_feature + 1; // +1 for bias
out_contribs->Resize(p_fmat->Info().num_row_ * contributions_columns *
contributions_columns *
model.learner_model_param->num_output_group);
out_contribs->Fill(0.0f);
auto phis = out_contribs->DeviceSpan();
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
// Add the base margin term to last column
size_t n_features = model.learner_model_param->num_feature;
dh::LaunchN(
generic_param_->gpu_id,
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
[=] __device__(size_t idx) {
size_t group = idx % ngroup;
size_t row_idx = idx / ngroup;
phis[gpu_treeshap::IndexPhiInteractions(
row_idx, ngroup, group, n_features, n_features, n_features)] =
margin.empty() ? base_score : margin[idx];
});

dh::device_vector<gpu_treeshap::PathElement> device_paths;
ExtractPaths(&device_paths, model, real_ntree_limit,
generic_param_->gpu_id);
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(generic_param_->gpu_id);
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature);
gpu_treeshap::GPUTreeShapInteractions(
X, device_paths.begin(), device_paths.end(), ngroup,
phis.data() + batch.base_rowid * contributions_columns, phis.size());
}
dh::safe_cuda(cudaMemcpy(contribs.data(), phis.data().get(),
sizeof(float) * phis.size(),
cudaMemcpyDefault));
}

protected:
Expand Down Expand Up @@ -640,16 +696,6 @@ class GPUPredictor : public xgboost::Predictor {
<< " is not implemented in GPU Predictor.";
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
LOG(FATAL) << "[Internal error]: " << __func__
<< " is not implemented in GPU Predictor.";
}

void Configure(const std::vector<std::pair<std::string, std::string>>& cfg) override {
Predictor::Configure(cfg);
}
Expand Down
1 change: 1 addition & 0 deletions tests/ci_build/conda_env/cpu_test.yml
Expand Up @@ -29,6 +29,7 @@ dependencies:
- boto3
- awscli
- pip:
- shap
- guzzle_sphinx_theme
- datatable
- modin[all]
29 changes: 18 additions & 11 deletions tests/cpp/predictor/test_cpu_predictor.cc
Expand Up @@ -53,24 +53,28 @@ TEST(CpuPredictor, Basic) {
}

// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model,
0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
Expand Down Expand Up @@ -112,8 +116,9 @@ TEST(CpuPredictor, ExternalMemory) {
}

// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
Expand All @@ -126,8 +131,10 @@ TEST(CpuPredictor, ExternalMemory) {
}

// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
HostDeviceVector<float> out_contribution_approximate_hdv;
auto& out_contribution_approximate = out_contribution_approximate_hdv.HostVector();
cpu_predictor->PredictContribution(
dmat.get(), &out_contribution_approximate_hdv, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
Expand Down