Skip to content

Commit

Permalink
Update GPUTreeshap (#6163)
Browse files Browse the repository at this point in the history
* Reduce shap test duration

* Test interoperability with shap package

* Add feature interactions

* Update GPUTreeShap
  • Loading branch information
RAMitchell committed Sep 27, 2020
1 parent 434a3f3 commit dda9e1e
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 87 deletions.
2 changes: 1 addition & 1 deletion gputreeshap
4 changes: 2 additions & 2 deletions include/xgboost/gbm.h
Expand Up @@ -147,13 +147,13 @@ class GradientBooster : public Model, public Configurable {
* \param condition_feature feature to condition on (i.e. fix) during calculations
*/
virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit = 0,
bool approximate = false, int condition = 0,
unsigned condition_feature = 0) = 0;

virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) = 0;

/*!
Expand Down
4 changes: 2 additions & 2 deletions include/xgboost/predictor.h
Expand Up @@ -201,7 +201,7 @@ class Predictor {
*/

virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
std::vector<bst_float>* tree_weights = nullptr,
Expand All @@ -210,7 +210,7 @@ class Predictor {
unsigned condition_feature = 0) = 0;

virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
std::vector<bst_float>* tree_weights = nullptr,
Expand Down
8 changes: 4 additions & 4 deletions src/gbm/gblinear.cc
Expand Up @@ -155,7 +155,7 @@ class GBLinear : public GradientBooster {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override {
model_.LazyInitModel();
Expand All @@ -165,7 +165,7 @@ class GBLinear : public GradientBooster {
const int ngroup = model_.learner_model_param->num_output_group;
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
// make sure contributions is zeroed, we could be reusing a previously allocated one
std::fill(contribs.begin(), contribs.end(), 0);
Expand Down Expand Up @@ -195,9 +195,9 @@ class GBLinear : public GradientBooster {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();

// linear models have no interaction effects
const size_t nelements = model_.learner_model_param->num_feature *
Expand Down
4 changes: 2 additions & 2 deletions src/gbm/gbtree.cc
Expand Up @@ -600,7 +600,7 @@ class Dart : public GBTree {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
CHECK(configured_);
Expand All @@ -609,7 +609,7 @@ class Dart : public GBTree {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
CHECK(configured_);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
Expand Down
6 changes: 3 additions & 3 deletions src/gbm/gbtree.h
Expand Up @@ -237,7 +237,7 @@ class GBTree : public GradientBooster {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate,
int condition, unsigned condition_feature) override {
CHECK(configured_);
Expand All @@ -246,10 +246,10 @@ class GBTree : public GradientBooster {
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
CHECK(configured_);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
this->GetPredictor()->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, nullptr, approximate);
}

Expand Down
4 changes: 2 additions & 2 deletions src/learner.cc
Expand Up @@ -1068,9 +1068,9 @@ class LearnerImpl : public LearnerIO {
this->Configure();
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
if (pred_contribs) {
gbm_->PredictContribution(data.get(), &out_preds->HostVector(), ntree_limit, approx_contribs);
gbm_->PredictContribution(data.get(), out_preds, ntree_limit, approx_contribs);
} else if (pred_interactions) {
gbm_->PredictInteractionContributions(data.get(), &out_preds->HostVector(), ntree_limit,
gbm_->PredictInteractionContributions(data.get(), out_preds, ntree_limit,
approx_contribs);
} else if (pred_leaf) {
gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
Expand Down
23 changes: 13 additions & 10 deletions src/predictor/cpu_predictor.cc
Expand Up @@ -352,7 +352,7 @@ class CPUPredictor : public Predictor {
}
}

void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
const gbm::GBTreeModel& model, uint32_t ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate, int condition,
Expand All @@ -370,7 +370,7 @@ class CPUPredictor : public Predictor {
size_t const ncolumns = model.learner_model_param->num_feature + 1;
CHECK_NE(ncolumns, 0);
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
// make sure contributions is zeroed, we could be reusing a previously
// allocated one
Expand Down Expand Up @@ -423,7 +423,7 @@ class CPUPredictor : public Predictor {
}
}

void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
Expand All @@ -435,21 +435,24 @@ class CPUPredictor : public Predictor {
const unsigned crow_chunk = ngroup * (ncolumns + 1);

// allocate space for (number of features^2) times the number of rows and tmp off/on contribs
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
std::vector<bst_float> contribs_off(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_on(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_diag(info.num_row_ * ngroup * (ncolumns + 1));
HostDeviceVector<bst_float> contribs_off_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_off = contribs_off_hdv.HostVector();
HostDeviceVector<bst_float> contribs_on_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_on = contribs_on_hdv.HostVector();
HostDeviceVector<bst_float> contribs_diag_hdv(info.num_row_ * ngroup * (ncolumns + 1));
auto &contribs_diag = contribs_diag_hdv.HostVector();

// Compute the difference in effects when conditioning on each of the features on and off
// see: Axiomatic characterizations of probabilistic and
// cardinal-probabilistic interaction indices
PredictContribution(p_fmat, &contribs_diag, model, ntree_limit,
PredictContribution(p_fmat, &contribs_diag_hdv, model, ntree_limit,
tree_weights, approximate, 0, 0);
for (size_t i = 0; i < ncolumns + 1; ++i) {
PredictContribution(p_fmat, &contribs_off, model, ntree_limit,
PredictContribution(p_fmat, &contribs_off_hdv, model, ntree_limit,
tree_weights, approximate, -1, i);
PredictContribution(p_fmat, &contribs_on, model, ntree_limit,
PredictContribution(p_fmat, &contribs_on_hdv, model, ntree_limit,
tree_weights, approximate, 1, i);

for (size_t j = 0; j < info.num_row_; ++j) {
Expand Down
86 changes: 66 additions & 20 deletions src/predictor/gpu_predictor.cu
Expand Up @@ -553,7 +553,7 @@ class GPUPredictor : public xgboost::Predictor {
}

void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate, int condition,
Expand All @@ -564,6 +564,7 @@ class GPUPredictor : public xgboost::Predictor {
}

dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
uint32_t real_ntree_limit =
ntree_limit * model.learner_model_param->num_output_group;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
Expand All @@ -573,22 +574,21 @@ class GPUPredictor : public xgboost::Predictor {
const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs;
size_t contributions_columns =
model.learner_model_param->num_feature + 1; // +1 for bias
contribs.resize(p_fmat->Info().num_row_ * contributions_columns *
out_contribs->Resize(p_fmat->Info().num_row_ * contributions_columns *
model.learner_model_param->num_output_group);
dh::TemporaryArray<float> phis(contribs.size(), 0.0);
out_contribs->Fill(0.0f);
auto phis = out_contribs->DeviceSpan();
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
auto d_phis = phis.data().get();
// Add the base margin term to last column
dh::LaunchN(
generic_param_->gpu_id,
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
[=] __device__(size_t idx) {
d_phis[(idx + 1) * contributions_columns - 1] =
phis[(idx + 1) * contributions_columns - 1] =
margin.empty() ? base_score : margin[idx];
});

Expand All @@ -602,11 +602,67 @@ class GPUPredictor : public xgboost::Predictor {
model.learner_model_param->num_feature);
gpu_treeshap::GPUTreeShap(
X, device_paths.begin(), device_paths.end(), ngroup,
phis.data().get() + batch.base_rowid * contributions_columns);
phis.data() + batch.base_rowid * contributions_columns, phis.size());
}
}

void PredictInteractionContributions(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
if (approximate) {
LOG(FATAL) << "[Internal error]: " << __func__
<< " approximate is not implemented in GPU Predictor.";
}

dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
uint32_t real_ntree_limit =
ntree_limit * model.learner_model_param->num_output_group;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
}

const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
// allocate space for (number of features + bias) times the number of rows
size_t contributions_columns =
model.learner_model_param->num_feature + 1; // +1 for bias
out_contribs->Resize(p_fmat->Info().num_row_ * contributions_columns *
contributions_columns *
model.learner_model_param->num_output_group);
out_contribs->Fill(0.0f);
auto phis = out_contribs->DeviceSpan();
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
// Add the base margin term to last column
size_t n_features = model.learner_model_param->num_feature;
dh::LaunchN(
generic_param_->gpu_id,
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
[=] __device__(size_t idx) {
size_t group = idx % ngroup;
size_t row_idx = idx / ngroup;
phis[gpu_treeshap::IndexPhiInteractions(
row_idx, ngroup, group, n_features, n_features, n_features)] =
margin.empty() ? base_score : margin[idx];
});

dh::device_vector<gpu_treeshap::PathElement> device_paths;
ExtractPaths(&device_paths, model, real_ntree_limit,
generic_param_->gpu_id);
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(generic_param_->gpu_id);
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature);
gpu_treeshap::GPUTreeShapInteractions(
X, device_paths.begin(), device_paths.end(), ngroup,
phis.data() + batch.base_rowid * contributions_columns, phis.size());
}
dh::safe_cuda(cudaMemcpy(contribs.data(), phis.data().get(),
sizeof(float) * phis.size(),
cudaMemcpyDefault));
}

protected:
Expand Down Expand Up @@ -640,16 +696,6 @@ class GPUPredictor : public xgboost::Predictor {
<< " is not implemented in GPU Predictor.";
}

void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
std::vector<bst_float>* tree_weights,
bool approximate) override {
LOG(FATAL) << "[Internal error]: " << __func__
<< " is not implemented in GPU Predictor.";
}

void Configure(const std::vector<std::pair<std::string, std::string>>& cfg) override {
Predictor::Configure(cfg);
}
Expand Down
1 change: 1 addition & 0 deletions tests/ci_build/conda_env/cpu_test.yml
Expand Up @@ -29,6 +29,7 @@ dependencies:
- boto3
- awscli
- pip:
- shap
- guzzle_sphinx_theme
- datatable
- modin[all]
29 changes: 18 additions & 11 deletions tests/cpp/predictor/test_cpu_predictor.cc
Expand Up @@ -53,24 +53,28 @@ TEST(CpuPredictor, Basic) {
}

// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model,
0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
Expand Down Expand Up @@ -112,8 +116,9 @@ TEST(CpuPredictor, ExternalMemory) {
}

// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
Expand All @@ -126,8 +131,10 @@ TEST(CpuPredictor, ExternalMemory) {
}

// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
HostDeviceVector<float> out_contribution_approximate_hdv;
auto& out_contribution_approximate = out_contribution_approximate_hdv.HostVector();
cpu_predictor->PredictContribution(
dmat.get(), &out_contribution_approximate_hdv, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
Expand Down

0 comments on commit dda9e1e

Please sign in to comment.