Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/dmlc/xgboost into optimiz…
Browse files Browse the repository at this point in the history
…ation_part_applysplit
  • Loading branch information
ShvetsKS committed May 16, 2022
2 parents 3b08089 + 4fcfd9c commit efb4f50
Show file tree
Hide file tree
Showing 9 changed files with 354 additions and 10 deletions.
18 changes: 17 additions & 1 deletion doc/jvm/xgboost4j_spark_tutorial.rst
Expand Up @@ -349,7 +349,23 @@ With regards to ML pipeline save and load, please refer the next section.

Interact with Other Bindings of XGBoost
---------------------------------------
After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving in single machine or integrate it with other single node libraries for further processing. XGBoost4j-Spark supports export model to local by:
After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving
in single machine or integrate it with other single node libraries for further processing.

After saving the model, we can load this model with single node Python XGBoost directly from ``version 2.0.0+``.

.. code-block:: scala
val xgbClassificationModelPath = "/tmp/xgbClassificationModel"
xgbClassificationModel.write.overwrite().save(xgbClassificationModelPath)
.. code-block:: python
import xgboost as xgb
bst = xgb.Booster({'nthread': 4})
bst.load_model("/tmp/xgbClassificationModel/data/XGBoostClassificationModel")
Before ``version 2.0.0``, XGBoost4j-Spark needs to export model to local manually by:

.. code-block:: scala
Expand Down
2 changes: 1 addition & 1 deletion doc/python/python_intro.rst
Expand Up @@ -147,7 +147,7 @@ XGBoost can use either a list of pairs or a dictionary to set :doc:`parameters <

.. code-block:: python
evallist = [(dtest, 'eval'), (dtrain, 'train')]
evallist = [(dtrain, 'train'), (dtest, 'eval')]
Training
--------
Expand Down
332 changes: 332 additions & 0 deletions src/common/partition_builder.h
@@ -0,0 +1,332 @@
/*!
* Copyright 2021-2022 by Contributors
* \file row_set.h
* \brief Quick Utility to compute subset of rows
* \author Philip Cho, Tianqi Chen
*/
#ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_
#define XGBOOST_COMMON_PARTITION_BUILDER_H_

#include <xgboost/data.h>

#include <algorithm>
#include <memory>
#include <utility>
#include <limits>
#include <vector>

#include "categorical.h"
#include "column_matrix.h"
#include "xgboost/generic_parameters.h"
#include "xgboost/tree_model.h"

namespace xgboost {
namespace common {

// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
// 2) Merging partial results produced by threads into original row set (row_set_collection_)
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
template<size_t BlockSize>
class PartitionBuilder {
public:
template<typename Func>
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) {
left_right_nodes_sizes_.resize(n_nodes);
blocks_offsets_.resize(n_nodes+1);

blocks_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1);
}

if (n_tasks > max_n_tasks_) {
mem_blocks_.resize(n_tasks);
max_n_tasks_ = n_tasks;
}
}

// split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond)
// Handle dense columns
// Analog of std::stable_partition, but in no-inplace manner
template <bool default_left, bool any_missing, typename ColumnType, typename Predicate>
inline std::pair<size_t, size_t> PartitionKernel(ColumnType* p_column,
common::Span<const size_t> row_indices,
common::Span<size_t> left_part,
common::Span<size_t> right_part,
size_t base_rowid, Predicate&& pred) {
auto& column = *p_column;
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;

auto p_row_indices = row_indices.data();
auto n_samples = row_indices.size();

for (size_t i = 0; i < n_samples; ++i) {
auto rid = p_row_indices[i];
const int32_t bin_id = column[rid - base_rowid];
if (any_missing && bin_id == ColumnType::kMissingId) {
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
} else {
if (pred(rid, bin_id)) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}

return {nleft_elems, nright_elems};
}

template <typename Pred>
inline std::pair<size_t, size_t> PartitionRangeKernel(common::Span<const size_t> ridx,
common::Span<size_t> left_part,
common::Span<size_t> right_part,
Pred pred) {
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
for (auto row_id : ridx) {
if (pred(row_id)) {
p_left_part[nleft_elems++] = row_id;
} else {
p_right_part[nright_elems++] = row_id;
}
}
return {nleft_elems, nright_elems};
}

template <typename BinIdxType, bool any_missing, bool any_cat>
void Partition(const size_t node_in_set, const size_t nid, const common::Range1d range,
const bst_bin_t split_cond, GHistIndexMatrix const& gmat,
const ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
const bst_uint fid = tree[nid].SplitIndex();
const bool default_left = tree[nid].DefaultLeft();
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
auto node_cats = tree.NodeCats(nid);

auto const& index = gmat.index;
auto const& cut_values = gmat.cut.Values();
auto const& cut_ptrs = gmat.cut.Ptrs();

auto pred = [&](auto ridx, auto bin_id) {
if (any_cat && is_cat) {
auto begin = gmat.RowIdx(ridx);
auto end = gmat.RowIdx(ridx + 1);
auto f_begin = cut_ptrs[fid];
auto f_end = cut_ptrs[fid + 1];
// bypassing the column matrix as we need the cut value instead of bin idx for categorical
// features.
auto gidx = BinarySearchBin(begin, end, index, f_begin, f_end);
bool go_left;
if (gidx == -1) {
go_left = default_left;
} else {
go_left = Decision(node_cats, cut_values[gidx], default_left);
}
return go_left;
} else {
return bin_id <= split_cond;
}
};

std::pair<size_t, size_t> child_nodes_sizes;
if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {
auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);
if (default_left) {
child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,
gmat.base_rowid, pred);
} else {
child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,
gmat.base_rowid, pred);
}
} else {
CHECK_EQ(any_missing, true);
auto column = column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.base_rowid);
if (default_left) {
child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,
gmat.base_rowid, pred);
} else {
child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,
gmat.base_rowid, pred);
}
}

const size_t n_left = child_nodes_sizes.first;
const size_t n_right = child_nodes_sizes.second;

SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
}

/**
* \brief Partition tree nodes with specific range of row indices.
*
* \tparam Pred Predicate for whether a row should be partitioned to the left node.
*
* \param node_in_set The index of node in current batch of nodes.
* \param nid The cannonical node index (node index in the tree).
* \param range The range of input row index.
* \param fidx Feature index.
* \param p_row_set_collection Pointer to rows that are being partitioned.
* \param pred A callback function that returns whether current row should be
* partitioned to the left node, it should accept the row index as
* input and returns a boolean value.
*/
template <typename Pred>
void PartitionRange(const size_t node_in_set, const size_t nid, common::Range1d range,
bst_feature_t fidx, common::RowSetCollection* p_row_set_collection,
Pred pred) {
auto& row_set_collection = *p_row_set_collection;
const size_t* p_ridx = row_set_collection[nid].begin;
common::Span<const size_t> ridx(p_ridx + range.begin(), p_ridx + range.end());
common::Span<size_t> left = this->GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = this->GetRightBuffer(node_in_set, range.begin(), range.end());
std::pair<size_t, size_t> child_nodes_sizes = PartitionRangeKernel(ridx, left, right, pred);

const size_t n_left = child_nodes_sizes.first;
const size_t n_right = child_nodes_sizes.second;

this->SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
this->SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
}

// allocate thread local memory, should be called for each specific task
void AllocateForTask(size_t id) {
if (mem_blocks_[id].get() == nullptr) {
BlockInfo* local_block_ptr = new BlockInfo;
CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
mem_blocks_[id].reset(local_block_ptr);
}
}

common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Left(), end - begin };
}

common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Right(), end - begin };
}

void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_left = n_left;
}

void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_right = n_right;
}


size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}

size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}

// Each thread has partial results for some set of tree-nodes
// The function decides order of merging partial results into final row set
void CalculateRowOffsets() {
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
size_t n_left = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_left = n_left;
n_left += mem_blocks_[j]->n_left;
}
size_t n_right = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i + 1]; ++j) {
mem_blocks_[j]->n_offset_right = n_left + n_right;
n_right += mem_blocks_[j]->n_right;
}
left_right_nodes_sizes_[i] = {n_left, n_right};
}
}

void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);

size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;

const size_t* left = mem_blocks_[task_idx]->Left();
const size_t* right = mem_blocks_[task_idx]->Right();

std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
}

size_t GetTaskIdx(int nid, size_t begin) {
return blocks_offsets_[nid] + begin / BlockSize;
}

// Copy row partitions into global cache for reuse in objective
template <typename Sampledp>
void LeafPartition(Context const* ctx, RegTree const& tree, RowSetCollection const& row_set,
std::vector<bst_node_t>* p_position, Sampledp sampledp) const {
auto& h_pos = *p_position;
h_pos.resize(row_set.Data()->size(), std::numeric_limits<bst_node_t>::max());

auto p_begin = row_set.Data()->data();
ParallelFor(row_set.Size(), ctx->Threads(), [&](size_t i) {
auto const& node = row_set[i];
if (node.node_id < 0) {
return;
}
CHECK(tree[node.node_id].IsLeaf());
if (node.begin) { // guard for empty node.
size_t ptr_offset = node.end - p_begin;
CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
for (auto idx = node.begin; idx != node.end; ++idx) {
h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id;
}
}
});
}

protected:
struct BlockInfo{
size_t n_left;
size_t n_right;

size_t n_offset_left;
size_t n_offset_right;

size_t* Left() {
return &left_data_[0];
}

size_t* Right() {
return &right_data_[0];
}
private:
size_t left_data_[BlockSize];
size_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0;
};

} // namespace common
} // namespace xgboost

#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_
2 changes: 1 addition & 1 deletion src/objective/adaptive.cc
Expand Up @@ -28,7 +28,7 @@ void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& posi
sorted_pos[i] = position[ridx[i]];
}
// find the first non-sampled row
auto begin_pos =
size_t begin_pos =
std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),
[](bst_node_t nidx) { return nidx >= 0; }));
CHECK_LE(begin_pos, sorted_pos.size());
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_approx.cc
Expand Up @@ -387,7 +387,7 @@ class GlobalApproxUpdater : public TreeUpdater {

public:
explicit GlobalApproxUpdater(GenericParameter const *ctx, ObjInfo task)
: task_{task}, TreeUpdater(ctx) {
: TreeUpdater(ctx), task_{task} {
monitor_.Init(__func__);
}

Expand Down
3 changes: 0 additions & 3 deletions src/tree/updater_quantile_hist.cc
Expand Up @@ -533,9 +533,6 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
monitor_->Stop(__func__);
}

// template struct QuantileHistMaker::Builder<float>;
// template struct QuantileHistMaker::Builder<double>;

XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
.describe("Grow tree using quantized histogram.")
.set_body([](GenericParameter const *ctx, ObjInfo task) {
Expand Down

0 comments on commit efb4f50

Please sign in to comment.