Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/dmlc/xgboost into optimiz…
Browse files Browse the repository at this point in the history
…ation_part_applysplit
  • Loading branch information
ShvetsKS committed May 16, 2022
2 parents 3b08089 + 4fcfd9c commit 5307902
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 13 deletions.
18 changes: 17 additions & 1 deletion doc/jvm/xgboost4j_spark_tutorial.rst
Expand Up @@ -349,7 +349,23 @@ With regards to ML pipeline save and load, please refer the next section.

Interact with Other Bindings of XGBoost
---------------------------------------
After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving in single machine or integrate it with other single node libraries for further processing. XGBoost4j-Spark supports export model to local by:
After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving
in single machine or integrate it with other single node libraries for further processing.

After saving the model, we can load this model with single node Python XGBoost directly from ``version 2.0.0+``.

.. code-block:: scala
val xgbClassificationModelPath = "/tmp/xgbClassificationModel"
xgbClassificationModel.write.overwrite().save(xgbClassificationModelPath)
.. code-block:: python
import xgboost as xgb
bst = xgb.Booster({'nthread': 4})
bst.load_model("/tmp/xgbClassificationModel/data/XGBoostClassificationModel")
Before ``version 2.0.0``, XGBoost4j-Spark needs to export model to local manually by:

.. code-block:: scala
Expand Down
2 changes: 1 addition & 1 deletion doc/python/python_intro.rst
Expand Up @@ -147,7 +147,7 @@ XGBoost can use either a list of pairs or a dictionary to set :doc:`parameters <

.. code-block:: python
evallist = [(dtest, 'eval'), (dtrain, 'train')]
evallist = [(dtrain, 'train'), (dtest, 'eval')]
Training
--------
Expand Down
24 changes: 21 additions & 3 deletions src/common/hist_util.h
Expand Up @@ -113,7 +113,7 @@ class HistogramCuts {
auto end = ptrs[column_id + 1];
auto beg = ptrs[column_id];
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
bst_bin_t idx = it - values.cbegin();
auto idx = it - values.cbegin();
idx -= !!(idx == end);
return idx;
}
Expand Down Expand Up @@ -189,8 +189,8 @@ inline HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_thr
return out;
}

enum BinTypeSize : unsigned int {
kUint8BinsTypeSize = 1,
enum BinTypeSize : uint8_t {
kUint8BinsTypeSize = 1,
kUint16BinsTypeSize = 2,
kUint32BinsTypeSize = 4
};
Expand All @@ -215,6 +215,24 @@ using BinTypeSizeSequence = std::integer_sequence<uint32_t,
BinTypeSize::kUint32BinsTypeSize>;
using BoolSequence = std::integer_sequence<bool, true, false>;

/**
* \brief Dispatch for bin type, fn is a function that accepts a scalar of the bin type.
*/
template <typename Fn>
auto DispatchBinType(BinTypeSize type, Fn&& fn) {
switch (type) {
case kUint8BinsTypeSize: {
return fn(uint8_t{});
}
case kUint16BinsTypeSize: {
return fn(uint16_t{});
}
case kUint32BinsTypeSize: {
return fn(uint32_t{});
}
}
}

/**
* \brief Optionally compressed gradient index. The compression works only with dense
* data.
Expand Down
2 changes: 1 addition & 1 deletion src/objective/adaptive.cc
Expand Up @@ -28,7 +28,7 @@ void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& posi
sorted_pos[i] = position[ridx[i]];
}
// find the first non-sampled row
auto begin_pos =
size_t begin_pos =
std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),
[](bst_node_t nidx) { return nidx >= 0; }));
CHECK_LE(begin_pos, sorted_pos.size());
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_approx.cc
Expand Up @@ -387,7 +387,7 @@ class GlobalApproxUpdater : public TreeUpdater {

public:
explicit GlobalApproxUpdater(GenericParameter const *ctx, ObjInfo task)
: task_{task}, TreeUpdater(ctx) {
: TreeUpdater(ctx), task_{task} {
monitor_.Init(__func__);
}

Expand Down
3 changes: 0 additions & 3 deletions src/tree/updater_quantile_hist.cc
Expand Up @@ -533,9 +533,6 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
monitor_->Stop(__func__);
}

// template struct QuantileHistMaker::Builder<float>;
// template struct QuantileHistMaker::Builder<double>;

XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
.describe("Grow tree using quantized histogram.")
.set_body([](GenericParameter const *ctx, ObjInfo task) {
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_quantile_hist.h
Expand Up @@ -220,7 +220,7 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo task_;
// Context for number of threads
GenericParameter const* ctx_;
Context const* ctx_;

std::unique_ptr<common::Monitor> monitor_;
// common::Monitor builder_monitor_;
Expand Down
1 change: 0 additions & 1 deletion tests/cpp/tree/hist/test_evaluate_splits.cc
Expand Up @@ -140,7 +140,6 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
namespace {
auto CompareOneHotAndPartition(bool onehot) {
int static constexpr kRows = 128, kCols = 1;
using GradientSumT = double;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);

TrainParam param;
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/tree/test_quantile_hist.cc
Expand Up @@ -30,7 +30,7 @@ TEST(QuantileHist, Partitioner) {

for (auto const& page : Xy->GetBatches<SparsePage>()) {
GHistIndexMatrix gmat;
gmat.Init(page, {}, cuts, 64, false, 0.5, ctx.Threads());
gmat.Init(page, {}, cuts, 64, true, 0.5, ctx.Threads());
bst_feature_t const split_ind = 0;
{
auto min_value = gmat.cut.MinValues()[split_ind];
Expand Down

0 comments on commit 5307902

Please sign in to comment.