Merge branch 'master' of https://github.com/dmlc/xgboost into optimiz…

…ation_part_applysplit
dmlc · May 16, 2022 · 5307902 · 5307902
2 parents 3b08089 + 4fcfd9c
commit 5307902
Show file tree

Hide file tree

Showing 9 changed files with 43 additions and 13 deletions.
diff --git a/doc/jvm/xgboost4j_spark_tutorial.rst b/doc/jvm/xgboost4j_spark_tutorial.rst
@@ -349,7 +349,23 @@ With regards to ML pipeline save and load, please refer the next section.
 
 Interact with Other Bindings of XGBoost
 ---------------------------------------
-After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving in single machine or integrate it with other single node libraries for further processing. XGBoost4j-Spark supports export model to local by:
+After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving
+in single machine or integrate it with other single node libraries for further processing.
+
+After saving the model, we can load this model with single node Python XGBoost directly from ``version 2.0.0+``.
+
+.. code-block:: scala
+
+  val xgbClassificationModelPath = "/tmp/xgbClassificationModel"
+  xgbClassificationModel.write.overwrite().save(xgbClassificationModelPath)
+
+.. code-block:: python
+
+  import xgboost as xgb
+  bst = xgb.Booster({'nthread': 4})
+  bst.load_model("/tmp/xgbClassificationModel/data/XGBoostClassificationModel")
+
+Before ``version 2.0.0``, XGBoost4j-Spark needs to export model to local manually by:
 
 .. code-block:: scala
 

diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst
@@ -147,7 +147,7 @@ XGBoost can use either a list of pairs or a dictionary to set :doc:`parameters <
 
   .. code-block:: python
 
-    evallist = [(dtest, 'eval'), (dtrain, 'train')]
+    evallist = [(dtrain, 'train'), (dtest, 'eval')]
 
 Training
 --------

diff --git a/src/common/hist_util.h b/src/common/hist_util.h
@@ -113,7 +113,7 @@ class HistogramCuts {
     auto end = ptrs[column_id + 1];
     auto beg = ptrs[column_id];
     auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
-    bst_bin_t idx = it - values.cbegin();
+    auto idx = it - values.cbegin();
     idx -= !!(idx == end);
     return idx;
   }
@@ -189,8 +189,8 @@ inline HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_thr
   return out;
 }
 
-enum BinTypeSize : unsigned int {
-  kUint8BinsTypeSize  = 1,
+enum BinTypeSize : uint8_t {
+  kUint8BinsTypeSize = 1,
   kUint16BinsTypeSize = 2,
   kUint32BinsTypeSize = 4
 };
@@ -215,6 +215,24 @@ using BinTypeSizeSequence = std::integer_sequence<uint32_t,
   BinTypeSize::kUint32BinsTypeSize>;
 using BoolSequence = std::integer_sequence<bool, true, false>;
 
+/**
+ * \brief Dispatch for bin type, fn is a function that accepts a scalar of the bin type.
+ */
+template <typename Fn>
+auto DispatchBinType(BinTypeSize type, Fn&& fn) {
+  switch (type) {
+    case kUint8BinsTypeSize: {
+      return fn(uint8_t{});
+    }
+    case kUint16BinsTypeSize: {
+      return fn(uint16_t{});
+    }
+    case kUint32BinsTypeSize: {
+      return fn(uint32_t{});
+    }
+  }
+}
+
 /**
  * \brief Optionally compressed gradient index. The compression works only with dense
  *        data.

diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc
@@ -28,7 +28,7 @@ void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& posi
     sorted_pos[i] = position[ridx[i]];
   }
   // find the first non-sampled row
-  auto begin_pos =
+  size_t begin_pos =
       std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),
                                                       [](bst_node_t nidx) { return nidx >= 0; }));
   CHECK_LE(begin_pos, sorted_pos.size());

diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc
@@ -387,7 +387,7 @@ class GlobalApproxUpdater : public TreeUpdater {
 
  public:
   explicit GlobalApproxUpdater(GenericParameter const *ctx, ObjInfo task)
-      : task_{task}, TreeUpdater(ctx) {
+      : TreeUpdater(ctx), task_{task} {
     monitor_.Init(__func__);
   }
 

diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc
@@ -533,9 +533,6 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
   monitor_->Stop(__func__);
 }
 
-// template struct QuantileHistMaker::Builder<float>;
-// template struct QuantileHistMaker::Builder<double>;
-
 XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
     .describe("Grow tree using quantized histogram.")
     .set_body([](GenericParameter const *ctx, ObjInfo task) {

diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h
@@ -220,7 +220,7 @@ class QuantileHistMaker: public TreeUpdater {
     std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
     ObjInfo task_;
     // Context for number of threads
-    GenericParameter const* ctx_;
+    Context const* ctx_;
 
     std::unique_ptr<common::Monitor> monitor_;
     // common::Monitor builder_monitor_;

diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -140,7 +140,6 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
 namespace {
 auto CompareOneHotAndPartition(bool onehot) {
   int static constexpr kRows = 128, kCols = 1;
-  using GradientSumT = double;
   std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
 
   TrainParam param;

diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
@@ -30,7 +30,7 @@ TEST(QuantileHist, Partitioner) {
 
   for (auto const& page : Xy->GetBatches<SparsePage>()) {
     GHistIndexMatrix gmat;
-    gmat.Init(page, {}, cuts, 64, false, 0.5, ctx.Threads());
+    gmat.Init(page, {}, cuts, 64, true, 0.5, ctx.Threads());
     bst_feature_t const split_ind = 0;
     {
       auto min_value = gmat.cut.MinValues()[split_ind];