From 6671efb14ece86219784ec86426cb9cc3da5055a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 22 Jul 2022 11:06:22 +0800
Subject: [PATCH 01/34] Calculate `base_score` based on input labels.

---
 include/xgboost/learner.h             |  5 +-
 include/xgboost/linalg.h              |  1 +
 include/xgboost/objective.h           |  8 +++
 src/common/common.h                   |  1 +
 src/common/linalg_op.h                |  1 +
 src/common/stats.cu                   | 44 +++++++++++++++++
 src/common/stats.h                    | 32 ++++++++++++
 src/data/array_interface.h            |  4 +-
 src/learner.cc                        | 33 ++++++++-----
 src/objective/adaptive.h              |  1 +
 src/objective/objective.cc            |  5 +-
 src/objective/regression_obj.cu       | 12 +++++
 tests/python-gpu/test_gpu_updaters.py |  5 ++
 tests/python/test_updaters.py         | 71 ++++++++++++++++++++++++++-
 14 files changed, 204 insertions(+), 19 deletions(-)
 create mode 100644 src/common/stats.cu
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 51fefac1365f..3125c431a19e 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -17,6 +17,7 @@
 #include <xgboost/predictor.h>
 #include <xgboost/task.h>
 
+#include <limits>  // std::numeric_limit
 #include <map>
 #include <memory>
 #include <string>
@@ -299,7 +300,7 @@ struct LearnerModelParamLegacy;
  */
 struct LearnerModelParam {
   /* \brief global bias */
-  bst_float base_score { 0.5f };
+  bst_float base_score { std::numeric_limits<float>::quiet_NaN() };
   /* \brief number of features  */
   uint32_t num_feature { 0 };
   /* \brief number of classes, if it is multi-class classification  */
@@ -312,7 +313,7 @@ struct LearnerModelParam {
   // this one as an immutable copy.
   LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin, ObjInfo t);
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0; }
+  bool Initialized() const { return num_feature != 0 && !std::isnan(base_score); }
 };
 
 }  // namespace xgboost
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 944903ac83e5..4fec61bb5b6d 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <limits>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index d30f81379f5a..8d68fcc74461 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -75,6 +75,14 @@ class ObjFunction : public Configurable {
   virtual bst_float ProbToMargin(bst_float base_score) const {
     return base_score;
   }
+  /**
+   * \brief Make initialize estimation of prediction.
+   *
+   * \param info MetaInfo that contains label.
+   *
+   * \return NaN if there's no initial estimation.
+   */
+  virtual float InitEstimation(MetaInfo const& info) const;
   /*!
    * \brief Return task of this objective.
    */
diff --git a/src/common/common.h b/src/common/common.h
index 0f21739876b2..8c260225b5d8 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -265,6 +265,7 @@ struct OptionalWeights {
   explicit OptionalWeights(float w) : dft{w} {}
 
   XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
+  auto Empty() const { return weights.empty(); }
 };
 
 /**
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 05f050772ccc..4aedfbc29561 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -59,6 +59,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
   ElementWiseKernelHost(t, ctx->Threads(), fn);
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
+
 }  // namespace linalg
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_LINALG_OP_H_
diff --git a/src/common/stats.cu b/src/common/stats.cu
new file mode 100644
index 000000000000..956c812c9346
--- /dev/null
+++ b/src/common/stats.cu
@@ -0,0 +1,44 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+
+#include "common.h"
+#include "stats.cuh"
+#include "xgboost/generic_parameters.h"
+#include "xgboost/host_device_vector.h"
+#include "xgboost/linalg.h"
+
+namespace xgboost {
+namespace common {
+namespace cuda {
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+             common::OptionalWeights weights) {
+  HostDeviceVector<size_t> segments{0, t.Size()};
+  segments.SetDevice(ctx->gpu_id);
+  auto d_segments = segments.ConstDeviceSpan();
+  auto val_it = dh::MakeTransformIterator<float>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
+        return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
+      });
+
+  HostDeviceVector<float> quantile{0};
+  quantile.SetDevice(ctx->gpu_id);
+  if (weights.Empty()) {
+    common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
+                              val_it + t.Size(), &quantile);
+  } else {
+    CHECK_NE(t.Shape(1), 0);
+    auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
+                                                 [=] XGBOOST_DEVICE(size_t i) {
+                                                   auto sample_idx = i / t.Shape(1);
+                                                   return weights[sample_idx];
+                                                 });
+    common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
+                                      val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
+  }
+  CHECK_EQ(quantile.Size(), 1);
+  return quantile.HostVector().front();
+}
+}  // namespace cuda
+}  // namespace common
+}  // namespace xgboost
diff --git a/src/common/stats.h b/src/common/stats.h
index 4ad9e4aa770a..8165cb75ae1a 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "common.h"
+#include "xgboost/generic_parameters.h"
 #include "xgboost/linalg.h"
 
 namespace xgboost {
@@ -90,6 +91,37 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
   idx = std::min(idx, static_cast<size_t>(n - 1));
   return val(idx);
 }
+
+namespace cuda {
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t, common::OptionalWeights weights);
+#if !defined(XGBOOST_USE_CUDA)
+inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
+  common::AssertGPUSupport();
+  return 0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
+}
+
+inline float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+                    common::OptionalWeights weights) {
+  if (!ctx->IsCPU()) {
+    return cuda::Median(ctx, t, weights);
+  }
+  auto iter = common::MakeIndexTransformIter(
+      [&](size_t i) { return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape())); });
+  float q{0};
+  if (weights.weights.empty()) {
+    q = common::Quantile(0.5, iter, iter + t.Size());
+  } else {
+    CHECK_NE(t.Shape(1), 0);
+    auto w_it = common::MakeIndexTransformIter([&](size_t i) {
+      auto sample_idx = i / t.Shape(1);
+      return weights[sample_idx];
+    });
+    q = common::WeightedQuantile(0.5, iter, iter + t.Size(), w_it);
+  }
+  return q;
+}
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_STATS_H_
diff --git a/src/data/array_interface.h b/src/data/array_interface.h
index c646654bef3f..e90473458e1c 100644
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -345,8 +345,8 @@ struct ToDType<int64_t> {
 };
 
 #if !defined(XGBOOST_USE_CUDA)
-inline void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) { common::AssertGPUSupport(); }
-inline bool ArrayInterfaceHandler::IsCudaPtr(void const *ptr) { return false; }
+inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
+inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
 /**
diff --git a/src/learner.cc b/src/learner.cc
index 0dbf3631a499..c02fe16fea81 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -90,7 +90,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /*! \brief constructor */
   LearnerModelParamLegacy() {
     std::memset(this, 0, sizeof(LearnerModelParamLegacy));
-    base_score = 0.5f;
+    base_score = std::numeric_limits<float>::quiet_NaN();
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
@@ -156,7 +156,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
-        .set_default(0.5f)
+        .set_default(std::numeric_limits<float>::quiet_NaN())
         .describe("Global bias of the model.");
     DMLC_DECLARE_FIELD(num_feature)
         .set_default(0)
@@ -338,8 +338,6 @@ class LearnerConfiguration : public Learner {
     Args args = {cfg_.cbegin(), cfg_.cend()};
 
     tparam_.UpdateAllowUnknown(args);
-    auto mparam_backup = mparam_;
-
     mparam_.UpdateAllowUnknown(args);
 
     auto initialized = generic_parameters_.GetInitialised();
@@ -364,7 +362,7 @@ class LearnerConfiguration : public Learner {
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     this->ConfigureObjective(old_tparam, &args);
 
-    auto task = this->ConfigureTargets();
+    this->ConfigureTargets();
 
     // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
     // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
@@ -378,9 +376,8 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
-    if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
-      learner_model_param_ =
-          LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
+    if (!learner_model_param_.Initialized()) {
+      learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score, obj_->Task());
     }
 
     this->ConfigureGBM(old_tparam, args);
@@ -396,6 +393,17 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
+  void ConfigureBaseScore(DMatrix const* p_fmat) {
+    CHECK(obj_);
+    if (std::isnan(mparam_.base_score)) {
+      mparam_.base_score = obj_->InitEstimation(p_fmat->Info());
+      CHECK(!learner_model_param_.Initialized());
+    }
+    auto task = obj_->Task();
+    learner_model_param_ = LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
+    CHECK(learner_model_param_.Initialized());
+  }
+
   virtual PredictionContainer* GetPredictionCache() const {
     return &((*ThreadLocalPredictionCache::Get())[this]);
   }
@@ -703,7 +711,7 @@ class LearnerConfiguration : public Learner {
   /**
    * Get number of targets from objective function.
    */
-  ObjInfo ConfigureTargets() {
+  void ConfigureTargets() {
     CHECK(this->obj_);
     auto const& cache = this->GetPredictionCache()->Container();
     size_t n_targets = 1;
@@ -722,7 +730,6 @@ class LearnerConfiguration : public Learner {
     } else {
       mparam_.num_target = n_targets;
     }
-    return this->obj_->Task();
   }
 };
 
@@ -1161,6 +1168,7 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->CheckDataSplitMode();
+    this->ConfigureBaseScore(train.get());
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
@@ -1189,7 +1197,9 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->CheckDataSplitMode();
+    this->ConfigureBaseScore(train.get());
     this->ValidateDMatrix(train.get(), true);
+
     auto local_cache = this->GetPredictionCache();
     local_cache->Cache(train, generic_parameters_.gpu_id);
 
@@ -1324,8 +1334,7 @@ class LearnerImpl : public LearnerIO {
     info.Validate(generic_parameters_.gpu_id);
 
     auto const row_based_split = [this]() {
-      return tparam_.dsplit == DataSplitMode::kRow ||
-             tparam_.dsplit == DataSplitMode::kAuto;
+      return tparam_.dsplit == DataSplitMode::kRow || tparam_.dsplit == DataSplitMode::kAuto;
     };
     if (row_based_split()) {
       if (is_training) {
diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
index 85c041347cb9..00d27a57afef 100644
--- a/src/objective/adaptive.h
+++ b/src/objective/adaptive.h
@@ -7,6 +7,7 @@
 #include <limits>
 #include <vector>
 
+#include "../common/common.h"
 #include "rabit/rabit.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 5991e918d315..b27405149c6c 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -1,10 +1,10 @@
 /*!
- * Copyright 2015 by Contributors
+ * Copyright 2015-2022 by Contributors
  * \file objective.cc
  * \brief Registry of all objective functions.
  */
-#include <xgboost/objective.h>
 #include <dmlc/registry.h>
+#include <xgboost/objective.h>
 
 #include <sstream>
 
@@ -31,6 +31,7 @@ ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const
   return pobj;
 }
 
+float ObjFunction::InitEstimation(MetaInfo const&) const { return 0.5; }
 }  // namespace xgboost
 
 namespace xgboost {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index ecd906f699a4..4a33366683f1 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -16,6 +16,7 @@
 #include "../common/common.h"
 #include "../common/linalg_op.h"
 #include "../common/pseudo_huber.h"
+#include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
 #include "./regression_loss.h"
@@ -698,6 +699,17 @@ class MeanAbsoluteError : public ObjFunction {
     });
   }
 
+  float InitEstimation(MetaInfo const& info) const override {
+    if (ctx_->IsCPU()) {
+      return common::Median(ctx_, info.labels.HostView(),
+                            common::OptionalWeights{info.weights_.ConstHostSpan()});
+    } else {
+      info.weights_.SetDevice(ctx_->gpu_id);
+      return common::Median(ctx_, info.labels.View(ctx_->gpu_id),
+                            common::OptionalWeights{info.weights_.DeviceSpan()});
+    }
+  }
+
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
                       HostDeviceVector<float> const& prediction, RegTree* p_tree) const override {
     if (ctx_->IsCPU()) {
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index d0e7c5bc883d..7f29a92e6101 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -208,3 +208,8 @@ def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), 10)
         assert tm.non_increasing(result['train'][dataset.metric])
+
+    @pytest.mark.skipif(**tm.no_sklearn())
+    @pytest.mark.parametrize("weighted", [True, False])
+    def test_adaptive(self, weighted) -> None:
+        self.cputest.run_adaptive("gpu_hist", weighted)
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 3e43b98ff113..e28f173860e7 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,4 +1,4 @@
-from random import choice
+import json
 from string import ascii_lowercase
 from typing import Dict, Any
 import testing as tm
@@ -397,3 +397,72 @@ def test_categorical_ames_housing(
     def test_categorical_missing(self, rows, cols, cats):
         self.run_categorical_missing(rows, cols, cats, "approx")
         self.run_categorical_missing(rows, cols, cats, "hist")
+
+    def run_adaptive(self, tree_method, weighted) -> None:
+        rng = np.random.RandomState(1994)
+        from sklearn.datasets import make_regression
+        from sklearn.utils import stats
+
+        n_samples = 256
+        X, y = make_regression(n_samples, 16, random_state=rng)
+        if weighted:
+            w = rng.normal(size=n_samples)
+            w -= w.min()
+            Xy = xgb.DMatrix(X, y, weight=w)
+            base_score = stats._weighted_percentile(y, w, percentile=50)
+        else:
+            Xy = xgb.DMatrix(X, y)
+            base_score = np.median(y)
+
+        booster_0 = xgb.train(
+            {
+                "tree_method": tree_method,
+                "base_score": base_score,
+                "objective": "reg:absoluteerror",
+            },
+            Xy,
+            num_boost_round=1,
+        )
+        booster_1 = xgb.train(
+            {"tree_method": tree_method, "objective": "reg:absoluteerror"},
+            Xy,
+            num_boost_round=1,
+        )
+        config_0 = json.loads(booster_0.save_config())
+        config_1 = json.loads(booster_1.save_config())
+
+        def get_score(config: Dict) -> float:
+            return float(config["learner"]["learner_model_param"]["base_score"])
+
+        assert get_score(config_0) == get_score(config_1)
+
+        raw_booster = booster_1.save_raw(raw_format="deprecated")
+        booster_2 = xgb.Booster(model_file=raw_booster)
+        config_2 = json.loads(booster_2.save_config())
+        assert get_score(config_1) == get_score(config_2)
+
+        raw_booster = booster_1.save_raw(raw_format="ubj")
+        booster_2 = xgb.Booster(model_file=raw_booster)
+        config_2 = json.loads(booster_2.save_config())
+        assert get_score(config_1) == get_score(config_2)
+
+        booster_0 = xgb.train(
+            {
+                "tree_method": tree_method,
+                "base_score": base_score + 1.0,
+                "objective": "reg:absoluteerror",
+            },
+            Xy,
+            num_boost_round=1,
+        )
+        config_0 = json.loads(booster_0.save_config())
+        np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1)
+
+    @pytest.mark.skipif(**tm.no_sklearn())
+    @pytest.mark.parametrize(
+        "tree_method,weighted", [
+            ("approx", False), ("hist", False), ("approx", True), ("hist", True)
+        ]
+    )
+    def test_adaptive(self, tree_method, weighted) -> None:
+        self.run_adaptive(tree_method, weighted)

From d041498bd6b69192d3d17ee8b4d810d5a2416b38 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 27 Jul 2022 13:55:48 +0800
Subject: [PATCH 02/34] Custom objective.

---
 include/xgboost/objective.h     |  3 ++-
 src/learner.cc                  | 13 ++++++++++++-
 src/objective/objective.cc      |  2 +-
 src/objective/regression_obj.cu |  9 +++++++--
 src/predictor/predictor.cc      |  4 +++-
 5 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 8d68fcc74461..5c75fad16822 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -27,7 +27,8 @@ class RegTree;
 /*! \brief interface of objective function */
 class ObjFunction : public Configurable {
  protected:
-  GenericParameter const* ctx_;
+  Context const* ctx_;
+  static constexpr float DefaultBaseScore() { return 0.5; };
 
  public:
   /*! \brief virtual destructor */
diff --git a/src/learner.cc b/src/learner.cc
index c02fe16fea81..e1522bc297a7 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -90,6 +90,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /*! \brief constructor */
   LearnerModelParamLegacy() {
     std::memset(this, 0, sizeof(LearnerModelParamLegacy));
+    // use nan to flag this is uninitialized.
     base_score = std::numeric_limits<float>::quiet_NaN();
     num_target = 1;
     major_version = std::get<0>(Version::Self());
@@ -393,6 +394,9 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
+  /**
+   * \brief Calculate the `base_score` based on input data.
+   */
   void ConfigureBaseScore(DMatrix const* p_fmat) {
     CHECK(obj_);
     if (std::isnan(mparam_.base_score)) {
@@ -1197,7 +1201,7 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->CheckDataSplitMode();
-    this->ConfigureBaseScore(train.get());
+    CHECK(!std::isnan(learner_model_param_.base_score));
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
@@ -1251,6 +1255,13 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
+    // This is only needed when custom objective is used.
+    if (gpair_.Empty()) {
+      this->ConfigureBaseScore(data.get());
+    } else {
+      CHECK(!std::isnan(learner_model_param_.base_score));
+    }
+
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
     if (pred_contribs) {
       gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index b27405149c6c..84c28197c9ed 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -31,7 +31,7 @@ ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const
   return pobj;
 }
 
-float ObjFunction::InitEstimation(MetaInfo const&) const { return 0.5; }
+float ObjFunction::InitEstimation(MetaInfo const&) const { return DefaultBaseScore(); }
 }  // namespace xgboost
 
 namespace xgboost {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 4a33366683f1..89d189778430 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -38,14 +38,18 @@
 namespace xgboost {
 namespace obj {
 namespace {
-void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
+void CheckInitInputs(MetaInfo const& info) {
   CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
   if (!info.weights_.Empty()) {
     CHECK_EQ(info.weights_.Size(), info.num_row_)
         << "Number of weights should be equal to number of data points.";
   }
 }
+
+void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
+  CheckInitInputs(info);
+  CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
+}
 }  // anonymous namespace
 
 #if defined(XGBOOST_USE_CUDA)
@@ -700,6 +704,7 @@ class MeanAbsoluteError : public ObjFunction {
   }
 
   float InitEstimation(MetaInfo const& info) const override {
+    CheckInitInputs(info);
     if (ctx_->IsCPU()) {
       return common::Median(ctx_, info.labels.HostView(),
                             common::OptionalWeights{info.weights_.ConstHostSpan()});
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 10d006a832d0..0cf57db94c85 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -87,7 +87,9 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
   } else {
     out_preds->Resize(n);
     // cannot rely on the Resize to fill as it might skip if the size is already correct.
-    out_preds->Fill(model.learner_model_param->base_score);
+    auto base_score = model.learner_model_param->base_score;
+    CHECK(!std::isnan(base_score));
+    out_preds->Fill(base_score);
   }
 }
 }  // namespace xgboost

From 28739ccc6fd84e0e290bd0b9e73afa50049221c9 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 27 Jul 2022 16:49:01 +0800
Subject: [PATCH 03/34] Fixes.

---
 include/xgboost/learner.h                 |  18 +++-
 include/xgboost/objective.h               |   9 +-
 include/xgboost/predictor.h               |   7 +-
 src/gbm/gblinear.cc                       |  16 ++--
 src/gbm/gbtree.cc                         |  14 +--
 src/gbm/gbtree.cu                         |  12 +--
 src/learner.cc                            | 103 +++++++++++++++++-----
 src/objective/objective.cc                |   6 +-
 src/objective/regression_obj.cu           |  12 +--
 src/predictor/cpu_predictor.cc            |  11 ++-
 src/predictor/gpu_predictor.cu            |  39 ++++----
 src/predictor/predictor.cc                |  11 +--
 tests/cpp/gbm/test_gbtree.cc              |   8 +-
 tests/cpp/linear/test_linear.cc           |  10 +--
 tests/cpp/linear/test_linear.cu           |  12 +--
 tests/cpp/predictor/test_cpu_predictor.cc |  21 ++---
 tests/cpp/predictor/test_gpu_predictor.cu |  51 +++--------
 tests/cpp/predictor/test_predictor.cc     |  16 ++--
 tests/cpp/predictor/test_predictor.h      |   6 +-
 tests/cpp/test_learner.cc                 |   3 +-
 20 files changed, 208 insertions(+), 177 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 3125c431a19e..16976a98aa76 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -300,7 +300,7 @@ struct LearnerModelParamLegacy;
  */
 struct LearnerModelParam {
   /* \brief global bias */
-  bst_float base_score { std::numeric_limits<float>::quiet_NaN() };
+  HostDeviceVector<float> base_score;
   /* \brief number of features  */
   uint32_t num_feature { 0 };
   /* \brief number of classes, if it is multi-class classification  */
@@ -311,9 +311,21 @@ struct LearnerModelParam {
   LearnerModelParam() = default;
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin, ObjInfo t);
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, HostDeviceVector<float> base_margin,
+                    ObjInfo t);
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
+  LearnerModelParam(bst_feature_t n_features, HostDeviceVector<float> base_margin,
+                    uint32_t n_groups)
+      : base_score{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+
+  void Copy(LearnerModelParam const& that) {
+    base_score.Resize(that.base_score.Size());
+    base_score.Copy(that.base_score);
+    num_feature = that.num_feature;
+    num_output_group = that.num_output_group, task = that.task;
+  }
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0 && !std::isnan(base_score); }
+  bool Initialized() const { return num_feature != 0; }
 };
 
 }  // namespace xgboost
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 5c75fad16822..b31e3e233140 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -28,7 +28,9 @@ class RegTree;
 class ObjFunction : public Configurable {
  protected:
   Context const* ctx_;
-  static constexpr float DefaultBaseScore() { return 0.5; };
+
+ public:
+  static constexpr float DefaultBaseScore() { return 0.5f; };
 
  public:
   /*! \brief virtual destructor */
@@ -80,10 +82,9 @@ class ObjFunction : public Configurable {
    * \brief Make initialize estimation of prediction.
    *
    * \param info MetaInfo that contains label.
-   *
-   * \return NaN if there's no initial estimation.
+   * \param base_score Output estimation.
    */
-  virtual float InitEstimation(MetaInfo const& info) const;
+  virtual void InitEstimation(MetaInfo const& info, HostDeviceVector<float>* base_score) const;
   /*!
    * \brief Return task of this objective.
    */
diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h
index 33c695bc19bf..877ff462bf24 100644
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -102,13 +102,10 @@ class PredictionContainer {
  */
 class Predictor {
  protected:
-  /*
-   * \brief Runtime parameters.
-   */
-  GenericParameter const* ctx_;
+  Context const* ctx_;
 
  public:
-  explicit Predictor(GenericParameter const* ctx) : ctx_{ctx} {}
+  explicit Predictor(Context const* ctx) : ctx_{ctx} {}
 
   virtual ~Predictor() = default;
 
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 35de4c70d604..5b78848a993a 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -161,9 +161,10 @@ class GBLinear : public GradientBooster {
                        uint32_t layer_begin, uint32_t) override {
     LinearCheckLayer(layer_begin);
     const int ngroup = model_.learner_model_param->num_output_group;
+    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
+    auto base_score = learner_model_param_->base_score.HostVector().front();
     for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
-                 learner_model_param_->base_score);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score);
     }
   }
 
@@ -184,6 +185,8 @@ class GBLinear : public GradientBooster {
     contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
     // make sure contributions is zeroed, we could be reusing a previously allocated one
     std::fill(contribs.begin(), contribs.end(), 0);
+    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
+    auto base_score = learner_model_param_->base_score.HostVector().front();
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       // parallel over local batch
@@ -202,8 +205,8 @@ class GBLinear : public GradientBooster {
           }
           // add base margin to BIAS
           p_contribs[ncolumns - 1] =
-              model_.Bias()[gid] + ((base_margin.Size() != 0) ? base_margin(row_idx, gid)
-                                                              : learner_model_param_->base_score);
+              model_.Bias()[gid] +
+              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score);
         }
       });
     }
@@ -272,6 +275,8 @@ class GBLinear : public GradientBooster {
     // start collecting the prediction
     const int ngroup = model_.learner_model_param->num_output_group;
     preds.resize(p_fmat->Info().num_row_ * ngroup);
+    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
+    auto base_score = learner_model_param_->base_score.HostVector().front();
     for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
       auto const& batch = page.GetView();
       // output convention: nrow * k, where nrow is number of rows
@@ -285,8 +290,7 @@ class GBLinear : public GradientBooster {
         const size_t ridx = page.base_rowid + i;
         // loop over output groups
         for (int gid = 0; gid < ngroup; ++gid) {
-          float margin =
-              (base_margin.Size() != 0) ? base_margin(ridx, gid) : learner_model_param_->base_score;
+          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score;
           this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
         }
       });
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 9d1d5404409e..4e9235921a33 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -638,13 +638,12 @@ void GPUDartPredictInc(common::Span<float> out_predts,
 }
 #endif
 
-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups,
+void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
+                              float tree_w, size_t n_rows,
+                              HostDeviceVector<float> const& base_score, bst_group_t n_groups,
                               bst_group_t group)
 #if defined(XGBOOST_USE_CUDA)
-;  // NOLINT
+    ;  // NOLINT
 #else
 {
   common::AssertGPUSupport();
@@ -850,15 +849,18 @@ class Dart : public GBTree {
       size_t n_rows = p_fmat->Info().num_row_;
       if (predts.predictions.DeviceIdx() != Context::kCpuId) {
         p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
+        model_.learner_model_param->base_score.SetDevice(predts.predictions.DeviceIdx());
         GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
                                  predts.predictions.DeviceSpan(), w, n_rows,
                                  model_.learner_model_param->base_score, n_groups, group);
       } else {
+        CHECK_EQ(model_.learner_model_param->base_score.Size(), 1);
+        auto base_score = model_.learner_model_param->base_score.HostVector().front();
         auto& h_predts = predts.predictions.HostVector();
         auto& h_out_predts = p_out_preds->predictions.HostVector();
         common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
           const size_t offset = ridx * n_groups + group;
-          h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
+          h_out_predts[offset] += (h_predts[offset] - base_score) * w;
         });
       }
     }
diff --git a/src/gbm/gbtree.cu b/src/gbm/gbtree.cu
index 0b81fff23e5c..51beff49fbd4 100644
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -31,13 +31,15 @@ void GPUDartPredictInc(common::Span<float> out_predts,
   });
 }
 
-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups, bst_group_t group) {
+void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
+                              float tree_w, size_t n_rows,
+                              HostDeviceVector<float> const &base_score, bst_group_t n_groups,
+                              bst_group_t group) {
+  auto const* d_score = base_score.ConstDevicePointer();
+  CHECK_EQ(base_score.Size(), 1);
   dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
     const size_t offset = ridx * n_groups + group;
-    out_predts[offset] += (predts[offset] - base_score) * tree_w;
+    out_predts[offset] += (predts[offset] - *d_score) * tree_w;
   });
 }
 }  // namespace gbm
diff --git a/src/learner.cc b/src/learner.cc
index e1522bc297a7..3661c6585111 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -174,9 +174,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   }
 };
 
-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin,
-                                     ObjInfo t)
-    : base_score{base_margin}, num_feature{user_param.num_feature}, task{t} {
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
+    : num_feature{user_param.num_feature}, task{t} {
   auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
   auto n_targets = user_param.num_target;
   num_output_group = std::max(n_classes, n_targets);
@@ -186,6 +185,12 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
       << ", n_targets:" << n_targets;
 }
 
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
+                                     HostDeviceVector<float> base_margin, ObjInfo t)
+    : LearnerModelParam{user_param, t} {
+  std::swap(base_score, base_margin);
+}
+
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
   // data split mode, can be row, col, or none.
   DataSplitMode dsplit {DataSplitMode::kAuto};
@@ -377,9 +382,6 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
-    if (!learner_model_param_.Initialized()) {
-      learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score, obj_->Task());
-    }
 
     this->ConfigureGBM(old_tparam, args);
     generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
@@ -397,15 +399,39 @@ class LearnerConfiguration : public Learner {
   /**
    * \brief Calculate the `base_score` based on input data.
    */
-  void ConfigureBaseScore(DMatrix const* p_fmat) {
+  void ConfigureLearnerParam(DMatrix const* p_fmat) {
     CHECK(obj_);
-    if (std::isnan(mparam_.base_score)) {
-      mparam_.base_score = obj_->InitEstimation(p_fmat->Info());
-      CHECK(!learner_model_param_.Initialized());
+    if (!learner_model_param_.base_score.Empty()) {
+      auto task = obj_->Task();
+      learner_model_param_ =
+          LearnerModelParam(mparam_, std::move(learner_model_param_.base_score), task);
+      CHECK(learner_model_param_.Initialized());
+      CHECK(!learner_model_param_.base_score.Empty());
+      return;
+    }
+
+    HostDeviceVector<float> base_score;
+    if (!std::isnan(mparam_.base_score)) {
+      // if base_score is set by user, use it.
+      base_score.Resize(1);
+      base_score.Fill(obj_->ProbToMargin(mparam_.base_score));
+    } else if (p_fmat) {
+      // otherwise, we estimate it from input data.
+      obj_->InitEstimation(p_fmat->Info(), &base_score);
+      auto& h_base_score = base_score.HostVector();
+      rabit::Allreduce<rabit::op::Sum>(h_base_score.data(), h_base_score.size());
+      float world = rabit::GetWorldSize();
+      std::transform(h_base_score.begin(), h_base_score.end(), h_base_score.begin(),
+                     [&](float v) { return obj_->ProbToMargin(v / world); });
+    } else {
+      // lastly, if data is not available (prediction for custom objective), use default.
+      base_score.Resize(1);
+      base_score.Fill(obj_->ProbToMargin(ObjFunction::DefaultBaseScore()));
     }
     auto task = obj_->Task();
-    learner_model_param_ = LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
+    learner_model_param_ = LearnerModelParam(mparam_, std::move(base_score), task);
     CHECK(learner_model_param_.Initialized());
+    CHECK(!learner_model_param_.base_score.Empty());
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -796,7 +822,19 @@ class LearnerIO : public LearnerConfiguration {
       std::transform(feature_types.cbegin(), feature_types.cend(), feature_types_.begin(),
                      [](Json const& fn) { return get<String const>(fn); });
     }
-
+    it = learner.find("base_score");
+    if (it != learner.cend()) {
+      if (IsA<F32Array>(it->second)) {
+        auto& base_score = get<F32Array const>(it->second);
+        learner_model_param_.base_score.HostVector() = base_score;
+      } else {
+        auto& base_score = get<Array const>(it->second);
+        auto& h_result = learner_model_param_.base_score.HostVector();
+        for (auto v : base_score) {
+          h_result.push_back(get<Number const>(v));
+        }
+      }
+    }
     this->need_configuration_ = true;
   }
 
@@ -823,6 +861,10 @@ class LearnerIO : public LearnerConfiguration {
       learner["attributes"][kv.first] = String(kv.second);
     }
 
+    learner["base_score"] = F32Array();
+    auto& base_score = get<F32Array>(learner["base_score"]);
+    base_score = learner_model_param_.base_score.ConstHostVector();
+
     learner["feature_names"] = Array();
     auto& feature_names = get<Array>(learner["feature_names"]);
     for (auto const& name : feature_names_) {
@@ -936,7 +978,7 @@ class LearnerIO : public LearnerConfiguration {
     }
 
     learner_model_param_ =
-        LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), obj_->Task());
+        LearnerModelParam(mparam_, {obj_->ProbToMargin(mparam_.base_score)}, obj_->Task());
     if (attributes_.find("objective") != attributes_.cend()) {
       auto obj_str = attributes_.at("objective");
       auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
@@ -953,6 +995,17 @@ class LearnerIO : public LearnerConfiguration {
         this->SetParam(kEvalMetric, n);
       }
     }
+    auto it = attributes_.find("base_score");
+    if (it != attributes_.cend()) {
+      auto const& base_score_str = it->second;
+      auto loaded = Json::Load(StringView{base_score_str});
+      auto const& base_score = get<Array const>(loaded);
+      auto& h_result = learner_model_param_.base_score.HostVector();
+      h_result.clear();
+      for (auto const& v : base_score) {
+        h_result.push_back(get<Number const>(v));
+      }
+    }
 
     if (warn_old_model) {
       LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it "
@@ -1011,6 +1064,16 @@ class LearnerIO : public LearnerConfiguration {
       }
       extra_attr.emplace_back("metrics", os.str());
     }
+
+    {
+      // serialize base score
+      F32Array base_score(learner_model_param_.base_score.Size());
+      base_score.GetArray() = learner_model_param_.base_score.ConstHostVector();
+      std::string base_score_str;
+      Json::Dump(Json(std::move(base_score)), &base_score_str);
+      extra_attr.emplace_back("base_score", base_score_str);
+    }
+
     std::string header {"binf"};
     fo->Write(header.data(), 4);
     if (DMLC_IO_NO_ENDIAN_SWAP) {
@@ -1131,8 +1194,8 @@ class LearnerImpl : public LearnerIO {
     this->Configure();
     CHECK_NE(this->learner_model_param_.num_feature, 0);
     CHECK_GE(begin_layer, 0);
-    auto *out_impl = new LearnerImpl({});
-    out_impl->learner_model_param_ = this->learner_model_param_;
+    auto* out_impl = new LearnerImpl({});
+    out_impl->learner_model_param_.Copy(this->learner_model_param_);
     out_impl->generic_parameters_ = this->generic_parameters_;
     auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(
         this->tparam_.booster, &out_impl->generic_parameters_,
@@ -1172,7 +1235,7 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->CheckDataSplitMode();
-    this->ConfigureBaseScore(train.get());
+    this->ConfigureLearnerParam(train.get());
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
@@ -1201,7 +1264,6 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->CheckDataSplitMode();
-    CHECK(!std::isnan(learner_model_param_.base_score));
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
@@ -1255,12 +1317,7 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
-    // This is only needed when custom objective is used.
-    if (gpair_.Empty()) {
-      this->ConfigureBaseScore(data.get());
-    } else {
-      CHECK(!std::isnan(learner_model_param_.base_score));
-    }
+    this->ConfigureLearnerParam(nullptr);
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
     if (pred_contribs) {
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 84c28197c9ed..9b1fda245d75 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -31,7 +31,11 @@ ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const
   return pobj;
 }
 
-float ObjFunction::InitEstimation(MetaInfo const&) const { return DefaultBaseScore(); }
+void ObjFunction::InitEstimation(MetaInfo const&, HostDeviceVector<float>* base_score) const {
+  CHECK(base_score);
+  base_score->Resize(1);
+  base_score->Fill(DefaultBaseScore());
+}
 }  // namespace xgboost
 
 namespace xgboost {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 89d189778430..679d1b7b57b8 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -703,15 +703,17 @@ class MeanAbsoluteError : public ObjFunction {
     });
   }
 
-  float InitEstimation(MetaInfo const& info) const override {
+  void InitEstimation(MetaInfo const& info, HostDeviceVector<float>* base_margin) const override {
     CheckInitInputs(info);
+    auto& h_base_margin = base_margin->HostVector();
+    h_base_margin.resize(1);
     if (ctx_->IsCPU()) {
-      return common::Median(ctx_, info.labels.HostView(),
-                            common::OptionalWeights{info.weights_.ConstHostSpan()});
+      h_base_margin.front() = common::Median(
+          ctx_, info.labels.HostView(), common::OptionalWeights{info.weights_.ConstHostSpan()});
     } else {
       info.weights_.SetDevice(ctx_->gpu_id);
-      return common::Median(ctx_, info.labels.View(ctx_->gpu_id),
-                            common::OptionalWeights{info.weights_.DeviceSpan()});
+      h_base_margin.front() = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
+                                             common::OptionalWeights{info.weights_.DeviceSpan()});
     }
   }
 
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 0e213b281231..5a3bad58f198 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -429,11 +429,12 @@ class CPUPredictor : public Predictor {
     }
     out_preds->resize(model.learner_model_param->num_output_group *
                       (model.param.size_leaf_vector + 1));
+    auto const& base_score = model.learner_model_param->base_score.HostVector().front();
     // loop over output groups
     for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
-      (*out_preds)[gid] = PredValue(inst, model.trees, model.tree_info, gid,
-                                    &feat_vecs[0], 0, ntree_limit) +
-                          model.learner_model_param->base_score;
+      (*out_preds)[gid] =
+          PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], 0, ntree_limit) +
+          base_score;
     }
   }
 
@@ -505,6 +506,8 @@ class CPUPredictor : public Predictor {
       FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
     });
     auto base_margin = info.base_margin_.View(GenericParameter::kCpuId);
+    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
+    auto base_score = model.learner_model_param->base_score.HostVector().front();
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       auto page = batch.GetView();
@@ -548,7 +551,7 @@ class CPUPredictor : public Predictor {
             CHECK_EQ(base_margin.Shape(1), ngroup);
             p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
           } else {
-            p_contribs[ncolumns - 1] += model.learner_model_param->base_score;
+            p_contribs[ncolumns - 1] += base_score;
           }
         }
       });
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 163f7b40f368..e788f92253bf 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -511,7 +511,7 @@ void ExtractPaths(
           n = d_nodes[n.Parent() + tree_offset];
           path_length++;
         }
-        return PathInfo{int64_t(idx), path_length, tree_idx};
+        return PathInfo{static_cast<int64_t>(idx), path_length, tree_idx};
       });
   auto end = thrust::copy_if(
       thrust::cuda::par(alloc), nodes_transform,
@@ -859,13 +859,14 @@ class GPUPredictor : public xgboost::Predictor {
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
-    float base_score = model.learner_model_param->base_score;
-    dh::LaunchN(
-        p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-        [=] __device__(size_t idx) {
-          phis[(idx + 1) * contributions_columns - 1] +=
-              margin.empty() ? base_score : margin[idx];
-        });
+    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
+    model.learner_model_param->base_score.SetDevice(ctx_->gpu_id);
+    float const* base_score = model.learner_model_param->base_score.ConstDevicePointer();
+    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
+                [=] __device__(size_t idx) {
+                  phis[(idx + 1) * contributions_columns - 1] +=
+                      margin.empty() ? *base_score : margin[idx];
+                });
   }
 
   void PredictInteractionContributions(DMatrix* p_fmat,
@@ -918,17 +919,19 @@ class GPUPredictor : public xgboost::Predictor {
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
-    float base_score = model.learner_model_param->base_score;
+
+    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
+    model.learner_model_param->base_score.SetDevice(ctx_->gpu_id);
+    float const* base_score = model.learner_model_param->base_score.ConstDevicePointer();
     size_t n_features = model.learner_model_param->num_feature;
-    dh::LaunchN(
-        p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-        [=] __device__(size_t idx) {
-          size_t group = idx % ngroup;
-          size_t row_idx = idx / ngroup;
-          phis[gpu_treeshap::IndexPhiInteractions(
-              row_idx, ngroup, group, n_features, n_features, n_features)] +=
-              margin.empty() ? base_score : margin[idx];
-        });
+    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
+                [=] __device__(size_t idx) {
+                  size_t group = idx % ngroup;
+                  size_t row_idx = idx / ngroup;
+                  phis[gpu_treeshap::IndexPhiInteractions(row_idx, ngroup, group, n_features,
+                                                          n_features, n_features)] +=
+                      margin.empty() ? *base_score : margin[idx];
+                });
   }
 
   void PredictInstance(const SparsePage::Inst&,
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 0cf57db94c85..af438af6556b 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -80,16 +80,17 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
   if (ctx_->gpu_id >= 0) {
     out_preds->SetDevice(ctx_->gpu_id);
   }
-  if (base_margin->Size() != 0) {
+  if (!base_margin->Empty()) {
     out_preds->Resize(n);
     ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
     out_preds->Copy(*base_margin);
   } else {
-    out_preds->Resize(n);
     // cannot rely on the Resize to fill as it might skip if the size is already correct.
-    auto base_score = model.learner_model_param->base_score;
-    CHECK(!std::isnan(base_score));
-    out_preds->Fill(base_score);
+    out_preds->Resize(n);
+    auto const& base_score = model.learner_model_param->base_score;
+    CHECK_EQ(base_score.Size(), 1);
+    // FIXME(jiamingy): Support multi-class
+    out_preds->Fill(base_score.HostVector().front());
   }
 }
 }  // namespace xgboost
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index a5c16f7951d7..4c66911d11d5 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -21,7 +21,7 @@ TEST(GBTree, SelectTreeMethod) {
   GenericParameter generic_param;
   generic_param.UpdateAllowUnknown(Args{});
   LearnerModelParam mparam;
-  mparam.base_score = 0.5;
+  mparam.base_score.Resize(1, 0.5);
   mparam.num_feature = kCols;
   mparam.num_output_group = 1;
 
@@ -58,7 +58,7 @@ TEST(GBTree, PredictionCache) {
   GenericParameter generic_param;
   generic_param.UpdateAllowUnknown(Args{});
   LearnerModelParam mparam;
-  mparam.base_score = 0.5;
+  mparam.base_score.Resize(1, 0.5);
   mparam.num_feature = kCols;
   mparam.num_output_group = 1;
 
@@ -179,7 +179,7 @@ TEST(GBTree, JsonIO) {
   LearnerModelParam mparam;
   mparam.num_feature = kCols;
   mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  mparam.base_score.Resize(1, 0.5);
 
   GenericParameter gparam;
   gparam.Init(Args{});
@@ -217,7 +217,7 @@ TEST(Dart, JsonIO) {
 
   LearnerModelParam mparam;
   mparam.num_feature = kCols;
-  mparam.base_score = 0.5;
+  mparam.base_score.Resize(1, 0.5);
   mparam.num_output_group = 1;
 
   GenericParameter gparam;
diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc
index f021641a2b23..b179eba0ebb1 100644
--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@@ -18,10 +18,7 @@ TEST(Linear, Shotgun) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  LearnerModelParam mparam(kCols, {.5}, 1);
 
   {
     auto updater = std::unique_ptr<xgboost::LinearUpdater>(
@@ -54,10 +51,7 @@ TEST(Linear, coordinate) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  LearnerModelParam mparam(kCols, {.5}, 1);
 
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("coord_descent", &lparam));
diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu
index c2eea45d166c..05330a02b85d 100644
--- a/tests/cpp/linear/test_linear.cu
+++ b/tests/cpp/linear/test_linear.cu
@@ -13,15 +13,11 @@ TEST(Linear, GPUCoordinate) {
   size_t constexpr kCols = 10;
 
   auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
-  auto lparam = CreateEmptyGenericParam(GPUIDX);
-
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  auto ctx = CreateEmptyGenericParam(GPUIDX);
 
+  LearnerModelParam mparam(kCols, {.5}, 1);
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
-      xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
+      xgboost::LinearUpdater::Create("gpu_coord_descent", &ctx));
   updater->Configure({{"eta", "1."}});
   xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
       mat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
@@ -36,4 +32,4 @@ TEST(Linear, GPUCoordinate) {
 TEST(GPUCoordinate, JsonIO) {
   TestUpdaterJsonIO("gpu_coord_descent");
 }
-}  // namespace xgboost
\ No newline at end of file
+}  // namespace xgboost
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 8ba270083c74..1a631d0098b0 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -21,10 +21,7 @@ TEST(CpuPredictor, Basic) {
   size_t constexpr kRows = 5;
   size_t constexpr kCols = 5;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.base_score = 0.0;
-  param.num_output_group = 1;
+  LearnerModelParam param{kCols, {0.0}, 1};
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
@@ -104,10 +101,7 @@ TEST(CpuPredictor, ExternalMemory) {
   std::unique_ptr<Predictor> cpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
 
-  LearnerModelParam param;
-  param.base_score = 0;
-  param.num_feature = dmat->Info().num_col_;
-  param.num_output_group = 1;
+  LearnerModelParam param(dmat->Info().num_col_, {0.0}, 1);
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
@@ -201,16 +195,11 @@ TEST(CpuPredictor, InplacePredict) {
 
 void TestUpdatePredictionCache(bool use_subsampling) {
   size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = kClasses;
-  mparam.base_score = 0;
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  LearnerModelParam mparam{kCols, {0.0}, kClasses};
+  Context ctx;
 
   std::unique_ptr<gbm::GBTree> gbm;
-  gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &gparam, &mparam)));
+  gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &ctx, &mparam)));
   std::map<std::string, std::string> cfg;
   cfg["tree_method"] = "hist";
   cfg["predictor"]   = "cpu_predictor";
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 8dacadac5403..868abad13b63 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -34,13 +34,8 @@ TEST(GPUPredictor, Basic) {
     int n_row = i, n_col = i;
     auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
 
-    LearnerModelParam param;
-    param.num_feature = n_col;
-    param.num_output_group = 1;
-    param.base_score = 0.5;
-
+    LearnerModelParam param(n_col, {.5}, 1);
     GenericParameter ctx;
-    ctx.UpdateAllowUnknown(Args{});
     gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
 
     // Test predict batch
@@ -93,14 +88,10 @@ TEST(GPUPredictor, ExternalMemoryTest) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({});
 
-  LearnerModelParam param;
-  param.num_feature = 5;
   const int n_classes = 3;
-  param.num_output_group = n_classes;
-  param.base_score = 0.5;
+  LearnerModelParam param(5, {.5}, n_classes);
 
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
+  Context ctx;
   gbm::GBTreeModel model = CreateTestModel(&param, &ctx, n_classes);
   std::vector<std::unique_ptr<DMatrix>> dmats;
 
@@ -171,14 +162,8 @@ TEST(GpuPredictor, LesserFeatures) {
 TEST(GPUPredictor, ShapStump) {
   cudaSetDevice(0);
 
-  LearnerModelParam param;
-  param.num_feature = 1;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-
+  LearnerModelParam param(1, {.5}, 1);
+  Context ctx;
   gbm::GBTreeModel model(&param, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
@@ -193,23 +178,18 @@ TEST(GPUPredictor, ShapStump) {
   auto dmat = RandomDataGenerator(3, 1, 0).GenerateDMatrix();
   gpu_predictor->PredictContribution(dmat.get(), &predictions, model);
   auto& phis = predictions.HostVector();
+  auto base_score = param.base_score.HostVector().front();
   EXPECT_EQ(phis[0], 0.0);
-  EXPECT_EQ(phis[1], param.base_score);
+  EXPECT_EQ(phis[1], base_score);
   EXPECT_EQ(phis[2], 0.0);
-  EXPECT_EQ(phis[3], param.base_score);
+  EXPECT_EQ(phis[3], base_score);
   EXPECT_EQ(phis[4], 0.0);
-  EXPECT_EQ(phis[5], param.base_score);
+  EXPECT_EQ(phis[5], base_score);
 }
 
 TEST(GPUPredictor, Shap) {
-  LearnerModelParam param;
-  param.num_feature = 1;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-
+  LearnerModelParam param(1, {.5}, 1);
+  Context ctx;
   gbm::GBTreeModel model(&param, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
@@ -258,13 +238,8 @@ TEST(GPUPredictor, PredictLeafBasic) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({});
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.base_score = 0.0;
-  param.num_output_group = 1;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
+  LearnerModelParam param(kCols, {.0}, 1);
+  Context ctx;
   gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
 
   HostDeviceVector<float> leaf_out_predictions;
diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc
index 34c4d48e6dc1..5ba1fc52b0d1 100644
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -210,10 +210,7 @@ void TestCategoricalPrediction(std::string name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
+  LearnerModelParam param(kCols, {.5}, 1);
 
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
@@ -237,27 +234,24 @@ void TestCategoricalPrediction(std::string name) {
 
   predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
   predictor->PredictBatch(m.get(), &out_predictions, model, 0);
+  auto score = param.base_score.HostVector().front();
   ASSERT_EQ(out_predictions.predictions.Size(), 1ul);
   ASSERT_EQ(out_predictions.predictions.HostVector()[0],
-            right_weight + param.base_score);  // go to right for matching cat
+            right_weight + score);  // go to right for matching cat
 
   row[split_ind] = split_cat + 1;
   m = GetDMatrixFromData(row, 1, kCols);
   out_predictions.version = 0;
   predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
   predictor->PredictBatch(m.get(), &out_predictions, model, 0);
-  ASSERT_EQ(out_predictions.predictions.HostVector()[0],
-            left_weight + param.base_score);
+  ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
 }
 
 void TestCategoricalPredictLeaf(StringView name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
+  LearnerModelParam param(kCols, {.5}, 1);
 
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h
index 1ff96096c533..bf970ddb420d 100644
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -12,11 +12,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
                                      std::shared_ptr<DMatrix> p_hist) {
   constexpr size_t kClasses { 3 };
 
-  LearnerModelParam param;
-  param.num_feature = cols;
-  param.num_output_group = kClasses;
-  param.base_score = 0.5;
-
+  LearnerModelParam param(cols, {.5}, kClasses);
   auto lparam = CreateEmptyGenericParam(0);
 
   std::unique_ptr<Predictor> predictor =
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 4a8214e9c5cd..42f25fa39caa 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -206,8 +206,7 @@ TEST(Learner, MultiThreadedPredict) {
   p_dmat->Info().labels.Reshape(kRows);
   CHECK_NE(p_dmat->Info().num_col_, 0);
 
-  std::shared_ptr<DMatrix> p_data{
-      RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
+  std::shared_ptr<DMatrix> p_data{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
   CHECK_NE(p_data->Info().num_col_, 0);
 
   std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};

From 117b17539947018e55a62607f780f1aa74576bdd Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 28 Jul 2022 16:56:40 +0800
Subject: [PATCH 04/34] Use a tensor in learner.

---
 include/xgboost/learner.h                 |  28 +--
 include/xgboost/linalg.h                  |  59 ++++-
 include/xgboost/objective.h               |   2 +-
 src/common/common.h                       |  11 +-
 src/common/linalg_op.h                    |  24 ++
 src/gbm/gblinear.cc                       |  17 +-
 src/gbm/gbtree.cc                         |  19 +-
 src/gbm/gbtree.cu                         |   5 +-
 src/learner.cc                            | 270 ++++++++++++----------
 src/objective/objective.cc                |   6 +-
 src/objective/regression_obj.cu           |  14 +-
 src/predictor/cpu_predictor.cc            |   5 +-
 src/predictor/gpu_predictor.cu            |  13 +-
 src/predictor/predictor.cc                |   6 +-
 tests/cpp/common/test_linalg.cc           |   8 +-
 tests/cpp/gbm/test_gblinear.cc            |  12 +-
 tests/cpp/gbm/test_gbtree.cc              |  46 ++--
 tests/cpp/helpers.h                       |   9 +
 tests/cpp/linear/test_linear.cc           |   4 +-
 tests/cpp/linear/test_linear.cu           |   2 +-
 tests/cpp/predictor/test_cpu_predictor.cc |  10 +-
 tests/cpp/predictor/test_gpu_predictor.cu |  24 +-
 tests/cpp/predictor/test_predictor.cc     |  11 +-
 tests/cpp/predictor/test_predictor.h      |   4 +-
 24 files changed, 350 insertions(+), 259 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 16976a98aa76..a6b46723b9ad 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -85,7 +85,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /*!
    * \brief Configure Learner based on set parameters.
    */
-  virtual void Configure() = 0;
+  virtual void Configure(DMatrix const* p_fmat = nullptr) = 0;
   /*!
    * \brief update the model for one iteration
    *  With the specified objective function.
@@ -290,7 +290,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /*! \brief The evaluation metrics used to evaluate the model. */
   std::vector<std::unique_ptr<Metric> > metrics_;
   /*! \brief Training parameter. */
-  GenericParameter generic_parameters_;
+  Context ctx_;
 };
 
 struct LearnerModelParamLegacy;
@@ -299,8 +299,11 @@ struct LearnerModelParamLegacy;
  * \brief Basic Model Parameters, used to describe the booster.
  */
 struct LearnerModelParam {
+ private:
   /* \brief global bias */
-  HostDeviceVector<float> base_score;
+  linalg::Tensor<float, 1> base_score_;
+
+ public:
   /* \brief number of features  */
   uint32_t num_feature { 0 };
   /* \brief number of classes, if it is multi-class classification  */
@@ -311,19 +314,18 @@ struct LearnerModelParam {
   LearnerModelParam() = default;
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, HostDeviceVector<float> base_margin,
-                    ObjInfo t);
+  LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
+                    linalg::Tensor<float, 1> base_margin, ObjInfo t);
   LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
-  LearnerModelParam(bst_feature_t n_features, HostDeviceVector<float> base_margin,
+  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
                     uint32_t n_groups)
-      : base_score{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+      : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+
+  linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
+  linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
+
+  void Copy(LearnerModelParam const& that);
 
-  void Copy(LearnerModelParam const& that) {
-    base_score.Resize(that.base_score.Size());
-    base_score.Copy(that.base_score);
-    num_feature = that.num_feature;
-    num_output_group = that.num_output_group, task = that.task;
-  }
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
   bool Initialized() const { return num_feature != 0; }
 };
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 4fec61bb5b6d..6f914db0714c 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -8,6 +8,7 @@
 
 #include <dmlc/endian.h>
 #include <xgboost/base.h>
+#include <xgboost/generic_parameters.h>
 #include <xgboost/host_device_vector.h>
 #include <xgboost/json.h>
 #include <xgboost/span.h>
@@ -214,6 +215,22 @@ LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
   constexpr auto kSize = std::tuple_size<Tup>::value;
   return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
 }
+
+/**
+ * C++ 17 conjunction
+ */
+template <class...>
+struct Conjunction : std::true_type {};
+template <class B1>
+struct Conjunction<B1> : B1 {};
+template <class B1, class... Bn>
+struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};
+
+template <typename... Index>
+using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;
+
+template <typename... Index>
+using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
 }  // namespace detail
 
 /**
@@ -407,7 +424,7 @@ class TensorView {
    *
    * \endcode
    */
-  template <typename... Index>
+  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
   LINALG_HD T &operator()(Index &&...index) {
     static_assert(sizeof...(index) <= kDim, "Invalid index.");
     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
@@ -417,7 +434,7 @@ class TensorView {
   /**
    * \brief Index the tensor to obtain a scalar value.
    */
-  template <typename... Index>
+  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
   LINALG_HD T const &operator()(Index &&...index) const {
     static_assert(sizeof...(index) <= kDim, "Invalid index.");
     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
@@ -425,6 +442,11 @@ class TensorView {
     return ptr_[offset];
   }
 
+  template <typename... S, std::enable_if_t<!detail::IsAllIntegral<S...>::value> * = nullptr>
+  LINALG_HD auto operator()(S &&...slices) const {
+    return this->Slice(std::forward<S>(slices)...);
+  }
+
   /**
    * \brief Slice the tensor.  The returned tensor has inferred dim and shape.  Scalar
    *        result is not supported.
@@ -703,12 +725,29 @@ class Tensor {
   }
 
   template <typename I, int32_t D>
-  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
+  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
+                  int32_t device = Context::kCpuId) {
     auto &h_vec = data_.HostVector();
     h_vec = data;
     // shape
     this->Initialize(shape, device);
   }
+  /**
+   * \brief Index operator. Not thread safe, should not be used in performance critical
+   *        region. For more efficient indexing, consider getting a view first.
+   */
+  template <typename... Index>
+  T &operator()(Index &&...idx) {
+    return this->HostView()(std::forward<Index>(idx)...);
+  }
+  /**
+   * \brief Index operator. Not thread safe, should not be used in performance critical
+   *        region. For more efficient indexing, consider getting a view first.
+   */
+  template <typename... Index>
+  T const &operator()(Index &&...idx) const {
+    return this->HostView()(std::forward<Index>(idx)...);
+  }
 
   /**
    * \brief Get a \ref TensorView for this tensor.
@@ -762,7 +801,8 @@ class Tensor {
    *
    *    If the total size is changed, then data in this tensor is no longer valid.
    */
-  template <typename... S>
+  template <typename... S, std::enable_if_t<detail::Conjunction<
+                               std::is_integral<std::remove_reference_t<S>>...>::value> * = nullptr>
   void Reshape(S &&...s) {
     static_assert(sizeof...(S) <= kDim, "Invalid shape.");
     detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
@@ -778,15 +818,20 @@ class Tensor {
    *
    *    If the total size is changed, then data in this tensor is no longer valid.
    */
-  template <int32_t D>
-  void Reshape(size_t (&shape)[D]) {
+  template <size_t D>
+  void Reshape(common::Span<size_t const, D> shape) {
     static_assert(D <= kDim, "Invalid shape.");
-    std::copy(shape, shape + D, this->shape_);
+    std::copy(shape.data(), shape.data() + D, this->shape_);
     std::fill(shape_ + D, shape_ + kDim, 1);
     auto n = detail::CalcSize(shape_);
     data_.Resize(n);
   }
 
+  template <size_t D>
+  void Reshape(size_t (&shape)[D]) {
+    this->Reshape(common::Span<size_t const, D>{shape});
+  }
+
   /**
    * \brief Set device ordinal for this tensor.
    */
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index b31e3e233140..2cad8dc2a8c2 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -84,7 +84,7 @@ class ObjFunction : public Configurable {
    * \param info MetaInfo that contains label.
    * \param base_score Output estimation.
    */
-  virtual void InitEstimation(MetaInfo const& info, HostDeviceVector<float>* base_score) const;
+  virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
   /*!
    * \brief Return task of this objective.
    */
diff --git a/src/common/common.h b/src/common/common.h
index 8c260225b5d8..794d431b0b07 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -277,7 +277,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
 }
 
 /**
- * @brief A CRTP (curiously recurring template pattern) helper function.
+ * \brief A CRTP (curiously recurring template pattern) helper function.
  *
  * https://www.fluentcpp.com/2017/05/19/crtp-helper/
  *
@@ -285,7 +285,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
  * 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
  * 2. Avoids having to `static_cast` in a lot of places.
  *
- * @tparam T The derived class in a CRTP hierarchy.
+ * \tparam T The derived class in a CRTP hierarchy.
  */
 template <typename T>
 struct Crtp {
@@ -293,6 +293,13 @@ struct Crtp {
   T const &Underlying() const { return static_cast<T const &>(*this); }
 };
 
+/**
+ * \brief C++17 std::as_const
+ */
+template <typename T>
+typename std::add_const<T>::type &AsConst(T &v) noexcept {
+  return v;
+}
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_COMMON_H_
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 4aedfbc29561..52790e33d859 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -60,6 +60,30 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
+template <typename T, int32_t kDim>
+auto cbegin(TensorView<T, kDim> v) {  // NOLINT
+  auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
+    return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
+  });
+  return it;
+}
+
+template <typename T, int32_t kDim>
+auto cend(TensorView<T, kDim> v) {  // NOLINT
+  return cbegin(v) + v.Size();
+}
+
+template <typename T, int32_t kDim>
+auto begin(TensorView<T, kDim> v) {  // NOLINT
+  auto it = common::MakeIndexTransformIter(
+      [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
+  return it;
+}
+
+template <typename T, int32_t kDim>
+auto end(TensorView<T, kDim> v) {  // NOLINT
+  return begin(v) + v.Size();
+}
 }  // namespace linalg
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_LINALG_OP_H_
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 5b78848a993a..29065add9846 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -161,10 +161,10 @@ class GBLinear : public GradientBooster {
                        uint32_t layer_begin, uint32_t) override {
     LinearCheckLayer(layer_begin);
     const int ngroup = model_.learner_model_param->num_output_group;
-    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
-    auto base_score = learner_model_param_->base_score.HostVector().front();
+
+    auto base_score = learner_model_param_->BaseScore(ctx_);
     for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score(0));
     }
   }
 
@@ -185,8 +185,7 @@ class GBLinear : public GradientBooster {
     contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
     // make sure contributions is zeroed, we could be reusing a previously allocated one
     std::fill(contribs.begin(), contribs.end(), 0);
-    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
-    auto base_score = learner_model_param_->base_score.HostVector().front();
+    auto base_score = learner_model_param_->BaseScore(ctx_);
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       // parallel over local batch
@@ -206,7 +205,7 @@ class GBLinear : public GradientBooster {
           // add base margin to BIAS
           p_contribs[ncolumns - 1] =
               model_.Bias()[gid] +
-              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score);
+              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score(0));
         }
       });
     }
@@ -275,8 +274,8 @@ class GBLinear : public GradientBooster {
     // start collecting the prediction
     const int ngroup = model_.learner_model_param->num_output_group;
     preds.resize(p_fmat->Info().num_row_ * ngroup);
-    CHECK_EQ(learner_model_param_->base_score.Size(), 1);
-    auto base_score = learner_model_param_->base_score.HostVector().front();
+
+    auto base_score = learner_model_param_->BaseScore(ctx_);
     for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
       auto const& batch = page.GetView();
       // output convention: nrow * k, where nrow is number of rows
@@ -290,7 +289,7 @@ class GBLinear : public GradientBooster {
         const size_t ridx = page.base_rowid + i;
         // loop over output groups
         for (int gid = 0; gid < ngroup; ++gid) {
-          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score;
+          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score(0);
           this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
         }
       });
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 4e9235921a33..a4106888f240 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -638,10 +638,10 @@ void GPUDartPredictInc(common::Span<float> out_predts,
 }
 #endif
 
-void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
-                              float tree_w, size_t n_rows,
-                              HostDeviceVector<float> const& base_score, bst_group_t n_groups,
-                              bst_group_t group)
+void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
+                              float /*tree_w*/, size_t /*n_rows*/,
+                              linalg::TensorView<float const, 1> /*base_score*/,
+                              bst_group_t /*n_groups*/, bst_group_t /*group*/)
 #if defined(XGBOOST_USE_CUDA)
     ;  // NOLINT
 #else
@@ -849,18 +849,17 @@ class Dart : public GBTree {
       size_t n_rows = p_fmat->Info().num_row_;
       if (predts.predictions.DeviceIdx() != Context::kCpuId) {
         p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
-        model_.learner_model_param->base_score.SetDevice(predts.predictions.DeviceIdx());
+        auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
         GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
-                                 predts.predictions.DeviceSpan(), w, n_rows,
-                                 model_.learner_model_param->base_score, n_groups, group);
+                                 predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
+                                 group);
       } else {
-        CHECK_EQ(model_.learner_model_param->base_score.Size(), 1);
-        auto base_score = model_.learner_model_param->base_score.HostVector().front();
+        auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
         auto& h_predts = predts.predictions.HostVector();
         auto& h_out_predts = p_out_preds->predictions.HostVector();
         common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
           const size_t offset = ridx * n_groups + group;
-          h_out_predts[offset] += (h_predts[offset] - base_score) * w;
+          h_out_predts[offset] += (h_predts[offset] - base_score(0)) * w;
         });
       }
     }
diff --git a/src/gbm/gbtree.cu b/src/gbm/gbtree.cu
index 51beff49fbd4..12109782d59b 100644
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -33,13 +33,12 @@ void GPUDartPredictInc(common::Span<float> out_predts,
 
 void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
                               float tree_w, size_t n_rows,
-                              HostDeviceVector<float> const &base_score, bst_group_t n_groups,
+                              linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,
                               bst_group_t group) {
-  auto const* d_score = base_score.ConstDevicePointer();
   CHECK_EQ(base_score.Size(), 1);
   dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
     const size_t offset = ridx * n_groups + group;
-    out_predts[offset] += (predts[offset] - *d_score) * tree_w;
+    out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
   });
 }
 }  // namespace gbm
diff --git a/src/learner.cc b/src/learner.cc
index 3661c6585111..b3c6224c1c0a 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -4,47 +4,48 @@
  * \brief Implementation of learning algorithm.
  * \author Tianqi Chen
  */
+#include "xgboost/learner.h"
+
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
 #include <dmlc/thread_local.h>
 
-#include <atomic>
-#include <mutex>
 #include <algorithm>
+#include <atomic>
 #include <iomanip>
 #include <limits>
 #include <memory>
+#include <mutex>
 #include <sstream>
-#include <string>
 #include <stack>
+#include <string>
 #include <utility>
 #include <vector>
 
+#include "common/charconv.h"
+#include "common/common.h"
+#include "common/io.h"
+#include "common/linalg_op.h"
+#include "common/observer.h"
+#include "common/random.h"
+#include "common/threading_utils.h"
+#include "common/timer.h"
+#include "common/version.h"
 #include "dmlc/any.h"
 #include "xgboost/base.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
-#include "xgboost/model.h"
-#include "xgboost/predictor.h"
 #include "xgboost/feature_map.h"
 #include "xgboost/gbm.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/json.h"
-#include "xgboost/learner.h"
 #include "xgboost/logging.h"
 #include "xgboost/metric.h"
+#include "xgboost/model.h"
 #include "xgboost/objective.h"
 #include "xgboost/parameter.h"
-
-#include "common/common.h"
-#include "common/io.h"
-#include "common/observer.h"
-#include "common/random.h"
-#include "common/timer.h"
-#include "common/charconv.h"
-#include "common/version.h"
-#include "common/threading_utils.h"
+#include "xgboost/predictor.h"
 
 namespace {
 
@@ -99,13 +100,12 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
                   "Do not change the size of this struct, as it will break binary IO.");
   }
   // Skip other legacy fields.
-  Json ToJson() const {
+  Json ToJson(linalg::TensorView<float const, 1> base_score_new) const {
     Object obj;
     char floats[NumericLimits<float>::kToCharsSize];
-    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
+    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score_new(0));
     CHECK(ret.ec == std::errc());
-    obj["base_score"] =
-        std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
+    obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
 
     char integers[NumericLimits<int64_t>::kToCharsSize];
     ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
@@ -126,7 +126,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
     return Json(std::move(obj));
   }
-  void FromJson(Json const& obj) {
+  void FromJson(Json const& obj, linalg::Tensor<float, 1>* base_score_new) {
     auto const& j_param = get<Object const>(obj);
     std::map<std::string, std::string> m;
     m["num_feature"] = get<String const>(j_param.at("num_feature"));
@@ -137,8 +137,11 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     }
 
     this->Init(m);
+
     std::string str = get<String const>(j_param.at("base_score"));
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
+    base_score_new->Reshape(1);
+    (*base_score_new)(0) = base_score;
   }
   inline LearnerModelParamLegacy ByteSwap() const {
     LearnerModelParamLegacy x = *this;
@@ -185,10 +188,47 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
       << ", n_targets:" << n_targets;
 }
 
-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
-                                     HostDeviceVector<float> base_margin, ObjInfo t)
+LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
+                                     linalg::Tensor<float, 1> base_margin, ObjInfo t)
     : LearnerModelParam{user_param, t} {
-  std::swap(base_score, base_margin);
+  std::swap(base_score_, base_margin);
+  // Make sure read access everywhere for thread-safe prediction.
+  common::AsConst(base_score_).HostView();
+  if (!ctx->IsCPU()) {
+    common::AsConst(base_score_).View(ctx->gpu_id);
+  }
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
+  // multi-class is not supported yet.
+  CHECK_EQ(base_score_.Size(), 1);
+  if (device == Context::kCpuId) {
+    CHECK(base_score_.Data()->HostCanRead());
+    return base_score_.HostView();
+  }
+  CHECK(base_score_.Data()->DeviceCanRead());
+  auto v = base_score_.View(device);
+  CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.
+  return v;
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(Context const* ctx) const {
+  return this->BaseScore(ctx->gpu_id);
+}
+
+void LearnerModelParam::Copy(LearnerModelParam const& that) {
+  base_score_.Reshape(that.base_score_.Shape());
+  base_score_.Data()->SetDevice(that.base_score_.DeviceIdx());
+  base_score_.Data()->Copy(*that.base_score_.Data());
+  common::AsConst(base_score_).HostView();
+  if (that.base_score_.DeviceIdx() != Context::kCpuId) {
+    common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
+  }
+  CHECK(base_score_.Data()->DeviceCanRead());
+  CHECK(base_score_.Data()->HostCanRead());
+
+  num_feature = that.num_feature;
+  num_output_group = that.num_output_group, task = that.task;
 }
 
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
@@ -314,6 +354,8 @@ class LearnerConfiguration : public Learner {
   LearnerModelParamLegacy mparam_;
   LearnerModelParam learner_model_param_;
   LearnerTrainParam tparam_;
+  // Initial prediction.
+  linalg::Tensor<float, 1> base_score_;
   std::vector<std::string> metric_names_;
 
  public:
@@ -333,11 +375,15 @@ class LearnerConfiguration : public Learner {
   }
 
   // Configuration before data is known.
-  void Configure() override {
+  void Configure(DMatrix const* p_fmat = nullptr) override {
     // Varient of double checked lock
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }
     std::lock_guard<std::mutex> guard(config_lock_);
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }
 
     monitor_.Start("Configure");
     auto old_tparam = tparam_;
@@ -346,9 +392,9 @@ class LearnerConfiguration : public Learner {
     tparam_.UpdateAllowUnknown(args);
     mparam_.UpdateAllowUnknown(args);
 
-    auto initialized = generic_parameters_.GetInitialised();
-    auto old_seed = generic_parameters_.seed;
-    generic_parameters_.UpdateAllowUnknown(args);
+    auto initialized = ctx_.GetInitialised();
+    auto old_seed = ctx_.seed;
+    ctx_.UpdateAllowUnknown(args);
 
     ConsoleLogger::Configure(args);
 
@@ -359,8 +405,8 @@ class LearnerConfiguration : public Learner {
     }
 
     // set seed only before the model is initialized
-    if (!initialized || generic_parameters_.seed != old_seed) {
-      common::GlobalRandom().seed(generic_parameters_.seed);
+    if (!initialized || ctx_.seed != old_seed) {
+      common::GlobalRandom().seed(ctx_.seed);
     }
 
     // must precede configure gbm since num_features is required for gbm
@@ -382,13 +428,14 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
+    this->ConfigureLearnerParam(p_fmat);
 
     this->ConfigureGBM(old_tparam, args);
-    generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
+    ctx_.ConfigureGpuId(this->gbm_->UseGPU());
     this->ConfigureMetrics(args);
 
     this->need_configuration_ = false;
-    if (generic_parameters_.validate_parameters) {
+    if (ctx_.validate_parameters) {
       this->ValidateParameters();
     }
 
@@ -401,37 +448,35 @@ class LearnerConfiguration : public Learner {
    */
   void ConfigureLearnerParam(DMatrix const* p_fmat) {
     CHECK(obj_);
-    if (!learner_model_param_.base_score.Empty()) {
-      auto task = obj_->Task();
-      learner_model_param_ =
-          LearnerModelParam(mparam_, std::move(learner_model_param_.base_score), task);
-      CHECK(learner_model_param_.Initialized());
-      CHECK(!learner_model_param_.base_score.Empty());
-      return;
-    }
-
-    HostDeviceVector<float> base_score;
-    if (!std::isnan(mparam_.base_score)) {
+    float world = rabit::GetWorldSize();
+    if (base_score_.Size() != 0) {
+      // do nothing
+    } else if (!std::isnan(mparam_.base_score)) {
       // if base_score is set by user, use it.
-      base_score.Resize(1);
-      base_score.Fill(obj_->ProbToMargin(mparam_.base_score));
+      base_score_.Reshape(1);
+      base_score_(0) = mparam_.base_score;
     } else if (p_fmat) {
       // otherwise, we estimate it from input data.
-      obj_->InitEstimation(p_fmat->Info(), &base_score);
-      auto& h_base_score = base_score.HostVector();
-      rabit::Allreduce<rabit::op::Sum>(h_base_score.data(), h_base_score.size());
-      float world = rabit::GetWorldSize();
-      std::transform(h_base_score.begin(), h_base_score.end(), h_base_score.begin(),
-                     [&](float v) { return obj_->ProbToMargin(v / world); });
+      obj_->InitEstimation(p_fmat->Info(), &base_score_);
     } else {
       // lastly, if data is not available (prediction for custom objective), use default.
-      base_score.Resize(1);
-      base_score.Fill(obj_->ProbToMargin(ObjFunction::DefaultBaseScore()));
+      base_score_.Reshape(1);
+      base_score_(0) = obj_->ProbToMargin(ObjFunction::DefaultBaseScore());
     }
+
     auto task = obj_->Task();
-    learner_model_param_ = LearnerModelParam(mparam_, std::move(base_score), task);
+    rabit::Allreduce<rabit::op::Sum>(base_score_.Data()->HostVector().data(),
+                                     base_score_.Data()->Size());
+    linalg::Tensor<float, 1> copy{base_score_.Shape(), ctx_.gpu_id};
+
+    auto in = base_score_.HostView();
+    auto out = copy.HostView();
+    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(out),
+                   [&](float v) { return obj_->ProbToMargin(v / world); });
+
+    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
     CHECK(learner_model_param_.Initialized());
-    CHECK(!learner_model_param_.base_score.Empty());
+    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -455,7 +500,7 @@ class LearnerConfiguration : public Learner {
 
     auto const& objective_fn = learner_parameters.at("objective");
     if (!obj_) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
     }
     obj_->LoadConfig(objective_fn);
     learner_model_param_.task = obj_->Task();
@@ -463,7 +508,7 @@ class LearnerConfiguration : public Learner {
     tparam_.booster = get<String>(gradient_booster["name"]);
     if (!gbm_) {
       gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                         &generic_parameters_, &learner_model_param_));
+                                         &ctx_, &learner_model_param_));
     }
     gbm_->LoadConfig(gradient_booster);
 
@@ -479,15 +524,15 @@ class LearnerConfiguration : public Learner {
       } else {
         metric_names_[i] = get<String>(j_metrics[i]["name"]);
       }
-      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &generic_parameters_));
+      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &ctx_));
       if (!old_serialization) {
         metrics_[i]->LoadConfig(j_metrics[i]);
       }
     }
 
-    FromJson(learner_parameters.at("generic_param"), &generic_parameters_);
+    FromJson(learner_parameters.at("generic_param"), &ctx_);
     // make sure the GPU ID is valid in new environment before start running configure.
-    generic_parameters_.ConfigureGpuId(false);
+    ctx_.ConfigureGpuId(false);
 
     this->need_configuration_ = true;
   }
@@ -501,7 +546,7 @@ class LearnerConfiguration : public Learner {
     auto& learner_parameters = out["learner"];
 
     learner_parameters["learner_train_param"] = ToJson(tparam_);
-    learner_parameters["learner_model_param"] = mparam_.ToJson();
+    learner_parameters["learner_model_param"] = mparam_.ToJson(base_score_.HostView());
     learner_parameters["gradient_booster"] = Object();
     auto& gradient_booster = learner_parameters["gradient_booster"];
     gbm_->SaveConfig(&gradient_booster);
@@ -516,7 +561,7 @@ class LearnerConfiguration : public Learner {
     }
     learner_parameters["metrics"] = Array(std::move(metrics));
 
-    learner_parameters["generic_param"] = ToJson(generic_parameters_);
+    learner_parameters["generic_param"] = ToJson(ctx_);
   }
 
   void SetParam(const std::string& key, const std::string& value) override {
@@ -589,7 +634,7 @@ class LearnerConfiguration : public Learner {
     return cfg_;
   }
 
-  GenericParameter const* Ctx() const override { return &generic_parameters_; }
+  GenericParameter const* Ctx() const override { return &ctx_; }
 
  private:
   void ValidateParameters() {
@@ -692,7 +737,7 @@ class LearnerConfiguration : public Learner {
 
   void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
     if (gbm_ == nullptr || old.booster != tparam_.booster) {
-      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                          &learner_model_param_));
     }
     gbm_->Configure(args);
@@ -716,7 +761,7 @@ class LearnerConfiguration : public Learner {
       cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
     }
     if (obj_ == nullptr || tparam_.objective != old.objective) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
     }
     auto& args = *p_args;
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
@@ -729,7 +774,7 @@ class LearnerConfiguration : public Learner {
                         return m->Name() != name;
                       };
       if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
-        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
+        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));
         mparam_.contain_eval_metrics = 1;
       }
     }
@@ -785,20 +830,20 @@ class LearnerIO : public LearnerConfiguration {
     }
 
     auto const& learner = get<Object>(in["learner"]);
-    mparam_.FromJson(learner.at("learner_model_param"));
+    mparam_.FromJson(learner.at("learner_model_param"), &base_score_);
 
     auto const& objective_fn = learner.at("objective");
 
     std::string name = get<String>(objective_fn["name"]);
     tparam_.UpdateAllowUnknown(Args{{"objective", name}});
-    obj_.reset(ObjFunction::Create(name, &generic_parameters_));
+    obj_.reset(ObjFunction::Create(name, &ctx_));
     obj_->LoadConfig(objective_fn);
 
     auto const& gradient_booster = learner.at("gradient_booster");
     name = get<String>(gradient_booster["name"]);
     tparam_.UpdateAllowUnknown(Args{{"booster", name}});
     gbm_.reset(
-        GradientBooster::Create(tparam_.booster, &generic_parameters_, &learner_model_param_));
+        GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
     gbm_->LoadModel(gradient_booster);
 
     auto const& j_attributes = get<Object const>(learner.at("attributes"));
@@ -822,19 +867,7 @@ class LearnerIO : public LearnerConfiguration {
       std::transform(feature_types.cbegin(), feature_types.cend(), feature_types_.begin(),
                      [](Json const& fn) { return get<String const>(fn); });
     }
-    it = learner.find("base_score");
-    if (it != learner.cend()) {
-      if (IsA<F32Array>(it->second)) {
-        auto& base_score = get<F32Array const>(it->second);
-        learner_model_param_.base_score.HostVector() = base_score;
-      } else {
-        auto& base_score = get<Array const>(it->second);
-        auto& h_result = learner_model_param_.base_score.HostVector();
-        for (auto v : base_score) {
-          h_result.push_back(get<Number const>(v));
-        }
-      }
-    }
+
     this->need_configuration_ = true;
   }
 
@@ -847,7 +880,7 @@ class LearnerIO : public LearnerConfiguration {
     out["learner"] = Object();
     auto& learner = out["learner"];
 
-    learner["learner_model_param"] = mparam_.ToJson();
+    learner["learner_model_param"] = mparam_.ToJson(base_score_.HostView());
     learner["gradient_booster"] = Object();
     auto& gradient_booster = learner["gradient_booster"];
     gbm_->SaveModel(&gradient_booster);
@@ -861,10 +894,6 @@ class LearnerIO : public LearnerConfiguration {
       learner["attributes"][kv.first] = String(kv.second);
     }
 
-    learner["base_score"] = F32Array();
-    auto& base_score = get<F32Array>(learner["base_score"]);
-    base_score = learner_model_param_.base_score.ConstHostVector();
-
     learner["feature_names"] = Array();
     auto& feature_names = get<Array>(learner["feature_names"]);
     for (auto const& name : feature_names_) {
@@ -879,7 +908,7 @@ class LearnerIO : public LearnerConfiguration {
 
   // About to be deprecated by JSON format
   void LoadModel(dmlc::Stream* fi) override {
-    generic_parameters_.UpdateAllowUnknown(Args{});
+    ctx_.UpdateAllowUnknown(Args{});
     tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
     // TODO(tqchen) mark deprecation of old format.
     common::PeekableInStream fp(fi);
@@ -934,8 +963,8 @@ class LearnerIO : public LearnerConfiguration {
     CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
     CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
 
-    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
-    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+    obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
+    gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                        &learner_model_param_));
     gbm_->Load(fi);
     if (mparam_.contain_extra_attrs != 0) {
@@ -978,7 +1007,14 @@ class LearnerIO : public LearnerConfiguration {
     }
 
     learner_model_param_ =
-        LearnerModelParam(mparam_, {obj_->ProbToMargin(mparam_.base_score)}, obj_->Task());
+        LearnerModelParam(&ctx_, mparam_,
+                          linalg::Tensor<float, 1>{{std::isnan(mparam_.base_score)
+                                                        ? std::numeric_limits<float>::quiet_NaN()
+                                                        : obj_->ProbToMargin(mparam_.base_score)},
+                                                   {1},
+                                                   Context::kCpuId},
+                          obj_->Task());
+
     if (attributes_.find("objective") != attributes_.cend()) {
       auto obj_str = attributes_.at("objective");
       auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
@@ -1000,7 +1036,8 @@ class LearnerIO : public LearnerConfiguration {
       auto const& base_score_str = it->second;
       auto loaded = Json::Load(StringView{base_score_str});
       auto const& base_score = get<Array const>(loaded);
-      auto& h_result = learner_model_param_.base_score.HostVector();
+      base_score_.Reshape(base_score.size());
+      auto& h_result = base_score_.Data()->HostVector();
       h_result.clear();
       for (auto const& v : base_score) {
         h_result.push_back(get<Number const>(v));
@@ -1067,8 +1104,10 @@ class LearnerIO : public LearnerConfiguration {
 
     {
       // serialize base score
-      F32Array base_score(learner_model_param_.base_score.Size());
-      base_score.GetArray() = learner_model_param_.base_score.ConstHostVector();
+      std::vector<Json> base_score;
+      for (auto v : base_score_.Data()->HostVector()) {
+        base_score.emplace_back(Number(v));
+      }
       std::string base_score_str;
       Json::Dump(Json(std::move(base_score)), &base_score_str);
       extra_attr.emplace_back("base_score", base_score_str);
@@ -1189,21 +1228,20 @@ class LearnerImpl : public LearnerIO {
     return gbm_->DumpModel(fmap, with_stats, format);
   }
 
-  Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
-                 bool *out_of_bound) override {
+  Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
+                 bool* out_of_bound) override {
     this->Configure();
     CHECK_NE(this->learner_model_param_.num_feature, 0);
     CHECK_GE(begin_layer, 0);
     auto* out_impl = new LearnerImpl({});
     out_impl->learner_model_param_.Copy(this->learner_model_param_);
-    out_impl->generic_parameters_ = this->generic_parameters_;
+    out_impl->ctx_ = this->ctx_;
     auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(
-        this->tparam_.booster, &out_impl->generic_parameters_,
-        &out_impl->learner_model_param_));
+        this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_));
     this->gbm_->Slice(begin_layer, end_layer, step, gbm.get(), out_of_bound);
     out_impl->gbm_ = std::move(gbm);
 
-    Json config { Object() };
+    Json config{Object()};
     this->SaveConfig(&config);
     out_impl->mparam_ = this->mparam_;
     out_impl->attributes_ = this->attributes_;
@@ -1229,17 +1267,16 @@ class LearnerImpl : public LearnerIO {
   void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
-    this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    this->Configure(train.get());
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
 
     this->CheckDataSplitMode();
-    this->ConfigureLearnerParam(train.get());
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
-    auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);
+    auto& predt = local_cache->Cache(train, ctx_.gpu_id);
 
     monitor_.Start("PredictRaw");
     this->PredictRaw(train.get(), &predt, true, 0, 0);
@@ -1258,16 +1295,16 @@ class LearnerImpl : public LearnerIO {
   void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
-    this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    this->Configure(train.get());
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
 
     this->CheckDataSplitMode();
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
-    local_cache->Cache(train, generic_parameters_.gpu_id);
+    local_cache->Cache(train, ctx_.gpu_id);
 
     gbm_->DoBoost(train.get(), in_gpair, &local_cache->Entry(train.get()), obj_.get());
     monitor_.Stop("BoostOneIter");
@@ -1283,18 +1320,18 @@ class LearnerImpl : public LearnerIO {
     os.precision(std::numeric_limits<double>::max_digits10);
     os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
     if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
-      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
+      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
       metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
     }
 
     auto local_cache = this->GetPredictionCache();
     for (size_t i = 0; i < data_sets.size(); ++i) {
       std::shared_ptr<DMatrix> m = data_sets[i];
-      auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
+      auto &predt = local_cache->Cache(m, ctx_.gpu_id);
       this->ValidateDMatrix(m.get(), false);
       this->PredictRaw(m.get(), &predt, false, 0, 0);
 
-      auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
+      auto &out = output_predictions_.Cache(m, ctx_.gpu_id).predictions;
       out.Resize(predt.predictions.Size());
       out.Copy(predt.predictions);
 
@@ -1317,7 +1354,6 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
-    this->ConfigureLearnerParam(nullptr);
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
     if (pred_contribs) {
@@ -1329,10 +1365,10 @@ class LearnerImpl : public LearnerIO {
       gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
     } else {
       auto local_cache = this->GetPredictionCache();
-      auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
+      auto& prediction = local_cache->Cache(data, ctx_.gpu_id);
       this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
       // Copy the prediction cache to output prediction. out_preds comes from C API
-      out_preds->SetDevice(generic_parameters_.gpu_id);
+      out_preds->SetDevice(ctx_.gpu_id);
       out_preds->Resize(prediction.predictions.Size());
       out_preds->Copy(prediction.predictions);
       if (!output_margin) {
@@ -1399,7 +1435,7 @@ class LearnerImpl : public LearnerIO {
 
   void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {
     MetaInfo const& info = p_fmat->Info();
-    info.Validate(generic_parameters_.gpu_id);
+    info.Validate(ctx_.gpu_id);
 
     auto const row_based_split = [this]() {
       return tparam_.dsplit == DataSplitMode::kRow || tparam_.dsplit == DataSplitMode::kAuto;
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 9b1fda245d75..5ba5f87fb8c5 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -31,10 +31,10 @@ ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const
   return pobj;
 }
 
-void ObjFunction::InitEstimation(MetaInfo const&, HostDeviceVector<float>* base_score) const {
+void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
   CHECK(base_score);
-  base_score->Resize(1);
-  base_score->Fill(DefaultBaseScore());
+  base_score->Reshape(1);
+  (*base_score)(0) = DefaultBaseScore();
 }
 }  // namespace xgboost
 
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 679d1b7b57b8..de4c32304d5b 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -703,17 +703,17 @@ class MeanAbsoluteError : public ObjFunction {
     });
   }
 
-  void InitEstimation(MetaInfo const& info, HostDeviceVector<float>* base_margin) const override {
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
     CheckInitInputs(info);
-    auto& h_base_margin = base_margin->HostVector();
-    h_base_margin.resize(1);
+    base_margin->Reshape(1);
+    auto h_base_margin = base_margin->HostView();
     if (ctx_->IsCPU()) {
-      h_base_margin.front() = common::Median(
-          ctx_, info.labels.HostView(), common::OptionalWeights{info.weights_.ConstHostSpan()});
+      h_base_margin(0) = common::Median(ctx_, info.labels.HostView(),
+                                        common::OptionalWeights{info.weights_.ConstHostSpan()});
     } else {
       info.weights_.SetDevice(ctx_->gpu_id);
-      h_base_margin.front() = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
-                                             common::OptionalWeights{info.weights_.DeviceSpan()});
+      h_base_margin(0) = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
+                                        common::OptionalWeights{info.weights_.DeviceSpan()});
     }
   }
 
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 5a3bad58f198..79feb59bc408 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -429,7 +429,7 @@ class CPUPredictor : public Predictor {
     }
     out_preds->resize(model.learner_model_param->num_output_group *
                       (model.param.size_leaf_vector + 1));
-    auto const& base_score = model.learner_model_param->base_score.HostVector().front();
+    auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
     // loop over output groups
     for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
       (*out_preds)[gid] =
@@ -506,8 +506,7 @@ class CPUPredictor : public Predictor {
       FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
     });
     auto base_margin = info.base_margin_.View(GenericParameter::kCpuId);
-    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
-    auto base_score = model.learner_model_param->base_score.HostVector().front();
+    auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       auto page = batch.GetView();
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index e788f92253bf..2716883303b5 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -859,13 +859,12 @@ class GPUPredictor : public xgboost::Predictor {
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
-    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
-    model.learner_model_param->base_score.SetDevice(ctx_->gpu_id);
-    float const* base_score = model.learner_model_param->base_score.ConstDevicePointer();
+
+    auto base_score = model.learner_model_param->BaseScore(ctx_);
     dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
                 [=] __device__(size_t idx) {
                   phis[(idx + 1) * contributions_columns - 1] +=
-                      margin.empty() ? *base_score : margin[idx];
+                      margin.empty() ? base_score(0) : margin[idx];
                 });
   }
 
@@ -920,9 +919,7 @@ class GPUPredictor : public xgboost::Predictor {
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
 
-    CHECK_EQ(model.learner_model_param->base_score.Size(), 1);
-    model.learner_model_param->base_score.SetDevice(ctx_->gpu_id);
-    float const* base_score = model.learner_model_param->base_score.ConstDevicePointer();
+    auto base_score = model.learner_model_param->BaseScore(ctx_);
     size_t n_features = model.learner_model_param->num_feature;
     dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
                 [=] __device__(size_t idx) {
@@ -930,7 +927,7 @@ class GPUPredictor : public xgboost::Predictor {
                   size_t row_idx = idx / ngroup;
                   phis[gpu_treeshap::IndexPhiInteractions(row_idx, ngroup, group, n_features,
                                                           n_features, n_features)] +=
-                      margin.empty() ? *base_score : margin[idx];
+                      margin.empty() ? base_score(0) : margin[idx];
                 });
   }
 
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index af438af6556b..5701ed892f23 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -87,10 +87,8 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
   } else {
     // cannot rely on the Resize to fill as it might skip if the size is already correct.
     out_preds->Resize(n);
-    auto const& base_score = model.learner_model_param->base_score;
-    CHECK_EQ(base_score.Size(), 1);
-    // FIXME(jiamingy): Support multi-class
-    out_preds->Fill(base_score.HostVector().front());
+    auto base_score = model.learner_model_param->BaseScore(Context::kCpuId)(0);
+    out_preds->Fill(base_score);
   }
 }
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc
index 8f4ecb7c8f59..5252a75dcd0a 100644
--- a/tests/cpp/common/test_linalg.cc
+++ b/tests/cpp/common/test_linalg.cc
@@ -59,7 +59,7 @@ TEST(Linalg, TensorView) {
   float v = t(0, 1, 2);
   ASSERT_EQ(v, 6);
 
-  auto s = t.Slice(1, All(), All());
+  auto s = t(1, All(), All());
   ASSERT_EQ(s.Shape().size(), 2);
   ASSERT_EQ(s.Shape()[0], 3);
   ASSERT_EQ(s.Shape()[1], 4);
@@ -86,9 +86,9 @@ TEST(Linalg, TensorView) {
   {
     // as matrix
     TensorView<double, 2> mat(data, {6, 4}, -1);
-    auto s = mat.Slice(2, All());
+    auto s = mat(2, All());
     ASSERT_EQ(s.Shape().size(), 1);
-    s = mat.Slice(All(), 1);
+    s = mat(All(), 1);
     ASSERT_EQ(s.Shape().size(), 1);
   }
 
@@ -107,7 +107,7 @@ TEST(Linalg, TensorView) {
     // Don't assign the initial dimension, tensor should be able to deduce the correct dim
     // for Slice.
     auto t = MakeTensorView(data, {2, 3, 4}, 0);
-    auto s = t.Slice(1, 2, All());
+    auto s = t(1, 2, All());
     static_assert(decltype(s)::kDimension == 1, "");
   }
   {
diff --git a/tests/cpp/gbm/test_gblinear.cc b/tests/cpp/gbm/test_gblinear.cc
index 61d22f5ea1ff..c53bb08f68ef 100644
--- a/tests/cpp/gbm/test_gblinear.cc
+++ b/tests/cpp/gbm/test_gblinear.cc
@@ -19,15 +19,11 @@ namespace gbm {
 TEST(GBLinear, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
-  GenericParameter gparam;
-  gparam.Init(Args{});
-
-  std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("gblinear", Args{}, kRows, kCols, &param, &gparam) };
+  std::unique_ptr<GradientBooster> gbm{
+      CreateTrainedGBM("gblinear", Args{}, kRows, kCols, &mparam, &ctx)};
   Json model { Object() };
   gbm->SaveModel(&model);
   ASSERT_TRUE(IsA<Object>(model));
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index 4c66911d11d5..13ec23c14906 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -18,15 +18,11 @@ namespace xgboost {
 TEST(GBTree, SelectTreeMethod) {
   size_t constexpr kCols = 10;
 
-  GenericParameter generic_param;
-  generic_param.UpdateAllowUnknown(Args{});
-  LearnerModelParam mparam;
-  mparam.base_score.Resize(1, 0.5);
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> p_gbm {
-    GradientBooster::Create("gbtree", &generic_param, &mparam)};
+    GradientBooster::Create("gbtree", &ctx, &mparam)};
   auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
 
   // Test if `tree_method` can be set
@@ -45,7 +41,7 @@ TEST(GBTree, SelectTreeMethod) {
   ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
 
 #ifdef XGBOOST_USE_CUDA
-  generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
   gbtree.Configure({{"tree_method", "gpu_hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
   gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
@@ -55,15 +51,11 @@ TEST(GBTree, SelectTreeMethod) {
 
 TEST(GBTree, PredictionCache) {
   size_t constexpr kRows = 100, kCols = 10;
-  GenericParameter generic_param;
-  generic_param.UpdateAllowUnknown(Args{});
-  LearnerModelParam mparam;
-  mparam.base_score.Resize(1, 0.5);
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> p_gbm {
-    GradientBooster::Create("gbtree", &generic_param, &mparam)};
+    GradientBooster::Create("gbtree", &ctx, &mparam)};
   auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
 
   gbtree.Configure({{"tree_method", "hist"}});
@@ -176,16 +168,11 @@ TEST(GBTree, ChoosePredictor) {
 TEST(GBTree, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score.Resize(1, 0.5);
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &gparam) };
+    CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
 
   Json model {Object()};
   model["model"] = Object();
@@ -215,16 +202,11 @@ TEST(GBTree, JsonIO) {
 TEST(Dart, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.base_score.Resize(1, 0.5);
-  mparam.num_output_group = 1;
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
-  std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &gparam) };
+  std::unique_ptr<GradientBooster> gbm{
+      CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &ctx)};
 
   Json model {Object()};
   model["model"] = Object();
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index b79ea27187f5..7a8127bdfc1a 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -451,5 +451,14 @@ class RMMAllocator;
 using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
 RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
 
+/*
+ * \brief Make learner model param
+ */
+inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups) {
+  size_t shape[1]{1};
+  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape}, n_groups);
+  return mparam;
+}
+
 }  // namespace xgboost
 #endif
diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc
index b179eba0ebb1..779c20940598 100644
--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@@ -18,7 +18,7 @@ TEST(Linear, Shotgun) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam(kCols, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   {
     auto updater = std::unique_ptr<xgboost::LinearUpdater>(
@@ -51,7 +51,7 @@ TEST(Linear, coordinate) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam(kCols, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("coord_descent", &lparam));
diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu
index 05330a02b85d..193e9b4b21eb 100644
--- a/tests/cpp/linear/test_linear.cu
+++ b/tests/cpp/linear/test_linear.cu
@@ -15,7 +15,7 @@ TEST(Linear, GPUCoordinate) {
   auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
   auto ctx = CreateEmptyGenericParam(GPUIDX);
 
-  LearnerModelParam mparam(kCols, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("gpu_coord_descent", &ctx));
   updater->Configure({{"eta", "1."}});
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 1a631d0098b0..8db605be3bcc 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -21,11 +21,11 @@ TEST(CpuPredictor, Basic) {
   size_t constexpr kRows = 5;
   size_t constexpr kCols = 5;
 
-  LearnerModelParam param{kCols, {0.0}, 1};
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
@@ -101,11 +101,11 @@ TEST(CpuPredictor, ExternalMemory) {
   std::unique_ptr<Predictor> cpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
 
-  LearnerModelParam param(dmat->Info().num_col_, {0.0}, 1);
+  LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   // Test predict batch
   PredictionCacheEntry out_predictions;
@@ -195,7 +195,7 @@ TEST(CpuPredictor, InplacePredict) {
 
 void TestUpdatePredictionCache(bool use_subsampling) {
   size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
-  LearnerModelParam mparam{kCols, {0.0}, kClasses};
+  LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};
   Context ctx;
 
   std::unique_ptr<gbm::GBTree> gbm;
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 868abad13b63..5e945e78e485 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2020 XGBoost contributors
+ * Copyright 2017-2022 XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/c_api.h>
@@ -34,9 +34,9 @@ TEST(GPUPredictor, Basic) {
     int n_row = i, n_col = i;
     auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
 
-    LearnerModelParam param(n_col, {.5}, 1);
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1)};
     GenericParameter ctx;
-    gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
     // Test predict batch
     PredictionCacheEntry gpu_out_predictions;
@@ -89,10 +89,10 @@ TEST(GPUPredictor, ExternalMemoryTest) {
   gpu_predictor->Configure({});
 
   const int n_classes = 3;
-  LearnerModelParam param(5, {.5}, n_classes);
+  LearnerModelParam mparam{MakeMP(5, .5, n_classes)};
 
   Context ctx;
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx, n_classes);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
   std::vector<std::unique_ptr<DMatrix>> dmats;
 
   dmats.push_back(CreateSparsePageDMatrix(400));
@@ -162,9 +162,9 @@ TEST(GpuPredictor, LesserFeatures) {
 TEST(GPUPredictor, ShapStump) {
   cudaSetDevice(0);
 
-  LearnerModelParam param(1, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(1, .5, 1)};
   Context ctx;
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
   trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -178,7 +178,7 @@ TEST(GPUPredictor, ShapStump) {
   auto dmat = RandomDataGenerator(3, 1, 0).GenerateDMatrix();
   gpu_predictor->PredictContribution(dmat.get(), &predictions, model);
   auto& phis = predictions.HostVector();
-  auto base_score = param.base_score.HostVector().front();
+  auto base_score = mparam.BaseScore(Context::kCpuId)(0);
   EXPECT_EQ(phis[0], 0.0);
   EXPECT_EQ(phis[1], base_score);
   EXPECT_EQ(phis[2], 0.0);
@@ -188,9 +188,9 @@ TEST(GPUPredictor, ShapStump) {
 }
 
 TEST(GPUPredictor, Shap) {
-  LearnerModelParam param(1, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(1, .5, 1)};
   Context ctx;
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
   trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -238,9 +238,9 @@ TEST(GPUPredictor, PredictLeafBasic) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({});
 
-  LearnerModelParam param(kCols, {.0}, 1);
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
   Context ctx;
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   HostDeviceVector<float> leaf_out_predictions;
   gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc
index 5ba1fc52b0d1..64d2b9a81ea2 100644
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -210,8 +210,7 @@ void TestCategoricalPrediction(std::string name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param(kCols, {.5}, 1);
-
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
   float left_weight = 1.3f;
@@ -219,7 +218,7 @@ void TestCategoricalPrediction(std::string name) {
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
   GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
 
   ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
@@ -234,7 +233,7 @@ void TestCategoricalPrediction(std::string name) {
 
   predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
   predictor->PredictBatch(m.get(), &out_predictions, model, 0);
-  auto score = param.base_score.HostVector().front();
+  auto score = mparam.BaseScore(Context::kCpuId)(0);
   ASSERT_EQ(out_predictions.predictions.Size(), 1ul);
   ASSERT_EQ(out_predictions.predictions.HostVector()[0],
             right_weight + score);  // go to right for matching cat
@@ -251,7 +250,7 @@ void TestCategoricalPredictLeaf(StringView name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param(kCols, {.5}, 1);
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
@@ -261,7 +260,7 @@ void TestCategoricalPredictLeaf(StringView name) {
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
 
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
   GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
 
   ctx.gpu_id = 0;
diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h
index bf970ddb420d..81ee249e250e 100644
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -12,7 +12,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
                                      std::shared_ptr<DMatrix> p_hist) {
   constexpr size_t kClasses { 3 };
 
-  LearnerModelParam param(cols, {.5}, kClasses);
+  LearnerModelParam mparam{MakeMP(cols, .5, kClasses)};
   auto lparam = CreateEmptyGenericParam(0);
 
   std::unique_ptr<Predictor> predictor =
@@ -21,7 +21,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx, kClasses);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, kClasses);
 
   {
     auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();

From fefde604a86fda09c754e51f3e945fcae227a64d Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 28 Jul 2022 17:22:41 +0800
Subject: [PATCH 05/34] fixes.

---
 src/gbm/gblinear.cc | 4 ++--
 src/learner.cc      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 29065add9846..c8cdfeb476b1 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -270,12 +270,12 @@ class GBLinear : public GradientBooster {
     monitor_.Start("PredictBatchInternal");
     model_.LazyInitModel();
     std::vector<bst_float> &preds = *out_preds;
-    auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
+    auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
     // start collecting the prediction
     const int ngroup = model_.learner_model_param->num_output_group;
     preds.resize(p_fmat->Info().num_row_ * ngroup);
 
-    auto base_score = learner_model_param_->BaseScore(ctx_);
+    auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
     for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
       auto const& batch = page.GetView();
       // output convention: nrow * k, where nrow is number of rows
diff --git a/src/learner.cc b/src/learner.cc
index b3c6224c1c0a..7e80ba253527 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -224,7 +224,7 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
   if (that.base_score_.DeviceIdx() != Context::kCpuId) {
     common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
   }
-  CHECK(base_score_.Data()->DeviceCanRead());
+  CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());
   CHECK(base_score_.Data()->HostCanRead());
 
   num_feature = that.num_feature;
@@ -461,7 +461,7 @@ class LearnerConfiguration : public Learner {
     } else {
       // lastly, if data is not available (prediction for custom objective), use default.
       base_score_.Reshape(1);
-      base_score_(0) = obj_->ProbToMargin(ObjFunction::DefaultBaseScore());
+      base_score_(0) = ObjFunction::DefaultBaseScore();
     }
 
     auto task = obj_->Task();

From 697fd12f95f3b60f4a274facf391c8e26428eaaf Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 28 Jul 2022 18:32:22 +0800
Subject: [PATCH 06/34] Fix.

---
 include/xgboost/linalg.h                  |  2 +-
 src/learner.cc                            | 29 ++++++++++++-----------
 tests/cpp/helpers.h                       |  6 +++--
 tests/cpp/predictor/test_gpu_predictor.cu | 16 ++++++++-----
 4 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 6f914db0714c..b45a9dba2671 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -679,7 +679,7 @@ class Tensor {
     }
     if (device >= 0) {
       data_.SetDevice(device);
-      data_.DevicePointer();  // Pull to device;
+      data_.ConstDevicePointer();  // Pull to device;
     }
     CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
   }
diff --git a/src/learner.cc b/src/learner.cc
index 7e80ba253527..0b12aeefc4ee 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -416,22 +416,11 @@ class LearnerConfiguration : public Learner {
 
     this->ConfigureTargets();
 
-    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
-    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
-    // keep the stability, we initialize it in binary LoadModel instead of configuration.
-    // Under what condition should we omit the transformation:
-    //
-    // - base_score is loaded from old binary model.
-    //
-    // What are the other possible conditions:
-    //
-    // - model loaded from new binary or JSON.
-    // - model is created from scratch.
-    // - model is configured second time due to change of parameter
-    this->ConfigureLearnerParam(p_fmat);
-
+    learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
+    this->ConfigureLearnerParam(p_fmat);
+
     this->ConfigureMetrics(args);
 
     this->need_configuration_ = false;
@@ -447,6 +436,18 @@ class LearnerConfiguration : public Learner {
    * \brief Calculate the `base_score` based on input data.
    */
   void ConfigureLearnerParam(DMatrix const* p_fmat) {
+    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
+    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
+    // keep the stability, we initialize it in binary LoadModel instead of configuration.
+    // Under what condition should we omit the transformation:
+    //
+    // - base_score is loaded from old binary model.
+    //
+    // What are the other possible conditions:
+    //
+    // - model loaded from new binary or JSON.
+    // - model is created from scratch.
+    // - model is configured second time due to change of parameter
     CHECK(obj_);
     float world = rabit::GetWorldSize();
     if (base_score_.Size() != 0) {
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index 7a8127bdfc1a..c7f73495c49f 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -454,9 +454,11 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
 /*
  * \brief Make learner model param
  */
-inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups) {
+inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
+                                int32_t device = Context::kCpuId) {
   size_t shape[1]{1};
-  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape}, n_groups);
+  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
+                           n_groups);
   return mparam;
 }
 
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 5e945e78e485..2a0b69cbd629 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -34,8 +34,9 @@ TEST(GPUPredictor, Basic) {
     int n_row = i, n_col = i;
     auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
 
-    LearnerModelParam mparam{MakeMP(n_col, .5, 1)};
-    GenericParameter ctx;
+    Context ctx;
+    ctx.gpu_id = 0;
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
     gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
     // Test predict batch
@@ -89,9 +90,10 @@ TEST(GPUPredictor, ExternalMemoryTest) {
   gpu_predictor->Configure({});
 
   const int n_classes = 3;
-  LearnerModelParam mparam{MakeMP(5, .5, n_classes)};
-
   Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
+
   gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
   std::vector<std::unique_ptr<DMatrix>> dmats;
 
@@ -162,8 +164,9 @@ TEST(GpuPredictor, LesserFeatures) {
 TEST(GPUPredictor, ShapStump) {
   cudaSetDevice(0);
 
-  LearnerModelParam mparam{MakeMP(1, .5, 1)};
   Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
   gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
@@ -188,8 +191,9 @@ TEST(GPUPredictor, ShapStump) {
 }
 
 TEST(GPUPredictor, Shap) {
-  LearnerModelParam mparam{MakeMP(1, .5, 1)};
   Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
   gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;

From 47cfa11b4f26e880e2b33edf80bf465a37d39c15 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 28 Jul 2022 18:35:39 +0800
Subject: [PATCH 07/34] Lint.

---
 include/xgboost/objective.h | 2 +-
 src/common/common.h         | 2 +-
 src/common/stats.h          | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 2cad8dc2a8c2..0c0d502bdbfb 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -30,7 +30,7 @@ class ObjFunction : public Configurable {
   Context const* ctx_;
 
  public:
-  static constexpr float DefaultBaseScore() { return 0.5f; };
+  static constexpr float DefaultBaseScore() { return 0.5f; }
 
  public:
   /*! \brief virtual destructor */
diff --git a/src/common/common.h b/src/common/common.h
index 794d431b0b07..1eaf9ae7f4a0 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -297,7 +297,7 @@ struct Crtp {
  * \brief C++17 std::as_const
  */
 template <typename T>
-typename std::add_const<T>::type &AsConst(T &v) noexcept {
+typename std::add_const<T>::type &AsConst(T &v) noexcept {  // NOLINT(runtime/references)
   return v;
 }
 }  // namespace common
diff --git a/src/common/stats.h b/src/common/stats.h
index 8165cb75ae1a..547063c43a56 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -93,14 +93,15 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
 }
 
 namespace cuda {
-float Median(Context const* ctx, linalg::TensorView<float const, 2> t, common::OptionalWeights weights);
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+             common::OptionalWeights weights);
 #if !defined(XGBOOST_USE_CUDA)
 inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
   common::AssertGPUSupport();
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-}
+}  // namespace cuda
 
 inline float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
                     common::OptionalWeights weights) {

From de46dc239b0c8bd82204047b02e0e8974457c897 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 29 Jul 2022 14:28:24 +0800
Subject: [PATCH 08/34] Remove.

---
 src/learner.cc | 136 ++++++++++++++++++++-----------------------------
 1 file changed, 56 insertions(+), 80 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index 0b12aeefc4ee..f7de8dd266c0 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -100,10 +100,10 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
                   "Do not change the size of this struct, as it will break binary IO.");
   }
   // Skip other legacy fields.
-  Json ToJson(linalg::TensorView<float const, 1> base_score_new) const {
+  Json ToJson() const {
     Object obj;
     char floats[NumericLimits<float>::kToCharsSize];
-    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score_new(0));
+    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
     CHECK(ret.ec == std::errc());
     obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
 
@@ -126,7 +126,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
     return Json(std::move(obj));
   }
-  void FromJson(Json const& obj, linalg::Tensor<float, 1>* base_score_new) {
+  void FromJson(Json const& obj) {
     auto const& j_param = get<Object const>(obj);
     std::map<std::string, std::string> m;
     m["num_feature"] = get<String const>(j_param.at("num_feature"));
@@ -140,9 +140,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
     std::string str = get<String const>(j_param.at("base_score"));
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
-    base_score_new->Reshape(1);
-    (*base_score_new)(0) = base_score;
   }
+
   inline LearnerModelParamLegacy ByteSwap() const {
     LearnerModelParamLegacy x = *this;
     dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
@@ -355,9 +354,57 @@ class LearnerConfiguration : public Learner {
   LearnerModelParam learner_model_param_;
   LearnerTrainParam tparam_;
   // Initial prediction.
-  linalg::Tensor<float, 1> base_score_;
   std::vector<std::string> metric_names_;
 
+  /**
+   * \brief Calculate the `base_score` based on input data.
+   */
+  void ConfigureLearnerParam(DMatrix const* p_fmat) {
+    linalg::Tensor<float, 1> base_score;
+    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
+    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
+    // keep the stability, we initialize it in binary LoadModel instead of configuration.
+    // Under what condition should we omit the transformation:
+    //
+    // - base_score is loaded from old binary model.
+    //
+    // What are the other possible conditions:
+    //
+    // - model loaded from new binary or JSON.
+    // - model is created from scratch.
+    // - model is configured second time due to change of parameter
+    CHECK(obj_);
+    float world = rabit::GetWorldSize();
+    if (!std::isnan(mparam_.base_score)) {
+      // if base_score is set by user, use it.
+      base_score.Reshape(1);
+      base_score(0) = mparam_.base_score;
+    } else if (p_fmat) {
+      // otherwise, we estimate it from input data.
+      obj_->InitEstimation(p_fmat->Info(), &base_score);
+    } else {
+      // lastly, if data is not available (prediction for custom objective), use default.
+      base_score.Reshape(1);
+      base_score(0) = ObjFunction::DefaultBaseScore();
+    }
+
+    auto task = obj_->Task();
+    auto in = base_score.HostView();
+    rabit::Allreduce<rabit::op::Sum>(in.Values().data(), in.Values().size());
+    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(in),
+                   [world](float v) { return v / world; });
+    mparam_.base_score = base_score(0);
+
+    linalg::Tensor<float, 1> copy(base_score.Shape(), ctx_.gpu_id);
+    auto out = copy.HostView();
+    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(out),
+                   [&](float v) { return obj_->ProbToMargin(v); });
+
+    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
+  }
+
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
       : need_configuration_{true} {
@@ -432,54 +479,6 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
-  /**
-   * \brief Calculate the `base_score` based on input data.
-   */
-  void ConfigureLearnerParam(DMatrix const* p_fmat) {
-    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
-    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
-    // keep the stability, we initialize it in binary LoadModel instead of configuration.
-    // Under what condition should we omit the transformation:
-    //
-    // - base_score is loaded from old binary model.
-    //
-    // What are the other possible conditions:
-    //
-    // - model loaded from new binary or JSON.
-    // - model is created from scratch.
-    // - model is configured second time due to change of parameter
-    CHECK(obj_);
-    float world = rabit::GetWorldSize();
-    if (base_score_.Size() != 0) {
-      // do nothing
-    } else if (!std::isnan(mparam_.base_score)) {
-      // if base_score is set by user, use it.
-      base_score_.Reshape(1);
-      base_score_(0) = mparam_.base_score;
-    } else if (p_fmat) {
-      // otherwise, we estimate it from input data.
-      obj_->InitEstimation(p_fmat->Info(), &base_score_);
-    } else {
-      // lastly, if data is not available (prediction for custom objective), use default.
-      base_score_.Reshape(1);
-      base_score_(0) = ObjFunction::DefaultBaseScore();
-    }
-
-    auto task = obj_->Task();
-    rabit::Allreduce<rabit::op::Sum>(base_score_.Data()->HostVector().data(),
-                                     base_score_.Data()->Size());
-    linalg::Tensor<float, 1> copy{base_score_.Shape(), ctx_.gpu_id};
-
-    auto in = base_score_.HostView();
-    auto out = copy.HostView();
-    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(out),
-                   [&](float v) { return obj_->ProbToMargin(v / world); });
-
-    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
-  }
-
   virtual PredictionContainer* GetPredictionCache() const {
     return &((*ThreadLocalPredictionCache::Get())[this]);
   }
@@ -547,7 +546,7 @@ class LearnerConfiguration : public Learner {
     auto& learner_parameters = out["learner"];
 
     learner_parameters["learner_train_param"] = ToJson(tparam_);
-    learner_parameters["learner_model_param"] = mparam_.ToJson(base_score_.HostView());
+    learner_parameters["learner_model_param"] = mparam_.ToJson();
     learner_parameters["gradient_booster"] = Object();
     auto& gradient_booster = learner_parameters["gradient_booster"];
     gbm_->SaveConfig(&gradient_booster);
@@ -831,7 +830,7 @@ class LearnerIO : public LearnerConfiguration {
     }
 
     auto const& learner = get<Object>(in["learner"]);
-    mparam_.FromJson(learner.at("learner_model_param"), &base_score_);
+    mparam_.FromJson(learner.at("learner_model_param"));
 
     auto const& objective_fn = learner.at("objective");
 
@@ -881,7 +880,7 @@ class LearnerIO : public LearnerConfiguration {
     out["learner"] = Object();
     auto& learner = out["learner"];
 
-    learner["learner_model_param"] = mparam_.ToJson(base_score_.HostView());
+    learner["learner_model_param"] = mparam_.ToJson();
     learner["gradient_booster"] = Object();
     auto& gradient_booster = learner["gradient_booster"];
     gbm_->SaveModel(&gradient_booster);
@@ -1032,18 +1031,6 @@ class LearnerIO : public LearnerConfiguration {
         this->SetParam(kEvalMetric, n);
       }
     }
-    auto it = attributes_.find("base_score");
-    if (it != attributes_.cend()) {
-      auto const& base_score_str = it->second;
-      auto loaded = Json::Load(StringView{base_score_str});
-      auto const& base_score = get<Array const>(loaded);
-      base_score_.Reshape(base_score.size());
-      auto& h_result = base_score_.Data()->HostVector();
-      h_result.clear();
-      for (auto const& v : base_score) {
-        h_result.push_back(get<Number const>(v));
-      }
-    }
 
     if (warn_old_model) {
       LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it "
@@ -1103,17 +1090,6 @@ class LearnerIO : public LearnerConfiguration {
       extra_attr.emplace_back("metrics", os.str());
     }
 
-    {
-      // serialize base score
-      std::vector<Json> base_score;
-      for (auto v : base_score_.Data()->HostVector()) {
-        base_score.emplace_back(Number(v));
-      }
-      std::string base_score_str;
-      Json::Dump(Json(std::move(base_score)), &base_score_str);
-      extra_attr.emplace_back("base_score", base_score_str);
-    }
-
     std::string header {"binf"};
     fo->Write(header.data(), 4);
     if (DMLC_IO_NO_ENDIAN_SWAP) {

From 0b0616a57816e39f8746281552c3956ef231b0b2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 29 Jul 2022 15:13:43 +0800
Subject: [PATCH 09/34] Cache the model.

---
 tests/python/test_model_compatibility.py | 26 ++++++++++++++----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py
index 6f9a184922ab..88549e1f2acb 100644
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -102,34 +102,38 @@ def run_scikit_model_check(name, path):
 
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_model_compatibility():
-    '''Test model compatibility, can only be run on CI as others don't
+    """Test model compatibility, can only be run on CI as others don't
     have the credentials.
 
-    '''
+    """
     path = os.path.dirname(os.path.abspath(__file__))
-    path = os.path.join(path, 'models')
+    path = os.path.join(path, "models")
 
-    zip_path, _ = urllib.request.urlretrieve('https://xgboost-ci-jenkins-artifacts.s3-us-west-2' +
-                                             '.amazonaws.com/xgboost_model_compatibility_test.zip')
-    with zipfile.ZipFile(zip_path, 'r') as z:
-        z.extractall(path)
+    if not os.path.exists(path):
+        zip_path, _ = urllib.request.urlretrieve(
+            "https://xgboost-ci-jenkins-artifacts.s3-us-west-2"
+            + ".amazonaws.com/xgboost_model_compatibility_test.zip"
+        )
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(path)
 
     models = [
-        os.path.join(root, f) for root, subdir, files in os.walk(path)
+        os.path.join(root, f)
+        for root, subdir, files in os.walk(path)
         for f in files
-        if f != 'version'
+        if f != "version"
     ]
     assert models
 
     for path in models:
         name = os.path.basename(path)
-        if name.startswith('xgboost-'):
+        if name.startswith("xgboost-"):
             booster = xgboost.Booster(model_file=path)
             run_booster_check(booster, name)
             # Do full serialization.
             booster = copy.copy(booster)
             run_booster_check(booster, name)
-        elif name.startswith('xgboost_scikit'):
+        elif name.startswith("xgboost_scikit"):
             run_scikit_model_check(name, path)
         else:
             assert False

From c243e6681412d01b33246ff00543555034daad9f Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 23 Aug 2022 17:00:46 +0800
Subject: [PATCH 10/34] Empty dmatrix.

---
 src/objective/regression_obj.cu | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index de4c32304d5b..a19ab7a649fc 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -707,6 +707,11 @@ class MeanAbsoluteError : public ObjFunction {
     CheckInitInputs(info);
     base_margin->Reshape(1);
     auto h_base_margin = base_margin->HostView();
+    if (info.num_row_ == 0) {
+      h_base_margin(0) = DefaultBaseScore();
+      return;
+    }
+
     if (ctx_->IsCPU()) {
       h_base_margin(0) = common::Median(ctx_, info.labels.HostView(),
                                         common::OptionalWeights{info.weights_.ConstHostSpan()});

From 7dfa87a0434d969108aba1cfbcdfdb62d738cea8 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 23 Aug 2022 17:32:15 +0800
Subject: [PATCH 11/34] Revert unnecessary changes.

---
 include/xgboost/linalg.h        | 8 +-------
 tests/cpp/common/test_linalg.cc | 8 ++++----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index b45a9dba2671..3897e89ea1ce 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -442,11 +442,6 @@ class TensorView {
     return ptr_[offset];
   }
 
-  template <typename... S, std::enable_if_t<!detail::IsAllIntegral<S...>::value> * = nullptr>
-  LINALG_HD auto operator()(S &&...slices) const {
-    return this->Slice(std::forward<S>(slices)...);
-  }
-
   /**
    * \brief Slice the tensor.  The returned tensor has inferred dim and shape.  Scalar
    *        result is not supported.
@@ -801,8 +796,7 @@ class Tensor {
    *
    *    If the total size is changed, then data in this tensor is no longer valid.
    */
-  template <typename... S, std::enable_if_t<detail::Conjunction<
-                               std::is_integral<std::remove_reference_t<S>>...>::value> * = nullptr>
+  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
   void Reshape(S &&...s) {
     static_assert(sizeof...(S) <= kDim, "Invalid shape.");
     detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc
index 5252a75dcd0a..8f4ecb7c8f59 100644
--- a/tests/cpp/common/test_linalg.cc
+++ b/tests/cpp/common/test_linalg.cc
@@ -59,7 +59,7 @@ TEST(Linalg, TensorView) {
   float v = t(0, 1, 2);
   ASSERT_EQ(v, 6);
 
-  auto s = t(1, All(), All());
+  auto s = t.Slice(1, All(), All());
   ASSERT_EQ(s.Shape().size(), 2);
   ASSERT_EQ(s.Shape()[0], 3);
   ASSERT_EQ(s.Shape()[1], 4);
@@ -86,9 +86,9 @@ TEST(Linalg, TensorView) {
   {
     // as matrix
     TensorView<double, 2> mat(data, {6, 4}, -1);
-    auto s = mat(2, All());
+    auto s = mat.Slice(2, All());
     ASSERT_EQ(s.Shape().size(), 1);
-    s = mat(All(), 1);
+    s = mat.Slice(All(), 1);
     ASSERT_EQ(s.Shape().size(), 1);
   }
 
@@ -107,7 +107,7 @@ TEST(Linalg, TensorView) {
     // Don't assign the initial dimension, tensor should be able to deduce the correct dim
     // for Slice.
     auto t = MakeTensorView(data, {2, 3, 4}, 0);
-    auto s = t(1, 2, All());
+    auto s = t.Slice(1, 2, All());
     static_assert(decltype(s)::kDimension == 1, "");
   }
   {

From 8db5676a2fa8be99306c9b6e806f21a39acf83e6 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 23 Aug 2022 21:29:05 +0800
Subject: [PATCH 12/34] Fix.

---
 src/predictor/cpu_predictor.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 79feb59bc408..444d1b089d21 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -505,8 +505,8 @@ class CPUPredictor : public Predictor {
     common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
       FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
     });
-    auto base_margin = info.base_margin_.View(GenericParameter::kCpuId);
-    auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
+    auto base_margin = info.base_margin_.View(Context::kCpuId);
+    auto base_score = model.learner_model_param->BaseScore(Context::kCpuId)(0);
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       auto page = batch.GetView();

From f260759885c29147a83222b7deb095889fffb551 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 24 Aug 2022 01:57:33 +0800
Subject: [PATCH 13/34] Add serialization test.

---
 tests/cpp/test_learner.cc       | 13 ++++++++++++
 tests/cpp/test_serialization.cc | 37 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 42f25fa39caa..1c292bce2983 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -447,4 +447,17 @@ TEST(Learner, MultiTarget) {
     EXPECT_THROW({ learner->Configure(); }, dmlc::Error);
   }
 }
+
+TEST(Learner, BaseScore) {
+  size_t constexpr kRows{1024}, kCols{16};
+  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
+  std::unique_ptr<Learner> learner{Learner::Create({m})};
+  learner->SetParam("objective", "reg:absoluteerror");
+  for (size_t i = 0; i < 4; ++i) {
+    learner->UpdateOneIter(i, m);
+  }
+  Json config{Object{}};
+  learner->SaveConfig(&config);
+  std::cout << config << std::endl;
+}
 }  // namespace xgboost
diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc
index d80a7442202e..e1cf8d6a3d1f 100644
--- a/tests/cpp/test_serialization.cc
+++ b/tests/cpp/test_serialization.cc
@@ -418,6 +418,43 @@ TEST_F(SerializationTest, GPUCoordDescent) {
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 
+class L1SerializationTest : public SerializationTest {};
+
+TEST_F(L1SerializationTest, Exact) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "exact"}},
+                           fmap_, p_dmat_);
+}
+
+TEST_F(L1SerializationTest, Approx) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "approx"}},
+                           fmap_, p_dmat_);
+}
+
+TEST_F(L1SerializationTest, Hist) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "hist"}},
+                           fmap_, p_dmat_);
+}
+
+TEST_F(L1SerializationTest, GpuHist) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "gpu_hist"}},
+                           fmap_, p_dmat_);
+}
 
 class LogitSerializationTest : public SerializationTest {
  protected:

From fba72456ec3aea4b8c6d91f986ceb8c5ba69cc31 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 24 Aug 2022 02:17:43 +0800
Subject: [PATCH 14/34] CPU build.

---
 tests/cpp/test_serialization.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc
index e1cf8d6a3d1f..15765f09f29d 100644
--- a/tests/cpp/test_serialization.cc
+++ b/tests/cpp/test_serialization.cc
@@ -447,6 +447,7 @@ TEST_F(L1SerializationTest, Hist) {
                            fmap_, p_dmat_);
 }
 
+#if defined(XGBOOST_USE_CUDA)
 TEST_F(L1SerializationTest, GpuHist) {
   TestLearnerSerialization({{"booster", "gbtree"},
                             {"objective", "reg:absoluteerror"},
@@ -455,6 +456,7 @@ TEST_F(L1SerializationTest, GpuHist) {
                             {"tree_method", "gpu_hist"}},
                            fmap_, p_dmat_);
 }
+#endif  //  defined(XGBOOST_USE_CUDA)
 
 class LogitSerializationTest : public SerializationTest {
  protected:

From 2fc0e604afb7944adcb95cd6bfc4f77338a5d919 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 24 Aug 2022 13:15:19 +0800
Subject: [PATCH 15/34] revert.

---
 tests/cpp/test_learner.cc | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 1c292bce2983..42f25fa39caa 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -447,17 +447,4 @@ TEST(Learner, MultiTarget) {
     EXPECT_THROW({ learner->Configure(); }, dmlc::Error);
   }
 }
-
-TEST(Learner, BaseScore) {
-  size_t constexpr kRows{1024}, kCols{16};
-  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
-  std::unique_ptr<Learner> learner{Learner::Create({m})};
-  learner->SetParam("objective", "reg:absoluteerror");
-  for (size_t i = 0; i < 4; ++i) {
-    learner->UpdateOneIter(i, m);
-  }
-  Json config{Object{}};
-  learner->SaveConfig(&config);
-  std::cout << config << std::endl;
-}
 }  // namespace xgboost

From fa9d4999c47c314d6c1a261269ab7d6fd748bde2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 13 Sep 2022 17:42:48 +0800
Subject: [PATCH 16/34] Better average.

---
 include/xgboost/learner.h       |  5 ++++-
 src/learner.cc                  |  8 +++-----
 src/objective/regression_obj.cu | 28 ++++++++++++++++++----------
 3 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index a6b46723b9ad..e42cd1cab8c5 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -300,7 +300,10 @@ struct LearnerModelParamLegacy;
  */
 struct LearnerModelParam {
  private:
-  /* \brief global bias */
+  /**
+   * \brief Global bias, this is just a scalar value but can be extended to vector when we
+   *        support multi-class and multi-target.
+   */
   linalg::Tensor<float, 1> base_score_;
 
  public:
diff --git a/src/learner.cc b/src/learner.cc
index f7de8dd266c0..069b0b1e21c3 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -374,7 +374,6 @@ class LearnerConfiguration : public Learner {
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
     CHECK(obj_);
-    float world = rabit::GetWorldSize();
     if (!std::isnan(mparam_.base_score)) {
       // if base_score is set by user, use it.
       base_score.Reshape(1);
@@ -389,17 +388,16 @@ class LearnerConfiguration : public Learner {
     }
 
     auto task = obj_->Task();
-    auto in = base_score.HostView();
-    rabit::Allreduce<rabit::op::Sum>(in.Values().data(), in.Values().size());
-    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(in),
-                   [world](float v) { return v / world; });
     mparam_.base_score = base_score(0);
 
+    // transform to margin
     linalg::Tensor<float, 1> copy(base_score.Shape(), ctx_.gpu_id);
+    auto in = base_score.HostView();
     auto out = copy.HostView();
     std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(out),
                    [&](float v) { return obj_->ProbToMargin(v); });
 
+    // move it to model param, which is shared with all other components.
     learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
     CHECK(learner_model_param_.Initialized());
     CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index a19ab7a649fc..f0087a88d469 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -706,20 +706,28 @@ class MeanAbsoluteError : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
     CheckInitInputs(info);
     base_margin->Reshape(1);
-    auto h_base_margin = base_margin->HostView();
+    auto out = base_margin->HostView();
+    std::int32_t invalid{0};
     if (info.num_row_ == 0) {
-      h_base_margin(0) = DefaultBaseScore();
-      return;
-    }
-
-    if (ctx_->IsCPU()) {
-      h_base_margin(0) = common::Median(ctx_, info.labels.HostView(),
-                                        common::OptionalWeights{info.weights_.ConstHostSpan()});
+      out(0) = 0;
+      invalid++;
+    } else if (ctx_->IsCPU()) {
+      out(0) = common::Median(ctx_, info.labels.HostView(),
+                              common::OptionalWeights{info.weights_.ConstHostSpan()});
     } else {
       info.weights_.SetDevice(ctx_->gpu_id);
-      h_base_margin(0) = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
-                                        common::OptionalWeights{info.weights_.DeviceSpan()});
+      out(0) = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
+                              common::OptionalWeights{info.weights_.DeviceSpan()});
     }
+
+    auto world = static_cast<float>(rabit::GetWorldSize());
+    rabit::Allreduce<rabit::op::Sum>(&invalid, 1);  // number of empty workers
+    world -= static_cast<float>(invalid);           // number of non-empty workers
+
+    // average base score across all valid workers
+    rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
+    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
+                   [world](float v) { return v / world; });
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,

From 7bc63d14284bb9fc7368262aad964d117b5ffddf Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 13 Sep 2022 18:21:23 +0800
Subject: [PATCH 17/34] Move configuration.

---
 include/xgboost/learner.h |  2 +-
 src/learner.cc            | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index e42cd1cab8c5..32f98c68ea15 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -85,7 +85,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /*!
    * \brief Configure Learner based on set parameters.
    */
-  virtual void Configure(DMatrix const* p_fmat = nullptr) = 0;
+  virtual void Configure() = 0;
   /*!
    * \brief update the model for one iteration
    *  With the specified objective function.
diff --git a/src/learner.cc b/src/learner.cc
index 069b0b1e21c3..058660a15551 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -401,6 +401,7 @@ class LearnerConfiguration : public Learner {
     learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
     CHECK(learner_model_param_.Initialized());
     CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
+    CHECK(!std::isnan(mparam_.base_score));
   }
 
  public:
@@ -420,7 +421,7 @@ class LearnerConfiguration : public Learner {
   }
 
   // Configuration before data is known.
-  void Configure(DMatrix const* p_fmat = nullptr) override {
+  void Configure() override {
     // Varient of double checked lock
     if (!this->need_configuration_) {
       return;
@@ -464,7 +465,6 @@ class LearnerConfiguration : public Learner {
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
-    this->ConfigureLearnerParam(p_fmat);
 
     this->ConfigureMetrics(args);
 
@@ -1242,7 +1242,9 @@ class LearnerImpl : public LearnerIO {
   void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
-    this->Configure(train.get());
+    this->Configure();
+    this->ConfigureLearnerParam(train.get());
+
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
@@ -1270,7 +1272,8 @@ class LearnerImpl : public LearnerIO {
   void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
-    this->Configure(train.get());
+    this->Configure();
+    this->ConfigureLearnerParam(train.get());
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }

From 7c457ad18101bf78f9642b0103b53b37fe772a32 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 13 Sep 2022 18:32:30 +0800
Subject: [PATCH 18/34] Check for model initialized.

---
 src/learner.cc | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index 058660a15551..dc3ef9f4fe05 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -477,6 +477,10 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
+  void CheckModelInitialized() const {
+    CHECK(learner_model_param_.Initialized()) << "Model not fit.";
+  }
+
   virtual PredictionContainer* GetPredictionCache() const {
     return &((*ThreadLocalPredictionCache::Get())[this]);
   }
@@ -871,6 +875,7 @@ class LearnerIO : public LearnerConfiguration {
 
   void SaveModel(Json* p_out) const override {
     CHECK(!this->need_configuration_) << "Call Configure before saving model.";
+    this->CheckModelInitialized();
 
     Version::Save(p_out);
     Json& out { *p_out };
@@ -1056,6 +1061,8 @@ class LearnerIO : public LearnerConfiguration {
   // Save model into binary format.  The code is about to be deprecated by more robust
   // JSON serialization format.
   void SaveModel(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
     LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
     std::vector<std::pair<std::string, std::string> > extra_attr;
     mparam.contain_extra_attrs = 1;
@@ -1110,6 +1117,8 @@ class LearnerIO : public LearnerConfiguration {
   }
 
   void Save(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
     Json memory_snapshot{Object()};
     memory_snapshot["Model"] = Object();
     auto& model = memory_snapshot["Model"];
@@ -1196,16 +1205,19 @@ class LearnerImpl : public LearnerIO {
     }
   }
 
-  std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                     bool with_stats,
+  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
                                      std::string format) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     return gbm_->DumpModel(fmap, with_stats, format);
   }
 
   Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
                  bool* out_of_bound) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     CHECK_NE(this->learner_model_param_.num_feature, 0);
     CHECK_GE(begin_layer, 0);
     auto* out_impl = new LearnerImpl({});
@@ -1274,6 +1286,7 @@ class LearnerImpl : public LearnerIO {
     monitor_.Start("BoostOneIter");
     this->Configure();
     this->ConfigureLearnerParam(train.get());
+
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
@@ -1293,6 +1306,7 @@ class LearnerImpl : public LearnerIO {
                           const std::vector<std::string>& data_names) override {
     monitor_.Start("EvalOneIter");
     this->Configure();
+    this->CheckModelInitialized();
 
     std::ostringstream os;
     os.precision(std::numeric_limits<double>::max_digits10);
@@ -1332,6 +1346,7 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
+    this->CheckModelInitialized();
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
     if (pred_contribs) {
@@ -1360,8 +1375,10 @@ class LearnerImpl : public LearnerIO {
     CHECK(!this->need_configuration_);
     return this->gbm_->BoostedRounds();
   }
+
   uint32_t Groups() const override {
     CHECK(!this->need_configuration_);
+    this->CheckModelInitialized();
     return this->learner_model_param_.num_output_group;
   }
 
@@ -1373,6 +1390,8 @@ class LearnerImpl : public LearnerIO {
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
     this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
     if (type == PredictionType::kValue) {
@@ -1388,6 +1407,8 @@ class LearnerImpl : public LearnerIO {
   void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
                         std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     gbm_->FeatureScore(importance_type, trees, features, scores);
   }
 
@@ -1407,6 +1428,7 @@ class LearnerImpl : public LearnerIO {
   void PredictRaw(DMatrix *data, PredictionCacheEntry *out_preds, bool training,
                   unsigned layer_begin, unsigned layer_end) const {
     CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
+    this->CheckModelInitialized();
     this->ValidateDMatrix(data, false);
     gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);
   }

From 052fff038189d7ae39bc99ccd123096363bcd1a6 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 13 Sep 2022 18:41:18 +0800
Subject: [PATCH 19/34] Merge dispatching into median.

---
 src/common/stats.cu             | 13 ++++++++-----
 src/common/stats.h              | 26 ++++++++++++++++----------
 src/objective/regression_obj.cu |  7 +------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/common/stats.cu b/src/common/stats.cu
index 956c812c9346..dcb04ac4b5de 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -2,11 +2,14 @@
  * Copyright 2022 by XGBoost Contributors
  */
 
-#include "common.h"
-#include "stats.cuh"
-#include "xgboost/generic_parameters.h"
-#include "xgboost/host_device_vector.h"
-#include "xgboost/linalg.h"
+#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
+
+#include "common.h"            // common::OptionalWeights
+#include "device_helpers.cuh"  // dh::MakeTransformIterator, tcbegin, tcend
+#include "stats.cuh"           // common::SegmentedQuantile, common::SegmentedWeightedQuantile
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/linalg.h"              // linalg::TensorView, UnravelIndex, Apply
 
 namespace xgboost {
 namespace common {
diff --git a/src/common/stats.h b/src/common/stats.h
index 547063c43a56..f191deb21575 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -103,23 +103,29 @@ inline float Median(Context const*, linalg::TensorView<float const, 2>, common::
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda
 
-inline float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
-                    common::OptionalWeights weights) {
+inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+                    HostDeviceVector<float> const& weights) {
   if (!ctx->IsCPU()) {
-    return cuda::Median(ctx, t, weights);
+    weights.SetDevice(ctx->gpu_id);
+    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
+    auto t_v = t.View(ctx->gpu_id);
+    return cuda::Median(ctx, t_v, opt_weights);
   }
+
+  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
+  auto t_v = t.HostView();
   auto iter = common::MakeIndexTransformIter(
-      [&](size_t i) { return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape())); });
+      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
   float q{0};
-  if (weights.weights.empty()) {
-    q = common::Quantile(0.5, iter, iter + t.Size());
+  if (opt_weights.Empty()) {
+    q = common::Quantile(0.5, iter, iter + t_v.Size());
   } else {
-    CHECK_NE(t.Shape(1), 0);
+    CHECK_NE(t_v.Shape(1), 0);
     auto w_it = common::MakeIndexTransformIter([&](size_t i) {
-      auto sample_idx = i / t.Shape(1);
-      return weights[sample_idx];
+      auto sample_idx = i / t_v.Shape(1);
+      return opt_weights[sample_idx];
     });
-    q = common::WeightedQuantile(0.5, iter, iter + t.Size(), w_it);
+    q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
   }
   return q;
 }
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index f0087a88d469..fe7e6c84251f 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -711,13 +711,8 @@ class MeanAbsoluteError : public ObjFunction {
     if (info.num_row_ == 0) {
       out(0) = 0;
       invalid++;
-    } else if (ctx_->IsCPU()) {
-      out(0) = common::Median(ctx_, info.labels.HostView(),
-                              common::OptionalWeights{info.weights_.ConstHostSpan()});
     } else {
-      info.weights_.SetDevice(ctx_->gpu_id);
-      out(0) = common::Median(ctx_, info.labels.View(ctx_->gpu_id),
-                              common::OptionalWeights{info.weights_.DeviceSpan()});
+      out(0) = common::Median(ctx_, info.labels, info.weights_);
     }
 
     auto world = static_cast<float>(rabit::GetWorldSize());

From 0c3c3a673a2c30f072c07441d8812a0a7264a978 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 13 Sep 2022 19:50:40 +0800
Subject: [PATCH 20/34] Split up the configuration.

---
 src/learner.cc | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index dc3ef9f4fe05..d1d347bf3e11 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -202,9 +202,11 @@ linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device)
   // multi-class is not supported yet.
   CHECK_EQ(base_score_.Size(), 1);
   if (device == Context::kCpuId) {
+    // Make sure that we won't run it race condition.
     CHECK(base_score_.Data()->HostCanRead());
     return base_score_.HostView();
   }
+  // Make sure that we won't run it race condition.
   CHECK(base_score_.Data()->DeviceCanRead());
   auto v = base_score_.View(device);
   CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.
@@ -358,8 +360,10 @@ class LearnerConfiguration : public Learner {
 
   /**
    * \brief Calculate the `base_score` based on input data.
+   *
+   * \param p_fmat The training DMatrix used to estimate the base score.
    */
-  void ConfigureLearnerParam(DMatrix const* p_fmat) {
+  void InitBaseScore(DMatrix const* p_fmat) {
     linalg::Tensor<float, 1> base_score;
     // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
     // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
@@ -387,21 +391,31 @@ class LearnerConfiguration : public Learner {
       base_score(0) = ObjFunction::DefaultBaseScore();
     }
 
-    auto task = obj_->Task();
     mparam_.base_score = base_score(0);
+    CHECK(!std::isnan(mparam_.base_score));
+    // Update the shared model parameter
+    this->ConfigureModelParam();
+  }
 
-    // transform to margin
-    linalg::Tensor<float, 1> copy(base_score.Shape(), ctx_.gpu_id);
-    auto in = base_score.HostView();
+  // Convert mparam to learner_model_param
+  void ConfigureModelParam() {
+    CHECK(obj_);
+    auto task = obj_->Task();
+    linalg::Tensor<float, 1> copy({1}, ctx_.gpu_id);
     auto out = copy.HostView();
-    std::transform(linalg::cbegin(in), linalg::cend(in), linalg::begin(out),
-                   [&](float v) { return obj_->ProbToMargin(v); });
 
-    // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
-    CHECK(!std::isnan(mparam_.base_score));
+    if (!std::isnan(mparam_.base_score)) {
+      // transform to margin
+      out(0) = obj_->ProbToMargin(mparam_.base_score);
+      // move it to model param, which is shared with all other components.
+      learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
+      CHECK(learner_model_param_.Initialized());
+      CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
+    } else {
+      // Model is not yet fitted, use default base score.
+      out(0) = ObjFunction::DefaultBaseScore();
+      learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
+    }
   }
 
  public:
@@ -465,6 +479,7 @@ class LearnerConfiguration : public Learner {
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
+    this->ConfigureModelParam();
 
     this->ConfigureMetrics(args);
 
@@ -479,6 +494,7 @@ class LearnerConfiguration : public Learner {
 
   void CheckModelInitialized() const {
     CHECK(learner_model_param_.Initialized()) << "Model not fit.";
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -1255,7 +1271,7 @@ class LearnerImpl : public LearnerIO {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
     this->Configure();
-    this->ConfigureLearnerParam(train.get());
+    this->InitBaseScore(train.get());
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1285,7 +1301,7 @@ class LearnerImpl : public LearnerIO {
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
     this->Configure();
-    this->ConfigureLearnerParam(train.get());
+    this->InitBaseScore(train.get());
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);

From bbb30a0794f74062afe21574f1259d2ec8f73b0f Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 04:04:47 +0800
Subject: [PATCH 21/34] Add a quick test.

---
 src/learner.cc            |  5 +++--
 tests/cpp/test_learner.cc | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index d1d347bf3e11..c590a1e07e3a 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -99,12 +99,13 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
+
   // Skip other legacy fields.
   Json ToJson() const {
     Object obj;
     char floats[NumericLimits<float>::kToCharsSize];
     auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
-    CHECK(ret.ec == std::errc());
+    CHECK(ret.ec == std::errc{});
     obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
 
     char integers[NumericLimits<int64_t>::kToCharsSize];
@@ -493,7 +494,7 @@ class LearnerConfiguration : public Learner {
   }
 
   void CheckModelInitialized() const {
-    CHECK(learner_model_param_.Initialized()) << "Model not fit.";
+    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
     CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
   }
 
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 42f25fa39caa..1d7291657833 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -3,8 +3,10 @@
  */
 #include <gtest/gtest.h>
 #include <xgboost/learner.h>
+#include <xgboost/objective.h>  // ObjFunction
 #include <xgboost/version_config.h>
 
+#include <string>  // std::stof, std::string
 #include <thread>
 #include <vector>
 
@@ -447,4 +449,40 @@ TEST(Learner, MultiTarget) {
     EXPECT_THROW({ learner->Configure(); }, dmlc::Error);
   }
 }
+
+/**
+ * Test the model initialization sequence is correctly performed.
+ */
+TEST(Learner, InitEstimation) {
+  size_t constexpr kCols = 10;
+  auto Xy = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
+  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  learner->SetParam("objective", "reg:absoluteerror");
+  learner->Configure();
+  HostDeviceVector<float> predt;
+  learner->Predict(Xy, false, &predt, 0, 0);
+
+  auto h_predt = predt.ConstHostSpan();
+  for (auto v : h_predt) {
+    ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
+  }
+  Json config{Object{}};
+  learner->SaveConfig(&config);
+  auto base_score =
+      std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+  // No base score is estimated yet.
+  ASSERT_TRUE(std::isnan(base_score));
+
+  learner->UpdateOneIter(0, Xy);
+  learner->Predict(Xy, false, &predt, 0, 0);
+  h_predt = predt.ConstHostSpan();
+  for (auto v : h_predt) {
+    ASSERT_NE(v, ObjFunction::DefaultBaseScore());
+  }
+
+  learner->SaveConfig(&config);
+  base_score = std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+  ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+  std::cout << base_score << std::endl;
+}
 }  // namespace xgboost

From e78c6083bdba709c8a024d6d9e6d1056a702d4d6 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 04:21:30 +0800
Subject: [PATCH 22/34] check.

---
 src/learner.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/learner.cc b/src/learner.cc
index c590a1e07e3a..c139b735e489 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -1302,7 +1302,8 @@ class LearnerImpl : public LearnerIO {
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
     this->Configure();
-    this->InitBaseScore(train.get());
+    // Should have been set to default in the first prediction.
+    CHECK(!std::isnan(mparam_.base_score));
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);

From 964fc0597f91383bdc02347fc673dddb3f8ee55c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 04:50:57 +0800
Subject: [PATCH 23/34] test.

---
 src/learner.cc            | 80 +++++++++++++++++++++------------------
 tests/cpp/test_learner.cc | 80 +++++++++++++++++++++++++++------------
 2 files changed, 99 insertions(+), 61 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index c139b735e489..a0b4020aacee 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -86,16 +86,18 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   uint32_t minor_version;
 
   uint32_t num_target{1};
+
+  int32_t base_score_estimated{0};
   /*! \brief reserved field */
-  int reserved[26];
+  int reserved[25];
   /*! \brief constructor */
   LearnerModelParamLegacy() {
     std::memset(this, 0, sizeof(LearnerModelParamLegacy));
-    // use nan to flag this is uninitialized.
-    base_score = std::numeric_limits<float>::quiet_NaN();
+    base_score = ObjFunction::DefaultBaseScore();
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
+    base_score_estimated = 0;
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
@@ -141,9 +143,11 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
     std::string str = get<String const>(j_param.at("base_score"));
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
+    // It can only be estimated during the first training, we consider it estimated afterward
+    base_score_estimated = 1;
   }
 
-  inline LearnerModelParamLegacy ByteSwap() const {
+  LearnerModelParamLegacy ByteSwap() const {
     LearnerModelParamLegacy x = *this;
     dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
     dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
@@ -153,10 +157,26 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
     dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
     dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
+    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
     dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
     return x;
   }
 
+  template <typename Container>
+  Args UpdateAllowUnknown(Container const& kwargs) {
+    // Detect whether user has made their own base score.
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
+                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
+      base_score_estimated = true;
+    }
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
+          return kv.first == "base_score_estimated";
+        }) != kwargs.cend()) {
+      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
+    }
+    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
+  }
+
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
@@ -174,6 +194,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .set_default(1)
         .set_lower_bound(1)
         .describe("Number of target for multi-target regression.");
+    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
   }
 };
 
@@ -379,44 +400,32 @@ class LearnerConfiguration : public Learner {
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
     CHECK(obj_);
-    if (!std::isnan(mparam_.base_score)) {
-      // if base_score is set by user, use it.
-      base_score.Reshape(1);
-      base_score(0) = mparam_.base_score;
-    } else if (p_fmat) {
-      // otherwise, we estimate it from input data.
+    if (!mparam_.base_score_estimated) {
+      // We estimate it from input data.
       obj_->InitEstimation(p_fmat->Info(), &base_score);
-    } else {
-      // lastly, if data is not available (prediction for custom objective), use default.
-      base_score.Reshape(1);
-      base_score(0) = ObjFunction::DefaultBaseScore();
+      mparam_.base_score_estimated = true;
+      mparam_.base_score = base_score(0);
+      CHECK(!std::isnan(mparam_.base_score));
+      // Update the shared model parameter
+      this->ConfigureModelParam();
     }
-
-    mparam_.base_score = base_score(0);
-    CHECK(!std::isnan(mparam_.base_score));
-    // Update the shared model parameter
-    this->ConfigureModelParam();
   }
 
   // Convert mparam to learner_model_param
   void ConfigureModelParam() {
+    this->ConfigureTargets();
+
     CHECK(obj_);
     auto task = obj_->Task();
-    linalg::Tensor<float, 1> copy({1}, ctx_.gpu_id);
-    auto out = copy.HostView();
-
-    if (!std::isnan(mparam_.base_score)) {
-      // transform to margin
-      out(0) = obj_->ProbToMargin(mparam_.base_score);
-      // move it to model param, which is shared with all other components.
-      learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
-      CHECK(learner_model_param_.Initialized());
-      CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
-    } else {
-      // Model is not yet fitted, use default base score.
-      out(0) = ObjFunction::DefaultBaseScore();
-      learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(copy), task);
-    }
+    linalg::Tensor<float, 1> base_score({1}, ctx_.gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
   }
 
  public:
@@ -475,12 +484,11 @@ class LearnerConfiguration : public Learner {
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     this->ConfigureObjective(old_tparam, &args);
 
-    this->ConfigureTargets();
+    this->ConfigureModelParam();
 
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
-    this->ConfigureModelParam();
 
     this->ConfigureMetrics(args);
 
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 1d7291657833..81a0700f6cf3 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -456,33 +456,63 @@ TEST(Learner, MultiTarget) {
 TEST(Learner, InitEstimation) {
   size_t constexpr kCols = 10;
   auto Xy = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
-  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
-  learner->SetParam("objective", "reg:absoluteerror");
-  learner->Configure();
-  HostDeviceVector<float> predt;
-  learner->Predict(Xy, false, &predt, 0, 0);
 
-  auto h_predt = predt.ConstHostSpan();
-  for (auto v : h_predt) {
-    ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
-  }
-  Json config{Object{}};
-  learner->SaveConfig(&config);
-  auto base_score =
-      std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-  // No base score is estimated yet.
-  ASSERT_TRUE(std::isnan(base_score));
-
-  learner->UpdateOneIter(0, Xy);
-  learner->Predict(Xy, false, &predt, 0, 0);
-  h_predt = predt.ConstHostSpan();
-  for (auto v : h_predt) {
-    ASSERT_NE(v, ObjFunction::DefaultBaseScore());
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->Configure();
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy, false, &predt, 0, 0);
+
+    auto h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
+    }
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    // No base score is estimated yet.
+    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner->UpdateOneIter(0, Xy);
+    learner->Predict(Xy, false, &predt, 0, 0);
+    h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_NE(v, ObjFunction::DefaultBaseScore());
+    }
+
+    learner->SaveConfig(&config);
+    base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+
+    ASSERT_THROW(
+        {
+          learner->SetParam("base_score_estimated", "1");
+          learner->Configure();
+        },
+        dmlc::Error);
   }
 
-  learner->SaveConfig(&config);
-  base_score = std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-  ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
-  std::cout << base_score << std::endl;
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->SetParam("base_score", "1.3");
+    learner->Configure();
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy, false, &predt, 0, 0);
+    auto h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_FLOAT_EQ(v, 1.3);
+    }
+    learner->UpdateOneIter(0, Xy);
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    // no change
+    ASSERT_FLOAT_EQ(base_score, 1.3);
+  }
 }
 }  // namespace xgboost

From 6c67acb9f0410e33ee764b7d35a8eb36b19a677f Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 05:01:47 +0800
Subject: [PATCH 24/34] Don't change.

---
 src/learner.cc            | 14 ++++++++++----
 tests/cpp/test_learner.cc | 11 +++++++++--
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index a0b4020aacee..557ebba7e25b 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -401,11 +401,15 @@ class LearnerConfiguration : public Learner {
     // - model is configured second time due to change of parameter
     CHECK(obj_);
     if (!mparam_.base_score_estimated) {
-      // We estimate it from input data.
-      obj_->InitEstimation(p_fmat->Info(), &base_score);
+      if (p_fmat) {
+        // We estimate it from input data.
+        obj_->InitEstimation(p_fmat->Info(), &base_score);
+        mparam_.base_score = base_score(0);
+        CHECK(!std::isnan(mparam_.base_score));
+      } else {
+        mparam_.base_score = ObjFunction::DefaultBaseScore();
+      }
       mparam_.base_score_estimated = true;
-      mparam_.base_score = base_score(0);
-      CHECK(!std::isnan(mparam_.base_score));
       // Update the shared model parameter
       this->ConfigureModelParam();
     }
@@ -1372,6 +1376,7 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
+    this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
@@ -1416,6 +1421,7 @@ class LearnerImpl : public LearnerIO {
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
     this->Configure();
+    this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 81a0700f6cf3..49c1d9537426 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -474,16 +474,23 @@ TEST(Learner, InitEstimation) {
         std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
     // No base score is estimated yet.
     ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+  }
 
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
     learner->UpdateOneIter(0, Xy);
+
+    HostDeviceVector<float> predt;
     learner->Predict(Xy, false, &predt, 0, 0);
-    h_predt = predt.ConstHostSpan();
+    auto h_predt = predt.ConstHostSpan();
     for (auto v : h_predt) {
       ASSERT_NE(v, ObjFunction::DefaultBaseScore());
     }
 
+    Json config{Object{}};
     learner->SaveConfig(&config);
-    base_score =
+    auto base_score =
         std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
     ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
 

From 6c07f9822df51b0b84265295e96ccec9e44b801b Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 17:29:53 +0800
Subject: [PATCH 25/34] check.

---
 src/learner.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index 557ebba7e25b..b94ad2df1ecf 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -218,6 +218,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
   if (!ctx->IsCPU()) {
     common::AsConst(base_score_).View(ctx->gpu_id);
   }
+  CHECK(common::AsConst(base_score_).Data()->HostCanRead());
 }
 
 linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
@@ -386,7 +387,6 @@ class LearnerConfiguration : public Learner {
    * \param p_fmat The training DMatrix used to estimate the base score.
    */
   void InitBaseScore(DMatrix const* p_fmat) {
-    linalg::Tensor<float, 1> base_score;
     // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
     // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
     // keep the stability, we initialize it in binary LoadModel instead of configuration.
@@ -403,6 +403,7 @@ class LearnerConfiguration : public Learner {
     if (!mparam_.base_score_estimated) {
       if (p_fmat) {
         // We estimate it from input data.
+        linalg::Tensor<float, 1> base_score;
         obj_->InitEstimation(p_fmat->Info(), &base_score);
         mparam_.base_score = base_score(0);
         CHECK(!std::isnan(mparam_.base_score));
@@ -488,11 +489,10 @@ class LearnerConfiguration : public Learner {
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     this->ConfigureObjective(old_tparam, &args);
 
-    this->ConfigureModelParam();
-
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
+    this->ConfigureModelParam();
 
     this->ConfigureMetrics(args);
 

From bb1fc887ee0e5ce574f5f6279cf0faa495b16cdf Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 17:35:59 +0800
Subject: [PATCH 26/34] check.

---
 src/learner.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/learner.cc b/src/learner.cc
index b94ad2df1ecf..06d2cf9ec51c 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -1315,7 +1315,7 @@ class LearnerImpl : public LearnerIO {
     monitor_.Start("BoostOneIter");
     this->Configure();
     // Should have been set to default in the first prediction.
-    CHECK(!std::isnan(mparam_.base_score));
+    CHECK(mparam_.base_score_estimated);
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);

From 8890a2a7ed8223cc2d0956048233163ac09e8063 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 14 Sep 2022 17:42:57 +0800
Subject: [PATCH 27/34] cleanup.

---
 include/xgboost/learner.h |  6 ++----
 src/common/linalg_op.h    |  9 +++++----
 src/learner.cc            | 17 +++++++++--------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 32f98c68ea15..34ae5a4d53bb 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -8,16 +8,14 @@
 #ifndef XGBOOST_LEARNER_H_
 #define XGBOOST_LEARNER_H_
 
-#include <dmlc/any.h>
 #include <xgboost/base.h>
 #include <xgboost/feature_map.h>
-#include <xgboost/generic_parameters.h>
+#include <xgboost/generic_parameters.h>  // Context
 #include <xgboost/host_device_vector.h>
 #include <xgboost/model.h>
 #include <xgboost/predictor.h>
 #include <xgboost/task.h>
 
-#include <limits>  // std::numeric_limit
 #include <map>
 #include <memory>
 #include <string>
@@ -275,7 +273,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /**
    * \brief Return the context object of this Booster.
    */
-  virtual GenericParameter const* Ctx() const = 0;
+  virtual Context const* Ctx() const = 0;
   /*!
    * \brief Get configuration arguments currently stored by the learner
    * \return Key-value pairs representing configuration arguments
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 52790e33d859..0de173c8e73f 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -4,6 +4,7 @@
 #ifndef XGBOOST_COMMON_LINALG_OP_H_
 #define XGBOOST_COMMON_LINALG_OP_H_
 #include <type_traits>
+#include <cstdint>  // std::int32_t
 
 #include "common.h"
 #include "threading_utils.h"
@@ -60,7 +61,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
-template <typename T, int32_t kDim>
+template <typename T, std::int32_t kDim>
 auto cbegin(TensorView<T, kDim> v) {  // NOLINT
   auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
     return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
@@ -68,19 +69,19 @@ auto cbegin(TensorView<T, kDim> v) {  // NOLINT
   return it;
 }
 
-template <typename T, int32_t kDim>
+template <typename T, std::int32_t kDim>
 auto cend(TensorView<T, kDim> v) {  // NOLINT
   return cbegin(v) + v.Size();
 }
 
-template <typename T, int32_t kDim>
+template <typename T, std::int32_t kDim>
 auto begin(TensorView<T, kDim> v) {  // NOLINT
   auto it = common::MakeIndexTransformIter(
       [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
   return it;
 }
 
-template <typename T, int32_t kDim>
+template <typename T, std::int32_t kDim>
 auto end(TensorView<T, kDim> v) {  // NOLINT
   return begin(v) + v.Size();
 }
diff --git a/src/learner.cc b/src/learner.cc
index 06d2cf9ec51c..d0ecfcb345b0 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -6,6 +6,7 @@
  */
 #include "xgboost/learner.h"
 
+#include <dmlc/any.h>
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
 #include <dmlc/thread_local.h>
@@ -13,7 +14,7 @@
 #include <algorithm>
 #include <atomic>
 #include <iomanip>
-#include <limits>
+#include <limits>  // std::numeric_limits
 #include <memory>
 #include <mutex>
 #include <sstream>
@@ -31,7 +32,6 @@
 #include "common/threading_utils.h"
 #include "common/timer.h"
 #include "common/version.h"
-#include "dmlc/any.h"
 #include "xgboost/base.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
@@ -180,7 +180,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
-        .set_default(std::numeric_limits<float>::quiet_NaN())
+        .set_default(ObjFunction::DefaultBaseScore())
         .describe("Global bias of the model.");
     DMLC_DECLARE_FIELD(num_feature)
         .set_default(0)
@@ -252,7 +252,8 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
   CHECK(base_score_.Data()->HostCanRead());
 
   num_feature = that.num_feature;
-  num_output_group = that.num_output_group, task = that.task;
+  num_output_group = that.num_output_group;
+  task = that.task;
 }
 
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
@@ -422,15 +423,15 @@ class LearnerConfiguration : public Learner {
 
     CHECK(obj_);
     auto task = obj_->Task();
-    linalg::Tensor<float, 1> base_score({1}, ctx_.gpu_id);
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
     auto h_base_score = base_score.HostView();
 
     // transform to margin
     h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
     // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(&ctx_, mparam_, std::move(base_score), task);
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
     CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(&ctx_).Size(), 0);
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
   }
 
  public:
@@ -665,7 +666,7 @@ class LearnerConfiguration : public Learner {
     return cfg_;
   }
 
-  GenericParameter const* Ctx() const override { return &ctx_; }
+  Context const* Ctx() const override { return &ctx_; }
 
  private:
   void ValidateParameters() {

From bba1cd92ad07c3e4a74713c5b4089995c89f3a26 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Sat, 17 Sep 2022 00:17:59 +0800
Subject: [PATCH 28/34] typo.

---
 src/learner.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index d0ecfcb345b0..2ee83fb71cbf 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -222,14 +222,14 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
 }
 
 linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
-  // multi-class is not supported yet.
+  // multi-class is not yet supported.
   CHECK_EQ(base_score_.Size(), 1);
   if (device == Context::kCpuId) {
-    // Make sure that we won't run it race condition.
+    // Make sure that we won't run into race condition.
     CHECK(base_score_.Data()->HostCanRead());
     return base_score_.HostView();
   }
-  // Make sure that we won't run it race condition.
+  // Make sure that we won't run into race condition.
   CHECK(base_score_.Data()->DeviceCanRead());
   auto v = base_score_.View(device);
   CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.

From 644bbe2d2655e9ebfa4aff98bedac7fcb9c41538 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 18:45:54 +0800
Subject: [PATCH 29/34] Weighted average.

---
 amalgamation/xgboost-all0.cc    | 13 +++++++------
 src/common/algorithm.cc         | 27 +++++++++++++++++++++++++++
 src/common/algorithm.cu         | 24 ++++++++++++++++++++++++
 src/common/algorithm.h          | 16 +++++++++++++++-
 src/common/threading_utils.h    |  7 +++++++
 src/objective/regression_obj.cu | 26 ++++++++++++++++----------
 6 files changed, 96 insertions(+), 17 deletions(-)
 create mode 100644 src/common/algorithm.cc
 create mode 100644 src/common/algorithm.cu

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index ded96bcbab4c..7cf6ff5f68a9 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -75,19 +75,20 @@
 #include "../src/collective/communicator.cc"
 
 // common
-#include "../src/common/common.cc"
-#include "../src/common/column_matrix.cc"
-#include "../src/common/random.cc"
+#include "../src/common/algorithm.cc"
 #include "../src/common/charconv.cc"
-#include "../src/common/timer.cc"
-#include "../src/common/quantile.cc"
-#include "../src/common/host_device_vector.cc"
+#include "../src/common/column_matrix.cc"
+#include "../src/common/common.cc"
 #include "../src/common/hist_util.cc"
+#include "../src/common/host_device_vector.cc"
 #include "../src/common/io.cc"
 #include "../src/common/json.cc"
 #include "../src/common/pseudo_huber.cc"
+#include "../src/common/quantile.cc"
+#include "../src/common/random.cc"
 #include "../src/common/survival_util.cc"
 #include "../src/common/threading_utils.cc"
+#include "../src/common/timer.cc"
 #include "../src/common/version.cc"
 
 // c_api
diff --git a/src/common/algorithm.cc b/src/common/algorithm.cc
new file mode 100644
index 000000000000..13ae3992ab68
--- /dev/null
+++ b/src/common/algorithm.cc
@@ -0,0 +1,27 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#include "algorithm.h"
+
+#include <numeric>
+
+#include "threading_utils.h"
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+
+namespace xgboost {
+namespace common {
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
+  if (ctx->IsCPU()) {
+    auto const& h_values = values.ConstHostVector();
+    MemStackAllocator<double, 128> result_tloc(ctx->Threads(), 0);
+    ParallelFor(h_values.size(), ctx->Threads(),
+                [&](auto i) { result_tloc[omp_get_thread_num()] = values[i]; });
+    auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
+    static_assert(std::is_same<decltype(result), double>::value, "");
+    return result;
+  }
+  return cuda::Reduce(ctx, values);
+}
+}  // namespace common
+}  // namespace xgboost
diff --git a/src/common/algorithm.cu b/src/common/algorithm.cu
new file mode 100644
index 000000000000..4a943d56224a
--- /dev/null
+++ b/src/common/algorithm.cu
@@ -0,0 +1,24 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#include <thrust/execution_policy.h>
+#include <thrust/functional.h>  // thrust:plus
+
+#include "algorithm.h"
+#include "device_helpers.cuh"            // dh::Reduce
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+
+namespace xgboost {
+namespace common {
+namespace cuda {
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
+  auto const d_values = values.ConstDeviceSpan();
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
+                        d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
+  return res;
+}
+}  // namespace cuda
+}  // namespace common
+}  // namespace xgboost
diff --git a/src/common/algorithm.h b/src/common/algorithm.h
index addcd95cfa24..f59e664ba253 100644
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -1,10 +1,14 @@
 /*!
  * Copyright 2022 by XGBoost Contributors
  */
-#pragma once
+#ifndef XGBOOST_COMMON_ALGORITHM_H_
+#define XGBOOST_COMMON_ALGORITHM_H_
 #include <algorithm>  // std::upper_bound
 #include <cinttypes>  // std::size_t
 
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+
 namespace xgboost {
 namespace common {
 template <typename It, typename Idx>
@@ -12,5 +16,15 @@ auto SegmentId(It first, It last, Idx idx) {
   std::size_t segment_id = std::upper_bound(first, last, idx) - 1 - first;
   return segment_id;
 }
+
+namespace cuda {
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
+}
+
+/**
+ * \brief Reduction with summation.
+ */
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 }  // namespace common
 }  // namespace xgboost
+#endif  // XGBOOST_COMMON_ALGORITHM_H_
diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h
index 9d4149d7d07b..b1fbe2210b6c 100644
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -278,6 +278,13 @@ class MemStackAllocator {
   T& operator[](size_t i) { return ptr_[i]; }
   T const& operator[](size_t i) const { return ptr_[i]; }
 
+  auto data() const { return ptr_; }                   // NOLINT
+  auto data() { return ptr_; }                         // NOLINT
+  std::size_t size() const { return required_size_; }  // NOLINT
+
+  auto cbegin() const { return data(); }         // NOLINT
+  auto cend() const { return data() + size(); }  // NOLINT
+
  private:
   T* ptr_ = nullptr;
   size_t required_size_;
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index fe7e6c84251f..6410c144b813 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -13,6 +13,7 @@
 #include <memory>
 #include <vector>
 
+#include "../common/algorithm.h"  // reduce
 #include "../common/common.h"
 #include "../common/linalg_op.h"
 #include "../common/pseudo_huber.h"
@@ -707,22 +708,27 @@ class MeanAbsoluteError : public ObjFunction {
     CheckInitInputs(info);
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
-    std::int32_t invalid{0};
-    if (info.num_row_ == 0) {
-      out(0) = 0;
-      invalid++;
+
+    double w{0.0};
+    if (info.weights_.Empty()) {
+      w = static_cast<double>(info.num_row_);
     } else {
-      out(0) = common::Median(ctx_, info.labels, info.weights_);
+      w = common::Reduce(ctx_, info.weights_);
     }
 
-    auto world = static_cast<float>(rabit::GetWorldSize());
-    rabit::Allreduce<rabit::op::Sum>(&invalid, 1);  // number of empty workers
-    world -= static_cast<float>(invalid);           // number of non-empty workers
+    if (info.num_row_) {
+      out(0) = 0;
+    } else {
+      // weighted avg
+      out(0) = common::Median(ctx_, info.labels, info.weights_) * w;
+    }
 
-    // average base score across all valid workers
+    // Weighted average base score across all workers
     rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
+    rabit::Allreduce<rabit::op::Sum>(&w, 1);
+
     std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
-                   [world](float v) { return v / world; });
+                   [w](float v) { return v / w; });
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,

From dad7a37791e0e406140d2d77b5bcf15544b396b8 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 19:11:32 +0800
Subject: [PATCH 30/34] Change name.

---
 amalgamation/xgboost-all0.cc            |  2 +-
 src/common/algorithm.h                  | 12 ------------
 src/common/{algorithm.cc => numeric.cc} | 11 ++++++-----
 src/common/{algorithm.cu => numeric.cu} |  7 +++++--
 src/common/numeric.h                    | 19 ++++++++++++++-----
 src/common/threading_utils.h            |  8 +++++++-
 src/objective/regression_obj.cu         |  2 +-
 7 files changed, 34 insertions(+), 27 deletions(-)
 rename src/common/{algorithm.cc => numeric.cc} (70%)
 rename src/common/{algorithm.cu => numeric.cu} (78%)

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index 7cf6ff5f68a9..3bc15d05388d 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -75,7 +75,6 @@
 #include "../src/collective/communicator.cc"
 
 // common
-#include "../src/common/algorithm.cc"
 #include "../src/common/charconv.cc"
 #include "../src/common/column_matrix.cc"
 #include "../src/common/common.cc"
@@ -83,6 +82,7 @@
 #include "../src/common/host_device_vector.cc"
 #include "../src/common/io.cc"
 #include "../src/common/json.cc"
+#include "../src/common/numeric.cc"
 #include "../src/common/pseudo_huber.cc"
 #include "../src/common/quantile.cc"
 #include "../src/common/random.cc"
diff --git a/src/common/algorithm.h b/src/common/algorithm.h
index f59e664ba253..a5d2d1974eff 100644
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -6,9 +6,6 @@
 #include <algorithm>  // std::upper_bound
 #include <cinttypes>  // std::size_t
 
-#include "xgboost/generic_parameters.h"  // Context
-#include "xgboost/host_device_vector.h"  // HostDeviceVector
-
 namespace xgboost {
 namespace common {
 template <typename It, typename Idx>
@@ -16,15 +13,6 @@ auto SegmentId(It first, It last, Idx idx) {
   std::size_t segment_id = std::upper_bound(first, last, idx) - 1 - first;
   return segment_id;
 }
-
-namespace cuda {
-double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
-}
-
-/**
- * \brief Reduction with summation.
- */
-double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_ALGORITHM_H_
diff --git a/src/common/algorithm.cc b/src/common/numeric.cc
similarity index 70%
rename from src/common/algorithm.cc
rename to src/common/numeric.cc
index 13ae3992ab68..b1a37892cf0c 100644
--- a/src/common/algorithm.cc
+++ b/src/common/numeric.cc
@@ -1,11 +1,12 @@
 /*!
  * Copyright 2022 by XGBoost Contributors
  */
-#include "algorithm.h"
+#include "numeric.h"
 
-#include <numeric>
+#include <numeric>      // std::accumulate
+#include <type_traits>  // std::is_same
 
-#include "threading_utils.h"
+#include "threading_utils.h"             // MemStackAllocator, ParallelFor, DefaultMaxThreads
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
@@ -14,9 +15,9 @@ namespace common {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   if (ctx->IsCPU()) {
     auto const& h_values = values.ConstHostVector();
-    MemStackAllocator<double, 128> result_tloc(ctx->Threads(), 0);
+    MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
     ParallelFor(h_values.size(), ctx->Threads(),
-                [&](auto i) { result_tloc[omp_get_thread_num()] = values[i]; });
+                [&](auto i) { result_tloc[omp_get_thread_num()] = h_values[i]; });
     auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
     static_assert(std::is_same<decltype(result), double>::value, "");
     return result;
diff --git a/src/common/algorithm.cu b/src/common/numeric.cu
similarity index 78%
rename from src/common/algorithm.cu
rename to src/common/numeric.cu
index 4a943d56224a..59d306badc35 100644
--- a/src/common/algorithm.cu
+++ b/src/common/numeric.cu
@@ -4,15 +4,18 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>  // thrust:plus
 
-#include "algorithm.h"
-#include "device_helpers.cuh"            // dh::Reduce
+#include "device_helpers.cuh"  // dh::Reduce
+#include "numeric.h"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/logging.h"             // CHECK_GE
 
 namespace xgboost {
 namespace common {
 namespace cuda {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
+  CHECK_GE(ctx->gpu_id, 0);
+  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
   auto const d_values = values.ConstDeviceSpan();
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
diff --git a/src/common/numeric.h b/src/common/numeric.h
index ff5ac2242033..c02bdf6d057b 100644
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -8,8 +8,9 @@
 #include <iterator>   // std::iterator_traits
 #include <vector>
 
-#include "threading_utils.h"
-#include "xgboost/generic_parameters.h"
+#include "threading_utils.h"             // MemStackAllocator, DefaultMaxThreads
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
 
 namespace xgboost {
 namespace common {
@@ -18,8 +19,8 @@ namespace common {
  * \brief Run length encode on CPU, input must be sorted.
  */
 template <typename Iter, typename Idx>
-void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
-  auto &out = *p_out;
+void RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {
+  auto& out = *p_out;
   out = std::vector<Idx>{0};
   size_t n = std::distance(begin, end);
   for (size_t i = 1; i < n; ++i) {
@@ -45,7 +46,7 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
   auto n = static_cast<size_t>(std::distance(begin, end));
   const size_t batch_threads =
       std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));
-  common::MemStackAllocator<T, 128> partial_sums(batch_threads);
+  MemStackAllocator<T, DefaultMaxThreads()> partial_sums(batch_threads);
 
   size_t block_size = n / batch_threads;
 
@@ -90,6 +91,14 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
   }
   exc.Rethrow();
 }
+
+namespace cuda {
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
+}
+/**
+ * \brief Reduction with summation.
+ */
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 }  // namespace common
 }  // namespace xgboost
 
diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h
index b1fbe2210b6c..656e570ae812 100644
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -8,6 +8,7 @@
 #include <dmlc/omp.h>
 
 #include <algorithm>
+#include <cstdint>  // std::int32_t
 #include <limits>
 #include <type_traits>  // std::is_signed
 #include <vector>
@@ -253,7 +254,7 @@ inline int32_t OmpGetNumThreads(int32_t n_threads) {
  * MaxStackSize, it will be allocated inside the stack. Otherwise, it will be
  * heap-allocated.
  */
-template <typename T, size_t MaxStackSize>
+template <typename T, std::size_t MaxStackSize>
 class MemStackAllocator {
  public:
   explicit MemStackAllocator(size_t required_size) : required_size_(required_size) {
@@ -290,6 +291,11 @@ class MemStackAllocator {
   size_t required_size_;
   T stack_mem_[MaxStackSize];
 };
+
+/**
+ * \brief Constant that can be used for initializing static thread local memory.
+ */
+std::int32_t constexpr DefaultMaxThreads() { return 128; }
 }  // namespace common
 }  // namespace xgboost
 
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 6410c144b813..99f7765d208b 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -13,9 +13,9 @@
 #include <memory>
 #include <vector>
 
-#include "../common/algorithm.h"  // reduce
 #include "../common/common.h"
 #include "../common/linalg_op.h"
+#include "../common/numeric.h"  // Reduce
 #include "../common/pseudo_huber.h"
 #include "../common/stats.h"
 #include "../common/threading_utils.h"

From 9c2bdac6bf89250220c74ebaa508b02fcd81997d Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 21:43:42 +0800
Subject: [PATCH 31/34] Add tests.

---
 src/common/numeric.cc            |  2 +-
 src/common/numeric.cu            |  6 ++----
 tests/cpp/common/test_numeric.cc | 10 ++++++++++
 tests/cpp/common/test_stats.cc   | 15 +++++++++++++++
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/common/numeric.cc b/src/common/numeric.cc
index b1a37892cf0c..9740d6af1f8d 100644
--- a/src/common/numeric.cc
+++ b/src/common/numeric.cc
@@ -17,7 +17,7 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
     auto const& h_values = values.ConstHostVector();
     MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
     ParallelFor(h_values.size(), ctx->Threads(),
-                [&](auto i) { result_tloc[omp_get_thread_num()] = h_values[i]; });
+                [&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
     auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
     static_assert(std::is_same<decltype(result), double>::value, "");
     return result;
diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index 59d306badc35..faac6ddb56da 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -4,18 +4,16 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>  // thrust:plus
 
-#include "device_helpers.cuh"  // dh::Reduce
+#include "device_helpers.cuh"  // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
 #include "numeric.h"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
-#include "xgboost/logging.h"             // CHECK_GE
 
 namespace xgboost {
 namespace common {
 namespace cuda {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
-  CHECK_GE(ctx->gpu_id, 0);
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  values.SetDevice(ctx->gpu_id);
   auto const d_values = values.ConstDeviceSpan();
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
diff --git a/tests/cpp/common/test_numeric.cc b/tests/cpp/common/test_numeric.cc
index 5b672585031b..2a91d2d72a78 100644
--- a/tests/cpp/common/test_numeric.cc
+++ b/tests/cpp/common/test_numeric.cc
@@ -29,5 +29,15 @@ TEST(Numeric, PartialSum) {
     ASSERT_EQ(sol, result);
   }
 }
+
+TEST(Numeric, Reduce) {
+  Context ctx;
+  ASSERT_TRUE(ctx.IsCPU());
+  HostDeviceVector<float> values(20);
+  auto& h_values = values.HostVector();
+  std::iota(h_values.begin(), h_values.end(), 0.0f);
+  auto sum = Reduce(&ctx, values);
+  ASSERT_EQ(sum, (values.Size() - 1) * values.Size() / 2);
+}
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 2a1e375c0f20..79f38ae6a984 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -54,5 +54,20 @@ TEST(Stats, WeightedQuantile) {
   q = WeightedQuantile(1.0, beg, end, w);
   ASSERT_EQ(q, 5);
 }
+
+TEST(Stats, Median) {
+  linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
+  Context ctx;
+  HostDeviceVector<float> weights;
+  auto m = Median(&ctx, values, weights);
+  ASSERT_EQ(m, .5f);
+
+#if defined(XGBOOST_USE_CUDA)
+  ctx.gpu_id = 0;
+  ASSERT_FALSE(ctx.IsCPU());
+  m = Median(&ctx, values, weights);
+  ASSERT_EQ(m, .5f);
+#endif  // defined(XGBOOST_USE_CUDA)
+}
 }  // namespace common
 }  // namespace xgboost

From 79fab2bbb2efe838129a39295abcb5d0a63d4075 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 22:46:32 +0800
Subject: [PATCH 32/34] CPU build.

---
 src/common/numeric.h | 7 +++++++
 src/common/stats.h   | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/common/numeric.h b/src/common/numeric.h
index c02bdf6d057b..cff3e8a12121 100644
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -8,6 +8,7 @@
 #include <iterator>   // std::iterator_traits
 #include <vector>
 
+#include "common.h"                      // AssertGPUSupport
 #include "threading_utils.h"             // MemStackAllocator, DefaultMaxThreads
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
@@ -94,7 +95,13 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
 
 namespace cuda {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
+#if !defined(XGBOOST_USE_CUDA)
+inline double Reduce(Context const*, HostDeviceVector<float> const&) {
+  AssertGPUSupport();
+  return 0;
 }
+#endif  // !defined(XGBOOST_USE_CUDA)
+}  // namespace cuda
 /**
  * \brief Reduction with summation.
  */
diff --git a/src/common/stats.h b/src/common/stats.h
index f191deb21575..c6347c421a9f 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -8,7 +8,7 @@
 #include <limits>
 #include <vector>
 
-#include "common.h"
+#include "common.h"  // AssertGPUSupport
 #include "xgboost/generic_parameters.h"
 #include "xgboost/linalg.h"
 
@@ -97,7 +97,7 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
              common::OptionalWeights weights);
 #if !defined(XGBOOST_USE_CUDA)
 inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
-  common::AssertGPUSupport();
+  AssertGPUSupport();
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)

From 5099c3c01ffddd051632f0925e83fc1beee537b9 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 23:29:26 +0800
Subject: [PATCH 33/34] Fix.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 99f7765d208b..6b2ce6371a6d 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -716,7 +716,7 @@ class MeanAbsoluteError : public ObjFunction {
       w = common::Reduce(ctx_, info.weights_);
     }
 
-    if (info.num_row_) {
+    if (info.num_row_ == 0) {
       out(0) = 0;
     } else {
       // weighted avg

From 103c722169173fb8a0a4439f608129e799e542d7 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 19 Sep 2022 23:41:39 +0800
Subject: [PATCH 34/34] Add a test for distributed training.

---
 tests/python/test_with_dask.py | 45 +++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 7695538923d4..d6eb4f32b9f7 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -1537,13 +1537,56 @@ def test_quantile(self) -> None:
     @pytest.mark.skipif(**tm.no_dask())
     @pytest.mark.gtest
     def test_quantile_same_on_all_workers(self) -> None:
-        self.run_quantile('SameOnAllWorkers')
+        self.run_quantile("SameOnAllWorkers")
+
+    def test_adaptive(self) -> None:
+        def get_score(config: Dict) -> float:
+            return float(config["learner"]["learner_model_param"]["base_score"])
+
+        def local_test(rabit_args: List[bytes], worker_id: int) -> bool:
+            with xgb.dask.RabitContext(rabit_args):
+                if worker_id == 0:
+                    y = np.array([0.0, 0.0, 0.0])
+                    x = np.array([[0.0]] * 3)
+                else:
+                    y = np.array([1000.0])
+                    x = np.array(
+                        [
+                            [0.0],
+                        ]
+                    )
+
+                Xy = xgb.DMatrix(x, y)
+                booster = xgb.train(
+                    {"tree_method": "hist", "objective": "reg:absoluteerror"},
+                    Xy,
+                    num_boost_round=1,
+                )
+                config = json.loads(booster.save_config())
+                base_score = get_score(config)
+                assert base_score == 250.0
+                return True
+
+        with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
+            with Client(cluster) as client:
+                workers = _get_client_workers(client)
+                rabit_args = client.sync(
+                    xgb.dask._get_rabit_args, len(workers), None, client
+                )
+                futures = []
+                for i, _ in enumerate(workers):
+                    f = client.submit(local_test, rabit_args, i)
+                    futures.append(f)
+
+                results = client.gather(futures)
+                assert all(results)
 
     def test_n_workers(self) -> None:
         with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
             with Client(cluster) as client:
                 workers = _get_client_workers(client)
                 from sklearn.datasets import load_breast_cancer
+
                 X, y = load_breast_cancer(return_X_y=True)
                 dX = client.submit(da.from_array, X, workers=[workers[0]]).result()
                 dy = client.submit(da.from_array, y, workers=[workers[0]]).result()