Merge remote-tracking branch 'upstream/master' into fix-repartition

dmlc · Sep 14, 2022 · 19f7602 · 19f7602
2 parents 5f89620 + 70df36c
commit 19f7602
Show file tree

Hide file tree

Showing 18 changed files with 111 additions and 877 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
diff --git a/Jenkinsfile-win64 b/Jenkinsfile-win64
diff --git a/doc/contrib/donate.rst b/doc/contrib/donate.rst
@@ -13,9 +13,9 @@ DMLC/XGBoost has grown from a research project incubated in academia to one of t
 
 A robust and efficient **continuous integration (CI)** infrastructure is one of the most critical solutions to address the above challenge. A CI service will monitor an open-source repository and run a suite of integration tests for every incoming contribution. This way, the CI ensures that every proposed change in the codebase is compatible with existing functionalities. Furthermore, XGBoost can enable more thorough tests with a powerful CI infrastructure to cover cases which are closer to the production environment.
 
-There are several CI services available free to open source projects, such as Travis CI and AppVeyor. The XGBoost project already utilizes Travis and AppVeyor. However, the XGBoost project has needs that these free services do not adequately address. In particular, the limited usage quota of resources such as CPU and memory leaves XGBoost developers unable to bring "too-intensive" tests. In addition, they do not offer test machines with GPUs for testing XGBoost-GPU code base which has been attracting more and more interest across many organizations. Consequently, the XGBoost project self-hosts a cloud server with Jenkins software installed: https://xgboost-ci.net/.
+There are several CI services available free to open source projects, such as Travis CI and AppVeyor. The XGBoost project already utilizes GitHub Actions. However, the XGBoost project has needs that these free services do not adequately address. In particular, the limited usage quota of resources such as CPU and memory leaves XGBoost developers unable to bring "too-intensive" tests. In addition, they do not offer test machines with GPUs for testing XGBoost-GPU code base which has been attracting more and more interest across many organizations. Consequently, the XGBoost project uses a cloud-hosted test farm. We use `BuildKite <https://buildkite.com/xgboost>`_ to organize CI pipelines.
 
-The self-hosted Jenkins CI server has recurring operating expenses. It utilizes a leading cloud provider (AWS) to accommodate variable workload. The master node serving the web interface is available 24/7, to accommodate contributions from people around the globe. In addition, the master node launches slave nodes on demand, to run the test suite on incoming contributions. To save cost, the slave nodes are terminated when they are no longer needed.
+The cloud-hosted test farm has recurring operating expenses. It utilizes a leading cloud provider (AWS) to accommodate variable workload. BuildKite launches worker machines on AWS on demand, to run the test suite on incoming contributions. To save cost, the worker machines are terminated when they are no longer needed.
 
 To help defray the hosting cost, the XGBoost project seeks donations from third parties.
 
@@ -29,14 +29,14 @@ The Project Management Committee (PMC) of the XGBoost project appointed `Open So
 
 All expenses incurred for hosting CI will be submitted to the fiscal host with receipts. Only the expenses in the following categories will be approved for reimbursement:
 
-* Cloud exprenses for the Jenkins CI server (https://xgboost-ci.net)
+* Cloud exprenses for the cloud test farm (https://buildkite.com/xgboost)
 * Cost of domain https://xgboost-ci.net
-* Meetup.com account for XGBoost project
+* Monthly cost of using BuildKite
 * Hosting cost of the User Forum (https://discuss.xgboost.ai)
 
-Administration of Jenkins CI server
------------------------------------
-The PMC shall appoint committer(s) to administer the Jenkins CI server on their behalf. The current administrators are as follows:
+Administration of cloud CI infrastructure
+-----------------------------------------
+The PMC shall appoint committer(s) to administer the cloud CI infrastructure on their behalf. The current administrators are as follows:
 
 * Primary administrator: `Hyunsu Cho <https://github.com/hcho3>`_
 * Secondary administrator: `Jiaming Yuan <https://github.com/trivialfis>`_

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
@@ -726,10 +726,9 @@ def _create_quantile_dmatrix(
     if parts is None:
         msg = f"worker {worker.address} has an empty DMatrix."
         LOGGER.warning(msg)
-        import cupy
 
         d = QuantileDMatrix(
-            cupy.zeros((0, 0)),
+            numpy.empty((0, 0)),
             feature_names=feature_names,
             feature_types=feature_types,
             max_bin=max_bin,
@@ -1544,15 +1543,21 @@ def inplace_predict(  # pylint: disable=unused-argument
 
 
 async def _async_wrap_evaluation_matrices(
-    client: Optional["distributed.Client"], **kwargs: Any
+    client: Optional["distributed.Client"],
+    tree_method: Optional[str],
+    max_bin: Optional[int],
+    **kwargs: Any,
 ) -> Tuple[DaskDMatrix, Optional[List[Tuple[DaskDMatrix, str]]]]:
     """A switch function for async environment."""
 
-    def _inner(**kwargs: Any) -> DaskDMatrix:
-        m = DaskDMatrix(client=client, **kwargs)
-        return m
+    def _dispatch(ref: Optional[DaskDMatrix], **kwargs: Any) -> DaskDMatrix:
+        if tree_method in ("hist", "gpu_hist"):
+            return DaskQuantileDMatrix(
+                client=client, ref=ref, max_bin=max_bin, **kwargs
+            )
+        return DaskDMatrix(client=client, **kwargs)
 
-    train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_inner, **kwargs)
+    train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_dispatch, **kwargs)
     train_dmatrix = await train_dmatrix
     if evals is None:
         return train_dmatrix, evals
@@ -1756,6 +1761,8 @@ async def _fit_async(
         params = self.get_xgb_params()
         dtrain, evals = await _async_wrap_evaluation_matrices(
             client=self.client,
+            tree_method=self.tree_method,
+            max_bin=self.max_bin,
             X=X,
             y=y,
             group=None,
@@ -1851,6 +1858,8 @@ async def _fit_async(
         params = self.get_xgb_params()
         dtrain, evals = await _async_wrap_evaluation_matrices(
             self.client,
+            tree_method=self.tree_method,
+            max_bin=self.max_bin,
             X=X,
             y=y,
             group=None,
@@ -2057,6 +2066,8 @@ async def _fit_async(
         params = self.get_xgb_params()
         dtrain, evals = await _async_wrap_evaluation_matrices(
             self.client,
+            tree_method=self.tree_method,
+            max_bin=self.max_bin,
             X=X,
             y=y,
             group=None,

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -38,6 +38,7 @@
     Booster,
     DMatrix,
     Metric,
+    QuantileDMatrix,
     XGBoostError,
     _convert_ntree_limit,
     _deprecate_positional_args,
@@ -430,7 +431,8 @@ def _wrap_evaluation_matrices(
     enable_categorical: bool,
     feature_types: Optional[FeatureTypes],
 ) -> Tuple[Any, List[Tuple[Any, str]]]:
-    """Convert array_like evaluation matrices into DMatrix.  Perform validation on the way."""
+    """Convert array_like evaluation matrices into DMatrix.  Perform validation on the
+    way."""
     train_dmatrix = create_dmatrix(
         data=X,
         label=y,
@@ -442,6 +444,7 @@ def _wrap_evaluation_matrices(
         missing=missing,
         enable_categorical=enable_categorical,
         feature_types=feature_types,
+        ref=None,
     )
 
     n_validation = 0 if eval_set is None else len(eval_set)
@@ -491,6 +494,7 @@ def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:
                     missing=missing,
                     enable_categorical=enable_categorical,
                     feature_types=feature_types,
+                    ref=train_dmatrix,
                 )
                 evals.append(m)
         nevals = len(evals)
@@ -904,6 +908,17 @@ def _duplicated(parameter: str) -> None:
 
         return model, metric, params, early_stopping_rounds, callbacks
 
+    def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
+        # Use `QuantileDMatrix` to save memory.
+        if self.tree_method in ("hist", "gpu_hist"):
+            try:
+                return QuantileDMatrix(
+                    **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
+                )
+            except TypeError:  # `QuantileDMatrix` supports lesser types than DMatrix
+                pass
+        return DMatrix(**kwargs, nthread=self.n_jobs)
+
     def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None:
         if evals_result:
             self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result)
@@ -996,7 +1011,7 @@ def fit(
                 base_margin_eval_set=base_margin_eval_set,
                 eval_group=None,
                 eval_qid=None,
-                create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
+                create_dmatrix=self._create_dmatrix,
                 enable_categorical=self.enable_categorical,
                 feature_types=self.feature_types,
             )
@@ -1479,7 +1494,7 @@ def fit(
                 base_margin_eval_set=base_margin_eval_set,
                 eval_group=None,
                 eval_qid=None,
-                create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
+                create_dmatrix=self._create_dmatrix,
                 enable_categorical=self.enable_categorical,
                 feature_types=self.feature_types,
             )
@@ -1930,7 +1945,7 @@ def fit(
                 base_margin_eval_set=base_margin_eval_set,
                 eval_group=eval_group,
                 eval_qid=eval_qid,
-                create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
+                create_dmatrix=self._create_dmatrix,
                 enable_categorical=self.enable_categorical,
                 feature_types=self.feature_types,
             )