From f8a448a57e1c3dfd02192756fe5336307fc5cd8c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 29 Sep 2022 11:45:48 +0800
Subject: [PATCH 1/3] Update parameter for categorical feature.

---
 doc/parameter.rst                 |  2 +-
 doc/tutorials/categorical.rst     |  2 +-
 python-package/xgboost/sklearn.py | 18 ++++++++++++++++--
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/doc/parameter.rst b/doc/parameter.rst
index c633d0835d4d..a9e48e04cf18 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -235,7 +235,7 @@ These parameters are only used for training with categorical data. See
 
 * ``max_cat_to_onehot``
 
-  .. versionadded:: 1.6
+  .. versionadded:: 1.6.0
 
   .. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
 
diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst
index 76b88e67dece..1c090801fae4 100644
--- a/doc/tutorials/categorical.rst
+++ b/doc/tutorials/categorical.rst
@@ -84,7 +84,7 @@ values are categories, and the measure is the output leaf value.  Intuitively, w
 group the categories that output similar leaf values. During split finding, we first sort
 the gradient histogram to prepare the contiguous partitions then enumerate the splits
 according to these sorted values. One of the related parameters for XGBoost is
-``max_cat_to_one_hot``, which controls whether one-hot encoding or partitioning should be
+``max_cat_to_onehot``, which controls whether one-hot encoding or partitioning should be
 used for each feature, see :ref:`cat-param` for details.
 
 
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 8e7dcfa5ee0d..ea7fb05858c4 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -249,8 +249,20 @@ def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
         A threshold for deciding whether XGBoost should use one-hot encoding based split
         for categorical data.  When number of categories is lesser than the threshold
         then one-hot encoding is chosen, otherwise the categories will be partitioned
-        into children nodes.  Only relevant for regression and binary classification.
-        See :doc:`Categorical Data </tutorials/categorical>` for details.
+        into children nodes. Also, `enable_categorical` needs to be set to have
+        categorical feature support. See :doc:`Categorical Data
+        </tutorials/categorical>` and :ref:`cat-param` for details.
+
+    max_cat_threshold : Optional[int]
+
+        .. versionadded:: 1.7.0
+
+        .. note:: This parameter is experimental
+
+        Maximum number of categories considered for each split. Used only by
+        partition-based splits for preventing over-fitting. Also, `enable_categorical`
+        needs to be set to have categorical feature support. See :doc:`Categorical Data
+        </tutorials/categorical>` and :ref:`cat-param` for details.
 
     eval_metric : Optional[Union[str, List[str], Callable]]
 
@@ -562,6 +574,7 @@ def __init__(
         enable_categorical: bool = False,
         feature_types: FeatureTypes = None,
         max_cat_to_onehot: Optional[int] = None,
+        max_cat_threshold: Optional[int] = None,
         eval_metric: Optional[Union[str, List[str], Callable]] = None,
         early_stopping_rounds: Optional[int] = None,
         callbacks: Optional[List[TrainingCallback]] = None,
@@ -607,6 +620,7 @@ def __init__(
         self.enable_categorical = enable_categorical
         self.feature_types = feature_types
         self.max_cat_to_onehot = max_cat_to_onehot
+        self.max_cat_threshold = max_cat_threshold
         self.eval_metric = eval_metric
         self.early_stopping_rounds = early_stopping_rounds
         self.callbacks = callbacks

From 8b5da727d5874eb9953757f0403e3525b0222744 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 29 Sep 2022 19:37:06 +0800
Subject: [PATCH 2/3] Workaround.

---
 doc/parameter.rst                                         | 2 +-
 tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/doc/parameter.rst b/doc/parameter.rst
index a9e48e04cf18..8833c6eb7150 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -349,7 +349,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
   - ``reg:squaredlogerror``: regression with squared log loss :math:`\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`.  All input labels are required to be greater than -1.  Also, see metric ``rmsle`` for possible issue  with this objective.
   - ``reg:logistic``: logistic regression.
   - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
-  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction.
+  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.
   - ``binary:logistic``: logistic regression for binary classification, output probability
   - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
   - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
diff --git a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
index 3cb110bd6c6e..9bcaf23a8a9c 100644
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -203,12 +203,9 @@ def run_gpu_hist(
 
     # See note on `ObjFunction::UpdateTreeLeaf`.
     update_leaf = dataset.name.endswith("-l1")
-    if update_leaf and len(history) == 2:
+    if update_leaf:
         assert history[0] + 1e-2 >= history[-1]
         return
-    if update_leaf and len(history) > 2:
-        assert history[0] >= history[-1]
-        return
     else:
         assert tm.non_increasing(history)
 

From 7f949caee7252a62bb0730cc68a0fa563bfb24f1 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 29 Sep 2022 19:39:40 +0800
Subject: [PATCH 3/3] Revert "Workaround."

This reverts commit 8b5da727d5874eb9953757f0403e3525b0222744.
---
 doc/parameter.rst                                         | 2 +-
 tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/parameter.rst b/doc/parameter.rst
index 8833c6eb7150..a9e48e04cf18 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -349,7 +349,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
   - ``reg:squaredlogerror``: regression with squared log loss :math:`\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`.  All input labels are required to be greater than -1.  Also, see metric ``rmsle`` for possible issue  with this objective.
   - ``reg:logistic``: logistic regression.
   - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
-  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.
+  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction.
   - ``binary:logistic``: logistic regression for binary classification, output probability
   - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
   - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
diff --git a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
index 9bcaf23a8a9c..3cb110bd6c6e 100644
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -203,9 +203,12 @@ def run_gpu_hist(
 
     # See note on `ObjFunction::UpdateTreeLeaf`.
     update_leaf = dataset.name.endswith("-l1")
-    if update_leaf:
+    if update_leaf and len(history) == 2:
         assert history[0] + 1e-2 >= history[-1]
         return
+    if update_leaf and len(history) > 2:
+        assert history[0] >= history[-1]
+        return
     else:
         assert tm.non_increasing(history)