dmlc · trivialfis · Jan 4, 2022 · Dec 19, 2021 · Dec 19, 2021 · Dec 19, 2021
diff --git a/doc/tutorials/index.rst b/doc/tutorials/index.rst
@@ -27,3 +27,4 @@ See `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for mo
   external_memory
   custom_metric_obj
   categorical
+  multioutput
diff --git a/doc/tutorials/multioutput.rst b/doc/tutorials/multioutput.rst
@@ -0,0 +1,32 @@
+################
+Multiple Outputs
+################
+
+.. versionadded:: 1.6
+
+Starting from version 1.6, XGBoost has experimental support for multi-output regression
+and multi-label classification.  For the terminologies please refer to the `scikit-learn
+user guide <https://scikit-learn.org/stable/modules/multiclass.html>`_.
+
+Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
+with the added benefit of reusing data and custom objective support.  For a worked example
+of regression, see :ref:`sphx_glr_python_examples_multioutput_regression.py`. For
+multi-label classification, the binary relevance strategy is used.  Input ``y`` should be
+of shape ``(n_samples, n_classes)`` with each column has value 0 or 1 to specify whether
+the sample is labeled as positive.  At the moment XGBoost supports only dense matrix for
+labels.
+
+.. code-block:: python
+
+    from sklearn.datasets import make_multilabel_classification
+    import numpy as np
+
+    X, y = make_multilabel_classification(
+        n_samples=32, n_classes=5, n_labels=3, random_state=0
+    )
+    clf = xgb.XGBClassifier(tree_method="hist")
+    clf.fit(X, y)
+    np.testing.assert_allclose(clf.predict(X), y)
+
+
+The feature is still under development and might contain unknown issues.
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -1215,6 +1215,14 @@ def intercept_(self) -> np.ndarray:
 def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> PredtT:
     assert len(prediction.shape) <= 2
     if len(prediction.shape) == 2 and prediction.shape[1] == n_classes:
+        # multi-class
+        return prediction
+    if (
+        len(prediction.shape) == 2
+        and n_classes == 2
+        and prediction.shape[1] >= n_classes
+    ):
+        # multi-label
         return prediction
     # binary logistic function
     classone_probs = prediction
@@ -1374,9 +1382,13 @@ def predict(
             # If output_margin is active, simply return the scores
             return class_probs
 
-        if len(class_probs.shape) > 1:
+        if len(class_probs.shape) > 1 and self.n_classes_ != 2:
             # turns softprob into softmax
             column_indexes: np.ndarray = np.argmax(class_probs, axis=1)  # type: ignore
+        elif len(class_probs.shape) > 1 and class_probs.shape[1] != 1:
+            # multi-label
+            column_indexes = np.zeros(class_probs.shape)
+            column_indexes[class_probs > 0.5] = 1
         else:
             # turns soft logit into class label
             column_indexes = np.repeat(0, class_probs.shape[0])

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
@@ -1194,6 +1194,24 @@ def test_estimator_type():
         cls.load_model(path)  # no error
 
 
+def test_multilabel_classification() -> None:
+    from sklearn.datasets import make_multilabel_classification
+
+    X, y = make_multilabel_classification(
+        n_samples=32, n_classes=5, n_labels=3, random_state=0
+    )
+    clf = xgb.XGBClassifier(tree_method="hist")
+    clf.fit(X, y)
+    booster = clf.get_booster()
+    learner = json.loads(booster.save_config())["learner"]
+    assert int(learner["learner_model_param"]["num_target"]) == 5
+
+    np.testing.assert_allclose(clf.predict(X), y)
+    predt = (clf.predict_proba(X) > 0.5).astype(np.int64)
+    np.testing.assert_allclose(clf.predict(X), predt)
+    assert predt.dtype == np.int64
+
+
 def run_data_initialization(DMatrix, model, X, y):
     """Assert that we don't create duplicated DMatrix."""