[pyspark] Fixing xgboost.spark python doc (#8200)

Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
dmlc · Aug 25, 2022 · 651f0a8 · 651f0a8
1 parent d03794c
commit 651f0a8
Showing 1 changed file with 37 additions and 27 deletions.
diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py
@@ -141,7 +141,7 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
 
     SparkXGBClassifier doesn't support `validate_features` and `output_margin` param.
 
-    SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the `nthread`
+    SparkXGBClassifier doesn't support setting `nthread` xgboost param, instead, the `nthread`
     param for each xgboost worker will be set equal to `spark.task.cpus` config value.
 
 
@@ -243,15 +243,15 @@ def _validate_params(self):
 
 class SparkXGBRanker(_SparkXGBEstimator):
     """SparkXGBRanker is a PySpark ML estimator. It implements the XGBoost
-    classification algorithm based on XGBoost python library, and it can be used in
+    ranking algorithm based on XGBoost python library, and it can be used in
     PySpark Pipeline and PySpark ML meta algorithms like
     :py:class:`~pyspark.ml.tuning.CrossValidator`/
     :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/
     :py:class:`~pyspark.ml.classification.OneVsRest`
 
     SparkXGBRanker automatically supports most of the parameters in
-    `xgboost.XGBClassifier` constructor and most of the parameters used in
-    :py:class:`xgboost.XGBClassifier` fit and predict method.
+    `xgboost.XGBRanker` constructor and most of the parameters used in
+    :py:class:`xgboost.XGBRanker` fit and predict method.
 
     SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
     see doc below for more details.
@@ -273,21 +273,21 @@ class SparkXGBRanker(_SparkXGBEstimator):
 
     callbacks:
         The export and import of the callback functions are at best effort. For
-        details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc.
+        details, see :py:attr:`xgboost.spark.SparkXGBRanker.callbacks` param doc.
     validation_indicator_col:
-        For params related to `xgboost.XGBClassifier` training with
+        For params related to `xgboost.XGBRanker` training with
         evaluation dataset's supervision,
-        set :py:attr:`xgboost.spark.SparkXGBClassifier.validation_indicator_col`
-        parameter instead of setting the `eval_set` parameter in `xgboost.XGBClassifier`
+        set :py:attr:`xgboost.spark.XGBRanker.validation_indicator_col`
+        parameter instead of setting the `eval_set` parameter in `xgboost.XGBRanker`
         fit method.
     weight_col:
         To specify the weight of the training and validation dataset, set
-        :py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting
-        `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier`
+        :py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting
+        `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRanker`
         fit method.
     xgb_model:
         Set the value to be the instance returned by
-        :func:`xgboost.spark.SparkXGBClassifierModel.get_booster`.
+        :func:`xgboost.spark.SparkXGBRankerModel.get_booster`.
     num_workers:
         Integer that specifies the number of XGBoost workers to use.
         Each XGBoost worker corresponds to one spark task.
@@ -313,23 +313,33 @@ class SparkXGBRanker(_SparkXGBEstimator):
     Examples
     --------
 
-    >>> from xgboost.spark import SparkXGBClassifier
+    >>> from xgboost.spark import SparkXGBRanker
     >>> from pyspark.ml.linalg import Vectors
-    >>> df_train = spark.createDataFrame([
-    ...     (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
-    ...     (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
-    ...     (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
-    ...     (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
-    ... ], ["features", "label", "isVal", "weight"])
-    >>> df_test = spark.createDataFrame([
-    ...     (Vectors.dense(1.0, 2.0, 3.0), ),
-    ... ], ["features"])
-    >>> xgb_classifier = SparkXGBClassifier(max_depth=5, missing=0.0,
-    ...     validation_indicator_col='isVal', weight_col='weight',
-    ...     early_stopping_rounds=1, eval_metric='logloss')
-    >>> xgb_clf_model = xgb_classifier.fit(df_train)
-    >>> xgb_clf_model.transform(df_test).show()
-
+    >>> ranker = SparkXGBRanker(qid_col="qid")
+    >>> df_train = spark.createDataFrame(
+    ...     [
+    ...         (Vectors.dense(1.0, 2.0, 3.0), 0, 0),
+    ...         (Vectors.dense(4.0, 5.0, 6.0), 1, 0),
+    ...         (Vectors.dense(9.0, 4.0, 8.0), 2, 0),
+    ...         (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
+    ...         (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
+    ...         (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
+    ...     ],
+    ...     ["features", "label", "qid"],
+    ... )
+    >>> df_test = spark.createDataFrame(
+    ...     [
+    ...         (Vectors.dense(1.5, 2.0, 3.0), 0),
+    ...         (Vectors.dense(4.5, 5.0, 6.0), 0),
+    ...         (Vectors.dense(9.0, 4.5, 8.0), 0),
+    ...         (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1),
+    ...         (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1),
+    ...         (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1),
+    ...     ],
+    ...     ["features", "qid"],
+    ... )
+    >>> model = ranker.fit(df_train)
+    >>> model.transform(df_test).show()
     """
 
     def __init__(self, **kwargs):