From 3da5a69dc9f10c41fc665530240c9121368cb78d Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Oct 2020 15:26:29 +0800
Subject: [PATCH] Fix typo in dask interface. (#6240)

---
 python-package/xgboost/dask.py | 25 +++++++++++++------------
 tests/python/test_with_dask.py | 13 ++++++++-----
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index 7a2221f27cdb..ed3aaae5e5ba 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -987,7 +987,7 @@ def inplace_predict(client, model, data,
 
 
 async def _evaluation_matrices(client, validation_set,
-                               sample_weights, missing):
+                               sample_weight, missing):
     '''
     Parameters
     ----------
@@ -1010,8 +1010,8 @@ async def _evaluation_matrices(client, validation_set,
     if validation_set is not None:
         assert isinstance(validation_set, list)
         for i, e in enumerate(validation_set):
-            w = (sample_weights[i]
-                 if sample_weights is not None else None)
+            w = (sample_weight[i]
+                 if sample_weight is not None else None)
             dmat = await DaskDMatrix(client=client, data=e[0], label=e[1],
                                      weight=w, missing=missing)
             evals.append((dmat, 'validation_{}'.format(i)))
@@ -1027,7 +1027,7 @@ class DaskScikitLearnBase(XGBModel):
 
     # pylint: disable=arguments-differ
     def fit(self, X, y,
-            sample_weights=None,
+            sample_weight=None,
             base_margin=None,
             eval_set=None,
             sample_weight_eval_set=None,
@@ -1086,13 +1086,13 @@ def client(self, clt):
                    ['estimators', 'model'])
 class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
     # pylint: disable=missing-class-docstring
-    async def _fit_async(self, X, y, sample_weights, base_margin, eval_set,
+    async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
                          sample_weight_eval_set, early_stopping_rounds,
                          verbose):
         dtrain = await DaskDMatrix(client=self.client,
                                    data=X,
                                    label=y,
-                                   weight=sample_weights,
+                                   weight=sample_weight,
                                    base_margin=base_margin,
                                    missing=self.missing)
         params = self.get_xgb_params()
@@ -1115,7 +1115,7 @@ async def _fit_async(self, X, y, sample_weights, base_margin, eval_set,
     def fit(self,
             X,
             y,
-            sample_weights=None,
+            sample_weight=None,
             base_margin=None,
             eval_set=None,
             sample_weight_eval_set=None,
@@ -1125,7 +1125,7 @@ def fit(self,
         return self.client.sync(self._fit_async,
                                 X=X,
                                 y=y,
-                                sample_weights=sample_weights,
+                                sample_weight=sample_weight,
                                 base_margin=base_margin,
                                 eval_set=eval_set,
                                 sample_weight_eval_set=sample_weight_eval_set,
@@ -1150,17 +1150,18 @@ def predict(self, data, output_margin=False, base_margin=None):
                                 output_margin=output_margin,
                                 base_margin=base_margin)
 
+
 @xgboost_model_doc(
     'Implementation of the scikit-learn API for XGBoost classification.',
     ['estimators', 'model'])
 class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
-    async def _fit_async(self, X, y, sample_weights, base_margin, eval_set,
+    async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
                          sample_weight_eval_set, early_stopping_rounds,
                          verbose):
         dtrain = await DaskDMatrix(client=self.client,
                                    data=X,
                                    label=y,
-                                   weight=sample_weights,
+                                   weight=sample_weight,
                                    base_margin=base_margin,
                                    missing=self.missing)
         params = self.get_xgb_params()
@@ -1196,7 +1197,7 @@ async def _fit_async(self, X, y, sample_weights, base_margin, eval_set,
     def fit(self,
             X,
             y,
-            sample_weights=None,
+            sample_weight=None,
             base_margin=None,
             eval_set=None,
             sample_weight_eval_set=None,
@@ -1206,7 +1207,7 @@ def fit(self,
         return self.client.sync(self._fit_async,
                                 X=X,
                                 y=y,
-                                sample_weights=sample_weights,
+                                sample_weight=sample_weight,
                                 base_margin=base_margin,
                                 eval_set=eval_set,
                                 sample_weight_eval_set=sample_weight_eval_set,
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 8b4a56ac2024..532934ecc9f5 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -40,10 +40,13 @@
 kWorkers = 5
 
 
-def generate_array():
+def generate_array(with_weights=False):
     partition_size = 20
     X = da.random.random((kRows, kCols), partition_size)
     y = da.random.random(kRows, partition_size)
+    if with_weights:
+        w = da.random.random(kRows, partition_size)
+        return X, y, w
     return X, y
 
 
@@ -252,11 +255,11 @@ def test_dask_missing_value_cls():
 def test_dask_regressor():
     with LocalCluster(n_workers=kWorkers) as cluster:
         with Client(cluster) as client:
-            X, y = generate_array()
+            X, y, w = generate_array(with_weights=True)
             regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
             regressor.set_params(tree_method='hist')
             regressor.client = client
-            regressor.fit(X, y, eval_set=[(X, y)])
+            regressor.fit(X, y, sample_weight=w, eval_set=[(X, y)])
             prediction = regressor.predict(X)
 
             assert prediction.ndim == 1
@@ -274,12 +277,12 @@ def test_dask_regressor():
 def test_dask_classifier():
     with LocalCluster(n_workers=kWorkers) as cluster:
         with Client(cluster) as client:
-            X, y = generate_array()
+            X, y, w = generate_array(with_weights=True)
             y = (y * 10).astype(np.int32)
             classifier = xgb.dask.DaskXGBClassifier(
                 verbosity=1, n_estimators=2, eval_metric='merror')
             classifier.client = client
-            classifier.fit(X, y, eval_set=[(X, y)])
+            classifier.fit(X, y, sample_weight=w, eval_set=[(X, y)])
             prediction = classifier.predict(X)
 
             assert prediction.ndim == 1