From 3da5a69dc9f10c41fc665530240c9121368cb78d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Oct 2020 15:26:29 +0800 Subject: [PATCH] Fix typo in dask interface. (#6240) --- python-package/xgboost/dask.py | 25 +++++++++++++------------ tests/python/test_with_dask.py | 13 ++++++++----- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 7a2221f27cdb..ed3aaae5e5ba 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -987,7 +987,7 @@ def inplace_predict(client, model, data, async def _evaluation_matrices(client, validation_set, - sample_weights, missing): + sample_weight, missing): ''' Parameters ---------- @@ -1010,8 +1010,8 @@ async def _evaluation_matrices(client, validation_set, if validation_set is not None: assert isinstance(validation_set, list) for i, e in enumerate(validation_set): - w = (sample_weights[i] - if sample_weights is not None else None) + w = (sample_weight[i] + if sample_weight is not None else None) dmat = await DaskDMatrix(client=client, data=e[0], label=e[1], weight=w, missing=missing) evals.append((dmat, 'validation_{}'.format(i))) @@ -1027,7 +1027,7 @@ class DaskScikitLearnBase(XGBModel): # pylint: disable=arguments-differ def fit(self, X, y, - sample_weights=None, + sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, @@ -1086,13 +1086,13 @@ def client(self, clt): ['estimators', 'model']) class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase): # pylint: disable=missing-class-docstring - async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, + async def _fit_async(self, X, y, sample_weight, base_margin, eval_set, sample_weight_eval_set, early_stopping_rounds, verbose): dtrain = await DaskDMatrix(client=self.client, data=X, label=y, - weight=sample_weights, + weight=sample_weight, base_margin=base_margin, missing=self.missing) params = self.get_xgb_params() @@ -1115,7 +1115,7 @@ async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, def fit(self, X, y, - sample_weights=None, + sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, @@ -1125,7 +1125,7 @@ def fit(self, return self.client.sync(self._fit_async, X=X, y=y, - sample_weights=sample_weights, + sample_weight=sample_weight, base_margin=base_margin, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, @@ -1150,17 +1150,18 @@ def predict(self, data, output_margin=False, base_margin=None): output_margin=output_margin, base_margin=base_margin) + @xgboost_model_doc( 'Implementation of the scikit-learn API for XGBoost classification.', ['estimators', 'model']) class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): - async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, + async def _fit_async(self, X, y, sample_weight, base_margin, eval_set, sample_weight_eval_set, early_stopping_rounds, verbose): dtrain = await DaskDMatrix(client=self.client, data=X, label=y, - weight=sample_weights, + weight=sample_weight, base_margin=base_margin, missing=self.missing) params = self.get_xgb_params() @@ -1196,7 +1197,7 @@ async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, def fit(self, X, y, - sample_weights=None, + sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, @@ -1206,7 +1207,7 @@ def fit(self, return self.client.sync(self._fit_async, X=X, y=y, - sample_weights=sample_weights, + sample_weight=sample_weight, base_margin=base_margin, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index 8b4a56ac2024..532934ecc9f5 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -40,10 +40,13 @@ kWorkers = 5 -def generate_array(): +def generate_array(with_weights=False): partition_size = 20 X = da.random.random((kRows, kCols), partition_size) y = da.random.random(kRows, partition_size) + if with_weights: + w = da.random.random(kRows, partition_size) + return X, y, w return X, y @@ -252,11 +255,11 @@ def test_dask_missing_value_cls(): def test_dask_regressor(): with LocalCluster(n_workers=kWorkers) as cluster: with Client(cluster) as client: - X, y = generate_array() + X, y, w = generate_array(with_weights=True) regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2) regressor.set_params(tree_method='hist') regressor.client = client - regressor.fit(X, y, eval_set=[(X, y)]) + regressor.fit(X, y, sample_weight=w, eval_set=[(X, y)]) prediction = regressor.predict(X) assert prediction.ndim == 1 @@ -274,12 +277,12 @@ def test_dask_regressor(): def test_dask_classifier(): with LocalCluster(n_workers=kWorkers) as cluster: with Client(cluster) as client: - X, y = generate_array() + X, y, w = generate_array(with_weights=True) y = (y * 10).astype(np.int32) classifier = xgb.dask.DaskXGBClassifier( verbosity=1, n_estimators=2, eval_metric='merror') classifier.client = client - classifier.fit(X, y, eval_set=[(X, y)]) + classifier.fit(X, y, sample_weight=w, eval_set=[(X, y)]) prediction = classifier.predict(X) assert prediction.ndim == 1