From a172649b9a454ae7901e86adb3e083dda9a64702 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 24 Jun 2021 02:29:56 +0800 Subject: [PATCH 1/2] Tests for dask skl categorical data support. --- python-package/xgboost/sklearn.py | 6 ++++ tests/python-gpu/test_gpu_with_dask.py | 42 +++++++++++++++++--------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index d9b928551c48..76e5d7e9c735 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -632,6 +632,12 @@ def _configure_fit( eval_metric = None else: params.update({"eval_metric": eval_metric}) + if self.enable_categorical and params.get("tree_method", None) != "gpu_hist": + raise ValueError( + "Experimental support for categorical data is not implemented for" + " current tree method yet." + ) + return model, feval, params def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None: diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index a08f99079c62..1819ce189e50 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -211,20 +211,34 @@ def test_categorical(local_cuda_cluster: LocalCUDACluster) -> None: ) assert tm.non_increasing(by_builtin_results["Train"]["rmse"]) - model = output["booster"] - with tempfile.TemporaryDirectory() as tempdir: - path = os.path.join(tempdir, "model.json") - model.save_model(path) - with open(path, "r") as fd: - categorical = json.load(fd) - - categories_sizes = np.array( - categorical["learner"]["gradient_booster"]["model"]["trees"][-1][ - "categories_sizes" - ] - ) - assert categories_sizes.shape[0] != 0 - np.testing.assert_allclose(categories_sizes, 1) + def check_model_output(model): + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, "model.json") + model.save_model(path) + with open(path, "r") as fd: + categorical = json.load(fd) + + categories_sizes = np.array( + categorical["learner"]["gradient_booster"]["model"]["trees"][-1][ + "categories_sizes" + ] + ) + assert categories_sizes.shape[0] != 0 + np.testing.assert_allclose(categories_sizes, 1) + + check_model_output(output["booster"]) + reg = dxgb.DaskXGBRegressor( + enable_categorical=True, n_estimators=10, tree_method="gpu_hist" + ) + reg.fit(X, y) + + check_model_output(reg.get_booster()) + + reg = dxgb.DaskXGBRegressor( + enable_categorical=True, n_estimators=10 + ) + with pytest.raises(ValueError): + reg.fit(X, y) def to_cp(x: Any, DMatrixT: Type) -> Any: From f85aff30dc562dc8201b740ce32391741be7950f Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 24 Jun 2021 02:36:33 +0800 Subject: [PATCH 2/2] mypy. --- tests/python-gpu/test_gpu_with_dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index 1819ce189e50..54087b85d1e9 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -211,7 +211,7 @@ def test_categorical(local_cuda_cluster: LocalCUDACluster) -> None: ) assert tm.non_increasing(by_builtin_results["Train"]["rmse"]) - def check_model_output(model): + def check_model_output(model: dxgb.Booster) -> None: with tempfile.TemporaryDirectory() as tempdir: path = os.path.join(tempdir, "model.json") model.save_model(path)