From c31e3efa7c2b886ebc9c0a63b4ed6a5b027214d7 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Fri, 11 Dec 2020 19:30:00 -0800 Subject: [PATCH] Pass correct split_type to GPU predictor (#6491) * Pass correct split_type to GPU predictor * Add a test --- src/predictor/gpu_predictor.cu | 6 ++++- tests/python-gpu/test_gpu_prediction.py | 35 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 66ff4c7e68ec..90e786f08b87 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -272,7 +272,11 @@ PredictKernel(Data data, common::Span d_nodes, d_categories.subspan(d_cat_tree_segments[tree_idx - tree_begin], d_cat_tree_segments[tree_idx - tree_begin + 1] - d_cat_tree_segments[tree_idx - tree_begin]); - float leaf = GetLeafWeight(global_idx, d_tree, d_tree_split_types, + auto tree_split_types = + d_tree_split_types.subspan(d_tree_segments[tree_idx - tree_begin], + d_tree_segments[tree_idx - tree_begin + 1] - + d_tree_segments[tree_idx - tree_begin]); + float leaf = GetLeafWeight(global_idx, d_tree, tree_split_types, tree_cat_ptrs, tree_categories, &loader); diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 1427fcd58165..e2c2afff9f5b 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -3,8 +3,17 @@ import numpy as np import xgboost as xgb +from xgboost.compat import PANDAS_INSTALLED + from hypothesis import given, strategies, assume, settings, note +if PANDAS_INSTALLED: + from hypothesis.extra.pandas import column, data_frames, range_indexes +else: + def noop(*args, **kwargs): + pass + column, data_frames, range_indexes = noop, noop, noop + sys.path.append("tests/python") import testing as tm from test_predict import run_threaded_predict # noqa @@ -259,3 +268,29 @@ def test_predict_leaf_dart(self, param, dataset): param['booster'] = 'dart' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) + + @pytest.mark.skipif(**tm.no_sklearn()) + @pytest.mark.skipif(**tm.no_pandas()) + @given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)), + column('x1', elements=strategies.integers(min_value=0, max_value=5))], + index=range_indexes(min_size=20, max_size=50))) + @settings(deadline=None) + def test_predict_categorical_split(self, df): + from sklearn.metrics import mean_squared_error + + df = df.astype('category') + x0, x1 = df['x0'].to_numpy(), df['x1'].to_numpy() + y = (x0 * 10 - 20) + (x1 - 2) + dtrain = xgb.DMatrix(df, label=y, enable_categorical=True) + + params = {'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', + 'enable_experimental_json_serialization': True, + 'max_depth': 3, 'learning_rate': 1.0, 'base_score': 0.0, 'eval_metric': 'rmse'} + + eval_history = {} + bst = xgb.train(params, dtrain, num_boost_round=5, evals=[(dtrain, 'train')], + verbose_eval=False, evals_result=eval_history) + + pred = bst.predict(dtrain) + rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False) + np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)