diff --git a/src/common/categorical.h b/src/common/categorical.h index 3706c4f2370d..fedada7bd700 100644 --- a/src/common/categorical.h +++ b/src/common/categorical.h @@ -42,9 +42,9 @@ inline XGBOOST_DEVICE bool Decision(common::Span cats, bst_cat_t return !s_cats.Check(cat); } -inline void CheckCat(bst_cat_t cat) { - CHECK_GE(cat, 0) << "Invalid categorical value detected. Categorical value " - "should be non-negative."; +inline void InvalidCategory() { + LOG(FATAL) << "Invalid categorical value detected. Categorical value " + "should be non-negative."; } struct IsCatOp { diff --git a/src/common/quantile.cu b/src/common/quantile.cu index 07949d18bfe8..d89951915d4f 100644 --- a/src/common/quantile.cu +++ b/src/common/quantile.cu @@ -580,6 +580,19 @@ void SketchContainer::AllReduce() { timer_.Stop(__func__); } +namespace { +struct InvalidCat { + Span values; + Span ptrs; + Span ft; + + XGBOOST_DEVICE bool operator()(size_t i) { + auto fidx = dh::SegmentId(ptrs, i); + return IsCat(ft, fidx) && values[i] < 0; + } +}; +} // anonymous namespace + void SketchContainer::MakeCuts(HistogramCuts* p_cuts) { timer_.Start(__func__); dh::safe_cuda(cudaSetDevice(device_)); @@ -669,6 +682,19 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) { assert(idx+1 < in_column.size()); out_column[idx] = in_column[idx+1].value; }); + + if (has_categorical_) { + dh::XGBCachingDeviceAllocator alloc; + auto ptrs = p_cuts->cut_ptrs_.ConstDeviceSpan(); + auto it = thrust::make_counting_iterator(0ul); + CHECK_EQ(p_cuts->Ptrs().back(), out_cut_values.size()); + auto invalid = + thrust::any_of(thrust::cuda::par(alloc), it, it + out_cut_values.size(), + InvalidCat{out_cut_values, ptrs, d_ft}); + if (invalid) { + InvalidCategory(); + } + } timer_.Stop(__func__); } } // namespace common diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index cbe63d243da4..561a364b5371 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -580,7 +580,9 @@ struct GPUHistMakerDevice { CHECK_LT(candidate.split.fvalue, std::numeric_limits::max()) << "Categorical feature value too large."; auto cat = common::AsCat(candidate.split.fvalue); - common::CheckCat(cat); + if (cat < 0) { + common::InvalidCategory(); + } std::vector split_cats(LBitField32::ComputeStorageSize(std::max(cat+1, 1)), 0); LBitField32 cats_bits(split_cats); cats_bits.Set(cat); diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 8a4822ba9105..505d38778c8b 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -101,6 +101,7 @@ def test_invalid_categorical(self): X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10) y = rng.normal(loc=0, scale=1, size=100) + # Check is performe during sketching. Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10) with pytest.raises(ValueError): xgb.train({"tree_method": "gpu_hist"}, Xy) @@ -108,7 +109,6 @@ def test_invalid_categorical(self): X, y = cp.array(X), cp.array(y) with pytest.raises(ValueError): Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10) - xgb.train({"tree_method": "gpu_hist"}, Xy) @pytest.mark.skipif(**tm.no_cupy()) @given(parameter_strategy, strategies.integers(1, 20),