From 14afdb4d92c7d84e60cce6b4d4a37e1734e4eecd Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 24 Sep 2020 19:28:57 +0800 Subject: [PATCH] Support categorical data in ellpack. (#6140) --- src/data/ellpack_page.cu | 47 ++++++++----- src/data/ellpack_page.cuh | 12 ++-- src/data/ellpack_page_source.cu | 5 +- tests/cpp/common/test_hist_util.h | 13 ---- tests/cpp/data/test_ellpack_page.cu | 42 +++++++++++- tests/cpp/helpers.cc | 9 ++- tests/cpp/helpers.h | 22 +++++++ tests/cpp/histogram_helpers.h | 2 +- tests/cpp/tree/gpu_hist/test_histogram.cu | 80 ++++++++++++++++++++++- 9 files changed, 189 insertions(+), 43 deletions(-) diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index 39e845f2d765..6ce18340f771 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -1,10 +1,10 @@ /*! - * Copyright 2019 XGBoost contributors + * Copyright 2019-2020 XGBoost contributors */ - #include #include #include +#include "../common/categorical.h" #include "../common/hist_util.cuh" #include "../common/random.h" #include "./ellpack_page.cuh" @@ -33,6 +33,7 @@ __global__ void CompressBinEllpackKernel( const Entry* __restrict__ entries, // One batch of input data const float* __restrict__ cuts, // HistogramCuts::cut_values_ const uint32_t* __restrict__ cut_rows, // HistogramCuts::cut_ptrs_ + common::Span feature_types, size_t base_row, // batch_row_begin size_t n_rows, size_t row_stride, @@ -51,11 +52,19 @@ __global__ void CompressBinEllpackKernel( // {feature_cuts, ncuts} forms the array of cuts of `feature'. const float* feature_cuts = &cuts[cut_rows[feature]]; int ncuts = cut_rows[feature + 1] - cut_rows[feature]; + bool is_cat = common::IsCat(feature_types, ifeature); // Assigning the bin in current entry. // S.t.: fvalue < feature_cuts[bin] - bin = thrust::upper_bound(thrust::seq, feature_cuts, feature_cuts + ncuts, - fvalue) - - feature_cuts; + if (is_cat) { + auto it = dh::MakeTransformIterator( + feature_cuts, [](float v) { return common::AsCat(v); }); + bin = thrust::lower_bound(thrust::seq, it, it + ncuts, common::AsCat(fvalue)) - it; + } else { + bin = thrust::upper_bound(thrust::seq, feature_cuts, feature_cuts + ncuts, + fvalue) - + feature_cuts; + } + if (bin >= ncuts) { bin = ncuts - 1; } @@ -83,14 +92,13 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, } EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, - const SparsePage& page, bool is_dense, - size_t row_stride) - : cuts_(std::move(cuts)), - is_dense(is_dense), - n_rows(page.Size()), + const SparsePage &page, bool is_dense, + size_t row_stride, + common::Span feature_types) + : cuts_(std::move(cuts)), is_dense(is_dense), n_rows(page.Size()), row_stride(row_stride) { this->InitCompressedData(device); - this->CreateHistIndices(device, page); + this->CreateHistIndices(device, page, feature_types); } // Construct an ELLPACK matrix in memory. @@ -108,12 +116,14 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) monitor_.Stop("Quantiles"); monitor_.Start("InitCompressedData"); - InitCompressedData(param.gpu_id); + this->InitCompressedData(param.gpu_id); monitor_.Stop("InitCompressedData"); + dmat->Info().feature_types.SetDevice(param.gpu_id); + auto ft = dmat->Info().feature_types.ConstDeviceSpan(); monitor_.Start("BinningCompression"); for (const auto& batch : dmat->GetBatches()) { - CreateHistIndices(param.gpu_id, batch); + CreateHistIndices(param.gpu_id, batch, ft); } monitor_.Stop("BinningCompression"); } @@ -365,7 +375,8 @@ void EllpackPageImpl::InitCompressedData(int device) { // Compress a CSR page into ELLPACK. void EllpackPageImpl::CreateHistIndices(int device, - const SparsePage& row_batch) { + const SparsePage& row_batch, + common::Span feature_types) { if (row_batch.Size() == 0) return; unsigned int null_gidx_value = NumSymbols() - 1; @@ -397,9 +408,9 @@ void EllpackPageImpl::CreateHistIndices(int device, size_t n_entries = ent_cnt_end - ent_cnt_begin; dh::device_vector entries_d(n_entries); // copy data entries to device. - dh::safe_cuda(cudaMemcpy(entries_d.data().get(), - data_vec.data() + ent_cnt_begin, - n_entries * sizeof(Entry), cudaMemcpyDefault)); + dh::safe_cuda(cudaMemcpyAsync(entries_d.data().get(), + data_vec.data() + ent_cnt_begin, + n_entries * sizeof(Entry), cudaMemcpyDefault)); const dim3 block3(32, 8, 1); // 256 threads const dim3 grid3(common::DivRoundUp(batch_nrows, block3.x), common::DivRoundUp(row_stride, block3.y), 1); @@ -408,7 +419,7 @@ void EllpackPageImpl::CreateHistIndices(int device, CompressBinEllpackKernel, common::CompressedBufferWriter(NumSymbols()), gidx_buffer.DevicePointer(), row_ptrs.data().get(), entries_d.data().get(), device_accessor.gidx_fvalue_map.data(), - device_accessor.feature_segments.data(), + device_accessor.feature_segments.data(), feature_types, row_batch.base_rowid + batch_row_begin, batch_nrows, row_stride, null_gidx_value); } diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh index ce2648f2a413..128e8a7ee9d1 100644 --- a/src/data/ellpack_page.cuh +++ b/src/data/ellpack_page.cuh @@ -118,10 +118,12 @@ class EllpackPageImpl { */ EllpackPageImpl(int device, common::HistogramCuts cuts, bool is_dense, size_t row_stride, size_t n_rows); - + /*! + * \brief Constructor used for external memory. + */ EllpackPageImpl(int device, common::HistogramCuts cuts, - const SparsePage& page, - bool is_dense, size_t row_stride); + const SparsePage &page, bool is_dense, size_t row_stride, + common::Span feature_types); /*! * \brief Constructor from an existing DMatrix. @@ -184,8 +186,8 @@ class EllpackPageImpl { * @param row_batch The CSR page. */ void CreateHistIndices(int device, - const SparsePage& row_batch - ); + const SparsePage& row_batch, + common::Span feature_types); /*! * \brief Initialize the buffer to store compressed features. */ diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index 3cf0a064a07f..1729c9d8c420 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -55,6 +55,7 @@ void EllpackPageSource::WriteEllpackPages(int device, DMatrix* dmat, SparsePage temp_host_page; writer.Alloc(&page); auto* impl = page->Impl(); + auto ft = dmat->Info().feature_types.ConstDeviceSpan(); size_t bytes_write = 0; double tstart = dmlc::GetTime(); @@ -66,7 +67,7 @@ void EllpackPageSource::WriteEllpackPages(int device, DMatrix* dmat, if (mem_cost_bytes >= page_size_) { bytes_write += mem_cost_bytes; *impl = EllpackPageImpl(device, cuts, temp_host_page, dmat->IsDense(), - row_stride); + row_stride, ft); writer.PushWrite(std::move(page)); writer.Alloc(&page); impl = page->Impl(); @@ -79,7 +80,7 @@ void EllpackPageSource::WriteEllpackPages(int device, DMatrix* dmat, } if (temp_host_page.Size() != 0) { *impl = EllpackPageImpl(device, cuts, temp_host_page, dmat->IsDense(), - row_stride); + row_stride, ft); writer.PushWrite(std::move(page)); } } diff --git a/tests/cpp/common/test_hist_util.h b/tests/cpp/common/test_hist_util.h index d025e5ea60bf..9b025ed9b17a 100644 --- a/tests/cpp/common/test_hist_util.h +++ b/tests/cpp/common/test_hist_util.h @@ -60,19 +60,6 @@ inline data::CupyAdapter AdapterFromData(const thrust::device_vector &x, } #endif -inline std::vector GenerateRandomCategoricalSingleColumn(int n, - int num_categories) { - std::vector x(n); - std::mt19937 rng(0); - std::uniform_int_distribution dist(0, num_categories - 1); - std::generate(x.begin(), x.end(), [&]() { return dist(rng); }); - // Make sure each category is present - for(auto i = 0; i < num_categories; i++) { - x[i] = i; - } - return x; -} - inline std::shared_ptr GetDMatrixFromData(const std::vector &x, int num_rows, int num_columns) { data::DenseAdapter adapter(x.data(), num_rows, num_columns); diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu index f92e6a7f9a4d..23d566068619 100644 --- a/tests/cpp/data/test_ellpack_page.cu +++ b/tests/cpp/data/test_ellpack_page.cu @@ -1,5 +1,5 @@ /*! - * Copyright 2019 XGBoost contributors + * Copyright 2019-2020 XGBoost contributors */ #include @@ -9,6 +9,7 @@ #include "../histogram_helpers.h" #include "gtest/gtest.h" +#include "../../../src/common/categorical.h" #include "../../../src/common/hist_util.h" #include "../../../src/data/ellpack_page.cuh" @@ -77,6 +78,45 @@ TEST(EllpackPage, BuildGidxSparse) { } } +TEST(EllpackPage, FromCategoricalBasic) { + using common::AsCat; + size_t constexpr kRows = 1000, kCats = 13, kCols = 1; + size_t max_bins = 8; + auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats); + auto m = GetDMatrixFromData(x, kRows, 1); + auto& h_ft = m->Info().feature_types.HostVector(); + h_ft.resize(kCols, FeatureType::kCategorical); + + BatchParam p(0, max_bins); + auto ellpack = EllpackPage(m.get(), p); + auto accessor = ellpack.Impl()->GetDeviceAccessor(0); + ASSERT_EQ(kCats, accessor.NumBins()); + + auto x_copy = x; + std::sort(x_copy.begin(), x_copy.end()); + auto n_uniques = std::unique(x_copy.begin(), x_copy.end()) - x_copy.begin(); + ASSERT_EQ(n_uniques, kCats); + + std::vector h_cuts_ptr(accessor.feature_segments.size()); + dh::CopyDeviceSpanToVector(&h_cuts_ptr, accessor.feature_segments); + std::vector h_cuts_values(accessor.gidx_fvalue_map.size()); + dh::CopyDeviceSpanToVector(&h_cuts_values, accessor.gidx_fvalue_map); + + ASSERT_EQ(h_cuts_ptr.size(), 2); + ASSERT_EQ(h_cuts_values.size(), kCats); + + std::vector const &h_gidx_buffer = + ellpack.Impl()->gidx_buffer.HostVector(); + auto h_gidx_iter = common::CompressedIterator( + h_gidx_buffer.data(), accessor.NumSymbols()); + + for (size_t i = 0; i < x.size(); ++i) { + auto bin = h_gidx_iter[i]; + auto bin_value = h_cuts_values.at(bin); + ASSERT_EQ(AsCat(x[i]), AsCat(bin_value)); + } +} + struct ReadRowFunction { EllpackDeviceAccessor matrix; int row; diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index 858b651981fb..1b319a8873ff 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -17,6 +17,7 @@ #include "helpers.h" #include "xgboost/c_api.h" #include "../../src/data/adapter.h" +#include "../../src/data/simple_dmatrix.h" #include "../../src/gbm/gbtree_model.h" #include "xgboost/predictor.h" @@ -350,6 +351,13 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label, return out; } +std::shared_ptr +GetDMatrixFromData(const std::vector &x, int num_rows, int num_columns){ + data::DenseAdapter adapter(x.data(), num_rows, num_columns); + return std::shared_ptr(new data::SimpleDMatrix( + &adapter, std::numeric_limits::quiet_NaN(), 1)); +} + std::unique_ptr CreateSparsePageDMatrix( size_t n_entries, size_t page_size, std::string tmp_file) { // Create sufficiently large data to make two row pages @@ -539,5 +547,4 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) { return RMMAllocatorPtr(nullptr, DeleteRMMResource); } #endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1 - } // namespace xgboost diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 5d4ce6cefa68..02bde0598f74 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -42,6 +42,12 @@ struct LearnerModelParam; class GradientBooster; } +template +Float RelError(Float l, Float r) { + static_assert(std::is_floating_point::value, ""); + return std::abs(1.0f - l / r); +} + bool FileExists(const std::string& filename); int64_t GetFileSize(const std::string& filename); @@ -254,6 +260,22 @@ class RandomDataGenerator { #endif }; +inline std::vector +GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) { + std::vector x(n); + std::mt19937 rng(0); + std::uniform_int_distribution dist(0, num_categories - 1); + std::generate(x.begin(), x.end(), [&]() { return dist(rng); }); + // Make sure each category is present + for(size_t i = 0; i < num_categories; i++) { + x[i] = i; + } + return x; +} + +std::shared_ptr GetDMatrixFromData(const std::vector &x, + int num_rows, int num_columns); + std::unique_ptr CreateSparsePageDMatrix( size_t n_entries, size_t page_size, std::string tmp_file); diff --git a/tests/cpp/histogram_helpers.h b/tests/cpp/histogram_helpers.h index 013020784045..127f6fe44da8 100644 --- a/tests/cpp/histogram_helpers.h +++ b/tests/cpp/histogram_helpers.h @@ -45,7 +45,7 @@ inline std::unique_ptr BuildEllpackPage( } auto page = std::unique_ptr( - new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride)); + new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride, {})); return page; } diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu index 99cc4b835fec..4ff7ec1062b0 100644 --- a/tests/cpp/tree/gpu_hist/test_histogram.cu +++ b/tests/cpp/tree/gpu_hist/test_histogram.cu @@ -1,6 +1,7 @@ #include #include #include "../../helpers.h" +#include "../../../../src/common/categorical.h" #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" #include "../../../../src/tree/gpu_hist/histogram.cuh" @@ -30,7 +31,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) { FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size, sizeof(Gradient)); - + auto rounding = CreateRoundingFactor(gpair.DeviceSpan()); BuildGradientHistogram(page->GetDeviceAccessor(0), feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), @@ -67,7 +68,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) { // Use a single feature group to compute the baseline. FeatureGroups single_group(page->Cuts()); - + dh::device_vector baseline(num_bins); BuildGradientHistogram(page->GetDeviceAccessor(0), single_group.DeviceAccessor(0), @@ -97,5 +98,80 @@ TEST(Histogram, GPUDeterministic) { } } } + +std::vector OneHotEncodeFeature(std::vector x, size_t num_cat) { + std::vector ret(x.size() * num_cat, 0); + size_t n_rows = x.size(); + for (size_t r = 0; r < n_rows; ++r) { + bst_cat_t cat = common::AsCat(x[r]); + ret.at(num_cat * r + cat) = 1; + } + return ret; +} + +// Test 1 vs rest categorical histogram is equivalent to one hot encoded data. +void TestGPUHistogramCategorical(size_t num_categories) { + size_t constexpr kRows = 340; + size_t constexpr kBins = 256; + auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories); + auto cat_m = GetDMatrixFromData(x, kRows, 1); + cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical); + BatchParam batch_param{0, static_cast(kBins), 0}; + tree::RowPartitioner row_partitioner(0, kRows); + auto ridx = row_partitioner.GetRows(0); + dh::device_vector cat_hist(num_categories); + auto gpair = GenerateRandomGradients(kRows, 0, 2); + gpair.SetDevice(0); + auto rounding = CreateRoundingFactor(gpair.DeviceSpan()); + // Generate hist with cat data. + for (auto const &batch : cat_m->GetBatches(batch_param)) { + auto* page = batch.Impl(); + FeatureGroups single_group(page->Cuts()); + BuildGradientHistogram(page->GetDeviceAccessor(0), + single_group.DeviceAccessor(0), + gpair.DeviceSpan(), ridx, dh::ToSpan(cat_hist), + rounding); + } + + // Generate hist with one hot encoded data. + auto x_encoded = OneHotEncodeFeature(x, num_categories); + auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories); + dh::device_vector encode_hist(2 * num_categories); + for (auto const &batch : encode_m->GetBatches(batch_param)) { + auto* page = batch.Impl(); + FeatureGroups single_group(page->Cuts()); + BuildGradientHistogram(page->GetDeviceAccessor(0), + single_group.DeviceAccessor(0), + gpair.DeviceSpan(), ridx, dh::ToSpan(encode_hist), + rounding); + } + + std::vector h_cat_hist(cat_hist.size()); + thrust::copy(cat_hist.begin(), cat_hist.end(), h_cat_hist.begin()); + auto cat_sum = std::accumulate(h_cat_hist.begin(), h_cat_hist.end(), GradientPairPrecise{}); + + std::vector h_encode_hist(encode_hist.size()); + thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin()); + + for (size_t c = 0; c < num_categories; ++c) { + auto zero = h_encode_hist[c * 2]; + auto one = h_encode_hist[c * 2 + 1]; + + auto chosen = h_cat_hist[c]; + auto not_chosen = cat_sum - chosen; + + ASSERT_LE(RelError(zero.GetGrad(), not_chosen.GetGrad()), kRtEps); + ASSERT_LE(RelError(zero.GetHess(), not_chosen.GetHess()), kRtEps); + + ASSERT_LE(RelError(one.GetGrad(), chosen.GetGrad()), kRtEps); + ASSERT_LE(RelError(one.GetHess(), chosen.GetHess()), kRtEps); + } +} + +TEST(Histogram, GPUHistCategorical) { + for (size_t num_categories = 2; num_categories < 8; ++num_categories) { + TestGPUHistogramCategorical(num_categories); + } +} } // namespace tree } // namespace xgboost