From 43ae4ad9371ff39a069bbf010727877c005d8e66 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 11 Aug 2022 15:07:06 +0800 Subject: [PATCH] Merge kernels. --- src/data/ellpack_page.cu | 25 +++++++++---------------- src/data/iterative_dmatrix.h | 9 +++------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index 0834bb6fd7a3..cf04ab16e7bf 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -288,7 +288,8 @@ ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch) namespace { void CopyGHistToEllpack(GHistIndexMatrix const& page, common::Span d_row_ptr, - size_t row_stride, common::CompressedByteT* d_compressed_buffer) { + size_t row_stride, common::CompressedByteT* d_compressed_buffer, + size_t null) { dh::device_vector data(page.index.begin(), page.index.end()); auto d_data = dh::ToSpan(data); @@ -305,9 +306,10 @@ void CopyGHistToEllpack(GHistIndexMatrix const& page, common::Span auto r_begin = d_row_ptr[ridx]; auto r_end = d_row_ptr[ridx + 1]; - size_t rsize = r_end - r_begin; + size_t r_size = r_end - r_begin; - if (ifeature >= rsize) { + if (ifeature >= r_size) { + writer.AtomicWriteSymbol(d_compressed_buffer, null, idx); return; } @@ -320,15 +322,10 @@ void CopyGHistToEllpack(GHistIndexMatrix const& page, common::Span using T = decltype(t); auto ptr = reinterpret_cast(d_data.data()); auto bin_idx = ptr[r_begin + ifeature] + offset; - writer.AtomicWriteSymbol(d_compressed_buffer, bin_idx, ridx * row_stride + ifeature); + writer.AtomicWriteSymbol(d_compressed_buffer, bin_idx, idx); }); }); } - -void RowCountsFromIndptr(common::Span d_row_ptr, common::Span row_counts) { - dh::LaunchN(row_counts.size(), - [=] XGBOOST_DEVICE(size_t i) { row_counts[i] = d_row_ptr[i + 1] - d_row_ptr[i]; }); -} } // anonymous namespace EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page, @@ -344,17 +341,13 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& pag monitor_.Stop("InitCompressedData"); // copy gidx - auto accessor = this->GetDeviceAccessor(ctx->gpu_id, ft); common::CompressedByteT* d_compressed_buffer = gidx_buffer.DevicePointer(); dh::device_vector row_ptr(page.row_ptr); auto d_row_ptr = dh::ToSpan(row_ptr); - CopyGHistToEllpack(page, d_row_ptr, row_stride, d_compressed_buffer); - // write null value - dh::device_vector row_counts(page.Size()); - auto row_counts_span = dh::ToSpan(row_counts); - RowCountsFromIndptr(d_row_ptr, row_counts_span); - WriteNullValues(this, ctx->gpu_id, row_counts_span); + auto accessor = this->GetDeviceAccessor(ctx->gpu_id, ft); + auto null = accessor.NullValue(); + CopyGHistToEllpack(page, d_row_ptr, row_stride, d_compressed_buffer, null); } // A functor that copies the data from one EllpackPage to another. diff --git a/src/data/iterative_dmatrix.h b/src/data/iterative_dmatrix.h index 976b87d56ee8..06d061382ba8 100644 --- a/src/data/iterative_dmatrix.h +++ b/src/data/iterative_dmatrix.h @@ -40,12 +40,9 @@ namespace data { * * - The CPU format and the GPU format are different, the former uses a CSR + CSC for * histogram index while the latter uses only Ellpack. This results into a design that - * we can obtain the GPU format from CPU but not the other way around since we can't - * recover the CSC from Ellpack. More concretely, if users want to construct a CPU - * version of `QuantileDMatrix`, input data must be on CPU. However, if users want to - * have a GPU version of `QuantileDMatrix`, data can be on either place. We can fix this - * by retaining the feature index information in ellpack if there are feature - * requests. + * we can obtain the GPU format from CPU but the other way around is not yet + * supported. We can search the bin value from ellpack to recover the feature index when + * we support copying data from GPU to CPU. */ class IterativeDMatrix : public DMatrix { MetaInfo info_;