From 8392d80e950bafe0549d47d4e94e89cd96973503 Mon Sep 17 00:00:00 2001 From: jiamingy Date: Mon, 7 Feb 2022 18:30:28 +0800 Subject: [PATCH] fix. --- src/c_api/c_api_utils.h | 1 + src/common/column_matrix.h | 2 +- src/common/partition_builder.h | 2 +- src/data/ellpack_page_source.cu | 9 ++++++++- src/data/ellpack_page_source.h | 22 ++++++++++++---------- src/data/gradient_index_format.cc | 2 +- src/data/gradient_index_page_source.cc | 6 ++++-- src/data/gradient_index_page_source.h | 4 ++-- src/data/sparse_page_dmatrix.cc | 1 - src/data/sparse_page_source.h | 9 +++------ src/tree/hist/histogram.h | 2 +- 11 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h index 3cd8b42fce31..1a004bcd2f0f 100644 --- a/src/c_api/c_api_utils.h +++ b/src/c_api/c_api_utils.h @@ -167,6 +167,7 @@ inline float GetMissing(Json const &config) { class XGBoostAPIGuard { #if defined(XGBOOST_USE_CUDA) int32_t device_id_ {0}; + void SetGPUAttribute(); void RestoreGPUAttribute(); #else diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index 14adff2c5fbd..e6b7765228a4 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -1,5 +1,5 @@ /*! - * Copyright 2017 by Contributors + * Copyright 2017-2022 by Contributors * \file column_matrix.h * \brief Utility for fast column-wise access * \author Philip Cho diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index f707e1a704a1..811a56ddfef4 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -1,5 +1,5 @@ /*! - * Copyright 2021 by Contributors + * Copyright 2021-2022 by Contributors * \file row_set.h * \brief Quick Utility to compute subset of rows * \author Philip Cho, Tianqi Chen diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index 6d79250a0a63..872cb0cc657f 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -1,5 +1,5 @@ /*! - * Copyright 2019-2021 XGBoost contributors + * Copyright 2019-2022 XGBoost contributors */ #include #include @@ -12,6 +12,13 @@ namespace data { void EllpackPageSource::Fetch() { dh::safe_cuda(cudaSetDevice(param_.gpu_id)); if (!this->ReadCache()) { + if (count_ != 0 && !sync_) { + // source is initialized to be the 0th page during construction, so when count_ is 0 + // there's no need to increment the source. + ++(*source_); + } + // This is not read from cache so we still need it to be synced with sparse page source. + CHECK_EQ(count_, source_->Iter()); auto const &csr = source_->Page(); this->page_.reset(new EllpackPage{}); auto *impl = this->page_->Impl(); diff --git a/src/data/ellpack_page_source.h b/src/data/ellpack_page_source.h index 9a1551d53749..dc080247287c 100644 --- a/src/data/ellpack_page_source.h +++ b/src/data/ellpack_page_source.h @@ -1,5 +1,5 @@ /*! - * Copyright 2019-2021 by XGBoost Contributors + * Copyright 2019-2022 by XGBoost Contributors */ #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_ @@ -25,15 +25,17 @@ class EllpackPageSource : public PageSourceIncMixIn { std::unique_ptr cuts_; public: - EllpackPageSource( - float missing, int nthreads, bst_feature_t n_features, size_t n_batches, - std::shared_ptr cache, BatchParam param, - std::unique_ptr cuts, bool is_dense, - size_t row_stride, common::Span feature_types, - std::shared_ptr source) - : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache), - is_dense_{is_dense}, row_stride_{row_stride}, param_{std::move(param)}, - feature_types_{feature_types}, cuts_{std::move(cuts)} { + EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches, + std::shared_ptr cache, BatchParam param, + std::unique_ptr cuts, bool is_dense, size_t row_stride, + common::Span feature_types, + std::shared_ptr source) + : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false), + is_dense_{is_dense}, + row_stride_{row_stride}, + param_{std::move(param)}, + feature_types_{feature_types}, + cuts_{std::move(cuts)} { this->source_ = source; this->Fetch(); } diff --git a/src/data/gradient_index_format.cc b/src/data/gradient_index_format.cc index 150e77f1086e..3032499465eb 100644 --- a/src/data/gradient_index_format.cc +++ b/src/data/gradient_index_format.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2021 XGBoost contributors + * Copyright 2021-2022 XGBoost contributors */ #include "sparse_page_writer.h" #include "gradient_index.h" diff --git a/src/data/gradient_index_page_source.cc b/src/data/gradient_index_page_source.cc index 71276c0265be..09d8ada8070b 100644 --- a/src/data/gradient_index_page_source.cc +++ b/src/data/gradient_index_page_source.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2021 by XGBoost Contributors + * Copyright 2021-2022 by XGBoost Contributors */ #include "gradient_index_page_source.h" @@ -7,7 +7,9 @@ namespace xgboost { namespace data { void GradientIndexPageSource::Fetch() { if (!this->ReadCache()) { - if (count_ != 0) { + if (count_ != 0 && !sync_) { + // source is initialized to be the 0th page during construction, so when count_ is 0 + // there's no need to increment the source. ++(*source_); } // This is not read from cache so we still need it to be synced with sparse page source. diff --git a/src/data/gradient_index_page_source.h b/src/data/gradient_index_page_source.h index f98f5f0a7333..e21a2907093b 100644 --- a/src/data/gradient_index_page_source.h +++ b/src/data/gradient_index_page_source.h @@ -1,5 +1,5 @@ /*! - * Copyright 2021 by XGBoost Contributors + * Copyright 2021-2022 by XGBoost Contributors */ #ifndef XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_ #define XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_ @@ -26,7 +26,7 @@ class GradientIndexPageSource : public PageSourceIncMixIn { common::Span feature_types, float sparse_thresh, std::shared_ptr source) : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, - !std::isnan(sparse_thresh)), + std::isnan(sparse_thresh)), cuts_{std::move(cuts)}, is_dense_{is_dense}, max_bin_per_feat_{max_bin_per_feat}, diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index 96a81861284b..55e0ec0d38c1 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -169,7 +169,6 @@ BatchSet SparsePageDMatrix::GetGradientIndex(const BatchParam auto sorted_sketch = param.regen; auto cuts = common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess); - this->InitializeSparsePage(); // reset after use. batch_param_ = param; ghist_index_source_.reset(); diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 053d42df45f1..0a3e32e75e1f 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -292,7 +292,9 @@ class PageSourceIncMixIn : public SparsePageSourceImpl { protected: std::shared_ptr source_; using Super = SparsePageSourceImpl; - bool sync_{true}; // synchronize the row page. + // synchronize the row page, `hist` and `gpu_hist` don't need the original sparse page + // so we avoid fetching it. + bool sync_{true}; public: PageSourceIncMixIn(float missing, int nthreads, bst_feature_t n_features, uint32_t n_batches, @@ -307,11 +309,6 @@ class PageSourceIncMixIn : public SparsePageSourceImpl { ++this->count_; this->at_end_ = this->count_ == this->n_batches_; - if (this->at_end_) { - CHECK_EQ(this->count_, this->n_batches_); - } else { - CHECK_LT(this->count_, this->n_batches_); - } if (this->at_end_) { this->cache_info_->Commit(); diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 5c1c89c27584..6020de28d529 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -1,5 +1,5 @@ /*! - * Copyright 2021 by XGBoost Contributors + * Copyright 2021-2022 by XGBoost Contributors */ #ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_ #define XGBOOST_TREE_HIST_HISTOGRAM_H_