From 8570ba52da96ba46bf495957252fadc8d6532315 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 31 Jan 2024 10:48:27 -0500 Subject: [PATCH 01/55] Add additional data split mode to cover the secure vertical pipeline --- include/xgboost/data.h | 8 ++++++-- src/tree/hist/evaluate_splits.h | 13 +++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 08d3d119a8ff..fcffa98eae4e 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -40,7 +40,7 @@ enum class DataType : uint8_t { enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 }; -enum class DataSplitMode : int { kRow = 0, kCol = 1 }; +enum class DataSplitMode : int { kRow = 0, kCol = 1, kColS = 2 }; /*! * \brief Meta information about dataset, always sit in memory. @@ -186,7 +186,11 @@ class MetaInfo { } /** @brief Whether the data is split column-wise. */ - bool IsColumnSplit() const { return data_split_mode == DataSplitMode::kCol; } + bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol) || (data_split_mode == DataSplitMode::kColS); } + + /** @brief Whether the data is split column-wise for secure computation. */ + bool IsSecureCompute() const { return data_split_mode == DataSplitMode::kColS; } + /** @brief Whether this is a learning to rank data. */ bool IsRanking() const { return !group_ptr_.empty(); } diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index bc534d351f17..340e81f015c2 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -41,6 +41,7 @@ class HistEvaluator { std::shared_ptr column_sampler_; TreeEvaluator tree_evaluator_; bool is_col_split_{false}; + bool is_secure_{false}; FeatureInteractionConstraintHost interaction_constraints_; std::vector snode_; @@ -393,6 +394,11 @@ class HistEvaluator { } } + + // Print the info about modes of data split. + std::cout<< "Data split mode: " << (is_col_split_ ? "column-wise" : "row-wise") << std::endl; + std::cout<< "Secure compute mode: " << (is_secure_ ? "secure" : "plain") << std::endl; + if (is_col_split_) { // With column-wise data split, we gather the best splits from all the workers and update the // expand entries accordingly. @@ -477,7 +483,8 @@ class HistEvaluator { param_{param}, column_sampler_{std::move(sampler)}, tree_evaluator_{*param, static_cast(info.num_col_), DeviceOrd::CPU()}, - is_col_split_{info.IsColumnSplit()} { + is_col_split_{info.IsColumnSplit()}, + is_secure_{info.IsSecureCompute()}{ interaction_constraints_.Configure(*param, info.num_col_); column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), param_->colsample_bynode, param_->colsample_bylevel, @@ -493,6 +500,7 @@ class HistMultiEvaluator { std::shared_ptr column_sampler_; Context const *ctx_; bool is_col_split_{false}; + bool is_secure_{false}; private: static double MultiCalcSplitGain(TrainParam const ¶m, @@ -753,7 +761,8 @@ class HistMultiEvaluator { : param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx}, - is_col_split_{info.IsColumnSplit()} { + is_col_split_{info.IsColumnSplit()}, + is_secure_{info.IsSecureCompute()} { interaction_constraints_.Configure(*param, info.num_col_); column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), param_->colsample_bynode, param_->colsample_bylevel, From 2d00db61837392b12df75d681cf241eba6309464 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 31 Jan 2024 15:03:25 -0500 Subject: [PATCH 02/55] Add IsSecure info and update corresponding functions --- include/xgboost/data.h | 4 ++-- src/tree/hist/evaluate_splits.h | 16 ++++++++++++++-- src/tree/hist/histogram.h | 8 +++++--- src/tree/updater_approx.cc | 2 +- src/tree/updater_quantile_hist.cc | 4 ++-- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/include/xgboost/data.h b/include/xgboost/data.h index fcffa98eae4e..263f323b91be 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -188,8 +188,8 @@ class MetaInfo { /** @brief Whether the data is split column-wise. */ bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol) || (data_split_mode == DataSplitMode::kColS); } - /** @brief Whether the data is split column-wise for secure computation. */ - bool IsSecureCompute() const { return data_split_mode == DataSplitMode::kColS; } + /** @brief Whether the data is split column-wise with secure computation. */ + bool IsSecure() const { return data_split_mode == DataSplitMode::kColS; } /** @brief Whether this is a learning to rank data. */ bool IsRanking() const { return !group_ptr_.empty(); } diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 340e81f015c2..9971fb8ba743 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -402,7 +402,19 @@ class HistEvaluator { if (is_col_split_) { // With column-wise data split, we gather the best splits from all the workers and update the // expand entries accordingly. + + //Print entry information before allgather + std::cout << "Entries Before allgather: " << std::endl; + for (size_t i = 0; i < entries.size(); ++i) { + std::cout << "Entry " << i << " nid: " << entries[i].nid << " gain: " << entries[i].split.loss_chg << std::endl; + } auto all_entries = Allgather(entries); + //Print entry information after allgather + std::cout << "Entries After allgather: " << std::endl; + for (size_t i = 0; i < all_entries.size(); ++i) { + std::cout << "Entry " << i << " nid: " << all_entries[i].nid << " gain: " << all_entries[i].split.loss_chg << std::endl; + } + for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { entries[nidx_in_set].split.Update( @@ -484,7 +496,7 @@ class HistEvaluator { column_sampler_{std::move(sampler)}, tree_evaluator_{*param, static_cast(info.num_col_), DeviceOrd::CPU()}, is_col_split_{info.IsColumnSplit()}, - is_secure_{info.IsSecureCompute()}{ + is_secure_{info.IsSecure()}{ interaction_constraints_.Configure(*param, info.num_col_); column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), param_->colsample_bynode, param_->colsample_bylevel, @@ -762,7 +774,7 @@ class HistMultiEvaluator { column_sampler_{std::move(sampler)}, ctx_{ctx}, is_col_split_{info.IsColumnSplit()}, - is_secure_{info.IsSecureCompute()} { + is_secure_{info.IsSecure()} { interaction_constraints_.Configure(*param, info.num_col_); column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), param_->colsample_bynode, param_->colsample_bylevel, diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 033d2221ec89..074c8be32c3f 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -50,6 +50,7 @@ class HistogramBuilder { // Whether XGBoost is running in distributed environment. bool is_distributed_{false}; bool is_col_split_{false}; + bool is_secure_{false}; public: /** @@ -60,13 +61,14 @@ class HistogramBuilder { * of using global rabit variable. */ void Reset(Context const *ctx, bst_bin_t total_bins, BatchParam const &p, bool is_distributed, - bool is_col_split, HistMakerTrainParam const *param) { + bool is_col_split, bool is_secure, HistMakerTrainParam const *param) { n_threads_ = ctx->Threads(); param_ = p; hist_.Reset(total_bins, param->max_cached_hist_node); buffer_.Init(total_bins); is_distributed_ = is_distributed; is_col_split_ = is_col_split; + is_secure_ = is_secure; } template @@ -329,12 +331,12 @@ class MultiHistogramBuilder { [[nodiscard]] auto &Histogram(bst_target_t t) { return target_builders_[t].Histogram(); } void Reset(Context const *ctx, bst_bin_t total_bins, bst_target_t n_targets, BatchParam const &p, - bool is_distributed, bool is_col_split, HistMakerTrainParam const *param) { + bool is_distributed, bool is_col_split, bool is_secure, HistMakerTrainParam const *param) { ctx_ = ctx; target_builders_.resize(n_targets); CHECK_GE(n_targets, 1); for (auto &v : target_builders_) { - v.Reset(ctx, total_bins, p, is_distributed, is_col_split, param); + v.Reset(ctx, total_bins, p, is_distributed, is_col_split, is_secure, param); } } }; diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index 94e7547ee209..42546188ff52 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -92,7 +92,7 @@ class GloablApproxBuilder { } histogram_builder_.Reset(ctx_, n_total_bins, p_tree->NumTargets(), BatchSpec(*param_, hess), - collective::IsDistributed(), p_fmat->Info().IsColumnSplit(), + collective::IsDistributed(), p_fmat->Info().IsColumnSplit(), p_fmat->Info().IsSecure(), hist_param_); monitor_->Stop(__func__); } diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index c2aaedafac95..2403aa8a6bdd 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -169,7 +169,7 @@ class MultiTargetHistBuilder { bst_target_t n_targets = p_tree->NumTargets(); histogram_builder_ = std::make_unique(); histogram_builder_->Reset(ctx_, n_total_bins, n_targets, HistBatch(param_), - collective::IsDistributed(), p_fmat->Info().IsColumnSplit(), + collective::IsDistributed(), p_fmat->Info().IsColumnSplit(), p_fmat->Info().IsSecure(), hist_param_); evaluator_ = std::make_unique(ctx_, p_fmat->Info(), param_, col_sampler_); @@ -358,7 +358,7 @@ class HistUpdater { fmat->Info().IsColumnSplit()); } histogram_builder_->Reset(ctx_, n_total_bins, 1, HistBatch(param_), collective::IsDistributed(), - fmat->Info().IsColumnSplit(), hist_param_); + fmat->Info().IsColumnSplit(), fmat->Info().IsSecure(), hist_param_); evaluator_ = std::make_unique(ctx_, this->param_, fmat->Info(), col_sampler_); p_last_tree_ = p_tree; monitor_->Stop(__func__); From ab17f5a3b4c264a5d8652430f5f2cd227f6b3800 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 31 Jan 2024 17:44:43 -0500 Subject: [PATCH 03/55] Modify evaluate_splits to block non-label owners to perform hist compute under secure scenario --- src/tree/hist/evaluate_splits.h | 95 +++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 9971fb8ba743..43c932777c2a 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -347,54 +347,67 @@ class HistEvaluator { auto evaluator = tree_evaluator_.GetEvaluator(); auto const& cut_ptrs = cut.Ptrs(); - common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) { - auto tidx = omp_get_thread_num(); - auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx]; - auto best = &entry->split; - auto nidx = entry->nid; - auto histogram = hist[nidx]; - auto features_set = features[nidx_in_set]->ConstHostSpan(); - for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) { - auto fidx = features_set[fidx_in_set]; - bool is_cat = common::IsCat(feature_types, fidx); - if (!interaction_constraints_.Query(nidx, fidx)) { - continue; - } - if (is_cat) { - auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx]; - if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) { - EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best); - } else { - std::vector sorted_idx(n_bins); - std::iota(sorted_idx.begin(), sorted_idx.end(), 0); - auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins); - // Sort the histogram to get contiguous partitions. - std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) { - auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) < - evaluator.CalcWeightCat(*param_, feat_hist[r]); - return ret; - }); - EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best); - EnumeratePart<-1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best); + + // print current rank + std::cout << "------------------------" << std::endl; + std::cout << "rank: " << collective::GetRank() << std::endl; + std::cout << "n_threads = " << n_threads << std::endl; + + // Under secure vertical setting, only the label owner is able to evaluate the split + // based on the global histogram. The other parties will only receive the final best split information + // Hence the below computation is not performed by the non-label owners under secure vertical setting + if ((!is_secure_) || (collective::GetRank() == 0)) { + // Evaluate the splits for each feature + common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) { + auto tidx = omp_get_thread_num(); + auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx]; + auto best = &entry->split; + auto nidx = entry->nid; + auto histogram = hist[nidx]; + auto features_set = features[nidx_in_set]->ConstHostSpan(); + for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) { + auto fidx = features_set[fidx_in_set]; + bool is_cat = common::IsCat(feature_types, fidx); + if (!interaction_constraints_.Query(nidx, fidx)) { + continue; } - } else { - auto grad_stats = EnumerateSplit<+1>(cut, histogram, fidx, nidx, evaluator, best); - if (SplitContainsMissingValues(grad_stats, snode_[nidx])) { - EnumerateSplit<-1>(cut, histogram, fidx, nidx, evaluator, best); + if (is_cat) { + auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx]; + if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) { + EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best); + } + else { + std::vector sorted_idx(n_bins); + std::iota(sorted_idx.begin(), sorted_idx.end(), 0); + auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins); + // Sort the histogram to get contiguous partitions. + std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) { + auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) < + evaluator.CalcWeightCat(*param_, feat_hist[r]); + return ret; + }); + EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best); + EnumeratePart<-1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best); + } + } + else { + auto grad_stats = EnumerateSplit<+1>(cut, histogram, fidx, nidx, evaluator, best); + if (SplitContainsMissingValues(grad_stats, snode_[nidx])) { + EnumerateSplit<-1>(cut, histogram, fidx, nidx, evaluator, best); + } } } - } - }); + }); - for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); - ++nidx_in_set) { - for (auto tidx = 0; tidx < n_threads; ++tidx) { - entries[nidx_in_set].split.Update( - tloc_candidates[n_threads * nidx_in_set + tidx].split); + for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); + ++nidx_in_set) { + for (auto tidx = 0; tidx < n_threads; ++tidx) { + entries[nidx_in_set].split.Update( + tloc_candidates[n_threads * nidx_in_set + tidx].split); + } } } - // Print the info about modes of data split. std::cout<< "Data split mode: " << (is_col_split_ ? "column-wise" : "row-wise") << std::endl; std::cout<< "Secure compute mode: " << (is_secure_ ? "secure" : "plain") << std::endl; From fb1787c88ff543c02f78abada877be0da3c70922 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 2 Feb 2024 17:31:20 -0500 Subject: [PATCH 04/55] Continue using Allgather for best split sync for secure vertical, equvalent to broadcast --- src/tree/hist/evaluate_splits.h | 12 +++++++++++ src/tree/hist/histogram.h | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 43c932777c2a..e061c7dbe3d4 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -295,6 +295,9 @@ class HistEvaluator { auto const world = collective::GetWorldSize(); auto const num_entries = entries.size(); + // print the number of entries + std::cout << "Number of local entries, threads, and wordsize: " << num_entries << " " << ctx_->Threads() << " " << world << std::endl; + // First, gather all the primitive fields. std::vector local_entries(num_entries); std::vector cat_bits; @@ -415,12 +418,21 @@ class HistEvaluator { if (is_col_split_) { // With column-wise data split, we gather the best splits from all the workers and update the // expand entries accordingly. + // Note that under secure vertical setting, only the label owner is able to evaluate the split + // based on the global histogram. The other parties will receive the final best splits + // allgather is capable of performing this (0-gain entries for non-label owners), + // but can be replaced with a broadcast in the future //Print entry information before allgather std::cout << "Entries Before allgather: " << std::endl; for (size_t i = 0; i < entries.size(); ++i) { std::cout << "Entry " << i << " nid: " << entries[i].nid << " gain: " << entries[i].split.loss_chg << std::endl; } + + std::cout << "**************" << std::endl; + std::cout << "Allgather entries" << std::endl; + std::cout << "**************" << std::endl; + auto all_entries = Allgather(entries); //Print entry information after allgather std::cout << "Entries After allgather: " << std::endl; diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 074c8be32c3f..fbba6cf48300 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -177,6 +177,10 @@ class HistogramBuilder { std::vector const &nodes_to_build, std::vector const &nodes_to_trick) { auto n_total_bins = buffer_.TotalBins(); + + // Print the bin information + LOG(CONSOLE) << "Total bins: " << n_total_bins; + common::BlockedSpace2d space( nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024); common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) { @@ -192,6 +196,39 @@ class HistogramBuilder { reinterpret_cast(this->hist_[first_nidx].data()), n); } + if (is_distributed_ && is_col_split_ && is_secure_) { + // Under secure mode, we perform allgather for all nodes + CHECK(!nodes_to_build.empty()); + + // print histogram info before allgather + LOG(CONSOLE) << "Before allgather"; + for (size_t i = 0; i < nodes_to_build.size(); ++i) { + auto const nidx = nodes_to_build[i]; + auto const &hist = this->hist_[nidx]; + LOG(CONSOLE) << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms"; + } + + LOG(CONSOLE) << "********* Allgather histograms for all nodes *********"; + // allgather histograms for all nodes + // First, gather all the primitive fields. + /**auto const num_entries = entries.size(); + std::vector local_entries(num_entries); + std::vector cat_bits; + std::vector cat_bits_sizes; + for (std::size_t i = 0; i < num_entries; i++) { + local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes); + } + auto all_entries = collective::Allgather(local_entries); + // Gather all the cat_bits. + auto gathered = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes); +**/ + + auto all_entries = collective::Allgather(this->hist_[nodes_to_build.front()].data()); + LOG(CONSOLE) << "After allgather " << all_entries.size() << " entries"; + + } + common::BlockedSpace2d const &subspace = nodes_to_trick.size() == nodes_to_build.size() ? space From 7a2a2b846fef8cea089dbcb9d37c0d72e600d3cf Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 6 Feb 2024 17:15:11 -0500 Subject: [PATCH 05/55] Modify histogram sync scheme for secure vertical case, can identify global best split, but need to further apply split correctly --- src/common/quantile.cc | 45 +++++++++++++++++- src/tree/hist/evaluate_splits.h | 52 +++++++++++++++------ src/tree/hist/histogram.h | 82 ++++++++++++++++++++++++--------- 3 files changed, 143 insertions(+), 36 deletions(-) diff --git a/src/common/quantile.cc b/src/common/quantile.cc index c74db99e4006..a9eb7dabc50b 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -366,6 +366,39 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b } } +template +void AddCutPointSecure(typename SketchType::SummaryContainer const &summary, int max_bin, + HistogramCuts *cuts) { + // For secure vertical pipeline, we fill the cut values corresponding to empty columns + // with a vector of minimum value + const float mval = 1e-5f; + size_t required_cuts = std::min(summary.size, static_cast(max_bin)); + // make a copy of required_cuts for mode selection + size_t required_cuts_original = required_cuts; + // Sync the required_cuts across all workers + collective::Allreduce(&required_cuts, 1); + + // add the cut points + auto &cut_values = cuts->cut_values_.HostVector(); + // if not empty column, fill the cut values with the actual values + if (required_cuts_original > 0) { + // we use the min_value as the first (0th) element, hence starting from 1. + for (size_t i = 1; i < required_cuts; ++i) { + bst_float cpt = summary.data[i].value; + if (i == 1 || cpt > cut_values.back()) { + cut_values.push_back(cpt); + } + } + } + // if empty column, fill the cut values with minimum value + else { + for (size_t i = 1; i < required_cuts; ++i) { + cut_values.push_back(mval); + } + } +} + + auto AddCategories(std::set const &categories, HistogramCuts *cuts) { if (std::any_of(categories.cbegin(), categories.cend(), InvalidCat)) { InvalidCategory(); @@ -415,11 +448,21 @@ void SketchContainerImpl::MakeCuts(Context const *ctx, MetaInfo const float max_cat{-1.f}; for (size_t fid = 0; fid < reduced.size(); ++fid) { size_t max_num_bins = std::min(num_cuts[fid], max_bins_); + // If vertical and secure mode, we need to sync the max_num_bins aross workers + if (info.IsVerticalFederated() && info.IsSecure()) { + collective::Allreduce(&max_num_bins, 1); + } typename WQSketch::SummaryContainer const &a = final_summaries[fid]; if (IsCat(feature_types_, fid)) { max_cat = std::max(max_cat, AddCategories(categories_.at(fid), p_cuts)); } else { - AddCutPoint(a, max_num_bins, p_cuts); + // use special AddCutPoint scheme for secure vertical federated learning + if (info.IsVerticalFederated() && info.IsSecure()) { + AddCutPointSecure(a, max_num_bins, p_cuts); + } + else { + AddCutPoint(a, max_num_bins, p_cuts); + } // push a value that is greater than anything const bst_float cpt = (a.size > 0) ? a.data[a.size - 1].value : p_cuts->min_vals_.HostVector()[fid]; diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index e061c7dbe3d4..2d241e0f4e60 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -296,7 +296,7 @@ class HistEvaluator { auto const num_entries = entries.size(); // print the number of entries - std::cout << "Number of local entries, threads, and wordsize: " << num_entries << " " << ctx_->Threads() << " " << world << std::endl; + //std::cout << "Number of local entries, threads, and wordsize: " << num_entries << " " << ctx_->Threads() << " " << world << std::endl; // First, gather all the primitive fields. std::vector local_entries(num_entries); @@ -351,10 +351,39 @@ class HistEvaluator { auto const& cut_ptrs = cut.Ptrs(); - // print current rank - std::cout << "------------------------" << std::endl; - std::cout << "rank: " << collective::GetRank() << std::endl; - std::cout << "n_threads = " << n_threads << std::endl; + + + // Print the details of the histogram and the features to a file + // according to the rank + std::ofstream file_0, file_1; + file_0.open("histogram_0.txt", std::ios_base::app); + file_1.open("histogram_1.txt", std::ios_base::app); + if (collective::GetRank() == 0) { + for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + auto nidx = entries[nidx_in_set].nid; + auto features_set = column_sampler_->GetFeatureSet(tree.GetDepth(nidx))->ConstHostSpan(); + file_0 << std::endl << "Features for node " << nidx << std::endl; + for (size_t i = 0; i < features_set.size(); i++) { + file_0 << "Feature " << features_set[i] << " Cut " << cut.Ptrs()[i] << " " << cut.Ptrs()[i+1] << std::endl; + } + } + } else { + for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + auto nidx = entries[nidx_in_set].nid; + auto features_set = column_sampler_->GetFeatureSet(tree.GetDepth(nidx))->ConstHostSpan(); + file_1 << std::endl << "Features for node " << nidx << std::endl; + for (size_t i = 0; i < features_set.size(); i++) { + file_1 << "Feature " << features_set[i] << " Cut " << cut.Ptrs()[i] << " " << cut.Ptrs()[i+1] << std::endl; + } + } + } + file_0.close(); + file_1.close(); + + + + + // Under secure vertical setting, only the label owner is able to evaluate the split // based on the global histogram. The other parties will only receive the final best split information @@ -411,10 +440,6 @@ class HistEvaluator { } } - // Print the info about modes of data split. - std::cout<< "Data split mode: " << (is_col_split_ ? "column-wise" : "row-wise") << std::endl; - std::cout<< "Secure compute mode: " << (is_secure_ ? "secure" : "plain") << std::endl; - if (is_col_split_) { // With column-wise data split, we gather the best splits from all the workers and update the // expand entries accordingly. @@ -423,23 +448,24 @@ class HistEvaluator { // allgather is capable of performing this (0-gain entries for non-label owners), // but can be replaced with a broadcast in the future + //Print entry information before allgather std::cout << "Entries Before allgather: " << std::endl; for (size_t i = 0; i < entries.size(); ++i) { std::cout << "Entry " << i << " nid: " << entries[i].nid << " gain: " << entries[i].split.loss_chg << std::endl; } - - std::cout << "**************" << std::endl; std::cout << "Allgather entries" << std::endl; - std::cout << "**************" << std::endl; auto all_entries = Allgather(entries); + + //Print entry information after allgather std::cout << "Entries After allgather: " << std::endl; for (size_t i = 0; i < all_entries.size(); ++i) { std::cout << "Entry " << i << " nid: " << all_entries[i].nid << " gain: " << all_entries[i].split.loss_chg << std::endl; } + for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { entries[nidx_in_set].split.Update( @@ -675,7 +701,7 @@ class HistMultiEvaluator { } CHECK(!features.empty()); - std::int32_t n_threads = ctx_->Threads(); + std::int32_t n_threads = ctx_->Threads(); std::size_t const grain_size = std::max(1, features.front()->Size() / n_threads); common::BlockedSpace2d space( entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); }, diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index fbba6cf48300..a4ae98396f89 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -197,35 +197,73 @@ class HistogramBuilder { } if (is_distributed_ && is_col_split_ && is_secure_) { - // Under secure mode, we perform allgather for all nodes + // Under secure vertical mode, we perform allgather for all nodes CHECK(!nodes_to_build.empty()); - // print histogram info before allgather - LOG(CONSOLE) << "Before allgather"; + + + + + + // print the details of histograms to a file + std::ofstream file_0, file_1; + file_0.open("hist_before_allgather_0.txt", std::ios_base::app); + file_1.open("hist_before_allgather_1.txt", std::ios_base::app); + for (size_t i = 0; i < nodes_to_build.size(); ++i) { + auto const nidx = nodes_to_build[i]; + auto const &hist = this->hist_[nidx]; + if (collective::GetRank() == 0) { + file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_0 << "Histogram " << j << ": " << hist[j] << std::endl; + } + } else { + file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_1 << "Histogram " << j << ": " << hist[j] << std::endl; + } + } + } + file_0.close(); + file_1.close(); + + + LOG(CONSOLE) << "********* Allgather histograms for all nodes *********"; + // in theory the operation is AllGather, but with current system functionality, + // we use AllReduce to simulate the AllGather operation + auto first_nidx = nodes_to_build.front(); + std::size_t n = n_total_bins * nodes_to_build.size() * 2; + collective::Allreduce( + reinterpret_cast(this->hist_[first_nidx].data()), n); + + + + + // print the details of histograms to a file + file_0.open("hist_after_allgather_0.txt", std::ios_base::app); + file_1.open("hist_after_allgather_1.txt", std::ios_base::app); for (size_t i = 0; i < nodes_to_build.size(); ++i) { auto const nidx = nodes_to_build[i]; auto const &hist = this->hist_[nidx]; - LOG(CONSOLE) << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms"; - } - - LOG(CONSOLE) << "********* Allgather histograms for all nodes *********"; - // allgather histograms for all nodes - // First, gather all the primitive fields. - /**auto const num_entries = entries.size(); - std::vector local_entries(num_entries); - std::vector cat_bits; - std::vector cat_bits_sizes; - for (std::size_t i = 0; i < num_entries; i++) { - local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes); + if (collective::GetRank() == 0) { + file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_0 << "Histogram " << j << ": " << hist[j] << std::endl; + } + } else { + file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_1 << "Histogram " << j << ": " << hist[j] << std::endl; + } + } } - auto all_entries = collective::Allgather(local_entries); - // Gather all the cat_bits. - auto gathered = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes); -**/ + file_0.close(); + file_1.close(); - auto all_entries = collective::Allgather(this->hist_[nodes_to_build.front()].data()); - LOG(CONSOLE) << "After allgather " << all_entries.size() << " entries"; } From 3ca31424419abe6fa2a7334f001e050e4fcc4bc7 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 7 Feb 2024 18:26:25 -0500 Subject: [PATCH 06/55] Sync cut informaiton across clients, full pipeline works for testing case --- src/common/quantile.cc | 11 +++++-- src/tree/hist/evaluate_splits.h | 49 ++++++++++++++-------------- src/tree/hist/histogram.h | 57 +++++++++++++-------------------- 3 files changed, 56 insertions(+), 61 deletions(-) diff --git a/src/common/quantile.cc b/src/common/quantile.cc index a9eb7dabc50b..eaed4b12f3dc 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -390,10 +390,10 @@ void AddCutPointSecure(typename SketchType::SummaryContainer const &summary, int } } } - // if empty column, fill the cut values with minimum value + // if empty column, fill the cut values with 0 else { for (size_t i = 1; i < required_cuts; ++i) { - cut_values.push_back(mval); + cut_values.push_back(0.0); } } } @@ -478,6 +478,13 @@ void SketchContainerImpl::MakeCuts(Context const *ctx, MetaInfo const p_cuts->cut_ptrs_.HostVector().push_back(cut_size); } + if (info.IsVerticalFederated() && info.IsSecure()) { + // cut values need to be synced across all workers via Allreduce + auto cut_val = p_cuts->cut_values_.HostVector().data(); + std::size_t n = p_cuts->cut_values_.HostVector().size(); + collective::Allreduce(cut_val, n); + } + p_cuts->SetCategorical(this->has_categorical_, max_cat); monitor_.Stop(__func__); } diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 2d241e0f4e60..2ff07462e887 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -278,10 +278,17 @@ class HistEvaluator { split_pt = cut_val[i - 1]; } best.Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum); + + //log best + //if (fidx == 5) { + // LOG(CONSOLE) << "Best cut for feature " << fidx << " is at " << best.split_value << " with gain " << best.loss_chg << std::endl; + //} + + + } } } - p_best->Update(best); return left_sum; } @@ -352,7 +359,7 @@ class HistEvaluator { - +/* // Print the details of the histogram and the features to a file // according to the rank std::ofstream file_0, file_1; @@ -379,10 +386,7 @@ class HistEvaluator { } file_0.close(); file_1.close(); - - - - +*/ // Under secure vertical setting, only the label owner is able to evaluate the split @@ -424,6 +428,11 @@ class HistEvaluator { } else { auto grad_stats = EnumerateSplit<+1>(cut, histogram, fidx, nidx, evaluator, best); + + // print the best split for each feature + // std::cout << "Best split for feature " << fidx << " is " << best->split_value << " with gain " << best->loss_chg << std::endl; + + if (SplitContainsMissingValues(grad_stats, snode_[nidx])) { EnumerateSplit<-1>(cut, histogram, fidx, nidx, evaluator, best); } @@ -448,30 +457,22 @@ class HistEvaluator { // allgather is capable of performing this (0-gain entries for non-label owners), // but can be replaced with a broadcast in the future - - //Print entry information before allgather - std::cout << "Entries Before allgather: " << std::endl; - for (size_t i = 0; i < entries.size(); ++i) { - std::cout << "Entry " << i << " nid: " << entries[i].nid << " gain: " << entries[i].split.loss_chg << std::endl; - } - std::cout << "Allgather entries" << std::endl; - auto all_entries = Allgather(entries); - - //Print entry information after allgather - std::cout << "Entries After allgather: " << std::endl; - for (size_t i = 0; i < all_entries.size(); ++i) { - std::cout << "Entry " << i << " nid: " << all_entries[i].nid << " gain: " << all_entries[i].split.loss_chg << std::endl; - } - - for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - entries[nidx_in_set].split.Update( - all_entries[worker * entries.size() + nidx_in_set].split); + entries[nidx_in_set].split.Update( + all_entries[worker * entries.size() + nidx_in_set].split); } } + + // Print entry information after AllGather + for (size_t i = 0; i < entries.size(); ++i) { + LOG(CONSOLE) << "After AllGather rank: " << collective::GetRank() << " nid: " << entries[i].nid + << " gain: " << entries[i].split.loss_chg << " fid: " << entries[i].split.sindex << " split: " + << entries[i].split.split_value << std::endl; + } + } } diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index a4ae98396f89..b437a77c6f22 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -199,12 +199,7 @@ class HistogramBuilder { if (is_distributed_ && is_col_split_ && is_secure_) { // Under secure vertical mode, we perform allgather for all nodes CHECK(!nodes_to_build.empty()); - - - - - - + /* // print the details of histograms to a file std::ofstream file_0, file_1; file_0.open("hist_before_allgather_0.txt", std::ios_base::app); @@ -228,43 +223,35 @@ class HistogramBuilder { } file_0.close(); file_1.close(); - - - LOG(CONSOLE) << "********* Allgather histograms for all nodes *********"; +*/ // in theory the operation is AllGather, but with current system functionality, // we use AllReduce to simulate the AllGather operation auto first_nidx = nodes_to_build.front(); std::size_t n = n_total_bins * nodes_to_build.size() * 2; collective::Allreduce( reinterpret_cast(this->hist_[first_nidx].data()), n); - - - - - // print the details of histograms to a file - file_0.open("hist_after_allgather_0.txt", std::ios_base::app); - file_1.open("hist_after_allgather_1.txt", std::ios_base::app); - for (size_t i = 0; i < nodes_to_build.size(); ++i) { - auto const nidx = nodes_to_build[i]; - auto const &hist = this->hist_[nidx]; - if (collective::GetRank() == 0) { - file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_0 << "Histogram " << j << ": " << hist[j] << std::endl; - } - } else { - file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_1 << "Histogram " << j << ": " << hist[j] << std::endl; - } +/* // print the details of histograms to a file + file_0.open("hist_after_allgather_0.txt", std::ios_base::app); + file_1.open("hist_after_allgather_1.txt", std::ios_base::app); + for (size_t i = 0; i < nodes_to_build.size(); ++i) { + auto const nidx = nodes_to_build[i]; + auto const &hist = this->hist_[nidx]; + if (collective::GetRank() == 0) { + file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_0 << "Histogram " << j << ": " << hist[j] << std::endl; + } + } else { + file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " + << hist.size() << " histograms" << std::endl; + for (size_t j = 0; j < hist.size(); ++j) { + file_1 << "Histogram " << j << ": " << hist[j] << std::endl; } + } } - file_0.close(); - file_1.close(); - - + file_0.close(); + file_1.close();*/ } common::BlockedSpace2d const &subspace = From 22dd522187311202c9b4c0d1912f147e7bcd5521 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 8 Feb 2024 09:46:03 -0500 Subject: [PATCH 07/55] Code cleanup, phase 1 of alternative vertical pipeline finished --- src/tree/hist/evaluate_splits.h | 48 ------------------------------- src/tree/hist/histogram.h | 50 --------------------------------- 2 files changed, 98 deletions(-) diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 2ff07462e887..1ef805c07968 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -278,14 +278,6 @@ class HistEvaluator { split_pt = cut_val[i - 1]; } best.Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum); - - //log best - //if (fidx == 5) { - // LOG(CONSOLE) << "Best cut for feature " << fidx << " is at " << best.split_value << " with gain " << best.loss_chg << std::endl; - //} - - - } } } @@ -357,38 +349,6 @@ class HistEvaluator { auto evaluator = tree_evaluator_.GetEvaluator(); auto const& cut_ptrs = cut.Ptrs(); - - -/* - // Print the details of the histogram and the features to a file - // according to the rank - std::ofstream file_0, file_1; - file_0.open("histogram_0.txt", std::ios_base::app); - file_1.open("histogram_1.txt", std::ios_base::app); - if (collective::GetRank() == 0) { - for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - auto nidx = entries[nidx_in_set].nid; - auto features_set = column_sampler_->GetFeatureSet(tree.GetDepth(nidx))->ConstHostSpan(); - file_0 << std::endl << "Features for node " << nidx << std::endl; - for (size_t i = 0; i < features_set.size(); i++) { - file_0 << "Feature " << features_set[i] << " Cut " << cut.Ptrs()[i] << " " << cut.Ptrs()[i+1] << std::endl; - } - } - } else { - for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - auto nidx = entries[nidx_in_set].nid; - auto features_set = column_sampler_->GetFeatureSet(tree.GetDepth(nidx))->ConstHostSpan(); - file_1 << std::endl << "Features for node " << nidx << std::endl; - for (size_t i = 0; i < features_set.size(); i++) { - file_1 << "Feature " << features_set[i] << " Cut " << cut.Ptrs()[i] << " " << cut.Ptrs()[i+1] << std::endl; - } - } - } - file_0.close(); - file_1.close(); -*/ - - // Under secure vertical setting, only the label owner is able to evaluate the split // based on the global histogram. The other parties will only receive the final best split information // Hence the below computation is not performed by the non-label owners under secure vertical setting @@ -465,14 +425,6 @@ class HistEvaluator { all_entries[worker * entries.size() + nidx_in_set].split); } } - - // Print entry information after AllGather - for (size_t i = 0; i < entries.size(); ++i) { - LOG(CONSOLE) << "After AllGather rank: " << collective::GetRank() << " nid: " << entries[i].nid - << " gain: " << entries[i].split.loss_chg << " fid: " << entries[i].split.sindex << " split: " - << entries[i].split.split_value << std::endl; - } - } } diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index b437a77c6f22..9d5d22eed782 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -178,9 +178,6 @@ class HistogramBuilder { std::vector const &nodes_to_trick) { auto n_total_bins = buffer_.TotalBins(); - // Print the bin information - LOG(CONSOLE) << "Total bins: " << n_total_bins; - common::BlockedSpace2d space( nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024); common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) { @@ -199,59 +196,12 @@ class HistogramBuilder { if (is_distributed_ && is_col_split_ && is_secure_) { // Under secure vertical mode, we perform allgather for all nodes CHECK(!nodes_to_build.empty()); - /* - // print the details of histograms to a file - std::ofstream file_0, file_1; - file_0.open("hist_before_allgather_0.txt", std::ios_base::app); - file_1.open("hist_before_allgather_1.txt", std::ios_base::app); - for (size_t i = 0; i < nodes_to_build.size(); ++i) { - auto const nidx = nodes_to_build[i]; - auto const &hist = this->hist_[nidx]; - if (collective::GetRank() == 0) { - file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_0 << "Histogram " << j << ": " << hist[j] << std::endl; - } - } else { - file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_1 << "Histogram " << j << ": " << hist[j] << std::endl; - } - } - } - file_0.close(); - file_1.close(); -*/ // in theory the operation is AllGather, but with current system functionality, // we use AllReduce to simulate the AllGather operation auto first_nidx = nodes_to_build.front(); std::size_t n = n_total_bins * nodes_to_build.size() * 2; collective::Allreduce( reinterpret_cast(this->hist_[first_nidx].data()), n); -/* // print the details of histograms to a file - file_0.open("hist_after_allgather_0.txt", std::ios_base::app); - file_1.open("hist_after_allgather_1.txt", std::ios_base::app); - for (size_t i = 0; i < nodes_to_build.size(); ++i) { - auto const nidx = nodes_to_build[i]; - auto const &hist = this->hist_[nidx]; - if (collective::GetRank() == 0) { - file_0 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_0 << "Histogram " << j << ": " << hist[j] << std::endl; - } - } else { - file_1 << std::endl << "Rank: " << collective::GetRank() << " Node: " << nidx << " has " - << hist.size() << " histograms" << std::endl; - for (size_t j = 0; j < hist.size(); ++j) { - file_1 << "Histogram " << j << ": " << hist[j] << std::endl; - } - } - } - file_0.close(); - file_1.close();*/ } common::BlockedSpace2d const &subspace = From 52e8951554481410e687d098d67e147fdb1a5589 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 8 Feb 2024 09:50:55 -0500 Subject: [PATCH 08/55] Code clean --- src/tree/hist/evaluate_splits.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 1ef805c07968..1ee896102086 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -294,9 +294,6 @@ class HistEvaluator { auto const world = collective::GetWorldSize(); auto const num_entries = entries.size(); - // print the number of entries - //std::cout << "Number of local entries, threads, and wordsize: " << num_entries << " " << ctx_->Threads() << " " << world << std::endl; - // First, gather all the primitive fields. std::vector local_entries(num_entries); std::vector cat_bits; @@ -421,8 +418,8 @@ class HistEvaluator { for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - entries[nidx_in_set].split.Update( - all_entries[worker * entries.size() + nidx_in_set].split); + entries[nidx_in_set].split.Update( + all_entries[worker * entries.size() + nidx_in_set].split); } } } @@ -654,7 +651,7 @@ class HistMultiEvaluator { } CHECK(!features.empty()); - std::int32_t n_threads = ctx_->Threads(); + std::int32_t n_threads = ctx_->Threads(); std::size_t const grain_size = std::max(1, features.front()->Size() / n_threads); common::BlockedSpace2d space( entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); }, From e9eef1539222386ec616c84ef3ad0ae4e9520a09 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 12 Feb 2024 11:32:56 -0500 Subject: [PATCH 09/55] change kColS to kColSecure to avoid confusion with kCols --- include/xgboost/data.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 263f323b91be..bdd44491e040 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -40,7 +40,7 @@ enum class DataType : uint8_t { enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 }; -enum class DataSplitMode : int { kRow = 0, kCol = 1, kColS = 2 }; +enum class DataSplitMode : int { kRow = 0, kCol = 1, kColSecure = 2 }; /*! * \brief Meta information about dataset, always sit in memory. @@ -186,10 +186,10 @@ class MetaInfo { } /** @brief Whether the data is split column-wise. */ - bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol) || (data_split_mode == DataSplitMode::kColS); } + bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol) || (data_split_mode == DataSplitMode::kColSecure); } /** @brief Whether the data is split column-wise with secure computation. */ - bool IsSecure() const { return data_split_mode == DataSplitMode::kColS; } + bool IsSecure() const { return data_split_mode == DataSplitMode::kColSecure; } /** @brief Whether this is a learning to rank data. */ bool IsRanking() const { return !group_ptr_.empty(); } From 91c8a2faf2b42149ae4f88a7e0e7388eef4b163c Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 12 Feb 2024 19:50:56 -0500 Subject: [PATCH 10/55] Replace allreduce with allgather, functional but inefficient version --- src/tree/hist/histogram.h | 131 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 4 deletions(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 9d5d22eed782..0072abcb4c27 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -196,12 +196,135 @@ class HistogramBuilder { if (is_distributed_ && is_col_split_ && is_secure_) { // Under secure vertical mode, we perform allgather for all nodes CHECK(!nodes_to_build.empty()); - // in theory the operation is AllGather, but with current system functionality, - // we use AllReduce to simulate the AllGather operation auto first_nidx = nodes_to_build.front(); std::size_t n = n_total_bins * nodes_to_build.size() * 2; - collective::Allreduce( - reinterpret_cast(this->hist_[first_nidx].data()), n); + + // in theory the operation is AllGather, with current system functionality, + // it can be achieved with AllReduce for the special hist data structure + + //collective::Allreduce( + // reinterpret_cast(this->hist_[first_nidx].data()), n); + + // implementation with AllGather, note that only Label Owner needs + // the global histogram + + if (collective::GetRank() == 0) { + //print the entries to file for debug + std::ofstream file_hist; + file_hist.open("hist_before_allgather_0.txt", std::ios_base::app); + file_hist << "********************************" << std::endl; + file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; + //iterate through the nodes to build + for (auto i = 0; i < nodes_to_build.size(); i++) { + auto hist = this->hist_[nodes_to_build[i]]; + auto hist_size = hist.size(); + file_hist << "node " << i << " hist_size: " << hist_size << std::endl; + // get item with iterator + size_t j = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + if ((j < 10) || ((j>2000)&&(j<2010))) { + file_hist << "hist_item " << j << ": " << *it << std::endl; + } + j++; + } + } + file_hist.close(); + } + + if (collective::GetRank() == 1) { + //print the entries to file for debug + std::ofstream file_hist; + file_hist.open("hist_before_allgather_1.txt", std::ios_base::app); + file_hist << "********************************" << std::endl; + file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; + //iterate through the nodes to build + for (auto i = 0; i < nodes_to_build.size(); i++) { + auto hist = this->hist_[nodes_to_build[i]]; + auto hist_size = hist.size(); + file_hist << "node " << i << " hist_size: " << hist_size << std::endl; + // get item with iterator + size_t j = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + if ((j < 10) || ((j>2000)&&(j<2010))) { + file_hist << "hist_item " << j << ": " << *it << std::endl; + } + j++; + } + } + file_hist.close(); + } + + // Perform AllGather at finest granularity (histogram entries) + for (auto i = 0; i < nodes_to_build.size(); i++) { + auto hist = this->hist_[nodes_to_build[i]]; + auto hist_size = hist.size(); + // get item with iterator + size_t j = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + auto item = *it; + // perform AllGather for each histogram entry + auto hist_entries = collective::Allgather(item); + if (collective::GetRank() == 0) { + // DECRYPT the received entries HERE!!!!!!!!! + // only perform update for the label owner + this->hist_[nodes_to_build[i]][j] = hist_entries[0]; + for (size_t k = 1; k < hist_entries.size(); k++) { + // update the global histogram with the received entries + this->hist_[nodes_to_build[i]][j] += hist_entries[k]; + } + } + j++; + } + } + + if (collective::GetRank() == 0) { + //print the entries to file for debug + std::ofstream file_hist; + file_hist.open("hist_after_allgather_0.txt", std::ios_base::app); + file_hist << "********************************" << std::endl; + file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; + //iterate through the nodes to build + for (auto i = 0; i < nodes_to_build.size(); i++) { + auto hist = this->hist_[nodes_to_build[i]]; + auto hist_size = hist.size(); + file_hist << "node " << i << " hist_size: " << hist_size << std::endl; + // get item with iterator + size_t j = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + if ((j < 10) || ((j>2000)&&(j<2010))) { + file_hist << "hist_item " << j << ": " << *it << std::endl; + } + j++; + } + } + file_hist.close(); + } + + if (collective::GetRank() == 1) { + //print the entries to file for debug + std::ofstream file_hist; + file_hist.open("hist_after_allgather_1.txt", std::ios_base::app); + file_hist << "********************************" << std::endl; + file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; + //iterate through the nodes to build + for (auto i = 0; i < nodes_to_build.size(); i++) { + auto hist = this->hist_[nodes_to_build[i]]; + auto hist_size = hist.size(); + file_hist << "node " << i << " hist_size: " << hist_size << std::endl; + // get item with iterator + size_t j = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + if ((j < 10) || ((j>2000)&&(j<2010))) { + file_hist << "hist_item " << j << ": " << *it << std::endl; + } + j++; + } + } + file_hist.close(); + } + + + } common::BlockedSpace2d const &subspace = From 8340c268689ce763c119f1c9321d441e75eeb0fe Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 13 Feb 2024 13:06:34 -0500 Subject: [PATCH 11/55] Update AllGather behavior from individual pair to bulk by adopting new histogram transmission data structure of a flat vector --- src/learner.cc | 9 +++++ src/tree/hist/histogram.h | 84 ++++++++++++++++++++++++++++++++++----- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index db72f71644cb..886878b422a1 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -1472,6 +1472,15 @@ class LearnerImpl : public LearnerIO { void GetGradient(HostDeviceVector const& preds, MetaInfo const& info, std::int32_t iter, linalg::Matrix* out_gpair) { out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength()); + + // print meta info + if (info.IsSecure()) { + std::cout << "secure mode" << std::endl; + } else { + std::cout << "clean mode" << std::endl; + } + + collective::ApplyWithLabels(info, out_gpair->Data(), [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); } diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 0072abcb4c27..2bc3a95b838b 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -205,6 +205,15 @@ class HistogramBuilder { //collective::Allreduce( // reinterpret_cast(this->hist_[first_nidx].data()), n); + + + + + + + + + // implementation with AllGather, note that only Label Owner needs // the global histogram @@ -254,7 +263,21 @@ class HistogramBuilder { file_hist.close(); } - // Perform AllGather at finest granularity (histogram entries) + + + + + + + + + + + // Collect the histogram entries from all nodes + // allocate memory for the received entries as a flat vector + std::vector hist_flat; + hist_flat.resize(n); + // iterate through the nodes to build for (auto i = 0; i < nodes_to_build.size(); i++) { auto hist = this->hist_[nodes_to_build[i]]; auto hist_size = hist.size(); @@ -262,21 +285,45 @@ class HistogramBuilder { size_t j = 0; for (auto it = hist.begin(); it != hist.end(); it++) { auto item = *it; - // perform AllGather for each histogram entry - auto hist_entries = collective::Allgather(item); - if (collective::GetRank() == 0) { - // DECRYPT the received entries HERE!!!!!!!!! - // only perform update for the label owner - this->hist_[nodes_to_build[i]][j] = hist_entries[0]; - for (size_t k = 1; k < hist_entries.size(); k++) { + hist_flat[i * hist_size + j] = item.GetGrad(); + hist_flat[i * hist_size + j + 1] = item.GetHess(); + j = j + 2; + } + } + // Perform AllGather + auto hist_entries = collective::AllgatherV(hist_flat); + // Update histogram for data owner + if (collective::GetRank() == 0) { + // skip rank 0, as local hist already contains its own entries + for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { + // iterate through the nodes to build + for (auto node_idx = 0; node_idx < nodes_to_build.size(); node_idx++) { + // get the histogram of the node + auto hist = this->hist_[nodes_to_build[node_idx]]; + // get item with iterator + size_t hist_item_idx = 0; + for (auto it = hist.begin(); it != hist.end(); it++) { + auto flat_idx = (rank_idx + node_idx) * n + hist_item_idx*2; + // DECRYPT the received entries HERE!!!!!!!!! + auto hist_item_grad = hist_entries[flat_idx]; + auto hist_item_hess = hist_entries[flat_idx + 1]; + // compose a gradient pair + auto hist_item_temp = GradientPairPrecise(hist_item_grad, hist_item_hess); // update the global histogram with the received entries - this->hist_[nodes_to_build[i]][j] += hist_entries[k]; + *it += hist_item_temp; + hist_item_idx += 1; } } - j++; } } + + + + + + + if (collective::GetRank() == 0) { //print the entries to file for debug std::ofstream file_hist; @@ -325,6 +372,23 @@ class HistogramBuilder { + + + + + + + + + + + + + + + + + } common::BlockedSpace2d const &subspace = From 42a9df147e30738b5798ab29b4ba2a4235a629e1 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 13 Feb 2024 13:31:41 -0500 Subject: [PATCH 12/55] comment out the record printing --- src/collective/aggregator.h | 34 ++++++++++++++++++++++++++++++++++ src/learner.cc | 11 +++++------ src/tree/hist/histogram.h | 28 ++++++---------------------- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index f2a9ff528366..1d1e9dcab5a3 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -102,6 +102,40 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function } } + +template +void ApplyWithLabelsEncrypted(MetaInfo const& info, HostDeviceVector* result, Function&& function) { + if (info.IsVerticalFederated()) { + // We assume labels are only available on worker 0, so the calculation is done there and result + // broadcast to other workers. + std::string message; + if (collective::GetRank() == 0) { + try { + std::forward(function)(); + } catch (dmlc::Error& e) { + message = e.what(); + } + } + + collective::Broadcast(&message, 0); + if (!message.empty()) { + LOG(FATAL) << &message[0]; + return; + } + + std::size_t size{}; + if (collective::GetRank() == 0) { + size = result->Size(); + } + collective::Broadcast(&size, sizeof(std::size_t), 0); + + result->Resize(size); + collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + } else { + std::forward(function)(); + } +} + /** * @brief Find the global max of the given value across all workers. * diff --git a/src/learner.cc b/src/learner.cc index 886878b422a1..21d9de98e72b 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -1473,16 +1473,15 @@ class LearnerImpl : public LearnerIO { std::int32_t iter, linalg::Matrix* out_gpair) { out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength()); - // print meta info + // calculate gradient and communicate with or without encryption if (info.IsSecure()) { - std::cout << "secure mode" << std::endl; + collective::ApplyWithLabelsEncrypted(info, out_gpair->Data(), + [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); } else { - std::cout << "clean mode" << std::endl; + collective::ApplyWithLabels(info, out_gpair->Data(), + [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); } - - collective::ApplyWithLabels(info, out_gpair->Data(), - [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); } /*! \brief random number transformation seed. */ diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 2bc3a95b838b..ba4e3b46c808 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -194,29 +194,13 @@ class HistogramBuilder { } if (is_distributed_ && is_col_split_ && is_secure_) { - // Under secure vertical mode, we perform allgather for all nodes + // Under secure vertical mode, we perform allgather to get the global histogram. + // note that only Label Owner needs the global histogram CHECK(!nodes_to_build.empty()); - auto first_nidx = nodes_to_build.front(); std::size_t n = n_total_bins * nodes_to_build.size() * 2; - // in theory the operation is AllGather, with current system functionality, - // it can be achieved with AllReduce for the special hist data structure - - //collective::Allreduce( - // reinterpret_cast(this->hist_[first_nidx].data()), n); - - - - - - - - - - - // implementation with AllGather, note that only Label Owner needs - // the global histogram + /* if (collective::GetRank() == 0) { //print the entries to file for debug std::ofstream file_hist; @@ -262,7 +246,7 @@ class HistogramBuilder { } file_hist.close(); } - +*/ @@ -323,7 +307,7 @@ class HistogramBuilder { - +/* if (collective::GetRank() == 0) { //print the entries to file for debug std::ofstream file_hist; @@ -369,7 +353,7 @@ class HistogramBuilder { } file_hist.close(); } - +*/ From 41e5abbf899a55aa3f07c5d9606cdf4101b82575 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 20 Feb 2024 10:55:21 -0500 Subject: [PATCH 13/55] fix pointer bug for histsync with allgather --- src/collective/aggregator.h | 38 ++++++- src/learner.cc | 3 +- src/tree/hist/histogram.h | 216 ++++++++---------------------------- 3 files changed, 85 insertions(+), 172 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 1d1e9dcab5a3..472e53c2b617 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -102,7 +102,7 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function } } - +// Same as above, but with encyption on the result template void ApplyWithLabelsEncrypted(MetaInfo const& info, HostDeviceVector* result, Function&& function) { if (info.IsVerticalFederated()) { @@ -129,8 +129,44 @@ void ApplyWithLabelsEncrypted(MetaInfo const& info, HostDeviceVector* result, } collective::Broadcast(&size, sizeof(std::size_t), 0); + // save to vector and encrypt + if (collective::GetRank() == 0) { + // check the max and min value of the result vector + float max_g = std::numeric_limits::min(); + float min_g = std::numeric_limits::max(); + float max_h = std::numeric_limits::min(); + float min_h = std::numeric_limits::max(); + std::vector result_vector_g, result_vector_h; + for (int i = 0; i < size; i++) { + result_vector_g.push_back(result->HostVector()[i].GetGrad()); + result_vector_h.push_back(result->HostVector()[i].GetHess()); + + if (result->HostVector()[i].GetGrad() > max_g) { + max_g = result->HostVector()[i].GetGrad(); + } + if (result->HostVector()[i].GetGrad() < min_g) { + min_g = result->HostVector()[i].GetGrad(); + } + if (result->HostVector()[i].GetHess() > max_h) { + max_h = result->HostVector()[i].GetHess(); + } + if (result->HostVector()[i].GetHess() < min_h) { + min_h = result->HostVector()[i].GetHess(); + } + } + // print 1 sample + std::cout << " g[0]: " << result_vector_g[0] << " h[0]: " << result_vector_h[0] << std::endl; + // print max and min + std::cout << "max_g: " << max_g << " min_g: " << min_g << " max_h: " << max_h << " min_h: " << min_h << std::endl; + } + result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + + // print 1 sample + std::cout << "Rank: " << collective::GetRank() << " after broadcast - g: " << result->HostVector()[0].GetGrad() << " h: " << result->HostVector()[0].GetHess() << std::endl; + + } else { std::forward(function)(); } diff --git a/src/learner.cc b/src/learner.cc index 21d9de98e72b..93f73adb7688 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -1472,12 +1472,11 @@ class LearnerImpl : public LearnerIO { void GetGradient(HostDeviceVector const& preds, MetaInfo const& info, std::int32_t iter, linalg::Matrix* out_gpair) { out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength()); - // calculate gradient and communicate with or without encryption if (info.IsSecure()) { collective::ApplyWithLabelsEncrypted(info, out_gpair->Data(), [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); - } else { + } else { collective::ApplyWithLabels(info, out_gpair->Data(), [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); } diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index ba4e3b46c808..7a007caa4085 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -199,177 +199,55 @@ class HistogramBuilder { CHECK(!nodes_to_build.empty()); std::size_t n = n_total_bins * nodes_to_build.size() * 2; - - /* - if (collective::GetRank() == 0) { - //print the entries to file for debug - std::ofstream file_hist; - file_hist.open("hist_before_allgather_0.txt", std::ios_base::app); - file_hist << "********************************" << std::endl; - file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; - //iterate through the nodes to build - for (auto i = 0; i < nodes_to_build.size(); i++) { - auto hist = this->hist_[nodes_to_build[i]]; - auto hist_size = hist.size(); - file_hist << "node " << i << " hist_size: " << hist_size << std::endl; - // get item with iterator - size_t j = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - if ((j < 10) || ((j>2000)&&(j<2010))) { - file_hist << "hist_item " << j << ": " << *it << std::endl; - } - j++; - } - } - file_hist.close(); - } - - if (collective::GetRank() == 1) { - //print the entries to file for debug - std::ofstream file_hist; - file_hist.open("hist_before_allgather_1.txt", std::ios_base::app); - file_hist << "********************************" << std::endl; - file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; - //iterate through the nodes to build - for (auto i = 0; i < nodes_to_build.size(); i++) { - auto hist = this->hist_[nodes_to_build[i]]; - auto hist_size = hist.size(); - file_hist << "node " << i << " hist_size: " << hist_size << std::endl; - // get item with iterator - size_t j = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - if ((j < 10) || ((j>2000)&&(j<2010))) { - file_hist << "hist_item " << j << ": " << *it << std::endl; - } - j++; - } - } - file_hist.close(); - } -*/ - - - - - - - - - - - // Collect the histogram entries from all nodes - // allocate memory for the received entries as a flat vector - std::vector hist_flat; - hist_flat.resize(n); - // iterate through the nodes to build - for (auto i = 0; i < nodes_to_build.size(); i++) { - auto hist = this->hist_[nodes_to_build[i]]; - auto hist_size = hist.size(); - // get item with iterator - size_t j = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - auto item = *it; - hist_flat[i * hist_size + j] = item.GetGrad(); - hist_flat[i * hist_size + j + 1] = item.GetHess(); - j = j + 2; - } - } - // Perform AllGather - auto hist_entries = collective::AllgatherV(hist_flat); - // Update histogram for data owner - if (collective::GetRank() == 0) { - // skip rank 0, as local hist already contains its own entries - for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { - // iterate through the nodes to build - for (auto node_idx = 0; node_idx < nodes_to_build.size(); node_idx++) { - // get the histogram of the node - auto hist = this->hist_[nodes_to_build[node_idx]]; - // get item with iterator - size_t hist_item_idx = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - auto flat_idx = (rank_idx + node_idx) * n + hist_item_idx*2; - // DECRYPT the received entries HERE!!!!!!!!! - auto hist_item_grad = hist_entries[flat_idx]; - auto hist_item_hess = hist_entries[flat_idx + 1]; - // compose a gradient pair - auto hist_item_temp = GradientPairPrecise(hist_item_grad, hist_item_hess); - // update the global histogram with the received entries - *it += hist_item_temp; - hist_item_idx += 1; - } - } + // Option 1: in theory the operation is AllGather, but with current system functionality, + // we use AllReduce to simulate the AllGather operation + //auto first_nidx = nodes_to_build.front(); + //collective::Allreduce( + // reinterpret_cast(this->hist_[first_nidx].data()), n); + + + + // Option 2: use AllGather instead of AllReduce + // Collect the histogram entries from all nodes + // allocate memory for the received entries as a flat vector + std::vector hist_flat; + hist_flat.resize(n); + // iterate through the nodes_to_build + std::cout << "nodes_to_build.size() = " << nodes_to_build.size() << std::endl; + // front pointer + auto it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); + auto hist_size = this->hist_[nodes_to_build.front()].size(); + std::cout<< "n=" << n << std::endl; + std::cout << "hist_size = " << hist_size << std::endl; + for (size_t i = 0; i < n; i++) { + // get item with iterator + auto item = *it; + hist_flat[i] = item; + it++; + } + std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; + + // Perform AllGather + auto hist_entries = collective::Allgather(hist_flat); + + // Update histogram for data owner + if (collective::GetRank() == 0) { + // skip rank 0, as local hist already contains its own entries + std::cout << "hist_entries.size() = " << hist_entries.size() << std::endl; + // reposition iterator to the beginning of the vector + it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); + for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { + // iterate through the flat vector + for (size_t i = 0; i < n; i++) { + auto flat_idx = rank_idx * n + i; + // DECRYPT the received entries HERE!!!!!!!!! + auto hist_item = hist_entries[flat_idx]; + // update the global histogram with the received entries + *it += hist_item; + it++; } } - - - - - - - -/* - if (collective::GetRank() == 0) { - //print the entries to file for debug - std::ofstream file_hist; - file_hist.open("hist_after_allgather_0.txt", std::ios_base::app); - file_hist << "********************************" << std::endl; - file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; - //iterate through the nodes to build - for (auto i = 0; i < nodes_to_build.size(); i++) { - auto hist = this->hist_[nodes_to_build[i]]; - auto hist_size = hist.size(); - file_hist << "node " << i << " hist_size: " << hist_size << std::endl; - // get item with iterator - size_t j = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - if ((j < 10) || ((j>2000)&&(j<2010))) { - file_hist << "hist_item " << j << ": " << *it << std::endl; - } - j++; - } - } - file_hist.close(); - } - - if (collective::GetRank() == 1) { - //print the entries to file for debug - std::ofstream file_hist; - file_hist.open("hist_after_allgather_1.txt", std::ios_base::app); - file_hist << "********************************" << std::endl; - file_hist << "nodes to build: " << nodes_to_build.size() << std::endl; - //iterate through the nodes to build - for (auto i = 0; i < nodes_to_build.size(); i++) { - auto hist = this->hist_[nodes_to_build[i]]; - auto hist_size = hist.size(); - file_hist << "node " << i << " hist_size: " << hist_size << std::endl; - // get item with iterator - size_t j = 0; - for (auto it = hist.begin(); it != hist.end(); it++) { - if ((j < 10) || ((j>2000)&&(j<2010))) { - file_hist << "hist_item " << j << ": " << *it << std::endl; - } - j++; - } - } - file_hist.close(); - } -*/ - - - - - - - - - - - - - - - - + } From d91be10dbce6d42a28469f3d09ac1b48732d65e0 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 23 Feb 2024 10:06:25 -0500 Subject: [PATCH 14/55] identify the HE adding locations --- src/collective/aggregator.h | 4 +- src/tree/hist/histogram.h | 67 ++++++++++++++++++++++++++++--- src/tree/updater_quantile_hist.cc | 12 +++++- 3 files changed, 75 insertions(+), 8 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 472e53c2b617..fe1ce3ed1f07 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -155,9 +155,9 @@ void ApplyWithLabelsEncrypted(MetaInfo const& info, HostDeviceVector* result, } } // print 1 sample - std::cout << " g[0]: " << result_vector_g[0] << " h[0]: " << result_vector_h[0] << std::endl; + //std::cout << " g[0]: " << result_vector_g[0] << " h[0]: " << result_vector_h[0] << std::endl; // print max and min - std::cout << "max_g: " << max_g << " min_g: " << min_g << " max_h: " << max_h << " min_h: " << min_h << std::endl; + //std::cout << "max_g: " << max_g << " min_g: " << min_g << " max_h: " << max_h << " min_h: " << min_h << std::endl; } result->Resize(size); diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 7a007caa4085..b543683a0fb4 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,7 +76,29 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { + + + if ((collective::GetRank() == 1)) { + std::cout << "Current samples on nodes: " << std::endl; + // print info on all nodes + for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { + auto size = row_set_collection[nit].Size(); + std::cout << "Node " << nit << " has " << size << " rows." << std::endl; + } + + + for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { + std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl; + } + std::cout << std::endl; + + } + + + + // Parallel processing by nodes and data in each node + bool print_once = true; common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { const auto tid = static_cast(omp_get_thread_num()); bst_node_t const nidx = nodes_to_build[nid_in_set]; @@ -86,6 +108,19 @@ class HistogramBuilder { auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set, elem.begin + end_of_row_set, nidx); auto hist = buffer_.GetInitializedHist(tid, nid_in_set); + + // print info + //if ((collective::GetRank() == 0) && print_once ) { + //std::cout << "Sample of row set for node " << nidx << ": "; + //std::cout << "Size: " << row_set_collection[nidx].Size() << ", "; + //for (auto i = 0; i < 10; i++) { + // std::cout << rid_set.begin[i] << ", "; + //} + //std::cout << std::endl; + //print_once = false; + //} + + if (rid_set.Size() != 0) { common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); } @@ -156,6 +191,11 @@ class HistogramBuilder { if (page_idx == 0) { // Add the local histogram cache to the parallel buffer before processing the first page. auto n_nodes = nodes_to_build.size(); + + if ((collective::GetRank() == 0)) { + std::cout << "Building histogram for " << n_nodes << " nodes" << std::endl; + } + std::vector target_hists(n_nodes); for (size_t i = 0; i < n_nodes; ++i) { auto const nidx = nodes_to_build[i]; @@ -213,19 +253,19 @@ class HistogramBuilder { std::vector hist_flat; hist_flat.resize(n); // iterate through the nodes_to_build - std::cout << "nodes_to_build.size() = " << nodes_to_build.size() << std::endl; + //std::cout << "nodes_to_build.size() = " << nodes_to_build.size() << std::endl; // front pointer auto it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); auto hist_size = this->hist_[nodes_to_build.front()].size(); - std::cout<< "n=" << n << std::endl; - std::cout << "hist_size = " << hist_size << std::endl; + //std::cout<< "n=" << n << std::endl; + //std::cout << "hist_size = " << hist_size << std::endl; for (size_t i = 0; i < n; i++) { // get item with iterator auto item = *it; hist_flat[i] = item; it++; } - std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; + //std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; // Perform AllGather auto hist_entries = collective::Allgather(hist_flat); @@ -233,7 +273,7 @@ class HistogramBuilder { // Update histogram for data owner if (collective::GetRank() == 0) { // skip rank 0, as local hist already contains its own entries - std::cout << "hist_entries.size() = " << hist_entries.size() << std::endl; + //std::cout << "hist_entries.size() = " << hist_entries.size() << std::endl; // reposition iterator to the beginning of the vector it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { @@ -317,6 +357,10 @@ class MultiHistogramBuilder { linalg::MatrixView gpair, ExpandEntry const &best, BatchParam const ¶m, bool force_read_by_column = false) { auto n_targets = p_tree->NumTargets(); + + + std::cout << "Root n_targets = " << n_targets << std::endl; + CHECK_EQ(gpair.Shape(1), n_targets); CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0)); CHECK_EQ(target_builders_.size(), n_targets); @@ -357,6 +401,16 @@ class MultiHistogramBuilder { std::vector nodes_to_sub(valid_candidates.size()); AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub); + + // print index for nodes_to_build and nodes_to_sub + if (collective::GetRank() == 0) { + for (int i = 0; i < nodes_to_build.size(); i++) { + std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; "; + std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl; + } + } + + // use the first builder for getting number of valid nodes. target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true); CHECK_GE(nodes_to_build.size(), nodes_to_sub.size()); @@ -373,6 +427,9 @@ class MultiHistogramBuilder { CHECK_EQ(gpair.Shape(1), p_tree->NumTargets()); for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); + if (collective::GetRank() == 0) { + std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl; + } CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_); this->target_builders_[t].BuildHist(page_idx, space, page, partitioners[page_idx].Partitions(), nodes_to_build, diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 2403aa8a6bdd..28407c7659b1 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -385,7 +385,15 @@ class HistUpdater { monitor_->Start(__func__); CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0)); - this->histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node, + + + + std::cout<<"InitRoot: --------------------------------------"<histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node, HistBatch(param_)); { @@ -439,6 +447,8 @@ class HistUpdater { std::vector const &valid_candidates, linalg::MatrixView gpair) { monitor_->Start(__func__); + + std::cout << "BuildHistogram: --------------------------------------" << std::endl; this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_, valid_candidates, gpair, HistBatch(param_)); monitor_->Stop(__func__); From dd60317fe7a4432a22174ef9e0a9a2a19e256977 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 6 Mar 2024 18:04:59 -0500 Subject: [PATCH 15/55] revise and simplify template code --- CMakeCache.txt | 38 +++++++++++++ CMakeFiles/clion-log.txt | 4 ++ CMakeFiles/cmake.check_cache | 1 + src/collective/aggregator.h | 100 +++++++++++------------------------ src/learner.cc | 12 ++--- 5 files changed, 77 insertions(+), 78 deletions(-) create mode 100644 CMakeCache.txt create mode 100644 CMakeFiles/clion-log.txt create mode 100644 CMakeFiles/cmake.check_cache diff --git a/CMakeCache.txt b/CMakeCache.txt new file mode 100644 index 000000000000..8b7711cf0afd --- /dev/null +++ b/CMakeCache.txt @@ -0,0 +1,38 @@ +# This is the CMakeCache file. +# For build in directory: /media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + + +######################## +# INTERNAL cache entries +######################## + +//This is the directory where this CMakeCache.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=22 +//Patch version of cmake used to create the current loaded cache +CMAKE_CACHE_PATCH_VERSION:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.22 + diff --git a/CMakeFiles/clion-log.txt b/CMakeFiles/clion-log.txt new file mode 100644 index 000000000000..6c56fd32b6ab --- /dev/null +++ b/CMakeFiles/clion-log.txt @@ -0,0 +1,4 @@ +Cannot generate into /media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 +It is already used for unknown project + +Please either delete it manually or select another generation directory diff --git a/CMakeFiles/cmake.check_cache b/CMakeFiles/cmake.check_cache new file mode 100644 index 000000000000..3dccd731726d --- /dev/null +++ b/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index fe1ce3ed1f07..ca6b68ee61f7 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -69,7 +69,7 @@ void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&& * @param result The HostDeviceVector storing the results. * @param function The function used to calculate the results. */ -template +template void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function&& function) { if (info.IsVerticalFederated()) { // We assume labels are only available on worker 0, so the calculation is done there and result @@ -91,84 +91,46 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function std::size_t size{}; if (collective::GetRank() == 0) { - size = result->Size(); + size = result->Size(); } collective::Broadcast(&size, sizeof(std::size_t), 0); - result->Resize(size); - collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); - } else { - std::forward(function)(); - } -} - -// Same as above, but with encyption on the result -template -void ApplyWithLabelsEncrypted(MetaInfo const& info, HostDeviceVector* result, Function&& function) { - if (info.IsVerticalFederated()) { - // We assume labels are only available on worker 0, so the calculation is done there and result - // broadcast to other workers. - std::string message; - if (collective::GetRank() == 0) { - try { - std::forward(function)(); - } catch (dmlc::Error& e) { - message = e.what(); - } - } - - collective::Broadcast(&message, 0); - if (!message.empty()) { - LOG(FATAL) << &message[0]; - return; - } - - std::size_t size{}; - if (collective::GetRank() == 0) { - size = result->Size(); - } - collective::Broadcast(&size, sizeof(std::size_t), 0); - - // save to vector and encrypt - if (collective::GetRank() == 0) { - // check the max and min value of the result vector - float max_g = std::numeric_limits::min(); - float min_g = std::numeric_limits::max(); - float max_h = std::numeric_limits::min(); - float min_h = std::numeric_limits::max(); - std::vector result_vector_g, result_vector_h; + if (info.IsSecure() && is_gpair) { + // Under secure mode, gpairs will be processed to vector and encrypt + // information only available on rank 0 + if (collective::GetRank() == 0) { + std::vector vector_g, vector_h; for (int i = 0; i < size; i++) { - result_vector_g.push_back(result->HostVector()[i].GetGrad()); - result_vector_h.push_back(result->HostVector()[i].GetHess()); - - if (result->HostVector()[i].GetGrad() > max_g) { - max_g = result->HostVector()[i].GetGrad(); - } - if (result->HostVector()[i].GetGrad() < min_g) { - min_g = result->HostVector()[i].GetGrad(); - } - if (result->HostVector()[i].GetHess() > max_h) { - max_h = result->HostVector()[i].GetHess(); - } - if (result->HostVector()[i].GetHess() < min_h) { - min_h = result->HostVector()[i].GetHess(); - } + auto gpair = result->HostVector()[i]; + // cast from GradientPair to float pointer + auto gpair_ptr = reinterpret_cast(&gpair); + // save to vector + vector_g.push_back(gpair_ptr[0]); + vector_h.push_back(gpair_ptr[1]); } - // print 1 sample - //std::cout << " g[0]: " << result_vector_g[0] << " h[0]: " << result_vector_h[0] << std::endl; - // print max and min - //std::cout << "max_g: " << max_g << " min_g: " << min_g << " max_h: " << max_h << " min_h: " << min_h << std::endl; + // provide the vectors to the processor interface + + } + // broadcast the encrypted data + result->Resize(size); + collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + } else { + // clear text mode + result->Resize(size); + collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } - result->Resize(size); - collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + /* // print 1 sample - std::cout << "Rank: " << collective::GetRank() << " after broadcast - g: " << result->HostVector()[0].GetGrad() << " h: " << result->HostVector()[0].GetHess() << std::endl; - - + if (is_gpair) { + std::cout << "Rank: " << collective::GetRank() << " after broadcast - g: " + << reinterpret_cast(&result->HostVector()[0])[0] << " h: " + << reinterpret_cast(&result->HostVector()[0])[1] << std::endl; + } + */ } else { - std::forward(function)(); + std::forward(function)(); } } diff --git a/src/learner.cc b/src/learner.cc index 93f73adb7688..09c0b3f99afc 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner { void InitEstimation(MetaInfo const& info, linalg::Tensor* base_score) { base_score->Reshape(1); - collective::ApplyWithLabels(info, base_score->Data(), + collective::ApplyWithLabels(info, base_score->Data(), [&] { UsePtr(obj_)->InitEstimation(info, base_score); }); } }; @@ -1472,15 +1472,9 @@ class LearnerImpl : public LearnerIO { void GetGradient(HostDeviceVector const& preds, MetaInfo const& info, std::int32_t iter, linalg::Matrix* out_gpair) { out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength()); - // calculate gradient and communicate with or without encryption - if (info.IsSecure()) { - collective::ApplyWithLabelsEncrypted(info, out_gpair->Data(), + // calculate gradient and communicate + collective::ApplyWithLabels(info, out_gpair->Data(), [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); - } else { - collective::ApplyWithLabels(info, out_gpair->Data(), - [&] { obj_->GetGradient(preds, info, iter, out_gpair); }); - } - } /*! \brief random number transformation seed. */ From 8da824c0f8caece8d735a7122ca94a7330e3e1b1 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 6 Mar 2024 18:05:54 -0500 Subject: [PATCH 16/55] revise and simplify template code --- CMakeCache.txt | 38 ------------------------------------ CMakeFiles/clion-log.txt | 4 ---- CMakeFiles/cmake.check_cache | 1 - 3 files changed, 43 deletions(-) delete mode 100644 CMakeCache.txt delete mode 100644 CMakeFiles/clion-log.txt delete mode 100644 CMakeFiles/cmake.check_cache diff --git a/CMakeCache.txt b/CMakeCache.txt deleted file mode 100644 index 8b7711cf0afd..000000000000 --- a/CMakeCache.txt +++ /dev/null @@ -1,38 +0,0 @@ -# This is the CMakeCache file. -# For build in directory: /media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 -# It was generated by CMake: /usr/bin/cmake -# You can edit this file to change values found and used by cmake. -# If you do not want to change any of the values, simply exit the editor. -# If you do want to change a value, simply edit, save, and exit the editor. -# The syntax for the file is as follows: -# KEY:TYPE=VALUE -# KEY is the name of a variable in the cache. -# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. -# VALUE is the current value for the KEY. - -######################## -# EXTERNAL cache entries -######################## - - -######################## -# INTERNAL cache entries -######################## - -//This is the directory where this CMakeCache.txt was created -CMAKE_CACHEFILE_DIR:INTERNAL=/media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 -//Major version of cmake used to create the current loaded cache -CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 -//Minor version of cmake used to create the current loaded cache -CMAKE_CACHE_MINOR_VERSION:INTERNAL=22 -//Patch version of cmake used to create the current loaded cache -CMAKE_CACHE_PATCH_VERSION:INTERNAL=1 -//Path to CMake executable. -CMAKE_COMMAND:INTERNAL=/usr/bin/cmake -//Path to cpack program executable. -CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack -//Path to ctest program executable. -CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest -//Path to CMake installation. -CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.22 - diff --git a/CMakeFiles/clion-log.txt b/CMakeFiles/clion-log.txt deleted file mode 100644 index 6c56fd32b6ab..000000000000 --- a/CMakeFiles/clion-log.txt +++ /dev/null @@ -1,4 +0,0 @@ -Cannot generate into /media/ziyuexu/Research/Experiment/SecureFedXGBoost/XGBoost/xgboost_SecureBoostP2 -It is already used for unknown project - -Please either delete it manually or select another generation directory diff --git a/CMakeFiles/cmake.check_cache b/CMakeFiles/cmake.check_cache deleted file mode 100644 index 3dccd731726d..000000000000 --- a/CMakeFiles/cmake.check_cache +++ /dev/null @@ -1 +0,0 @@ -# This file is generated by cmake for dependency checking of the CMakeCache.txt file From fb9f4fa6ceef4065de5c45edc804bddee9605070 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 13 Mar 2024 11:45:15 -0400 Subject: [PATCH 17/55] prepare aggregator for gh broadcast --- src/collective/aggregator.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index ca6b68ee61f7..ab7764eb4373 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -110,12 +110,26 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function } // provide the vectors to the processor interface + + + + + + + + + + } - // broadcast the encrypted data + // make broadcast call on the prepared data buffer + // (to local gRPC handler for further encryption) + + //collective::Broadcast(gh_buffer, size_of_buffer, 0); + result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } else { - // clear text mode + // clear text mode, broadcast the data directly result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } From e77f8c65ca9245faca99c58aa2fc845a8a33bab7 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 14 Mar 2024 16:55:24 -0400 Subject: [PATCH 18/55] prepare histogram for histindex and row index for allgather --- src/collective/aggregator.h | 13 +++++++----- src/learner.cc | 8 +++++++- src/tree/hist/histogram.h | 34 ++++++++++++++++++++++++++++--- src/tree/updater_quantile_hist.cc | 11 +--------- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index ab7764eb4373..aad8b54bbf4d 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -99,18 +99,21 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 if (collective::GetRank() == 0) { - std::vector vector_g, vector_h; + std::vector vector_gh; for (int i = 0; i < size; i++) { auto gpair = result->HostVector()[i]; // cast from GradientPair to float pointer auto gpair_ptr = reinterpret_cast(&gpair); // save to vector - vector_g.push_back(gpair_ptr[0]); - vector_h.push_back(gpair_ptr[1]); + vector_gh.push_back(gpair_ptr[0]); + vector_gh.push_back(gpair_ptr[1]); } // provide the vectors to the processor interface + // print vector size for rank 1 - + if (collective::GetRank() == 0) { + std::cout << "DATA size of gpairs: " << vector_gh.size() << std::endl; + } @@ -125,7 +128,7 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector* result, Function // (to local gRPC handler for further encryption) //collective::Broadcast(gh_buffer, size_of_buffer, 0); - + result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } else { diff --git a/src/learner.cc b/src/learner.cc index 09c0b3f99afc..17909e4bc093 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -350,7 +350,6 @@ struct LearnerTrainParam : public XGBoostParameter { } }; - DMLC_REGISTER_PARAMETER(LearnerModelParamLegacy); DMLC_REGISTER_PARAMETER(LearnerTrainParam); @@ -493,6 +492,13 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); + + + + std::cout<<"configure interface here?????????????????????????"<need_configuration_ = false; if (ctx_.validate_parameters) { this->ValidateParameters(); diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index b543683a0fb4..a031ccf4101e 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -78,25 +78,50 @@ class HistogramBuilder { common::Span gpair_h, bool force_read_by_column) { - if ((collective::GetRank() == 1)) { + + + + + + if ((collective::GetRank() == 0)) { std::cout << "Current samples on nodes: " << std::endl; // print info on all nodes for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { auto size = row_set_collection[nit].Size(); std::cout << "Node " << nit << " has " << size << " rows." << std::endl; + // print the first and last indexes of the rows with iterator + if (size > 0) { + std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl; + } } + std::cout << std::endl; - + // print info on the nodes to build for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl; } std::cout << std::endl; + std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl; + std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl; + auto cut_ptrs = gidx.Cuts().Ptrs(); + //auto cut_values = gidx.Cuts().Values(); + //std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl; + std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl; + for (auto i = 0; i < cut_ptrs.size()-1; ++i) { + // std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; "; + std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] "; + } + std::cout << std::endl; } + + + + // Parallel processing by nodes and data in each node bool print_once = true; common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { @@ -246,7 +271,6 @@ class HistogramBuilder { // reinterpret_cast(this->hist_[first_nidx].data()), n); - // Option 2: use AllGather instead of AllReduce // Collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector @@ -267,6 +291,10 @@ class HistogramBuilder { } //std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; + if (collective::GetRank() == 0) { + std::cout << "---------------------CALL AllGather for node building-------------------- " << std::endl; + } + // Perform AllGather auto hist_entries = collective::Allgather(hist_flat); diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 28407c7659b1..69979969890a 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -385,15 +385,7 @@ class HistUpdater { monitor_->Start(__func__); CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0)); - - - - std::cout<<"InitRoot: --------------------------------------"<histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node, + this->histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node, HistBatch(param_)); { @@ -448,7 +440,6 @@ class HistUpdater { linalg::MatrixView gpair) { monitor_->Start(__func__); - std::cout << "BuildHistogram: --------------------------------------" << std::endl; this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_, valid_candidates, gpair, HistBatch(param_)); monitor_->Stop(__func__); From 8405791e97c9cab3970aa5f4a35148de4ad7d783 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 15 Mar 2024 13:02:55 -0400 Subject: [PATCH 19/55] fix conflicts --- src/collective/aggregator.h | 24 +----- src/common/quantile.cc | 33 -------- src/learner.cc | 8 +- src/tree/hist/histogram.h | 132 +++++++++++------------------- src/tree/updater_quantile_hist.cc | 1 - 5 files changed, 50 insertions(+), 148 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index b628b54d400f..a664361d4282 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -92,7 +92,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* std::size_t size{}; if (collective::GetRank() == 0) { - size = result->Size(); + size = result->Size(); } collective::Broadcast(&size, sizeof(std::size_t), 0); @@ -111,25 +111,13 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* } // provide the vectors to the processor interface // print vector size for rank 1 - if (collective::GetRank() == 0) { std::cout << "DATA size of gpairs: " << vector_gh.size() << std::endl; } - - - - - - - - - } // make broadcast call on the prepared data buffer // (to local gRPC handler for further encryption) - //collective::Broadcast(gh_buffer, size_of_buffer, 0); - result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } else { @@ -137,16 +125,6 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } - - - /* - // print 1 sample - if (is_gpair) { - std::cout << "Rank: " << collective::GetRank() << " after broadcast - g: " - << reinterpret_cast(&result->HostVector()[0])[0] << " h: " - << reinterpret_cast(&result->HostVector()[0])[1] << std::endl; - } - */ } else { std::forward(function)(); } diff --git a/src/common/quantile.cc b/src/common/quantile.cc index ecb95bb4016a..49a2594e4a52 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -387,39 +387,6 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b } } -template -void AddCutPointSecure(typename SketchType::SummaryContainer const &summary, int max_bin, - HistogramCuts *cuts) { - // For secure vertical pipeline, we fill the cut values corresponding to empty columns - // with a vector of minimum value - const float mval = 1e-5f; - size_t required_cuts = std::min(summary.size, static_cast(max_bin)); - // make a copy of required_cuts for mode selection - size_t required_cuts_original = required_cuts; - // Sync the required_cuts across all workers - collective::Allreduce(&required_cuts, 1); - - // add the cut points - auto &cut_values = cuts->cut_values_.HostVector(); - // if not empty column, fill the cut values with the actual values - if (required_cuts_original > 0) { - // we use the min_value as the first (0th) element, hence starting from 1. - for (size_t i = 1; i < required_cuts; ++i) { - bst_float cpt = summary.data[i].value; - if (i == 1 || cpt > cut_values.back()) { - cut_values.push_back(cpt); - } - } - } - // if empty column, fill the cut values with 0 - else { - for (size_t i = 1; i < required_cuts; ++i) { - cut_values.push_back(0.0); - } - } -} - - auto AddCategories(std::set const &categories, HistogramCuts *cuts) { if (std::any_of(categories.cbegin(), categories.cend(), InvalidCat)) { InvalidCategory(); diff --git a/src/learner.cc b/src/learner.cc index a7325c793cd5..310e16ea7d77 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -350,6 +350,7 @@ struct LearnerTrainParam : public XGBoostParameter { } }; + DMLC_REGISTER_PARAMETER(LearnerModelParamLegacy); DMLC_REGISTER_PARAMETER(LearnerTrainParam); @@ -492,12 +493,7 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); - - - - std::cout<<"configure interface here?????????????????????????"<need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 2d29c103ef60..1a7d156f6956 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,54 +76,39 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { - - - - - - - - if ((collective::GetRank() == 0)) { - std::cout << "Current samples on nodes: " << std::endl; - // print info on all nodes - for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { - auto size = row_set_collection[nit].Size(); - std::cout << "Node " << nit << " has " << size << " rows." << std::endl; - // print the first and last indexes of the rows with iterator - if (size > 0) { - std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl; - } - } - std::cout << std::endl; - - // print info on the nodes to build - for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { - std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl; - } - std::cout << std::endl; - - std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl; - std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl; - auto cut_ptrs = gidx.Cuts().Ptrs(); - //auto cut_values = gidx.Cuts().Values(); - //std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl; - std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl; - for (auto i = 0; i < cut_ptrs.size()-1; ++i) { - // std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; "; - std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] "; - } - std::cout << std::endl; + //Print out all kinds if information for interface integration + if ((collective::GetRank() == 0)) { + std::cout << "Current samples on nodes: " << std::endl; + // print info on all nodes + for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { + auto size = row_set_collection[nit].Size(); + std::cout << "Node " << nit << " has " << size << " rows." << std::endl; + // print the first and last indexes of the rows with iterator + if (size > 0) { + std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl; + } } - - - - - - - + std::cout << std::endl; + // print info on the nodes to build + for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { + std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl; + } + std::cout << std::endl; + std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl; + std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl; + auto cut_ptrs = gidx.Cuts().Ptrs(); + //auto cut_values = gidx.Cuts().Values(); + //std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl; + std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl; + for (auto i = 0; i < cut_ptrs.size()-1; ++i) { + //std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; "; + std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] "; + } + std::cout << std::endl; + } + // Call the interface to transmit the row set collection and gidx to the secure worker // Parallel processing by nodes and data in each node - bool print_once = true; common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { const auto tid = static_cast(omp_get_thread_num()); bst_node_t const nidx = nodes_to_build[nid_in_set]; @@ -133,19 +118,6 @@ class HistogramBuilder { auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set, elem.begin + end_of_row_set, nidx); auto hist = buffer_.GetInitializedHist(tid, nid_in_set); - - // print info - //if ((collective::GetRank() == 0) && print_once ) { - //std::cout << "Sample of row set for node " << nidx << ": "; - //std::cout << "Size: " << row_set_collection[nidx].Size() << ", "; - //for (auto i = 0; i < 10; i++) { - // std::cout << rid_set.begin[i] << ", "; - //} - //std::cout << std::endl; - //print_once = false; - //} - - if (rid_set.Size() != 0) { common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); } @@ -216,11 +188,6 @@ class HistogramBuilder { if (page_idx == 0) { // Add the local histogram cache to the parallel buffer before processing the first page. auto n_nodes = nodes_to_build.size(); - - if ((collective::GetRank() == 0)) { - std::cout << "Building histogram for " << n_nodes << " nodes" << std::endl; - } - std::vector target_hists(n_nodes); for (size_t i = 0; i < n_nodes; ++i) { auto const nidx = nodes_to_build[i]; @@ -266,11 +233,11 @@ class HistogramBuilder { // Option 1: in theory the operation is AllGather, but with current system functionality, // we use AllReduce to simulate the AllGather operation - //auto first_nidx = nodes_to_build.front(); - //collective::Allreduce( - // reinterpret_cast(this->hist_[first_nidx].data()), n); - + auto first_nidx = nodes_to_build.front(); + collective::Allreduce( + reinterpret_cast(this->hist_[first_nidx].data()), n); +/* // Option 2: use AllGather instead of AllReduce // Collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector @@ -290,10 +257,9 @@ class HistogramBuilder { it++; } //std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; - - if (collective::GetRank() == 0) { - std::cout << "---------------------CALL AllGather for node building-------------------- " << std::endl; - } + if (collective::GetRank() == 0) { + std::cout << "---------------CALL AllGather for node building-------------- " << std::endl; + } // Perform AllGather auto hist_entries = collective::Allgather(hist_flat); @@ -316,7 +282,7 @@ class HistogramBuilder { } } } - +*/ } @@ -385,10 +351,6 @@ class MultiHistogramBuilder { linalg::MatrixView gpair, ExpandEntry const &best, BatchParam const ¶m, bool force_read_by_column = false) { auto n_targets = p_tree->NumTargets(); - - - std::cout << "Root n_targets = " << n_targets << std::endl; - CHECK_EQ(gpair.Shape(1), n_targets); CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0)); CHECK_EQ(target_builders_.size(), n_targets); @@ -429,16 +391,14 @@ class MultiHistogramBuilder { std::vector nodes_to_sub(valid_candidates.size()); AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub); - // print index for nodes_to_build and nodes_to_sub if (collective::GetRank() == 0) { - for (int i = 0; i < nodes_to_build.size(); i++) { - std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; "; - std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl; - } + for (int i = 0; i < nodes_to_build.size(); i++) { + std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; "; + std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl; + } } - // use the first builder for getting number of valid nodes. target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true); CHECK_GE(nodes_to_build.size(), nodes_to_sub.size()); @@ -455,9 +415,11 @@ class MultiHistogramBuilder { CHECK_EQ(gpair.Shape(1), p_tree->NumTargets()); for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); - if (collective::GetRank() == 0) { - std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl; - } + + if (collective::GetRank() == 0) { + std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl; + } + CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_); this->target_builders_[t].BuildHist(page_idx, space, page, partitioners[page_idx].Partitions(), nodes_to_build, diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 61c87f3fa5f1..b042f1631f2e 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -440,7 +440,6 @@ class HistUpdater { std::vector const &valid_candidates, linalg::MatrixView gpair) { monitor_->Start(__func__); - this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_, valid_candidates, gpair, HistBatch(param_)); monitor_->Stop(__func__); From db7d518210ec808b2741a07e901e401d89b833aa Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 15 Mar 2024 14:03:44 -0400 Subject: [PATCH 20/55] fix conflicts --- src/collective/aggregator.h | 6 +++-- src/learner.cc | 4 ++- src/tree/hist/histogram.h | 50 +++++++++++++++++++++---------------- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index a664361d4282..941a34bc9a83 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -112,12 +112,14 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // provide the vectors to the processor interface // print vector size for rank 1 if (collective::GetRank() == 0) { - std::cout << "DATA size of gpairs: " << vector_gh.size() << std::endl; + std::cout << "-----------Call Interface for gp encryption and broadcast" + << ", size of gpairs: " << vector_gh.size() + << " ----------------------" << std::endl; } } // make broadcast call on the prepared data buffer // (to local gRPC handler for further encryption) - //collective::Broadcast(gh_buffer, size_of_buffer, 0); + // collective::Broadcast(gh_buffer, size_of_buffer, 0); result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } else { diff --git a/src/learner.cc b/src/learner.cc index 310e16ea7d77..f287b9dddff0 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -493,7 +493,9 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); - std::cout<<"configure interface here???????????????"<need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 1a7d156f6956..027b7a776492 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,8 +76,9 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { - //Print out all kinds if information for interface integration + // Print out all kinds if information for interface integration if ((collective::GetRank() == 0)) { + std::cout << "--------------Node Hist----------------" << std::endl; std::cout << "Current samples on nodes: " << std::endl; // print info on all nodes for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { @@ -85,28 +86,36 @@ class HistogramBuilder { std::cout << "Node " << nit << " has " << size << " rows." << std::endl; // print the first and last indexes of the rows with iterator if (size > 0) { - std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl; + std::cout << "First index for node " << nit << " is " + << *row_set_collection[nit].begin << " and last index is " + << *(row_set_collection[nit].end - 1) << std::endl; } } - std::cout << std::endl; // print info on the nodes to build for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { - std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl; + std::cout << "Building local histogram for node ID: " << *nit + << " with " << row_set_collection[*nit].Size() + << " samples." << std::endl; } - std::cout << std::endl; - std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl; - std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl; + std::cout << "GHistIndexMatrix will not change with size " << gidx.index.Size() << std::endl; auto cut_ptrs = gidx.Cuts().Ptrs(); - //auto cut_values = gidx.Cuts().Values(); - //std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl; - std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl; + auto cut_values = gidx.Cuts().Values(); + // cut points: feature 0 start (0), feature 1 start, feature 2 start, ... feature n start + // cut value: cut for feature 0 slot 0, ..., cut for feature 0 slot m, cut for feature 1 slot 0, ... + std::cout << "size of the cut points and cut values: " + << cut_ptrs.size() << " " << cut_values.size() << std::endl; + std::cout << "first sample falls to: [feature_id, slot #, slot cutValue]: " << std::endl; for (auto i = 0; i < cut_ptrs.size()-1; ++i) { - //std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; "; - std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] "; + auto slot_number = gidx.GetGindex(0, i); + std::cout << "[" << i << ", " << slot_number << ", "<< cut_values[slot_number] << "] "; } std::cout << std::endl; + std::cout << "------------------------------" << std::endl; } // Call the interface to transmit the row set collection and gidx to the secure worker + if ((collective::GetRank() == 0)) { + std::cout << "---------------CALL interface to transmit the row and gidx------------" << std::endl; + } // Parallel processing by nodes and data in each node common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { @@ -233,11 +242,11 @@ class HistogramBuilder { // Option 1: in theory the operation is AllGather, but with current system functionality, // we use AllReduce to simulate the AllGather operation - auto first_nidx = nodes_to_build.front(); - collective::Allreduce( - reinterpret_cast(this->hist_[first_nidx].data()), n); + //auto first_nidx = nodes_to_build.front(); + //collective::Allreduce( + // reinterpret_cast(this->hist_[first_nidx].data()), n); + -/* // Option 2: use AllGather instead of AllReduce // Collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector @@ -263,7 +272,10 @@ class HistogramBuilder { // Perform AllGather auto hist_entries = collective::Allgather(hist_flat); - + // Call interface here to post-process the messages + if (collective::GetRank() == 0) { + std::cout << "---------------CALL Interface for post processing-------------- " << std::endl; + } // Update histogram for data owner if (collective::GetRank() == 0) { // skip rank 0, as local hist already contains its own entries @@ -274,7 +286,6 @@ class HistogramBuilder { // iterate through the flat vector for (size_t i = 0; i < n; i++) { auto flat_idx = rank_idx * n + i; - // DECRYPT the received entries HERE!!!!!!!!! auto hist_item = hist_entries[flat_idx]; // update the global histogram with the received entries *it += hist_item; @@ -282,9 +293,6 @@ class HistogramBuilder { } } } -*/ - - } common::BlockedSpace2d const &subspace = From dd6adde6c9709db02ef080457b5b44ca2543d3f6 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 15 Mar 2024 14:10:56 -0400 Subject: [PATCH 21/55] fix format --- src/tree/hist/histogram.h | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 027b7a776492..ae69e41d850e 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -101,7 +101,7 @@ class HistogramBuilder { auto cut_ptrs = gidx.Cuts().Ptrs(); auto cut_values = gidx.Cuts().Values(); // cut points: feature 0 start (0), feature 1 start, feature 2 start, ... feature n start - // cut value: cut for feature 0 slot 0, ..., cut for feature 0 slot m, cut for feature 1 slot 0, ... + // cut value: cut for feature 0 slot 0, ..., feature 0 slot m, feature 1 slot 0, ... std::cout << "size of the cut points and cut values: " << cut_ptrs.size() << " " << cut_values.size() << std::endl; std::cout << "first sample falls to: [feature_id, slot #, slot cutValue]: " << std::endl; @@ -114,7 +114,7 @@ class HistogramBuilder { } // Call the interface to transmit the row set collection and gidx to the secure worker if ((collective::GetRank() == 0)) { - std::cout << "---------------CALL interface to transmit the row and gidx------------" << std::endl; + std::cout << "---------------CALL interface to transmit row & gidx------------" << std::endl; } // Parallel processing by nodes and data in each node @@ -240,46 +240,29 @@ class HistogramBuilder { CHECK(!nodes_to_build.empty()); std::size_t n = n_total_bins * nodes_to_build.size() * 2; - // Option 1: in theory the operation is AllGather, but with current system functionality, - // we use AllReduce to simulate the AllGather operation - //auto first_nidx = nodes_to_build.front(); - //collective::Allreduce( - // reinterpret_cast(this->hist_[first_nidx].data()), n); - - - // Option 2: use AllGather instead of AllReduce - // Collect the histogram entries from all nodes + // Use AllGather to collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector std::vector hist_flat; hist_flat.resize(n); // iterate through the nodes_to_build - //std::cout << "nodes_to_build.size() = " << nodes_to_build.size() << std::endl; - // front pointer auto it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); auto hist_size = this->hist_[nodes_to_build.front()].size(); - //std::cout<< "n=" << n << std::endl; - //std::cout << "hist_size = " << hist_size << std::endl; for (size_t i = 0; i < n; i++) { // get item with iterator auto item = *it; hist_flat[i] = item; it++; } - //std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl; - if (collective::GetRank() == 0) { - std::cout << "---------------CALL AllGather for node building-------------- " << std::endl; - } // Perform AllGather auto hist_entries = collective::Allgather(hist_flat); // Call interface here to post-process the messages if (collective::GetRank() == 0) { - std::cout << "---------------CALL Interface for post processing-------------- " << std::endl; + std::cout << "---------------CALL Interface for processing-------------- " << std::endl; } // Update histogram for data owner if (collective::GetRank() == 0) { // skip rank 0, as local hist already contains its own entries - //std::cout << "hist_entries.size() = " << hist_entries.size() << std::endl; // reposition iterator to the beginning of the vector it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { From 9567e67cfc9f346555c4aee589eb68167c7983c1 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 19 Mar 2024 17:13:05 -0400 Subject: [PATCH 22/55] fix allgather logic and update unit test --- src/collective/aggregator.h | 2 + src/tree/hist/histogram.h | 53 ++++++++++++++++----------- tests/cpp/tree/hist/test_histogram.cc | 14 ++++++- 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 941a34bc9a83..32aa0f42170f 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -97,6 +97,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* collective::Broadcast(&size, sizeof(std::size_t), 0); if (info.IsSecure() && is_gpair) { + // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 if (collective::GetRank() == 0) { @@ -120,6 +121,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // make broadcast call on the prepared data buffer // (to local gRPC handler for further encryption) // collective::Broadcast(gh_buffer, size_of_buffer, 0); + result->Resize(size); collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); } else { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index ae69e41d850e..681e3e9cc142 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,8 +76,9 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { + // Print out all kinds if information for interface integration - if ((collective::GetRank() == 0)) { + if (is_distributed_ && is_col_split_ && is_secure_ && (collective::GetRank() == 0)) { std::cout << "--------------Node Hist----------------" << std::endl; std::cout << "Current samples on nodes: " << std::endl; // print info on all nodes @@ -111,10 +112,10 @@ class HistogramBuilder { } std::cout << std::endl; std::cout << "------------------------------" << std::endl; - } - // Call the interface to transmit the row set collection and gidx to the secure worker - if ((collective::GetRank() == 0)) { - std::cout << "---------------CALL interface to transmit row & gidx------------" << std::endl; + // Call the interface to transmit the row set collection and gidx to the secure worker + if ((collective::GetRank() == 0)) { + std::cout << "---------------CALL interface to transmit row & gidx------------" << std::endl; + } } // Parallel processing by nodes and data in each node @@ -238,19 +239,22 @@ class HistogramBuilder { // Under secure vertical mode, we perform allgather to get the global histogram. // note that only Label Owner needs the global histogram CHECK(!nodes_to_build.empty()); + + // Front item of nodes_to_build + auto first_nidx = nodes_to_build.front(); + // *2 because we have a pair of g and h for each histogram item std::size_t n = n_total_bins * nodes_to_build.size() * 2; // Use AllGather to collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector std::vector hist_flat; - hist_flat.resize(n); // iterate through the nodes_to_build - auto it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); - auto hist_size = this->hist_[nodes_to_build.front()].size(); + auto it = reinterpret_cast(this->hist_[first_nidx].data()); + auto hist_size = this->hist_[first_nidx].size(); for (size_t i = 0; i < n; i++) { // get item with iterator - auto item = *it; - hist_flat[i] = item; + double item = *it; + hist_flat.push_back(item); it++; } @@ -260,22 +264,27 @@ class HistogramBuilder { if (collective::GetRank() == 0) { std::cout << "---------------CALL Interface for processing-------------- " << std::endl; } - // Update histogram for data owner + + // Update histogram for label owner if (collective::GetRank() == 0) { - // skip rank 0, as local hist already contains its own entries - // reposition iterator to the beginning of the vector - it = reinterpret_cast(this->hist_[nodes_to_build.front()].data()); - for (auto rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { - // iterate through the flat vector - for (size_t i = 0; i < n; i++) { - auto flat_idx = rank_idx * n + i; - auto hist_item = hist_entries[flat_idx]; - // update the global histogram with the received entries - *it += hist_item; - it++; + // iterator of the beginning of the vector + auto it = reinterpret_cast(this->hist_[first_nidx].data()); + // iterate through the hist vector of the label owner + for (size_t i = 0; i < n; i++) { + // skip rank 0, as local hist already contains its own entries + // get the sum of the entries from other ranks + double hist_sum = 0.0; + for (int rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { + int flat_idx = rank_idx * n + i; + hist_sum += hist_entries.at(flat_idx); } + // add other parties' sum to rank 0's record + // to get the global histogram + *it += hist_sum; + it++; } } + } common::BlockedSpace2d const &subspace = diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 76428d1d83b4..eea6b1d6a434 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -293,8 +293,18 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ collective::Allreduce(&grad, 1); collective::Allreduce(&hess, 1); } - ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); - ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); + if (is_distributed && !is_col_split) { + // row split, all party holds the same data + ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); + ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); + } + if (is_distributed && is_col_split && is_secure) { + // secure col split, only rank 0 holds the global histogram + if (collective::GetRank() == 0) { + ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); + ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); + } + } } } From 53800f2644054d1ec4f9c11f106d5a1f649cb58b Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 19 Mar 2024 17:19:21 -0400 Subject: [PATCH 23/55] fix linting --- src/collective/aggregator.h | 1 - src/tree/hist/histogram.h | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 32aa0f42170f..255ad7abe1ce 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -97,7 +97,6 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* collective::Broadcast(&size, sizeof(std::size_t), 0); if (info.IsSecure() && is_gpair) { - // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 if (collective::GetRank() == 0) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 681e3e9cc142..96d1d665bbc2 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,7 +76,6 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { - // Print out all kinds if information for interface integration if (is_distributed_ && is_col_split_ && is_secure_ && (collective::GetRank() == 0)) { std::cout << "--------------Node Hist----------------" << std::endl; @@ -105,7 +104,7 @@ class HistogramBuilder { // cut value: cut for feature 0 slot 0, ..., feature 0 slot m, feature 1 slot 0, ... std::cout << "size of the cut points and cut values: " << cut_ptrs.size() << " " << cut_values.size() << std::endl; - std::cout << "first sample falls to: [feature_id, slot #, slot cutValue]: " << std::endl; + std::cout << "first sample falls to: [feature_id, slot #, cutValue]: " << std::endl; for (auto i = 0; i < cut_ptrs.size()-1; ++i) { auto slot_number = gidx.GetGindex(0, i); std::cout << "[" << i << ", " << slot_number << ", "<< cut_values[slot_number] << "] "; @@ -284,9 +283,7 @@ class HistogramBuilder { it++; } } - } - common::BlockedSpace2d const &subspace = nodes_to_trick.size() == nodes_to_build.size() ? space From b7e70f1e4b70ecd40d233465e7292983fe55a543 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 20 Mar 2024 11:01:25 -0400 Subject: [PATCH 24/55] fix linting and other unit test issues --- src/tree/hist/histogram.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 96d1d665bbc2..1d0aeb92ace4 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -81,14 +81,14 @@ class HistogramBuilder { std::cout << "--------------Node Hist----------------" << std::endl; std::cout << "Current samples on nodes: " << std::endl; // print info on all nodes - for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) { - auto size = row_set_collection[nit].Size(); - std::cout << "Node " << nit << " has " << size << " rows." << std::endl; + for (std::size_t nid = 0; nid < row_set_collection.Size(); ++nid) { + auto size = row_set_collection[nid].Size(); + std::cout << "Node " << nid << " has " << size << " rows." << std::endl; // print the first and last indexes of the rows with iterator if (size > 0) { - std::cout << "First index for node " << nit << " is " - << *row_set_collection[nit].begin << " and last index is " - << *(row_set_collection[nit].end - 1) << std::endl; + std::cout << "First index for node " << nid << " is " + << *row_set_collection[nid].begin << " and last index is " + << *(row_set_collection[nid].end - 1) << std::endl; } } // print info on the nodes to build @@ -105,7 +105,7 @@ class HistogramBuilder { std::cout << "size of the cut points and cut values: " << cut_ptrs.size() << " " << cut_values.size() << std::endl; std::cout << "first sample falls to: [feature_id, slot #, cutValue]: " << std::endl; - for (auto i = 0; i < cut_ptrs.size()-1; ++i) { + for (std::size_t i = 0; i < cut_ptrs.size()-1; ++i) { auto slot_number = gidx.GetGindex(0, i); std::cout << "[" << i << ", " << slot_number << ", "<< cut_values[slot_number] << "] "; } @@ -113,7 +113,7 @@ class HistogramBuilder { std::cout << "------------------------------" << std::endl; // Call the interface to transmit the row set collection and gidx to the secure worker if ((collective::GetRank() == 0)) { - std::cout << "---------------CALL interface to transmit row & gidx------------" << std::endl; + std::cout << "------------CALL interface to transmit row & gidx---------" << std::endl; } } @@ -249,7 +249,6 @@ class HistogramBuilder { std::vector hist_flat; // iterate through the nodes_to_build auto it = reinterpret_cast(this->hist_[first_nidx].data()); - auto hist_size = this->hist_[first_nidx].size(); for (size_t i = 0; i < n; i++) { // get item with iterator double item = *it; @@ -273,7 +272,7 @@ class HistogramBuilder { // skip rank 0, as local hist already contains its own entries // get the sum of the entries from other ranks double hist_sum = 0.0; - for (int rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { + for (std::size_t rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { int flat_idx = rank_idx * n + i; hist_sum += hist_entries.at(flat_idx); } @@ -390,7 +389,7 @@ class MultiHistogramBuilder { // print index for nodes_to_build and nodes_to_sub if (collective::GetRank() == 0) { - for (int i = 0; i < nodes_to_build.size(); i++) { + for (std::size_t i = 0; i < nodes_to_build.size(); i++) { std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; "; std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl; } From 49e8fd69a52e9bb12d4b875d4856906e979891b6 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 20 Mar 2024 11:17:10 -0400 Subject: [PATCH 25/55] fix linting and other unit test issues --- src/collective/aggregator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 255ad7abe1ce..3a38c9866141 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -101,7 +101,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // information only available on rank 0 if (collective::GetRank() == 0) { std::vector vector_gh; - for (int i = 0; i < size; i++) { + for (std::size_t i = 0; i < size; i++) { auto gpair = result->HostVector()[i]; // cast from GradientPair to float pointer auto gpair_ptr = reinterpret_cast(&gpair); From da0f7a6dc5befa1f2a87a37c5827293c9581d83a Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 22 Mar 2024 18:14:09 -0400 Subject: [PATCH 26/55] integration with interface initial attempt --- src/collective/aggregator.h | 61 +++++++++--- src/learner.cc | 10 +- src/processing/plugins/dummy_processor.cc | 104 +++++++++++++++++++++ src/processing/plugins/dummy_processor.h | 44 +++++++++ src/processing/processor.h | 108 ++++++++++++++++++++++ src/processing/processor_loader.cc | 62 +++++++++++++ src/tree/hist/histogram.h | 44 ++++++--- 7 files changed, 406 insertions(+), 27 deletions(-) create mode 100644 src/processing/plugins/dummy_processor.cc create mode 100644 src/processing/plugins/dummy_processor.h create mode 100644 src/processing/processor.h create mode 100644 src/processing/processor_loader.cc diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 3a38c9866141..2bcc35f842bb 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -14,6 +14,7 @@ #include "communicator-inl.h" #include "xgboost/collective/result.h" // for Result #include "xgboost/data.h" // for MetaINfo +#include "../processing/processor.h" // for Processor namespace xgboost::collective { @@ -99,6 +100,18 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* if (info.IsSecure() && is_gpair) { // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 + + xgboost::processing::ProcessorLoader loader; + auto processor = loader.load("dummy"); + if (collective::GetRank() == 0) { + processor->Initialize(true, {}); + } else { + processor->Initialize(false, {}); + } + + std::size_t buffer_size{}; + std::int8_t *buffer; + //common::Span buffer; if (collective::GetRank() == 0) { std::vector vector_gh; for (std::size_t i = 0; i < size; i++) { @@ -109,20 +122,46 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* vector_gh.push_back(gpair_ptr[0]); vector_gh.push_back(gpair_ptr[1]); } - // provide the vectors to the processor interface - // print vector size for rank 1 - if (collective::GetRank() == 0) { - std::cout << "-----------Call Interface for gp encryption and broadcast" - << ", size of gpairs: " << vector_gh.size() - << " ----------------------" << std::endl; + // provide the vectors to the processor interface + // print vector size for rank 1 + if (collective::GetRank() == 0) { + std::cout << "-----------Call Interface for gp encryption and broadcast" + << ", size of gpairs: " << vector_gh.size() + << " ----------------------" << std::endl; + auto buf = processor->ProcessGHPairs(vector_gh); + buffer_size = buf.size(); + buffer = reinterpret_cast(buf.data()); + std::cout << "buffer size: " << buffer_size << std::endl; } } - // make broadcast call on the prepared data buffer - // (to local gRPC handler for further encryption) - // collective::Broadcast(gh_buffer, size_of_buffer, 0); + // broadcast the buffer size + collective::Broadcast(&buffer_size, sizeof(std::size_t), 0); - result->Resize(size); - collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + // prepare buffer on passive parties for satisfying broadcast mpi call + if (collective::GetRank() != 0) { + buffer = reinterpret_cast(malloc(buffer_size)); + } + // broadcast the data buffer holding processed gpairs + collective::Broadcast(buffer, buffer_size, 0); + + // call HandleGHPairs + xgboost::common::Span buf = xgboost::common::Span(buffer, buffer_size); + processor->HandleGHPairs(buf); + + + + + // update the result vector with the broadcasted data + //result->Resize(size); + //collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); + //for (std::size_t i = 0; i < size; i++) { + // auto gpair_ptr = reinterpret_cast(&result->HostVector()[i]); + // gpair_ptr[0] = buffer[i * 2]; + // gpair_ptr[1] = buffer[i * 2 + 1]; + //} + //processor->FreeBuffer(buf); + //processor->Shutdown(); + //loader.unload(); } else { // clear text mode, broadcast the data directly result->Resize(size); diff --git a/src/learner.cc b/src/learner.cc index f287b9dddff0..b27bb1281d8e 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -62,7 +62,7 @@ #include "xgboost/predictor.h" // for PredictionContainer, PredictionCacheEntry #include "xgboost/string_view.h" // for operator<<, StringView #include "xgboost/task.h" // for ObjInfo - +#include "processing/processor.h" // for Processor namespace { const char* kMaxDeltaStepDefaultValue = "0.7"; } // anonymous namespace @@ -496,6 +496,14 @@ class LearnerConfiguration : public Learner { if ((collective::GetRank() == 0)) { std::cout << "configure interface here???????????????" << std::endl; } + xgboost::processing::ProcessorLoader loader; + auto processor = loader.load("dummy"); + if (collective::GetRank() == 0) { + processor->Initialize(true, {}); + } else { + processor->Initialize(false, {}); + } + this->need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc new file mode 100644 index 000000000000..f07f92835662 --- /dev/null +++ b/src/processing/plugins/dummy_processor.cc @@ -0,0 +1,104 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#include "./dummy_processor.h" + +using std::vector; +using std::cout; +using std::endl; + +const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 +const int kPrefixLen = 24; + +xgboost::common::Span DummyProcessor::ProcessGHPairs(vector &pairs) { + cout << "ProcessGHPairs called with pairs size: " << pairs.size() << endl; + + auto buf_size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x + + // This memory needs to be freed + char *buf = static_cast(calloc(buf_size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &xgboost::processing::kDataTypeGHPairs, 8); + + // Simulate encryption by duplicating value 10 times + int index = kPrefixLen; + for (auto value : pairs) { + for (int i = 0; i < 10; i++) { + memcpy(buf+index, &value, 8); + index += 8; + } + } + + // Save pairs for future operations + this->gh_pairs_ = &pairs; + + return xgboost::common::Span(reinterpret_cast(buf), buf_size); +} + +xgboost::common::Span DummyProcessor::HandleGHPairs(xgboost::common::Span buffer) { + cout << "HandleGHPairs called with buffer size: " << buffer.size() << endl; + + // For dummy, this call is used to set gh_pairs for passive sites + if (!active_) { + int8_t *ptr = buffer.data() + kPrefixLen; + double *pairs = reinterpret_cast(ptr); + size_t num = (buffer.size() - kPrefixLen) / 8; + gh_pairs_ = new vector(pairs, pairs + num); + } + + return buffer; +} + +xgboost::common::Span DummyProcessor::ProcessAggregation( + std::vector const &nodes_to_build, xgboost::common::RowSetCollection const &row_set) { + auto total_bin_size = gidx_->Cuts().Values().size(); + auto histo_size = total_bin_size*2; + auto buf_size = kPrefixLen + 8*histo_size*nodes_to_build.size(); + std::int8_t *buf = static_cast(calloc(buf_size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &xgboost::processing::kDataTypeHisto, 8); + + double *histo = reinterpret_cast(buf + kPrefixLen); + for (auto &node_id : nodes_to_build) { + auto elem = row_set[node_id]; + for (auto it = elem.begin; it != elem.end; ++it) { + auto row_id = *it; + for (std::size_t f = 0; f < gidx_->Cuts().Ptrs().size()-1; f++) { + auto slot = gidx_->GetGindex(row_id, f); + if (slot < 0) { + continue; + } + + auto g = (*gh_pairs_)[row_id*2]; + auto h = (*gh_pairs_)[row_id*2+1]; + histo[slot*2] += g; + histo[slot*2+1] += h; + } + } + histo += histo_size; + } + + return xgboost::common::Span(reinterpret_cast(buf), buf_size); +} + +std::vector DummyProcessor::HandleAggregation(xgboost::common::Span buffer) { + std::vector result = std::vector(); + + int8_t* ptr = buffer.data(); + auto rest_size = buffer.size(); + + while (rest_size > kPrefixLen) { + std::int64_t *size_ptr = reinterpret_cast(ptr + 8); + double *array_start = reinterpret_cast(ptr + kPrefixLen); + auto array_size = (*size_ptr - kPrefixLen)/8; + result.insert(result.end(), array_start, array_start + array_size); + + rest_size -= *size_ptr; + ptr = ptr + *size_ptr; + } + + return result; +} + diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h new file mode 100644 index 000000000000..9511cf7f56f6 --- /dev/null +++ b/src/processing/plugins/dummy_processor.h @@ -0,0 +1,44 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#pragma once +#include +#include +#include +#include "../processor.h" + +class DummyProcessor: public xgboost::processing::Processor { + private: + bool active_ = false; + const std::map *params_; + std::vector *gh_pairs_{nullptr}; + const xgboost::GHistIndexMatrix *gidx_; + + public: + void Initialize(bool active, std::map params) override { + this->active_ = active; + this->params_ = ¶ms; + } + + void Shutdown() override { + this->gh_pairs_ = nullptr; + this->gidx_ = nullptr; + } + + void FreeBuffer(xgboost::common::Span buffer) override { + free(buffer.data()); + } + + xgboost::common::Span ProcessGHPairs(std::vector &pairs) override; + + xgboost::common::Span HandleGHPairs(xgboost::common::Span buffer) override; + + void InitAggregationContext(xgboost::GHistIndexMatrix const &gidx) override { + this->gidx_ = &gidx; + } + + xgboost::common::Span ProcessAggregation(std::vector const &nodes_to_build, + xgboost::common::RowSetCollection const &row_set) override; + + std::vector HandleAggregation(xgboost::common::Span buffer) override; +}; diff --git a/src/processing/processor.h b/src/processing/processor.h new file mode 100644 index 000000000000..933be77af58b --- /dev/null +++ b/src/processing/processor.h @@ -0,0 +1,108 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#pragma once + +#include +#include +#include +#include +#include +#include "../data/gradient_index.h" + +namespace xgboost::processing { + +const char kLibraryPath[] = "LIBRARY_PATH"; +const char kDummyProcessor[] = "dummy"; +const char kLoadFunc[] = "LoadProcessor"; + +// Data type definition +const int kDataTypeGHPairs = 1; +const int kDataTypeHisto = 2; + +/*! \brief An processor interface to handle tasks that require external library through plugins */ +class Processor { + public: + /*! + * \brief Initialize the processor + * + * \param active If true, this is the active node + * \param params Optional parameters + */ + virtual void Initialize(bool active, std::map params) = 0; + + /*! + * \brief Shutdown the processor and free all the resources + * + */ + virtual void Shutdown() = 0; + + /*! + * \brief Free buffer + * + * \param buffer Any buffer returned by the calls from the plugin + */ + virtual void FreeBuffer(common::Span buffer) = 0; + + /*! + * \brief Preparing g & h pairs to be sent to other clients by active client + * + * \param pairs g&h pairs in a vector (g1, h1, g2, h2 ...) for every sample + * + * \return The encoded buffer to be sent + */ + virtual common::Span ProcessGHPairs(std::vector& pairs) = 0; + + /*! + * \brief Handle buffers with encoded pairs received from broadcast + * + * \param The encoded buffer + * + * \return The encoded buffer + */ + virtual common::Span HandleGHPairs(common::Span buffer) = 0; + + /*! + * \brief Initialize aggregation context by providing global GHistIndexMatrix + * + * \param gidx The matrix for every sample with its feature and slot assignment + */ + virtual void InitAggregationContext(GHistIndexMatrix const &gidx) = 0; + + /*! + * \brief Prepare row set for aggregation + * + * \param row_set Information for node IDs and its sample IDs + * + * \return The encoded buffer to be sent via AllGather + */ + virtual common::Span ProcessAggregation(std::vector const &nodes_to_build, + common::RowSetCollection const &row_set) = 0; + + /*! + * \brief Handle all gather result + * + * \param buffers Buffer from all gather, only buffer from active site is needed + * + * \return A flattened vector of histograms for each site, each node in the form of + * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 + */ + virtual std::vector HandleAggregation(common::Span buffer) = 0; +}; + +class ProcessorLoader { + private: + std::map params; + void *handle = NULL; + + + public: + ProcessorLoader(): params{} {} + + ProcessorLoader(std::map& params): params(params) {} + + Processor* load(const std::string& plugin_name); + + void unload(); +}; +} // namespace xgboost::processing diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc new file mode 100644 index 000000000000..47a31f482d46 --- /dev/null +++ b/src/processing/processor_loader.cc @@ -0,0 +1,62 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#include +#include + +#include "./processor.h" +#include "plugins/dummy_processor.h" + +namespace xgboost::processing { + using LoadFunc = Processor *(const char *); + + Processor* ProcessorLoader::load(const std::string& plugin_name) { + // Dummy processor for unit testing without loading a shared library + if (plugin_name == kDummyProcessor) { + return new DummyProcessor(); + } + + auto lib_name = "libproc_" + plugin_name; + + auto extension = +#if defined(__APPLE__) || defined(__MACH__) + ".dylib"; +#else + ".so"; +#endif + auto lib_file_name = lib_name + extension; + + std::string lib_path; + + if (params.find(kLibraryPath) == params.end()) { + lib_path = lib_file_name; + } else { + auto p = params[kLibraryPath]; + if (p.back() != '/') { + p += '/'; + } + lib_path = p + lib_file_name; + } + + handle = dlopen(lib_path.c_str(), RTLD_LAZY); + if (!handle) { + std::cerr << "Failed to load the dynamic library: " << dlerror() << std::endl; + return NULL; + } + + void* func_ptr = dlsym(handle, kLoadFunc); + + if (!func_ptr) { + std::cerr << "Failed to find loader function: " << dlerror() << std::endl; + return NULL; + } + + auto func = reinterpret_cast(func_ptr); + + return (*func)(plugin_name.c_str()); + } + + void ProcessorLoader::unload() { + dlclose(handle); + } +} // namespace xgboost::processing diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 1d0aeb92ace4..2a24e37200a0 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -27,6 +27,7 @@ #include "xgboost/logging.h" // for CHECK_GE #include "xgboost/span.h" // for Span #include "xgboost/tree_model.h" // for RegTree +#include "../../processing/processor.h" // for Processor namespace xgboost::tree { /** @@ -111,26 +112,39 @@ class HistogramBuilder { } std::cout << std::endl; std::cout << "------------------------------" << std::endl; + } + + if (is_distributed_ && is_col_split_ && is_secure_) { // Call the interface to transmit the row set collection and gidx to the secure worker + // for encrypted histogram compuation if ((collective::GetRank() == 0)) { std::cout << "------------CALL interface to transmit row & gidx---------" << std::endl; } - } - - // Parallel processing by nodes and data in each node - common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { - const auto tid = static_cast(omp_get_thread_num()); - bst_node_t const nidx = nodes_to_build[nid_in_set]; - auto elem = row_set_collection[nidx]; - auto start_of_row_set = std::min(r.begin(), elem.Size()); - auto end_of_row_set = std::min(r.end(), elem.Size()); - auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set, - elem.begin + end_of_row_set, nidx); - auto hist = buffer_.GetInitializedHist(tid, nid_in_set); - if (rid_set.Size() != 0) { - common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); + xgboost::processing::ProcessorLoader loader; + auto processor = loader.load("dummy"); + if (collective::GetRank() == 0) { + processor->Initialize(true, {}); + } else { + processor->Initialize(false, {}); } - }); + processor->InitAggregationContext(gidx); + processor->ProcessAggregation(nodes_to_build, row_set_collection); + } else { + // Parallel processing by nodes and data in each node + common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { + const auto tid = static_cast(omp_get_thread_num()); + bst_node_t const nidx = nodes_to_build[nid_in_set]; + auto elem = row_set_collection[nidx]; + auto start_of_row_set = std::min(r.begin(), elem.Size()); + auto end_of_row_set = std::min(r.end(), elem.Size()); + auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set, + elem.begin + end_of_row_set, nidx); + auto hist = buffer_.GetInitializedHist(tid, nid_in_set); + if (rid_set.Size() != 0) { + common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); + } + }); + } } /** From 406cda3b0d3c5473d52dd7b7283ddb697ed91696 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 22 Mar 2024 18:41:10 -0400 Subject: [PATCH 27/55] integration with interface initial attempt --- src/collective/aggregator.h | 12 ++---------- src/learner.cc | 8 ++++---- src/processing/processor.h | 3 +++ src/tree/hist/histogram.h | 11 ++--------- 4 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 2bcc35f842bb..539893793a85 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -101,14 +101,6 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 - xgboost::processing::ProcessorLoader loader; - auto processor = loader.load("dummy"); - if (collective::GetRank() == 0) { - processor->Initialize(true, {}); - } else { - processor->Initialize(false, {}); - } - std::size_t buffer_size{}; std::int8_t *buffer; //common::Span buffer; @@ -128,7 +120,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* std::cout << "-----------Call Interface for gp encryption and broadcast" << ", size of gpairs: " << vector_gh.size() << " ----------------------" << std::endl; - auto buf = processor->ProcessGHPairs(vector_gh); + auto buf = processor_instance->ProcessGHPairs(vector_gh); buffer_size = buf.size(); buffer = reinterpret_cast(buf.data()); std::cout << "buffer size: " << buffer_size << std::endl; @@ -146,7 +138,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // call HandleGHPairs xgboost::common::Span buf = xgboost::common::Span(buffer, buffer_size); - processor->HandleGHPairs(buf); + processor_instance->HandleGHPairs(buf); diff --git a/src/learner.cc b/src/learner.cc index b27bb1281d8e..b88a4ed4df9c 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -68,7 +68,7 @@ const char* kMaxDeltaStepDefaultValue = "0.7"; } // anonymous namespace DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy); - +xgboost::processing::Processor *processor_instance; namespace xgboost { Learner::~Learner() = default; namespace { @@ -497,11 +497,11 @@ class LearnerConfiguration : public Learner { std::cout << "configure interface here???????????????" << std::endl; } xgboost::processing::ProcessorLoader loader; - auto processor = loader.load("dummy"); + processor_instance = loader.load("dummy"); if (collective::GetRank() == 0) { - processor->Initialize(true, {}); + processor_instance->Initialize(true, {}); } else { - processor->Initialize(false, {}); + processor_instance->Initialize(false, {}); } diff --git a/src/processing/processor.h b/src/processing/processor.h index 933be77af58b..928062d79b0c 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -105,4 +105,7 @@ class ProcessorLoader { void unload(); }; + } // namespace xgboost::processing + +extern xgboost::processing::Processor *processor_instance; \ No newline at end of file diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 2a24e37200a0..405d2099c103 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -120,15 +120,8 @@ class HistogramBuilder { if ((collective::GetRank() == 0)) { std::cout << "------------CALL interface to transmit row & gidx---------" << std::endl; } - xgboost::processing::ProcessorLoader loader; - auto processor = loader.load("dummy"); - if (collective::GetRank() == 0) { - processor->Initialize(true, {}); - } else { - processor->Initialize(false, {}); - } - processor->InitAggregationContext(gidx); - processor->ProcessAggregation(nodes_to_build, row_set_collection); + processor_instance->InitAggregationContext(gidx); + processor_instance->ProcessAggregation(nodes_to_build, row_set_collection); } else { // Parallel processing by nodes and data in each node common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { From f6c63aa4ba08c11cd6e354a308a99aa568e7343f Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 22 Mar 2024 19:33:25 -0400 Subject: [PATCH 28/55] integration with interface initial attempt --- src/collective/aggregator.h | 15 --------------- src/learner.cc | 19 +++++++++++-------- src/processing/plugins/dummy_processor.cc | 20 +++++++++----------- src/processing/plugins/dummy_processor.h | 2 +- src/processing/processor.h | 2 +- src/tree/hist/histogram.h | 18 +++++++++++++----- 6 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 539893793a85..198e97505f71 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -139,21 +139,6 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // call HandleGHPairs xgboost::common::Span buf = xgboost::common::Span(buffer, buffer_size); processor_instance->HandleGHPairs(buf); - - - - - // update the result vector with the broadcasted data - //result->Resize(size); - //collective::Broadcast(result->HostPointer(), size * sizeof(T), 0); - //for (std::size_t i = 0; i < size; i++) { - // auto gpair_ptr = reinterpret_cast(&result->HostVector()[i]); - // gpair_ptr[0] = buffer[i * 2]; - // gpair_ptr[1] = buffer[i * 2 + 1]; - //} - //processor->FreeBuffer(buf); - //processor->Shutdown(); - //loader.unload(); } else { // clear text mode, broadcast the data directly result->Resize(size); diff --git a/src/learner.cc b/src/learner.cc index b88a4ed4df9c..2d82d531f9a1 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -494,16 +494,19 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); if ((collective::GetRank() == 0)) { - std::cout << "configure interface here???????????????" << std::endl; + std::cout << "Initialize interface" << std::endl; } - xgboost::processing::ProcessorLoader loader; - processor_instance = loader.load("dummy"); - if (collective::GetRank() == 0) { - processor_instance->Initialize(true, {}); - } else { - processor_instance->Initialize(false, {}); - } + xgboost::processing::ProcessorLoader loader; + processor_instance = loader.load("dummy"); + if (collective::GetRank() == 0) { + processor_instance->Initialize(true, {}); + } else { + processor_instance->Initialize(false, {}); + } + //processor_instance->FreeBuffer(buf); + //processor_instance->Shutdown(); + //loader.unload(); this->need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index f07f92835662..fbbf523141c5 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -44,7 +44,10 @@ xgboost::common::Span DummyProcessor::HandleGHPairs(xgboost::common::Spa int8_t *ptr = buffer.data() + kPrefixLen; double *pairs = reinterpret_cast(ptr); size_t num = (buffer.size() - kPrefixLen) / 8; - gh_pairs_ = new vector(pairs, pairs + num); + gh_pairs_ = new vector(); + for (int i = 0; i < num; i += 10) { + gh_pairs_->push_back(pairs[i]); + } } return buffer; @@ -83,22 +86,17 @@ xgboost::common::Span DummyProcessor::ProcessAggregation( return xgboost::common::Span(reinterpret_cast(buf), buf_size); } -std::vector DummyProcessor::HandleAggregation(xgboost::common::Span buffer) { +std::vector DummyProcessor::HandleAggregation(std::vector> buffers) { std::vector result = std::vector(); - int8_t* ptr = buffer.data(); - auto rest_size = buffer.size(); - - while (rest_size > kPrefixLen) { + for (auto buf : buffers) { + int8_t *ptr = buf.data(); std::int64_t *size_ptr = reinterpret_cast(ptr + 8); double *array_start = reinterpret_cast(ptr + kPrefixLen); - auto array_size = (*size_ptr - kPrefixLen)/8; + auto array_size = (*size_ptr - kPrefixLen) / 8; result.insert(result.end(), array_start, array_start + array_size); - - rest_size -= *size_ptr; - ptr = ptr + *size_ptr; } - + return result; } diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h index 9511cf7f56f6..dc1d937ba4d0 100644 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/dummy_processor.h @@ -40,5 +40,5 @@ class DummyProcessor: public xgboost::processing::Processor { xgboost::common::Span ProcessAggregation(std::vector const &nodes_to_build, xgboost::common::RowSetCollection const &row_set) override; - std::vector HandleAggregation(xgboost::common::Span buffer) override; + std::vector HandleAggregation(std::vector> buffers) override; }; diff --git a/src/processing/processor.h b/src/processing/processor.h index 928062d79b0c..effc52c6ff7d 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -87,7 +87,7 @@ class Processor { * \return A flattened vector of histograms for each site, each node in the form of * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 */ - virtual std::vector HandleAggregation(common::Span buffer) = 0; + virtual std::vector HandleAggregation(std::vector> buffers) = 0; }; class ProcessorLoader { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 405d2099c103..8f4bccfeb5dc 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -52,7 +52,7 @@ class HistogramBuilder { bool is_distributed_{false}; bool is_col_split_{false}; bool is_secure_{false}; - + xgboost::common::Span hist_data; public: /** * @brief Reset the builder, should be called before growing a new tree. @@ -121,7 +121,8 @@ class HistogramBuilder { std::cout << "------------CALL interface to transmit row & gidx---------" << std::endl; } processor_instance->InitAggregationContext(gidx); - processor_instance->ProcessAggregation(nodes_to_build, row_set_collection); + // get the encrypted histogram from the secure worker + hist_data = processor_instance->ProcessAggregation(nodes_to_build, row_set_collection); } else { // Parallel processing by nodes and data in each node common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { @@ -246,29 +247,33 @@ class HistogramBuilder { // note that only Label Owner needs the global histogram CHECK(!nodes_to_build.empty()); + // Front item of nodes_to_build auto first_nidx = nodes_to_build.front(); // *2 because we have a pair of g and h for each histogram item std::size_t n = n_total_bins * nodes_to_build.size() * 2; - + /* // Use AllGather to collect the histogram entries from all nodes // allocate memory for the received entries as a flat vector std::vector hist_flat; // iterate through the nodes_to_build - auto it = reinterpret_cast(this->hist_[first_nidx].data()); + auto it = reinterpret_cast(hist_data.data()); for (size_t i = 0; i < n; i++) { // get item with iterator double item = *it; hist_flat.push_back(item); it++; } +*/ // Perform AllGather - auto hist_entries = collective::Allgather(hist_flat); + auto hist_entries = collective::Allgather(hist_data); // Call interface here to post-process the messages if (collective::GetRank() == 0) { std::cout << "---------------CALL Interface for processing-------------- " << std::endl; } + std::vector hist_aggr = processor_instance->HandleAggregation(hist_entries); + std::cout << "aggregated size: " << hist_aggr.size() << std::endl; // Update histogram for label owner if (collective::GetRank() == 0) { @@ -276,6 +281,8 @@ class HistogramBuilder { auto it = reinterpret_cast(this->hist_[first_nidx].data()); // iterate through the hist vector of the label owner for (size_t i = 0; i < n; i++) { + *it = hist_aggr[i]; + /* // skip rank 0, as local hist already contains its own entries // get the sum of the entries from other ranks double hist_sum = 0.0; @@ -286,6 +293,7 @@ class HistogramBuilder { // add other parties' sum to rank 0's record // to get the global histogram *it += hist_sum; + */ it++; } } From f223df7162d9f6234d2b596af43931378f6f0ede Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 1 Apr 2024 10:25:05 -0400 Subject: [PATCH 29/55] functional integration with interface --- src/processing/plugins/dummy_processor.cc | 46 +++++++++++++++++++---- src/processing/plugins/dummy_processor.h | 2 +- src/processing/processor.h | 4 +- src/tree/hist/histogram.h | 17 +++++---- 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index fbbf523141c5..1699ca4515ca 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -10,6 +10,10 @@ using std::endl; const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 const int kPrefixLen = 24; +bool ValidDam(std::int8_t *buffer) { + return memcmp(buffer, kSignature, strlen(kSignature)) == 0; +} + xgboost::common::Span DummyProcessor::ProcessGHPairs(vector &pairs) { cout << "ProcessGHPairs called with pairs size: " << pairs.size() << endl; @@ -31,13 +35,18 @@ xgboost::common::Span DummyProcessor::ProcessGHPairs(vector &pai } // Save pairs for future operations - this->gh_pairs_ = &pairs; + this->gh_pairs_ = new vector(pairs); return xgboost::common::Span(reinterpret_cast(buf), buf_size); } xgboost::common::Span DummyProcessor::HandleGHPairs(xgboost::common::Span buffer) { - cout << "HandleGHPairs called with buffer size: " << buffer.size() << endl; + cout << "HandleGHPairs called with buffer size: " << buffer.size() << " Active: " << active_ << endl; + + if (!ValidDam(buffer.data())) { + cout << "Invalid buffer received" << endl; + return buffer; + } // For dummy, this call is used to set gh_pairs for passive sites if (!active_) { @@ -48,6 +57,7 @@ xgboost::common::Span DummyProcessor::HandleGHPairs(xgboost::common::Spa for (int i = 0; i < num; i += 10) { gh_pairs_->push_back(pairs[i]); } + cout << "GH Pairs saved. Size: " << gh_pairs_->size() << endl; } return buffer; @@ -58,6 +68,7 @@ xgboost::common::Span DummyProcessor::ProcessAggregation( auto total_bin_size = gidx_->Cuts().Values().size(); auto histo_size = total_bin_size*2; auto buf_size = kPrefixLen + 8*histo_size*nodes_to_build.size(); + cout << "ProcessAggregation called with bin size: " << total_bin_size << " Buffer Size: " << buf_size << endl; std::int8_t *buf = static_cast(calloc(buf_size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); @@ -74,6 +85,15 @@ xgboost::common::Span DummyProcessor::ProcessAggregation( continue; } + if (slot >= total_bin_size) { + cout << "Slot too big, ignored: " << slot << endl; + continue; + } + + if (row_id >= gh_pairs_->size()/2) { + cout << "Row ID too big: " << row_id << endl; + } + auto g = (*gh_pairs_)[row_id*2]; auto h = (*gh_pairs_)[row_id*2+1]; histo[slot*2] += g; @@ -86,17 +106,29 @@ xgboost::common::Span DummyProcessor::ProcessAggregation( return xgboost::common::Span(reinterpret_cast(buf), buf_size); } -std::vector DummyProcessor::HandleAggregation(std::vector> buffers) { +std::vector DummyProcessor::HandleAggregation(xgboost::common::Span buffer) { + cout << "HandleAggregation called with buffer size: " << buffer.size() << endl; std::vector result = std::vector(); - for (auto buf : buffers) { - int8_t *ptr = buf.data(); + int8_t* ptr = buffer.data(); + auto rest_size = buffer.size(); + + while (rest_size > kPrefixLen) { + if (!ValidDam(ptr)) { + cout << "Invalid buffer at offset " << buffer.size() - rest_size << endl; + continue; + } std::int64_t *size_ptr = reinterpret_cast(ptr + 8); double *array_start = reinterpret_cast(ptr + kPrefixLen); - auto array_size = (*size_ptr - kPrefixLen) / 8; + auto array_size = (*size_ptr - kPrefixLen)/8; + cout << "Histo size for buffer: " << array_size << endl; result.insert(result.end(), array_start, array_start + array_size); + cout << "Result size: " << result.size() << endl; + rest_size -= *size_ptr; + ptr = ptr + *size_ptr; } + + cout << "Total histo size: " << result.size() << endl; return result; } - diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h index dc1d937ba4d0..9511cf7f56f6 100644 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/dummy_processor.h @@ -40,5 +40,5 @@ class DummyProcessor: public xgboost::processing::Processor { xgboost::common::Span ProcessAggregation(std::vector const &nodes_to_build, xgboost::common::RowSetCollection const &row_set) override; - std::vector HandleAggregation(std::vector> buffers) override; + std::vector HandleAggregation(xgboost::common::Span buffer) override; }; diff --git a/src/processing/processor.h b/src/processing/processor.h index effc52c6ff7d..952acfbe60b6 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -82,12 +82,12 @@ class Processor { /*! * \brief Handle all gather result * - * \param buffers Buffer from all gather, only buffer from active site is needed + * \param buffer Buffer from all gather, only buffer from active site is needed * * \return A flattened vector of histograms for each site, each node in the form of * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 */ - virtual std::vector HandleAggregation(std::vector> buffers) = 0; + virtual std::vector HandleAggregation(xgboost::common::Span buffer) = 0; }; class ProcessorLoader { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 8f4bccfeb5dc..1a4c63b95a2d 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -267,12 +267,15 @@ class HistogramBuilder { */ // Perform AllGather - auto hist_entries = collective::Allgather(hist_data); + auto hist_vec = std::vector(hist_data.data(), hist_data.data() + hist_data.size()); + + auto hist_entries = collective::Allgather(hist_vec); // Call interface here to post-process the messages if (collective::GetRank() == 0) { std::cout << "---------------CALL Interface for processing-------------- " << std::endl; } - std::vector hist_aggr = processor_instance->HandleAggregation(hist_entries); + auto hist_span = common::Span(hist_entries.data(), hist_entries.size()); + std::vector hist_aggr = processor_instance->HandleAggregation(hist_span); std::cout << "aggregated size: " << hist_aggr.size() << std::endl; // Update histogram for label owner @@ -280,20 +283,18 @@ class HistogramBuilder { // iterator of the beginning of the vector auto it = reinterpret_cast(this->hist_[first_nidx].data()); // iterate through the hist vector of the label owner + std::cout << "Histogram size n=" << n << std::endl; for (size_t i = 0; i < n; i++) { - *it = hist_aggr[i]; - /* // skip rank 0, as local hist already contains its own entries // get the sum of the entries from other ranks double hist_sum = 0.0; - for (std::size_t rank_idx = 1; rank_idx < hist_entries.size()/n; rank_idx++) { + for (std::size_t rank_idx = 0; rank_idx < hist_aggr.size()/n; rank_idx++) { int flat_idx = rank_idx * n + i; - hist_sum += hist_entries.at(flat_idx); + hist_sum += hist_aggr[flat_idx]; } // add other parties' sum to rank 0's record // to get the global histogram - *it += hist_sum; - */ + *it = hist_sum; it++; } } From d881d846be0d077547eca38aa995d1447e9d82de Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 1 Apr 2024 10:46:31 -0400 Subject: [PATCH 30/55] remove debugging prints --- src/collective/aggregator.h | 20 +++------- src/learner.cc | 7 ---- src/tree/hist/histogram.h | 78 +------------------------------------ 3 files changed, 8 insertions(+), 97 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index 198e97505f71..da7459db7f97 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -100,10 +100,8 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* if (info.IsSecure() && is_gpair) { // Under secure mode, gpairs will be processed to vector and encrypt // information only available on rank 0 - std::size_t buffer_size{}; std::int8_t *buffer; - //common::Span buffer; if (collective::GetRank() == 0) { std::vector vector_gh; for (std::size_t i = 0; i < size; i++) { @@ -115,24 +113,18 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* vector_gh.push_back(gpair_ptr[1]); } // provide the vectors to the processor interface - // print vector size for rank 1 - if (collective::GetRank() == 0) { - std::cout << "-----------Call Interface for gp encryption and broadcast" - << ", size of gpairs: " << vector_gh.size() - << " ----------------------" << std::endl; - auto buf = processor_instance->ProcessGHPairs(vector_gh); - buffer_size = buf.size(); - buffer = reinterpret_cast(buf.data()); - std::cout << "buffer size: " << buffer_size << std::endl; - } + auto buf = processor_instance->ProcessGHPairs(vector_gh); + buffer_size = buf.size(); + buffer = reinterpret_cast(buf.data()); } - // broadcast the buffer size - collective::Broadcast(&buffer_size, sizeof(std::size_t), 0); + // broadcast the buffer size for other ranks to prepare + collective::Broadcast(&buffer_size, sizeof(std::size_t), 0); // prepare buffer on passive parties for satisfying broadcast mpi call if (collective::GetRank() != 0) { buffer = reinterpret_cast(malloc(buffer_size)); } + // broadcast the data buffer holding processed gpairs collective::Broadcast(buffer, buffer_size, 0); diff --git a/src/learner.cc b/src/learner.cc index 2d82d531f9a1..4f27516e0ff4 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -493,10 +493,6 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); - if ((collective::GetRank() == 0)) { - std::cout << "Initialize interface" << std::endl; - } - xgboost::processing::ProcessorLoader loader; processor_instance = loader.load("dummy"); if (collective::GetRank() == 0) { @@ -504,9 +500,6 @@ class LearnerConfiguration : public Learner { } else { processor_instance->Initialize(false, {}); } - //processor_instance->FreeBuffer(buf); - //processor_instance->Shutdown(); - //loader.unload(); this->need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 1a4c63b95a2d..9b024e56c220 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -77,49 +77,9 @@ class HistogramBuilder { std::vector const &nodes_to_build, common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { - // Print out all kinds if information for interface integration - if (is_distributed_ && is_col_split_ && is_secure_ && (collective::GetRank() == 0)) { - std::cout << "--------------Node Hist----------------" << std::endl; - std::cout << "Current samples on nodes: " << std::endl; - // print info on all nodes - for (std::size_t nid = 0; nid < row_set_collection.Size(); ++nid) { - auto size = row_set_collection[nid].Size(); - std::cout << "Node " << nid << " has " << size << " rows." << std::endl; - // print the first and last indexes of the rows with iterator - if (size > 0) { - std::cout << "First index for node " << nid << " is " - << *row_set_collection[nid].begin << " and last index is " - << *(row_set_collection[nid].end - 1) << std::endl; - } - } - // print info on the nodes to build - for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) { - std::cout << "Building local histogram for node ID: " << *nit - << " with " << row_set_collection[*nit].Size() - << " samples." << std::endl; - } - std::cout << "GHistIndexMatrix will not change with size " << gidx.index.Size() << std::endl; - auto cut_ptrs = gidx.Cuts().Ptrs(); - auto cut_values = gidx.Cuts().Values(); - // cut points: feature 0 start (0), feature 1 start, feature 2 start, ... feature n start - // cut value: cut for feature 0 slot 0, ..., feature 0 slot m, feature 1 slot 0, ... - std::cout << "size of the cut points and cut values: " - << cut_ptrs.size() << " " << cut_values.size() << std::endl; - std::cout << "first sample falls to: [feature_id, slot #, cutValue]: " << std::endl; - for (std::size_t i = 0; i < cut_ptrs.size()-1; ++i) { - auto slot_number = gidx.GetGindex(0, i); - std::cout << "[" << i << ", " << slot_number << ", "<< cut_values[slot_number] << "] "; - } - std::cout << std::endl; - std::cout << "------------------------------" << std::endl; - } - if (is_distributed_ && is_col_split_ && is_secure_) { // Call the interface to transmit the row set collection and gidx to the secure worker // for encrypted histogram compuation - if ((collective::GetRank() == 0)) { - std::cout << "------------CALL interface to transmit row & gidx---------" << std::endl; - } processor_instance->InitAggregationContext(gidx); // get the encrypted histogram from the secure worker hist_data = processor_instance->ProcessAggregation(nodes_to_build, row_set_collection); @@ -247,53 +207,31 @@ class HistogramBuilder { // note that only Label Owner needs the global histogram CHECK(!nodes_to_build.empty()); - // Front item of nodes_to_build auto first_nidx = nodes_to_build.front(); // *2 because we have a pair of g and h for each histogram item std::size_t n = n_total_bins * nodes_to_build.size() * 2; - /* - // Use AllGather to collect the histogram entries from all nodes - // allocate memory for the received entries as a flat vector - std::vector hist_flat; - // iterate through the nodes_to_build - auto it = reinterpret_cast(hist_data.data()); - for (size_t i = 0; i < n; i++) { - // get item with iterator - double item = *it; - hist_flat.push_back(item); - it++; - } -*/ // Perform AllGather auto hist_vec = std::vector(hist_data.data(), hist_data.data() + hist_data.size()); - auto hist_entries = collective::Allgather(hist_vec); // Call interface here to post-process the messages - if (collective::GetRank() == 0) { - std::cout << "---------------CALL Interface for processing-------------- " << std::endl; - } auto hist_span = common::Span(hist_entries.data(), hist_entries.size()); std::vector hist_aggr = processor_instance->HandleAggregation(hist_span); - std::cout << "aggregated size: " << hist_aggr.size() << std::endl; // Update histogram for label owner if (collective::GetRank() == 0) { // iterator of the beginning of the vector auto it = reinterpret_cast(this->hist_[first_nidx].data()); // iterate through the hist vector of the label owner - std::cout << "Histogram size n=" << n << std::endl; for (size_t i = 0; i < n; i++) { - // skip rank 0, as local hist already contains its own entries - // get the sum of the entries from other ranks + // get the sum of the entries from all ranks double hist_sum = 0.0; for (std::size_t rank_idx = 0; rank_idx < hist_aggr.size()/n; rank_idx++) { int flat_idx = rank_idx * n + i; hist_sum += hist_aggr[flat_idx]; } - // add other parties' sum to rank 0's record - // to get the global histogram + // update rank 0's record with the global histogram *it = hist_sum; it++; } @@ -403,14 +341,6 @@ class MultiHistogramBuilder { std::vector nodes_to_sub(valid_candidates.size()); AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub); - // print index for nodes_to_build and nodes_to_sub - if (collective::GetRank() == 0) { - for (std::size_t i = 0; i < nodes_to_build.size(); i++) { - std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; "; - std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl; - } - } - // use the first builder for getting number of valid nodes. target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true); CHECK_GE(nodes_to_build.size(), nodes_to_sub.size()); @@ -428,10 +358,6 @@ class MultiHistogramBuilder { for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); - if (collective::GetRank() == 0) { - std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl; - } - CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_); this->target_builders_[t].BuildHist(page_idx, space, page, partitioners[page_idx].Partitions(), nodes_to_build, From 2997cf72d1e2ae234d9cfb891c6ad289ec41af22 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 1 Apr 2024 10:51:58 -0400 Subject: [PATCH 31/55] remove processor from another PR --- src/processing/plugins/dummy_processor.cc | 134 ---------------------- src/processing/plugins/dummy_processor.h | 44 ------- src/processing/processor.h | 111 ------------------ src/processing/processor_loader.cc | 62 ---------- src/tree/hist/histogram.h | 4 +- 5 files changed, 3 insertions(+), 352 deletions(-) delete mode 100644 src/processing/plugins/dummy_processor.cc delete mode 100644 src/processing/plugins/dummy_processor.h delete mode 100644 src/processing/processor.h delete mode 100644 src/processing/processor_loader.cc diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc deleted file mode 100644 index 1699ca4515ca..000000000000 --- a/src/processing/plugins/dummy_processor.cc +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Copyright 2014-2024 by XGBoost Contributors - */ -#include "./dummy_processor.h" - -using std::vector; -using std::cout; -using std::endl; - -const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 -const int kPrefixLen = 24; - -bool ValidDam(std::int8_t *buffer) { - return memcmp(buffer, kSignature, strlen(kSignature)) == 0; -} - -xgboost::common::Span DummyProcessor::ProcessGHPairs(vector &pairs) { - cout << "ProcessGHPairs called with pairs size: " << pairs.size() << endl; - - auto buf_size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x - - // This memory needs to be freed - char *buf = static_cast(calloc(buf_size, 1)); - memcpy(buf, kSignature, strlen(kSignature)); - memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &xgboost::processing::kDataTypeGHPairs, 8); - - // Simulate encryption by duplicating value 10 times - int index = kPrefixLen; - for (auto value : pairs) { - for (int i = 0; i < 10; i++) { - memcpy(buf+index, &value, 8); - index += 8; - } - } - - // Save pairs for future operations - this->gh_pairs_ = new vector(pairs); - - return xgboost::common::Span(reinterpret_cast(buf), buf_size); -} - -xgboost::common::Span DummyProcessor::HandleGHPairs(xgboost::common::Span buffer) { - cout << "HandleGHPairs called with buffer size: " << buffer.size() << " Active: " << active_ << endl; - - if (!ValidDam(buffer.data())) { - cout << "Invalid buffer received" << endl; - return buffer; - } - - // For dummy, this call is used to set gh_pairs for passive sites - if (!active_) { - int8_t *ptr = buffer.data() + kPrefixLen; - double *pairs = reinterpret_cast(ptr); - size_t num = (buffer.size() - kPrefixLen) / 8; - gh_pairs_ = new vector(); - for (int i = 0; i < num; i += 10) { - gh_pairs_->push_back(pairs[i]); - } - cout << "GH Pairs saved. Size: " << gh_pairs_->size() << endl; - } - - return buffer; -} - -xgboost::common::Span DummyProcessor::ProcessAggregation( - std::vector const &nodes_to_build, xgboost::common::RowSetCollection const &row_set) { - auto total_bin_size = gidx_->Cuts().Values().size(); - auto histo_size = total_bin_size*2; - auto buf_size = kPrefixLen + 8*histo_size*nodes_to_build.size(); - cout << "ProcessAggregation called with bin size: " << total_bin_size << " Buffer Size: " << buf_size << endl; - std::int8_t *buf = static_cast(calloc(buf_size, 1)); - memcpy(buf, kSignature, strlen(kSignature)); - memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &xgboost::processing::kDataTypeHisto, 8); - - double *histo = reinterpret_cast(buf + kPrefixLen); - for (auto &node_id : nodes_to_build) { - auto elem = row_set[node_id]; - for (auto it = elem.begin; it != elem.end; ++it) { - auto row_id = *it; - for (std::size_t f = 0; f < gidx_->Cuts().Ptrs().size()-1; f++) { - auto slot = gidx_->GetGindex(row_id, f); - if (slot < 0) { - continue; - } - - if (slot >= total_bin_size) { - cout << "Slot too big, ignored: " << slot << endl; - continue; - } - - if (row_id >= gh_pairs_->size()/2) { - cout << "Row ID too big: " << row_id << endl; - } - - auto g = (*gh_pairs_)[row_id*2]; - auto h = (*gh_pairs_)[row_id*2+1]; - histo[slot*2] += g; - histo[slot*2+1] += h; - } - } - histo += histo_size; - } - - return xgboost::common::Span(reinterpret_cast(buf), buf_size); -} - -std::vector DummyProcessor::HandleAggregation(xgboost::common::Span buffer) { - cout << "HandleAggregation called with buffer size: " << buffer.size() << endl; - std::vector result = std::vector(); - - int8_t* ptr = buffer.data(); - auto rest_size = buffer.size(); - - while (rest_size > kPrefixLen) { - if (!ValidDam(ptr)) { - cout << "Invalid buffer at offset " << buffer.size() - rest_size << endl; - continue; - } - std::int64_t *size_ptr = reinterpret_cast(ptr + 8); - double *array_start = reinterpret_cast(ptr + kPrefixLen); - auto array_size = (*size_ptr - kPrefixLen)/8; - cout << "Histo size for buffer: " << array_size << endl; - result.insert(result.end(), array_start, array_start + array_size); - cout << "Result size: " << result.size() << endl; - rest_size -= *size_ptr; - ptr = ptr + *size_ptr; - } - - cout << "Total histo size: " << result.size() << endl; - - return result; -} diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h deleted file mode 100644 index 9511cf7f56f6..000000000000 --- a/src/processing/plugins/dummy_processor.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2014-2024 by XGBoost Contributors - */ -#pragma once -#include -#include -#include -#include "../processor.h" - -class DummyProcessor: public xgboost::processing::Processor { - private: - bool active_ = false; - const std::map *params_; - std::vector *gh_pairs_{nullptr}; - const xgboost::GHistIndexMatrix *gidx_; - - public: - void Initialize(bool active, std::map params) override { - this->active_ = active; - this->params_ = ¶ms; - } - - void Shutdown() override { - this->gh_pairs_ = nullptr; - this->gidx_ = nullptr; - } - - void FreeBuffer(xgboost::common::Span buffer) override { - free(buffer.data()); - } - - xgboost::common::Span ProcessGHPairs(std::vector &pairs) override; - - xgboost::common::Span HandleGHPairs(xgboost::common::Span buffer) override; - - void InitAggregationContext(xgboost::GHistIndexMatrix const &gidx) override { - this->gidx_ = &gidx; - } - - xgboost::common::Span ProcessAggregation(std::vector const &nodes_to_build, - xgboost::common::RowSetCollection const &row_set) override; - - std::vector HandleAggregation(xgboost::common::Span buffer) override; -}; diff --git a/src/processing/processor.h b/src/processing/processor.h deleted file mode 100644 index 952acfbe60b6..000000000000 --- a/src/processing/processor.h +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Copyright 2014-2024 by XGBoost Contributors - */ -#pragma once - -#include -#include -#include -#include -#include -#include "../data/gradient_index.h" - -namespace xgboost::processing { - -const char kLibraryPath[] = "LIBRARY_PATH"; -const char kDummyProcessor[] = "dummy"; -const char kLoadFunc[] = "LoadProcessor"; - -// Data type definition -const int kDataTypeGHPairs = 1; -const int kDataTypeHisto = 2; - -/*! \brief An processor interface to handle tasks that require external library through plugins */ -class Processor { - public: - /*! - * \brief Initialize the processor - * - * \param active If true, this is the active node - * \param params Optional parameters - */ - virtual void Initialize(bool active, std::map params) = 0; - - /*! - * \brief Shutdown the processor and free all the resources - * - */ - virtual void Shutdown() = 0; - - /*! - * \brief Free buffer - * - * \param buffer Any buffer returned by the calls from the plugin - */ - virtual void FreeBuffer(common::Span buffer) = 0; - - /*! - * \brief Preparing g & h pairs to be sent to other clients by active client - * - * \param pairs g&h pairs in a vector (g1, h1, g2, h2 ...) for every sample - * - * \return The encoded buffer to be sent - */ - virtual common::Span ProcessGHPairs(std::vector& pairs) = 0; - - /*! - * \brief Handle buffers with encoded pairs received from broadcast - * - * \param The encoded buffer - * - * \return The encoded buffer - */ - virtual common::Span HandleGHPairs(common::Span buffer) = 0; - - /*! - * \brief Initialize aggregation context by providing global GHistIndexMatrix - * - * \param gidx The matrix for every sample with its feature and slot assignment - */ - virtual void InitAggregationContext(GHistIndexMatrix const &gidx) = 0; - - /*! - * \brief Prepare row set for aggregation - * - * \param row_set Information for node IDs and its sample IDs - * - * \return The encoded buffer to be sent via AllGather - */ - virtual common::Span ProcessAggregation(std::vector const &nodes_to_build, - common::RowSetCollection const &row_set) = 0; - - /*! - * \brief Handle all gather result - * - * \param buffer Buffer from all gather, only buffer from active site is needed - * - * \return A flattened vector of histograms for each site, each node in the form of - * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 - */ - virtual std::vector HandleAggregation(xgboost::common::Span buffer) = 0; -}; - -class ProcessorLoader { - private: - std::map params; - void *handle = NULL; - - - public: - ProcessorLoader(): params{} {} - - ProcessorLoader(std::map& params): params(params) {} - - Processor* load(const std::string& plugin_name); - - void unload(); -}; - -} // namespace xgboost::processing - -extern xgboost::processing::Processor *processor_instance; \ No newline at end of file diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc deleted file mode 100644 index 47a31f482d46..000000000000 --- a/src/processing/processor_loader.cc +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Copyright 2014-2024 by XGBoost Contributors - */ -#include -#include - -#include "./processor.h" -#include "plugins/dummy_processor.h" - -namespace xgboost::processing { - using LoadFunc = Processor *(const char *); - - Processor* ProcessorLoader::load(const std::string& plugin_name) { - // Dummy processor for unit testing without loading a shared library - if (plugin_name == kDummyProcessor) { - return new DummyProcessor(); - } - - auto lib_name = "libproc_" + plugin_name; - - auto extension = -#if defined(__APPLE__) || defined(__MACH__) - ".dylib"; -#else - ".so"; -#endif - auto lib_file_name = lib_name + extension; - - std::string lib_path; - - if (params.find(kLibraryPath) == params.end()) { - lib_path = lib_file_name; - } else { - auto p = params[kLibraryPath]; - if (p.back() != '/') { - p += '/'; - } - lib_path = p + lib_file_name; - } - - handle = dlopen(lib_path.c_str(), RTLD_LAZY); - if (!handle) { - std::cerr << "Failed to load the dynamic library: " << dlerror() << std::endl; - return NULL; - } - - void* func_ptr = dlsym(handle, kLoadFunc); - - if (!func_ptr) { - std::cerr << "Failed to find loader function: " << dlerror() << std::endl; - return NULL; - } - - auto func = reinterpret_cast(func_ptr); - - return (*func)(plugin_name.c_str()); - } - - void ProcessorLoader::unload() { - dlclose(handle); - } -} // namespace xgboost::processing diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 9b024e56c220..32d5d8d5a3a7 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -53,6 +53,7 @@ class HistogramBuilder { bool is_col_split_{false}; bool is_secure_{false}; xgboost::common::Span hist_data; + public: /** * @brief Reset the builder, should be called before growing a new tree. @@ -213,7 +214,8 @@ class HistogramBuilder { std::size_t n = n_total_bins * nodes_to_build.size() * 2; // Perform AllGather - auto hist_vec = std::vector(hist_data.data(), hist_data.data() + hist_data.size()); + auto hist_vec = std::vector(hist_data.data(), + hist_data.data() + hist_data.size()); auto hist_entries = collective::Allgather(hist_vec); // Call interface here to post-process the messages auto hist_span = common::Span(hist_entries.data(), hist_entries.size()); From 3a1f9acf92463a0c369836fa4ba4a32828bfa3d6 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 12 Apr 2024 10:49:44 -0400 Subject: [PATCH 32/55] Update the processor functions according to new processor implementation --- src/collective/aggregator.h | 11 ++++++----- src/learner.cc | 15 +++++++-------- src/tree/hist/histogram.h | 31 ++++++++++++++++++++++++++----- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index da7459db7f97..c1f5329bec15 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -113,9 +113,10 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* vector_gh.push_back(gpair_ptr[1]); } // provide the vectors to the processor interface - auto buf = processor_instance->ProcessGHPairs(vector_gh); - buffer_size = buf.size(); - buffer = reinterpret_cast(buf.data()); + size_t size; + auto buf = processor_instance->ProcessGHPairs(size, vector_gh); + buffer_size = size; + buffer = reinterpret_cast(buf); } // broadcast the buffer size for other ranks to prepare @@ -129,8 +130,8 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* collective::Broadcast(buffer, buffer_size, 0); // call HandleGHPairs - xgboost::common::Span buf = xgboost::common::Span(buffer, buffer_size); - processor_instance->HandleGHPairs(buf); + size_t size; + processor_instance->HandleGHPairs(size, buffer, buffer_size); } else { // clear text mode, broadcast the data directly result->Resize(size); diff --git a/src/learner.cc b/src/learner.cc index 4f27516e0ff4..c114fd695168 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -68,7 +68,7 @@ const char* kMaxDeltaStepDefaultValue = "0.7"; } // anonymous namespace DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy); -xgboost::processing::Processor *processor_instance; +processing::Processor *processor_instance; namespace xgboost { Learner::~Learner() = default; namespace { @@ -493,13 +493,12 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); - xgboost::processing::ProcessorLoader loader; - processor_instance = loader.load("dummy"); - if (collective::GetRank() == 0) { - processor_instance->Initialize(true, {}); - } else { - processor_instance->Initialize(false, {}); - } + std::map loader_params = {{"LIBRARY_PATH", "/tmp"}}; + std::map proc_params = {}; + auto plugin_name = "dummy"; + processing::ProcessorLoader loader(loader_params); + processor_instance = loader.load(plugin_name); + processor_instance->Initialize(collective::GetRank() == 0, proc_params); this->need_configuration_ = false; if (ctx_.validate_parameters) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 32d5d8d5a3a7..bc71cecf3570 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -79,11 +79,33 @@ class HistogramBuilder { common::RowSetCollection const &row_set_collection, common::Span gpair_h, bool force_read_by_column) { if (is_distributed_ && is_col_split_ && is_secure_) { - // Call the interface to transmit the row set collection and gidx to the secure worker + // Call the interface to transmit gidx information to the secure worker // for encrypted histogram compuation - processor_instance->InitAggregationContext(gidx); + auto slots = std::vector(); + auto num_rows = row_set_collection[0].Size(); + auto cuts = gidx.Cuts().Ptrs(); + for (int row = 0; row < num_rows; row++) { + for (int f = 0; f < cuts.size()-1; f++) { + auto slot = gidx.GetGindex(row, f); + slots.push_back(slot); + } + } + processor_instance->InitAggregationContext(cuts,slots); + // Further use the row set collection info to // get the encrypted histogram from the secure worker - hist_data = processor_instance->ProcessAggregation(nodes_to_build, row_set_collection); + auto node_map = std::map>(); + for (auto node : nodes_to_build) { + auto rows = std::vector(); + auto elem = row_set_collection[node]; + for (auto it = elem.begin; it != elem.end; ++it) { + auto row_id = *it; + rows.push_back(row_id); + } + node_map.insert({node, rows}); + } + size_t buf_size; + auto buf = processor_instance->ProcessAggregation(buf_size, node_map); + hist_data = xgboost::common::Span(static_cast(buf), buf_size); } else { // Parallel processing by nodes and data in each node common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { @@ -218,8 +240,7 @@ class HistogramBuilder { hist_data.data() + hist_data.size()); auto hist_entries = collective::Allgather(hist_vec); // Call interface here to post-process the messages - auto hist_span = common::Span(hist_entries.data(), hist_entries.size()); - std::vector hist_aggr = processor_instance->HandleAggregation(hist_span); + std::vector hist_aggr = processor_instance->HandleAggregation(hist_entries.data(), hist_entries.size()); // Update histogram for label owner if (collective::GetRank() == 0) { From 11076044aaed91c676f4ac195c34c55dc40f0b64 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 12 Apr 2024 14:23:12 -0400 Subject: [PATCH 33/55] Move processor interface init from learner to communicator --- src/collective/communicator.cc | 9 ++ src/learner.cc | 11 +- src/processing/plugins/dummy_processor.cc | 139 ++++++++++++++++++++++ src/processing/plugins/dummy_processor.h | 53 +++++++++ src/processing/processor.h | 114 ++++++++++++++++++ src/processing/processor_loader.cc | 62 ++++++++++ 6 files changed, 379 insertions(+), 9 deletions(-) create mode 100755 src/processing/plugins/dummy_processor.cc create mode 100755 src/processing/plugins/dummy_processor.h create mode 100755 src/processing/processor.h create mode 100755 src/processing/processor_loader.cc diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index 7fabe50b465d..aee7c0051325 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -10,6 +10,8 @@ #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_communicator.h" +#include "../processing/processor.h" +processing::Processor *processor_instance; #endif namespace xgboost::collective { @@ -39,6 +41,13 @@ void Communicator::Init(Json const& config) { case CommunicatorType::kFederated: { #if defined(XGBOOST_USE_FEDERATED) communicator_.reset(FederatedCommunicator::Create(config)); + std::cout << "!!!!!!!! Communicator Initialization!!!!!!!!!!!!!!!!!!!! " << std::endl; + auto plugin_name = "dummy"; + std::map loader_params = {{"LIBRARY_PATH", "/tmp"}}; + std::map proc_params = {}; + processing::ProcessorLoader loader(loader_params); + processor_instance = loader.load(plugin_name); + processor_instance->Initialize(collective::GetRank() == 0, proc_params); #else LOG(FATAL) << "XGBoost is not compiled with Federated Learning support."; #endif diff --git a/src/learner.cc b/src/learner.cc index c114fd695168..b8212e2b2868 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -62,13 +62,13 @@ #include "xgboost/predictor.h" // for PredictionContainer, PredictionCacheEntry #include "xgboost/string_view.h" // for operator<<, StringView #include "xgboost/task.h" // for ObjInfo -#include "processing/processor.h" // for Processor + namespace { const char* kMaxDeltaStepDefaultValue = "0.7"; } // anonymous namespace DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy); -processing::Processor *processor_instance; + namespace xgboost { Learner::~Learner() = default; namespace { @@ -493,13 +493,6 @@ class LearnerConfiguration : public Learner { this->ConfigureMetrics(args); - std::map loader_params = {{"LIBRARY_PATH", "/tmp"}}; - std::map proc_params = {}; - auto plugin_name = "dummy"; - processing::ProcessorLoader loader(loader_params); - processor_instance = loader.load(plugin_name); - processor_instance->Initialize(collective::GetRank() == 0, proc_params); - this->need_configuration_ = false; if (ctx_.validate_parameters) { this->ValidateParameters(); diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc new file mode 100755 index 000000000000..e667d4096362 --- /dev/null +++ b/src/processing/plugins/dummy_processor.cc @@ -0,0 +1,139 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#include +#include "./dummy_processor.h" + +using std::vector; +using std::cout; +using std::endl; + +const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 +const int64_t kPrefixLen = 24; + +bool ValidDam(void *buffer) { + return memcmp(buffer, kSignature, strlen(kSignature)) == 0; +} + +void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { + cout << "ProcessGHPairs called with pairs size: " << pairs.size() << endl; + + size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x + + int64_t buf_size = size; + // This memory needs to be freed + char *buf = static_cast(calloc(size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &processing::kDataTypeGHPairs, 8); + + // Simulate encryption by duplicating value 10 times + int index = kPrefixLen; + for (auto value : pairs) { + for (int i = 0; i < 10; i++) { + memcpy(buf+index, &value, 8); + index += 8; + } + } + + // Save pairs for future operations + this->gh_pairs_ = new vector(pairs); + + return buf; +} + + +void* DummyProcessor::HandleGHPairs(size_t &size, void *buffer, size_t buf_size) { + cout << "HandleGHPairs called with buffer size: " << buf_size << " Active: " << active_ << endl; + + if (!ValidDam(buffer)) { + cout << "Invalid buffer received" << endl; + return buffer; + } + + // For dummy, this call is used to set gh_pairs for passive sites + if (!active_) { + int8_t *ptr = static_cast(buffer); + ptr += kPrefixLen; + double *pairs = reinterpret_cast(ptr); + size_t num = (buf_size - kPrefixLen) / 8; + gh_pairs_ = new vector(); + for (int i = 0; i < num; i += 10) { + gh_pairs_->push_back(pairs[i]); + } + cout << "GH Pairs saved. Size: " << gh_pairs_->size() << endl; + } + + return buffer; +} + +void *DummyProcessor::ProcessAggregation(size_t &size, std::map> nodes) { + auto total_bin_size = cuts_.back(); + auto histo_size = total_bin_size*2; + size = kPrefixLen + 8*histo_size*nodes.size(); + int64_t buf_size = size; + cout << "ProcessAggregation called with bin size: " << total_bin_size << " Buffer Size: " << buf_size << endl; + std::int8_t *buf = static_cast(calloc(buf_size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &processing::kDataTypeHisto, 8); + + double *histo = reinterpret_cast(buf + kPrefixLen); + for ( const auto &node : nodes ) { + auto rows = node.second; + for (const auto &row_id : rows) { + + auto num = cuts_.size() - 1; + for (std::size_t f = 0; f < num; f++) { + auto slot = slots_[f + num*row_id]; + if (slot < 0) { + continue; + } + + if (slot >= total_bin_size) { + cout << "Slot too big, ignored: " << slot << endl; + continue; + } + + if (row_id >= gh_pairs_->size()/2) { + cout << "Row ID too big: " << row_id << endl; + } + + auto g = (*gh_pairs_)[row_id*2]; + auto h = (*gh_pairs_)[row_id*2+1]; + histo[slot*2] += g; + histo[slot*2+1] += h; + } + } + histo += histo_size; + } + + return buf; +} + +std::vector DummyProcessor::HandleAggregation(void *buffer, size_t buf_size) { + cout << "HandleAggregation called with buffer size: " << buf_size << endl; + std::vector result = std::vector(); + + int8_t* ptr = static_cast(buffer); + auto rest_size = buf_size; + + while (rest_size > kPrefixLen) { + if (!ValidDam(ptr)) { + cout << "Invalid buffer at offset " << buf_size - rest_size << endl; + continue; + } + std::int64_t *size_ptr = reinterpret_cast(ptr + 8); + double *array_start = reinterpret_cast(ptr + kPrefixLen); + auto array_size = (*size_ptr - kPrefixLen)/8; + cout << "Histo size for buffer: " << array_size << endl; + result.insert(result.end(), array_start, array_start + array_size); + cout << "Result size: " << result.size() << endl; + rest_size -= *size_ptr; + ptr = ptr + *size_ptr; + } + + cout << "Total histo size: " << result.size() << endl; + + return result; +} diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h new file mode 100755 index 000000000000..0116588bdf4d --- /dev/null +++ b/src/processing/plugins/dummy_processor.h @@ -0,0 +1,53 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#pragma once +#include +#include +#include +#include +#include "../processor.h" + +class DummyProcessor: public processing::Processor { + private: + bool active_ = false; + const std::map *params_{nullptr}; + std::vector *gh_pairs_{nullptr}; + std::vector cuts_; + std::vector slots_; + + public: + void Initialize(bool active, std::map params) override { + this->active_ = active; + this->params_ = ¶ms; + } + + void Shutdown() override { + this->gh_pairs_ = nullptr; + this->cuts_.clear(); + this->slots_.clear(); + } + + void FreeBuffer(void *buffer) override { + free(buffer); + } + + void* ProcessGHPairs(size_t &size, std::vector& pairs) override; + + void* HandleGHPairs(size_t &size, void *buffer, size_t buf_size) override; + + void InitAggregationContext(const std::vector &cuts, std::vector &slots) override { + std::cout << "InitAggregationContext called with cuts size: " << cuts.size()-1 << + " number of slot: " << slots.size() << std::endl; + this->cuts_ = cuts; + if (this->slots_.empty()) { + this->slots_ = slots; + } else { + std::cout << "Multiple calls to InitAggregationContext" << std::endl; + } + } + + void *ProcessAggregation(size_t &size, std::map> nodes) override; + + std::vector HandleAggregation(void *buffer, size_t buf_size) override; +}; diff --git a/src/processing/processor.h b/src/processing/processor.h new file mode 100755 index 000000000000..aae83f56abf6 --- /dev/null +++ b/src/processing/processor.h @@ -0,0 +1,114 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#pragma once + +#include +#include +#include +#include + +namespace processing { + +const char kLibraryPath[] = "LIBRARY_PATH"; +const char kDummyProcessor[] = "dummy"; +const char kLoadFunc[] = "LoadProcessor"; + +// Data type definition +const int64_t kDataTypeGHPairs = 1; +const int64_t kDataTypeHisto = 2; + +/*! \brief An processor interface to handle tasks that require external library through plugins */ +class Processor { + public: + /*! + * \brief Initialize the processor + * + * \param active If true, this is the active node + * \param params Optional parameters + */ + virtual void Initialize(bool active, std::map params) = 0; + + /*! + * \brief Shutdown the processor and free all the resources + * + */ + virtual void Shutdown() = 0; + + /*! + * \brief Free buffer + * + * \param buffer Any buffer returned by the calls from the plugin + */ + virtual void FreeBuffer(void* buffer) = 0; + + /*! + * \brief Preparing g & h pairs to be sent to other clients by active client + * + * \param size The size of the buffer + * \param pairs g&h pairs in a vector (g1, h1, g2, h2 ...) for every sample + * + * \return The encoded buffer to be sent + */ + virtual void* ProcessGHPairs(size_t &size, std::vector& pairs) = 0; + + /*! + * \brief Handle buffers with encoded pairs received from broadcast + * + * \param size Output buffer size + * \param The encoded buffer + * \param The encoded buffer size + * + * \return The encoded buffer + */ + virtual void* HandleGHPairs(size_t &size, void *buffer, size_t buf_size) = 0; + + /*! + * \brief Initialize aggregation context by providing global GHistIndexMatrix + * + * \param cuts The cut point for each feature + * \param slots The slot assignment in a flattened matrix for each feature/row. The size is num_feature*num_row + */ + virtual void InitAggregationContext(const std::vector &cuts, std::vector &slots) = 0; + + /*! + * \brief Prepare row set for aggregation + * + * \param size The output buffer size + * \param nodes Map of node and the rows belong to this node + * + * \return The encoded buffer to be sent via AllGather + */ + virtual void *ProcessAggregation(size_t &size, std::map> nodes) = 0; + + /*! + * \brief Handle all gather result + * + * \param buffer Buffer from all gather, only buffer from active site is needed + * \param buf_size The size of the buffer + * + * \return A flattened vector of histograms for each site, each node in the form of + * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 + */ + virtual std::vector HandleAggregation(void *buffer, size_t buf_size) = 0; +}; + +class ProcessorLoader { + private: + std::map params; + void *handle = NULL; + + + public: + ProcessorLoader(): params{} {} + + ProcessorLoader(std::map& params): params(params) {} + + Processor* load(const std::string& plugin_name); + + void unload(); +}; + +} // namespace processing + +extern processing::Processor *processor_instance; \ No newline at end of file diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc new file mode 100755 index 000000000000..677797521085 --- /dev/null +++ b/src/processing/processor_loader.cc @@ -0,0 +1,62 @@ +/** + * Copyright 2014-2024 by XGBoost Contributors + */ +#include +#include + +#include "./processor.h" +#include "plugins/dummy_processor.h" + +namespace processing { + using LoadFunc = Processor *(const char *); + + Processor* ProcessorLoader::load(const std::string& plugin_name) { + // Dummy processor for unit testing without loading a shared library + if (plugin_name == kDummyProcessor) { + return new DummyProcessor(); + } + + auto lib_name = "libproc_" + plugin_name; + + auto extension = +#if defined(__APPLE__) || defined(__MACH__) + ".dylib"; +#else + ".so"; +#endif + auto lib_file_name = lib_name + extension; + + std::string lib_path; + + if (params.find(kLibraryPath) == params.end()) { + lib_path = lib_file_name; + } else { + auto p = params[kLibraryPath]; + if (p.back() != '/') { + p += '/'; + } + lib_path = p + lib_file_name; + } + + handle = dlopen(lib_path.c_str(), RTLD_LAZY); + if (!handle) { + std::cerr << "Failed to load the dynamic library: " << dlerror() << std::endl; + return NULL; + } + + void* func_ptr = dlsym(handle, kLoadFunc); + + if (!func_ptr) { + std::cerr << "Failed to find loader function: " << dlerror() << std::endl; + return NULL; + } + + auto func = reinterpret_cast(func_ptr); + + return (*func)(plugin_name.c_str()); + } + + void ProcessorLoader::unload() { + dlclose(handle); + } +} // namespace processing From 30b7ed5d5221b4ea86eb3fafc1e20d07603a7319 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 12 Apr 2024 17:18:06 -0400 Subject: [PATCH 34/55] Move processor interface init from learner to communicator functional --- src/collective/communicator.cc | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index aee7c0051325..14be1b036c21 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -41,13 +41,25 @@ void Communicator::Init(Json const& config) { case CommunicatorType::kFederated: { #if defined(XGBOOST_USE_FEDERATED) communicator_.reset(FederatedCommunicator::Create(config)); - std::cout << "!!!!!!!! Communicator Initialization!!!!!!!!!!!!!!!!!!!! " << std::endl; - auto plugin_name = "dummy"; - std::map loader_params = {{"LIBRARY_PATH", "/tmp"}}; - std::map proc_params = {}; - processing::ProcessorLoader loader(loader_params); - processor_instance = loader.load(plugin_name); - processor_instance->Initialize(collective::GetRank() == 0, proc_params); + // Get processor configs + std::string plugin_name{}; + std::string loader_params_key{}; + std::string loader_params_map{}; + std::string proc_params_key{}; + std::string proc_params_map{}; + plugin_name = OptionalArg(config, "plugin_name", plugin_name); + loader_params_key = OptionalArg(config, "loader_params_key", loader_params_key); + loader_params_map = OptionalArg(config, "loader_params_map", loader_params_map); + proc_params_key = OptionalArg(config, "proc_params_key", proc_params_key); + proc_params_map = OptionalArg(config, "proc_params_map", proc_params_map); + // Initialize processor if plugin_name is provided + if (!plugin_name.empty()){ + std::map loader_params = {{loader_params_key, loader_params_map}}; + std::map proc_params = {{proc_params_key, proc_params_map}}; + processing::ProcessorLoader loader(loader_params); + processor_instance = loader.load(plugin_name); + processor_instance->Initialize(collective::GetRank() == 0, proc_params); + } #else LOG(FATAL) << "XGBoost is not compiled with Federated Learning support."; #endif From a3ddf7d2e0a236830eb09949bcb9361e3ea43ab5 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 15 Apr 2024 11:00:41 -0400 Subject: [PATCH 35/55] switch to allgatherV for encrypted message with varying lenghts --- src/tree/hist/histogram.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index bc71cecf3570..758f993597b6 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -238,7 +238,7 @@ class HistogramBuilder { // Perform AllGather auto hist_vec = std::vector(hist_data.data(), hist_data.data() + hist_data.size()); - auto hist_entries = collective::Allgather(hist_vec); + auto hist_entries = collective::AllgatherV(hist_vec); // Call interface here to post-process the messages std::vector hist_aggr = processor_instance->HandleAggregation(hist_entries.data(), hist_entries.size()); From 3123b51292a3c7cf626585c7527862c38df09074 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 19 Apr 2024 14:30:16 -0400 Subject: [PATCH 36/55] consolidate with processor interface PR --- src/processing/plugins/dummy_processor.cc | 14 ++-- src/processing/plugins/dummy_processor.h | 5 +- src/processing/processor.h | 4 -- src/processing/processor_loader.cc | 0 tests/cpp/processing/test_processor.cc | 80 +++++++++++++++++++++++ 5 files changed, 92 insertions(+), 11 deletions(-) mode change 100755 => 100644 src/processing/plugins/dummy_processor.cc mode change 100755 => 100644 src/processing/plugins/dummy_processor.h mode change 100755 => 100644 src/processing/processor.h mode change 100755 => 100644 src/processing/processor_loader.cc create mode 100644 tests/cpp/processing/test_processor.cc diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc old mode 100755 new mode 100644 index e667d4096362..84510ebc015a --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -2,6 +2,7 @@ * Copyright 2014-2024 by XGBoost Contributors */ #include +#include #include "./dummy_processor.h" using std::vector; @@ -11,8 +12,8 @@ using std::endl; const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 const int64_t kPrefixLen = 24; -bool ValidDam(void *buffer) { - return memcmp(buffer, kSignature, strlen(kSignature)) == 0; +bool ValidDam(void *buffer, size_t size) { + return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; } void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { @@ -25,7 +26,7 @@ void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { char *buf = static_cast(calloc(size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &processing::kDataTypeGHPairs, 8); + memcpy(buf + 16, &kDataTypeGHPairs, 8); // Simulate encryption by duplicating value 10 times int index = kPrefixLen; @@ -46,7 +47,8 @@ void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { void* DummyProcessor::HandleGHPairs(size_t &size, void *buffer, size_t buf_size) { cout << "HandleGHPairs called with buffer size: " << buf_size << " Active: " << active_ << endl; - if (!ValidDam(buffer)) { + size = buf_size; + if (!ValidDam(buffer, size)) { cout << "Invalid buffer received" << endl; return buffer; } @@ -76,7 +78,7 @@ void *DummyProcessor::ProcessAggregation(size_t &size, std::map(calloc(buf_size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &processing::kDataTypeHisto, 8); + memcpy(buf + 16, &kDataTypeHisto, 8); double *histo = reinterpret_cast(buf + kPrefixLen); for ( const auto &node : nodes ) { @@ -119,7 +121,7 @@ std::vector DummyProcessor::HandleAggregation(void *buffer, size_t buf_s auto rest_size = buf_size; while (rest_size > kPrefixLen) { - if (!ValidDam(ptr)) { + if (!ValidDam(ptr, rest_size)) { cout << "Invalid buffer at offset " << buf_size - rest_size << endl; continue; } diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h old mode 100755 new mode 100644 index 0116588bdf4d..0692c3a24b41 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/dummy_processor.h @@ -3,11 +3,14 @@ */ #pragma once #include -#include #include #include #include "../processor.h" +// Data type definition +const int64_t kDataTypeGHPairs = 1; +const int64_t kDataTypeHisto = 2; + class DummyProcessor: public processing::Processor { private: bool active_ = false; diff --git a/src/processing/processor.h b/src/processing/processor.h old mode 100755 new mode 100644 index aae83f56abf6..3a977d9cfd09 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -14,10 +14,6 @@ const char kLibraryPath[] = "LIBRARY_PATH"; const char kDummyProcessor[] = "dummy"; const char kLoadFunc[] = "LoadProcessor"; -// Data type definition -const int64_t kDataTypeGHPairs = 1; -const int64_t kDataTypeHisto = 2; - /*! \brief An processor interface to handle tasks that require external library through plugins */ class Processor { public: diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc old mode 100755 new mode 100644 diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc new file mode 100644 index 000000000000..65c84837d80f --- /dev/null +++ b/tests/cpp/processing/test_processor.cc @@ -0,0 +1,80 @@ +/*! + * Copyright 2024 XGBoost contributors + */ +#include + +#include "../../../src/processing/processor.h" + + +class ProcessorTest : public testing::Test { + public: + void SetUp() override { + auto loader = processing::ProcessorLoader(); + processor_ = loader.load("dummy"); + processor_->Initialize(true, {}); + } + + void TearDown() override { + processor_->Shutdown(); + processor_ = nullptr; + } + + protected: + processing::Processor *processor_ = nullptr; + + // Test data, 4 Rows, 2 Features + std::vector gh_pairs_ = {1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1}; // 4 Rows, 8 GH Pairs + std::vector cuts_ = {0, 4, 10}; // 2 features, one has 4 bins, another 6 + std::vector slots_ = { + 0, 4, + 1, 9, + 3, 7, + 0, 4 + }; + + std::vector node0_ = {0, 2}; + std::vector node1_ = {1, 3}; + + std::map> nodes_ = {{0, node0_}, {1, node1_}}; +}; + +TEST_F(ProcessorTest, TestLoading) { + auto base_class = dynamic_cast(processor_); + ASSERT_NE(base_class, nullptr); +} + +TEST_F(ProcessorTest, TestGHEncoding) { + size_t buf_size; + auto buffer = processor_->ProcessGHPairs(buf_size, gh_pairs_); + size_t expected_size = 24; // DAM header size + expected_size += gh_pairs_.size()*10*8; // Dummy plugin duplicate each number 10x to simulate encryption + ASSERT_EQ(buf_size, expected_size); + + size_t new_size; + auto new_buffer = processor_->HandleGHPairs(new_size, buffer, buf_size); + // Dummy plugin doesn't change buffer + ASSERT_EQ(new_size, buf_size); + ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); +} + +TEST_F(ProcessorTest, TestAggregation) { + size_t buf_size; + processor_->ProcessGHPairs(buf_size, gh_pairs_); // Pass the GH pairs to the plugin + + processor_->InitAggregationContext(cuts_, slots_); + auto buffer = processor_->ProcessAggregation(buf_size, nodes_); + auto histos = processor_->HandleAggregation(buffer, buf_size); + std::vector expected_histos = { + 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, + 0, 0, 0, 0, 5.1, 6.1, 0, 0, 0, 0, + 7.1, 8.1, 3.1, 4.1, 0, 0, 0, 0, 7.1, 8.1, + 0, 0, 0, 0, 0, 0, 0, 0, 3.1, 4.1 + }; + + ASSERT_EQ(expected_histos.size(), histos.size()) << "Histograms have different sizes"; + + for (int i = 0; i < histos.size(); ++i) { + EXPECT_EQ(expected_histos[i], histos[i]) << "Histogram differs at index " << i; + } +} + From 73225a05a6e47156175c3a0bc7a6e6bc5de2c1f3 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 23 Apr 2024 17:43:03 -0400 Subject: [PATCH 37/55] remove prints and fix format --- src/collective/communicator.cc | 47 +++--- src/processing/plugins/dummy_processor.cc | 183 +++++++++------------- src/processing/plugins/dummy_processor.h | 7 +- src/processing/processor.h | 8 +- src/processing/processor_loader.cc | 3 +- src/tree/hist/histogram.h | 6 +- 6 files changed, 115 insertions(+), 139 deletions(-) diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index 14be1b036c21..8f2d103469d0 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -1,6 +1,7 @@ /*! * Copyright 2022 XGBoost contributors */ +#include #include "communicator.h" #include "comm.h" @@ -9,9 +10,9 @@ #include "rabit_communicator.h" #if defined(XGBOOST_USE_FEDERATED) -#include "../../plugin/federated/federated_communicator.h" -#include "../processing/processor.h" -processing::Processor *processor_instance; + #include "../../plugin/federated/federated_communicator.h" + #include "../processing/processor.h" + processing::Processor *processor_instance; #endif namespace xgboost::collective { @@ -40,26 +41,26 @@ void Communicator::Init(Json const& config) { } case CommunicatorType::kFederated: { #if defined(XGBOOST_USE_FEDERATED) - communicator_.reset(FederatedCommunicator::Create(config)); - // Get processor configs - std::string plugin_name{}; - std::string loader_params_key{}; - std::string loader_params_map{}; - std::string proc_params_key{}; - std::string proc_params_map{}; - plugin_name = OptionalArg(config, "plugin_name", plugin_name); - loader_params_key = OptionalArg(config, "loader_params_key", loader_params_key); - loader_params_map = OptionalArg(config, "loader_params_map", loader_params_map); - proc_params_key = OptionalArg(config, "proc_params_key", proc_params_key); - proc_params_map = OptionalArg(config, "proc_params_map", proc_params_map); - // Initialize processor if plugin_name is provided - if (!plugin_name.empty()){ - std::map loader_params = {{loader_params_key, loader_params_map}}; - std::map proc_params = {{proc_params_key, proc_params_map}}; - processing::ProcessorLoader loader(loader_params); - processor_instance = loader.load(plugin_name); - processor_instance->Initialize(collective::GetRank() == 0, proc_params); - } + communicator_.reset(FederatedCommunicator::Create(config)); + // Get processor configs + std::string plugin_name{}; + std::string loader_params_key{}; + std::string loader_params_map{}; + std::string proc_params_key{}; + std::string proc_params_map{}; + plugin_name = OptionalArg(config, "plugin_name", plugin_name); + loader_params_key = OptionalArg(config, "loader_params_key", loader_params_key); + loader_params_map = OptionalArg(config, "loader_params_map", loader_params_map); + proc_params_key = OptionalArg(config, "proc_params_key", proc_params_key); + proc_params_map = OptionalArg(config, "proc_params_map", proc_params_map); + // Initialize processor if plugin_name is provided + if (!plugin_name.empty()) { + std::map loader_params = {{loader_params_key, loader_params_map}}; + std::map proc_params = {{proc_params_key, proc_params_map}}; + processing::ProcessorLoader loader(loader_params); + processor_instance = loader.load(plugin_name); + processor_instance->Initialize(collective::GetRank() == 0, proc_params); + } #else LOG(FATAL) << "XGBoost is not compiled with Federated Learning support."; #endif diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index 84510ebc015a..be4ed5b24d46 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -5,137 +5,110 @@ #include #include "./dummy_processor.h" -using std::vector; -using std::cout; -using std::endl; - const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 const int64_t kPrefixLen = 24; bool ValidDam(void *buffer, size_t size) { - return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; + return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; } void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { - cout << "ProcessGHPairs called with pairs size: " << pairs.size() << endl; - - size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x - - int64_t buf_size = size; - // This memory needs to be freed - char *buf = static_cast(calloc(size, 1)); - memcpy(buf, kSignature, strlen(kSignature)); - memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &kDataTypeGHPairs, 8); - - // Simulate encryption by duplicating value 10 times - int index = kPrefixLen; - for (auto value : pairs) { - for (int i = 0; i < 10; i++) { - memcpy(buf+index, &value, 8); - index += 8; - } + size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x + + int64_t buf_size = size; + // This memory needs to be freed + char *buf = static_cast(calloc(size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &kDataTypeGHPairs, 8); + + // Simulate encryption by duplicating value 10 times + int index = kPrefixLen; + for (auto value : pairs) { + for (int i = 0; i < 10; i++) { + memcpy(buf+index, &value, 8); + index += 8; } + } - // Save pairs for future operations - this->gh_pairs_ = new vector(pairs); + // Save pairs for future operations + this->gh_pairs_ = new std::vector(pairs); - return buf; + return buf; } void* DummyProcessor::HandleGHPairs(size_t &size, void *buffer, size_t buf_size) { - cout << "HandleGHPairs called with buffer size: " << buf_size << " Active: " << active_ << endl; - - size = buf_size; - if (!ValidDam(buffer, size)) { - cout << "Invalid buffer received" << endl; - return buffer; - } - - // For dummy, this call is used to set gh_pairs for passive sites - if (!active_) { - int8_t *ptr = static_cast(buffer); - ptr += kPrefixLen; - double *pairs = reinterpret_cast(ptr); - size_t num = (buf_size - kPrefixLen) / 8; - gh_pairs_ = new vector(); - for (int i = 0; i < num; i += 10) { - gh_pairs_->push_back(pairs[i]); - } - cout << "GH Pairs saved. Size: " << gh_pairs_->size() << endl; + size = buf_size; + if (!ValidDam(buffer, size)) { + return buffer; + } + + // For dummy, this call is used to set gh_pairs for passive sites + if (!active_) { + int8_t *ptr = static_cast(buffer); + ptr += kPrefixLen; + double *pairs = reinterpret_cast(ptr); + size_t num = (buf_size - kPrefixLen) / 8; + gh_pairs_ = new std::vector(); + for (int i = 0; i < num; i += 10) { + gh_pairs_->push_back(pairs[i]); } + } - return buffer; + return buffer; } void *DummyProcessor::ProcessAggregation(size_t &size, std::map> nodes) { - auto total_bin_size = cuts_.back(); - auto histo_size = total_bin_size*2; - size = kPrefixLen + 8*histo_size*nodes.size(); - int64_t buf_size = size; - cout << "ProcessAggregation called with bin size: " << total_bin_size << " Buffer Size: " << buf_size << endl; - std::int8_t *buf = static_cast(calloc(buf_size, 1)); - memcpy(buf, kSignature, strlen(kSignature)); - memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &kDataTypeHisto, 8); - - double *histo = reinterpret_cast(buf + kPrefixLen); - for ( const auto &node : nodes ) { - auto rows = node.second; - for (const auto &row_id : rows) { - - auto num = cuts_.size() - 1; - for (std::size_t f = 0; f < num; f++) { - auto slot = slots_[f + num*row_id]; - if (slot < 0) { - continue; - } - - if (slot >= total_bin_size) { - cout << "Slot too big, ignored: " << slot << endl; - continue; - } - - if (row_id >= gh_pairs_->size()/2) { - cout << "Row ID too big: " << row_id << endl; - } - - auto g = (*gh_pairs_)[row_id*2]; - auto h = (*gh_pairs_)[row_id*2+1]; - histo[slot*2] += g; - histo[slot*2+1] += h; - } + auto total_bin_size = cuts_.back(); + auto histo_size = total_bin_size*2; + size = kPrefixLen + 8*histo_size*nodes.size(); + int64_t buf_size = size; + std::int8_t *buf = static_cast(calloc(buf_size, 1)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &kDataTypeHisto, 8); + + double *histo = reinterpret_cast(buf + kPrefixLen); + for ( const auto &node : nodes ) { + auto rows = node.second; + for (const auto &row_id : rows) { + auto num = cuts_.size() - 1; + for (std::size_t f = 0; f < num; f++) { + auto slot = slots_[f + num*row_id]; + if ((slot < 0) || (slot >= total_bin_size)) { + continue; } - histo += histo_size; + + auto g = (*gh_pairs_)[row_id*2]; + auto h = (*gh_pairs_)[row_id*2+1]; + histo[slot*2] += g; + histo[slot*2+1] += h; + } } + histo += histo_size; + } - return buf; + return buf; } std::vector DummyProcessor::HandleAggregation(void *buffer, size_t buf_size) { - cout << "HandleAggregation called with buffer size: " << buf_size << endl; - std::vector result = std::vector(); + std::vector result = std::vector(); - int8_t* ptr = static_cast(buffer); - auto rest_size = buf_size; + int8_t* ptr = static_cast(buffer); + auto rest_size = buf_size; - while (rest_size > kPrefixLen) { - if (!ValidDam(ptr, rest_size)) { - cout << "Invalid buffer at offset " << buf_size - rest_size << endl; - continue; - } - std::int64_t *size_ptr = reinterpret_cast(ptr + 8); - double *array_start = reinterpret_cast(ptr + kPrefixLen); - auto array_size = (*size_ptr - kPrefixLen)/8; - cout << "Histo size for buffer: " << array_size << endl; - result.insert(result.end(), array_start, array_start + array_size); - cout << "Result size: " << result.size() << endl; - rest_size -= *size_ptr; - ptr = ptr + *size_ptr; + while (rest_size > kPrefixLen) { + if (!ValidDam(ptr, rest_size)) { + continue; } - - cout << "Total histo size: " << result.size() << endl; - - return result; + std::int64_t *size_ptr = reinterpret_cast(ptr + 8); + double *array_start = reinterpret_cast(ptr + kPrefixLen); + auto array_size = (*size_ptr - kPrefixLen)/8; + result.insert(result.end(), array_start, array_start + array_size); + rest_size -= *size_ptr; + ptr = ptr + *size_ptr; + } + + return result; } diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h index 0692c3a24b41..d1a680b6144b 100644 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/dummy_processor.h @@ -39,14 +39,11 @@ class DummyProcessor: public processing::Processor { void* HandleGHPairs(size_t &size, void *buffer, size_t buf_size) override; - void InitAggregationContext(const std::vector &cuts, std::vector &slots) override { - std::cout << "InitAggregationContext called with cuts size: " << cuts.size()-1 << - " number of slot: " << slots.size() << std::endl; + void InitAggregationContext(const std::vector &cuts, + std::vector &slots) override { this->cuts_ = cuts; if (this->slots_.empty()) { this->slots_ = slots; - } else { - std::cout << "Multiple calls to InitAggregationContext" << std::endl; } } diff --git a/src/processing/processor.h b/src/processing/processor.h index 3a977d9cfd09..6b2fe28b849c 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -63,9 +63,11 @@ class Processor { * \brief Initialize aggregation context by providing global GHistIndexMatrix * * \param cuts The cut point for each feature - * \param slots The slot assignment in a flattened matrix for each feature/row. The size is num_feature*num_row + * \param slots The slot assignment in a flattened matrix for each feature/row. + * The size is num_feature*num_row */ - virtual void InitAggregationContext(const std::vector &cuts, std::vector &slots) = 0; + virtual void InitAggregationContext(const std::vector &cuts, + std::vector &slots) = 0; /*! * \brief Prepare row set for aggregation @@ -107,4 +109,4 @@ class ProcessorLoader { } // namespace processing -extern processing::Processor *processor_instance; \ No newline at end of file +extern processing::Processor *processor_instance; diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc index 677797521085..0d4bda0acefa 100644 --- a/src/processing/processor_loader.cc +++ b/src/processing/processor_loader.cc @@ -1,8 +1,9 @@ /** * Copyright 2014-2024 by XGBoost Contributors */ +#include "dlfcn.h" + #include -#include #include "./processor.h" #include "plugins/dummy_processor.h" diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 758f993597b6..7f30333b64c6 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -10,6 +10,7 @@ #include // for function #include // for move #include // for vector +#include // for map #include "../../collective/communicator-inl.h" // for Allreduce #include "../../collective/communicator.h" // for Operation @@ -90,7 +91,7 @@ class HistogramBuilder { slots.push_back(slot); } } - processor_instance->InitAggregationContext(cuts,slots); + processor_instance->InitAggregationContext(cuts, slots); // Further use the row set collection info to // get the encrypted histogram from the secure worker auto node_map = std::map>(); @@ -240,7 +241,8 @@ class HistogramBuilder { hist_data.data() + hist_data.size()); auto hist_entries = collective::AllgatherV(hist_vec); // Call interface here to post-process the messages - std::vector hist_aggr = processor_instance->HandleAggregation(hist_entries.data(), hist_entries.size()); + std::vector hist_aggr = processor_instance->HandleAggregation( + hist_entries.data(), hist_entries.size()); // Update histogram for label owner if (collective::GetRank() == 0) { From e85b1fb6e817f61af4eff5f89d2e3187039ffaf9 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 10:46:16 -0400 Subject: [PATCH 38/55] fix linting over reference pass --- src/collective/aggregator.h | 4 ++-- src/processing/plugins/dummy_processor.cc | 20 ++++++++++---------- src/processing/plugins/dummy_processor.h | 8 ++++---- src/processing/processor.h | 10 +++++----- src/processing/processor_loader.cc | 3 ++- src/tree/hist/histogram.h | 2 +- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h index c1f5329bec15..9f9028c8e25b 100644 --- a/src/collective/aggregator.h +++ b/src/collective/aggregator.h @@ -114,7 +114,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* } // provide the vectors to the processor interface size_t size; - auto buf = processor_instance->ProcessGHPairs(size, vector_gh); + auto buf = processor_instance->ProcessGHPairs(&size, vector_gh); buffer_size = size; buffer = reinterpret_cast(buf); } @@ -131,7 +131,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector* // call HandleGHPairs size_t size; - processor_instance->HandleGHPairs(size, buffer, buffer_size); + processor_instance->HandleGHPairs(&size, buffer, buffer_size); } else { // clear text mode, broadcast the data directly result->Resize(size); diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index be4ed5b24d46..271571030b56 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -12,12 +12,12 @@ bool ValidDam(void *buffer, size_t size) { return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; } -void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { - size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x +void* DummyProcessor::ProcessGHPairs(size_t *size, const std::vector& pairs) { + *size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x - int64_t buf_size = size; + int64_t buf_size = *size; // This memory needs to be freed - char *buf = static_cast(calloc(size, 1)); + char *buf = static_cast(calloc(*size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); memcpy(buf + 16, &kDataTypeGHPairs, 8); @@ -38,9 +38,9 @@ void* DummyProcessor::ProcessGHPairs(size_t &size, std::vector& pairs) { } -void* DummyProcessor::HandleGHPairs(size_t &size, void *buffer, size_t buf_size) { - size = buf_size; - if (!ValidDam(buffer, size)) { +void* DummyProcessor::HandleGHPairs(size_t *size, void *buffer, size_t buf_size) { + *size = buf_size; + if (!ValidDam(buffer, *size)) { return buffer; } @@ -59,11 +59,11 @@ void* DummyProcessor::HandleGHPairs(size_t &size, void *buffer, size_t buf_size) return buffer; } -void *DummyProcessor::ProcessAggregation(size_t &size, std::map> nodes) { +void *DummyProcessor::ProcessAggregation(size_t *size, std::map> nodes) { auto total_bin_size = cuts_.back(); auto histo_size = total_bin_size*2; - size = kPrefixLen + 8*histo_size*nodes.size(); - int64_t buf_size = size; + *size = kPrefixLen + 8*histo_size*nodes.size(); + int64_t buf_size = *size; std::int8_t *buf = static_cast(calloc(buf_size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/dummy_processor.h index d1a680b6144b..c1690e61e20d 100644 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/dummy_processor.h @@ -35,19 +35,19 @@ class DummyProcessor: public processing::Processor { free(buffer); } - void* ProcessGHPairs(size_t &size, std::vector& pairs) override; + void* ProcessGHPairs(size_t *size, const std::vector& pairs) override; - void* HandleGHPairs(size_t &size, void *buffer, size_t buf_size) override; + void* HandleGHPairs(size_t *size, void *buffer, size_t buf_size) override; void InitAggregationContext(const std::vector &cuts, - std::vector &slots) override { + const std::vector &slots) override { this->cuts_ = cuts; if (this->slots_.empty()) { this->slots_ = slots; } } - void *ProcessAggregation(size_t &size, std::map> nodes) override; + void *ProcessAggregation(size_t *size, std::map> nodes) override; std::vector HandleAggregation(void *buffer, size_t buf_size) override; }; diff --git a/src/processing/processor.h b/src/processing/processor.h index 6b2fe28b849c..7927fc5015b9 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -46,7 +46,7 @@ class Processor { * * \return The encoded buffer to be sent */ - virtual void* ProcessGHPairs(size_t &size, std::vector& pairs) = 0; + virtual void* ProcessGHPairs(size_t *size, const std::vector& pairs) = 0; /*! * \brief Handle buffers with encoded pairs received from broadcast @@ -57,7 +57,7 @@ class Processor { * * \return The encoded buffer */ - virtual void* HandleGHPairs(size_t &size, void *buffer, size_t buf_size) = 0; + virtual void* HandleGHPairs(size_t *size, void *buffer, size_t buf_size) = 0; /*! * \brief Initialize aggregation context by providing global GHistIndexMatrix @@ -67,7 +67,7 @@ class Processor { * The size is num_feature*num_row */ virtual void InitAggregationContext(const std::vector &cuts, - std::vector &slots) = 0; + const std::vector &slots) = 0; /*! * \brief Prepare row set for aggregation @@ -77,7 +77,7 @@ class Processor { * * \return The encoded buffer to be sent via AllGather */ - virtual void *ProcessAggregation(size_t &size, std::map> nodes) = 0; + virtual void *ProcessAggregation(size_t *size, std::map> nodes) = 0; /*! * \brief Handle all gather result @@ -100,7 +100,7 @@ class ProcessorLoader { public: ProcessorLoader(): params{} {} - ProcessorLoader(std::map& params): params(params) {} + explicit ProcessorLoader(const std::map& params): params(params) {} Processor* load(const std::string& plugin_name); diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc index 0d4bda0acefa..89c6964a8a89 100644 --- a/src/processing/processor_loader.cc +++ b/src/processing/processor_loader.cc @@ -1,10 +1,11 @@ /** * Copyright 2014-2024 by XGBoost Contributors */ -#include "dlfcn.h" #include +#include "dlfcn.h" + #include "./processor.h" #include "plugins/dummy_processor.h" diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 7f30333b64c6..4477897f0c7c 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -105,7 +105,7 @@ class HistogramBuilder { node_map.insert({node, rows}); } size_t buf_size; - auto buf = processor_instance->ProcessAggregation(buf_size, node_map); + auto buf = processor_instance->ProcessAggregation(&buf_size, node_map); hist_data = xgboost::common::Span(static_cast(buf), buf_size); } else { // Parallel processing by nodes and data in each node From 57750b407e0944139b8ec703dca19274e60dda3e Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 11:24:41 -0400 Subject: [PATCH 39/55] fix undefined symbol issue --- src/collective/communicator.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index 8f2d103469d0..e762be7e3208 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -11,10 +11,11 @@ #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_communicator.h" - #include "../processing/processor.h" - processing::Processor *processor_instance; #endif +#include "../processing/processor.h" +processing::Processor *processor_instance; + namespace xgboost::collective { thread_local std::unique_ptr Communicator::communicator_{new NoOpCommunicator()}; thread_local CommunicatorType Communicator::type_{}; From fa2665ac6e62f06f599b6068026f58463fd893d5 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 11:54:48 -0400 Subject: [PATCH 40/55] fix processor test --- tests/cpp/processing/test_processor.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 65c84837d80f..2198ff4ad4bd 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -5,7 +5,6 @@ #include "../../../src/processing/processor.h" - class ProcessorTest : public testing::Test { public: void SetUp() override { @@ -45,13 +44,13 @@ TEST_F(ProcessorTest, TestLoading) { TEST_F(ProcessorTest, TestGHEncoding) { size_t buf_size; - auto buffer = processor_->ProcessGHPairs(buf_size, gh_pairs_); + auto buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); size_t expected_size = 24; // DAM header size expected_size += gh_pairs_.size()*10*8; // Dummy plugin duplicate each number 10x to simulate encryption ASSERT_EQ(buf_size, expected_size); size_t new_size; - auto new_buffer = processor_->HandleGHPairs(new_size, buffer, buf_size); + auto new_buffer = processor_->HandleGHPairs(&new_size, buffer, buf_size); // Dummy plugin doesn't change buffer ASSERT_EQ(new_size, buf_size); ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); @@ -59,10 +58,10 @@ TEST_F(ProcessorTest, TestGHEncoding) { TEST_F(ProcessorTest, TestAggregation) { size_t buf_size; - processor_->ProcessGHPairs(buf_size, gh_pairs_); // Pass the GH pairs to the plugin + processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin processor_->InitAggregationContext(cuts_, slots_); - auto buffer = processor_->ProcessAggregation(buf_size, nodes_); + auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); auto histos = processor_->HandleAggregation(buffer, buf_size); std::vector expected_histos = { 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, From 87d2fdb20bd7ae6c5de2766560939b3f92e521ab Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 15:12:19 -0400 Subject: [PATCH 41/55] secure vertical relies on processor, move the unit test --- tests/cpp/tree/hist/test_histogram.cc | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index eea6b1d6a434..481ae18c180c 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -228,7 +228,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ Context ctx; auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); - if (is_col_split && !is_secure) { + if (is_col_split) { p_fmat = std::shared_ptr{ p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; } @@ -254,7 +254,6 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ row_indices.resize(kNRows); std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); - CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)}; std::vector nodes_to_build{node.nid}; std::vector dummy_sub; @@ -288,23 +287,13 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ GradientPairPrecise sol = histogram_expected[i]; double grad = sol.GetGrad(); double hess = sol.GetHess(); - if (is_distributed && (!is_col_split || (is_secure && is_col_split))) { - // the solution also needs to be allreduce - collective::Allreduce(&grad, 1); - collective::Allreduce(&hess, 1); - } if (is_distributed && !is_col_split) { // row split, all party holds the same data - ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); - ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); - } - if (is_distributed && is_col_split && is_secure) { - // secure col split, only rank 0 holds the global histogram - if (collective::GetRank() == 0) { - ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); - ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); - } + collective::Allreduce(&grad, 1); + collective::Allreduce(&hess, 1); } + ASSERT_NEAR(grad, histogram.Histogram()[nid][i].GetGrad(), kEps); + ASSERT_NEAR(hess, histogram.Histogram()[nid][i].GetHess(), kEps); } } @@ -327,11 +316,6 @@ TEST(CPUHistogram, BuildHistDistColSplit) { RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true, false); } -TEST(CPUHistogram, BuildHistDistColSplitSecure) { - auto constexpr kWorkers = 4; - RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true, true); - RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true, true); -} namespace { template From 9941293a27f4e392b81399391cd35a16dfdfed2c Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 15:51:42 -0400 Subject: [PATCH 42/55] type correction --- tests/cpp/processing/test_processor.cc | 98 +++++++++++++------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 2198ff4ad4bd..0cefcc65bd9f 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -7,73 +7,73 @@ class ProcessorTest : public testing::Test { public: - void SetUp() override { - auto loader = processing::ProcessorLoader(); - processor_ = loader.load("dummy"); - processor_->Initialize(true, {}); - } + void SetUp() override { + auto loader = processing::ProcessorLoader(); + processor_ = loader.load("dummy"); + processor_->Initialize(true, {}); + } - void TearDown() override { - processor_->Shutdown(); - processor_ = nullptr; - } + void TearDown() override { + processor_->Shutdown(); + processor_ = nullptr; + } protected: - processing::Processor *processor_ = nullptr; + processing::Processor *processor_ = nullptr; - // Test data, 4 Rows, 2 Features - std::vector gh_pairs_ = {1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1}; // 4 Rows, 8 GH Pairs - std::vector cuts_ = {0, 4, 10}; // 2 features, one has 4 bins, another 6 - std::vector slots_ = { - 0, 4, - 1, 9, - 3, 7, - 0, 4 - }; + // Test data, 4 Rows, 2 Features + std::vector gh_pairs_ = {1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1}; // 4 Rows, 8 GH Pairs + std::vector cuts_ = {0, 4, 10}; // 2 features, one has 4 bins, another 6 + std::vector slots_ = { + 0, 4, + 1, 9, + 3, 7, + 0, 4 + }; - std::vector node0_ = {0, 2}; - std::vector node1_ = {1, 3}; + std::vector node0_ = {0, 2}; + std::vector node1_ = {1, 3}; - std::map> nodes_ = {{0, node0_}, {1, node1_}}; + std::map> nodes_ = {{0, node0_}, {1, node1_}}; }; TEST_F(ProcessorTest, TestLoading) { - auto base_class = dynamic_cast(processor_); - ASSERT_NE(base_class, nullptr); + auto base_class = dynamic_cast(processor_); + ASSERT_NE(base_class, nullptr); } TEST_F(ProcessorTest, TestGHEncoding) { - size_t buf_size; - auto buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); - size_t expected_size = 24; // DAM header size - expected_size += gh_pairs_.size()*10*8; // Dummy plugin duplicate each number 10x to simulate encryption - ASSERT_EQ(buf_size, expected_size); + size_t buf_size; + auto buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); + size_t expected_size = 24; // DAM header size + expected_size += gh_pairs_.size()*10*8; // Dummy plugin duplicate each number 10x to simulate encryption + ASSERT_EQ(buf_size, expected_size); - size_t new_size; - auto new_buffer = processor_->HandleGHPairs(&new_size, buffer, buf_size); - // Dummy plugin doesn't change buffer - ASSERT_EQ(new_size, buf_size); - ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); + size_t new_size; + auto new_buffer = processor_->HandleGHPairs(&new_size, buffer, buf_size); + // Dummy plugin doesn't change buffer + ASSERT_EQ(new_size, buf_size); + ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); } TEST_F(ProcessorTest, TestAggregation) { - size_t buf_size; - processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin + size_t buf_size; + processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin - processor_->InitAggregationContext(cuts_, slots_); - auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); - auto histos = processor_->HandleAggregation(buffer, buf_size); - std::vector expected_histos = { - 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, - 0, 0, 0, 0, 5.1, 6.1, 0, 0, 0, 0, - 7.1, 8.1, 3.1, 4.1, 0, 0, 0, 0, 7.1, 8.1, - 0, 0, 0, 0, 0, 0, 0, 0, 3.1, 4.1 - }; + processor_->InitAggregationContext(cuts_, slots_); + auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); + auto histos = processor_->HandleAggregation(buffer, buf_size); + std::vector expected_histos = { + 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, + 0, 0, 0, 0, 5.1, 6.1, 0, 0, 0, 0, + 7.1, 8.1, 3.1, 4.1, 0, 0, 0, 0, 7.1, 8.1, + 0, 0, 0, 0, 0, 0, 0, 0, 3.1, 4.1 + }; - ASSERT_EQ(expected_histos.size(), histos.size()) << "Histograms have different sizes"; + ASSERT_EQ(expected_histos.size(), histos.size()) << "Histograms have different sizes"; - for (int i = 0; i < histos.size(); ++i) { - EXPECT_EQ(expected_histos[i], histos[i]) << "Histogram differs at index " << i; - } + for (size_t i = 0; i < histos.size(); ++i) { + EXPECT_EQ(expected_histos[i], histos[i]) << "Histogram differs at index " << i; + } } From dd4f44027ccf1a866a1c98250475bf5978bc6f7e Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 16:17:35 -0400 Subject: [PATCH 43/55] type correction --- src/tree/hist/histogram.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 4477897f0c7c..55c0d66283ef 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -85,8 +85,8 @@ class HistogramBuilder { auto slots = std::vector(); auto num_rows = row_set_collection[0].Size(); auto cuts = gidx.Cuts().Ptrs(); - for (int row = 0; row < num_rows; row++) { - for (int f = 0; f < cuts.size()-1; f++) { + for (std::size_t row = 0; row < num_rows; row++) { + for (std::size_t f = 0; f < cuts.size()-1; f++) { auto slot = gidx.GetGindex(row, f); slots.push_back(slot); } @@ -104,12 +104,12 @@ class HistogramBuilder { } node_map.insert({node, rows}); } - size_t buf_size; + std::size_t buf_size; auto buf = processor_instance->ProcessAggregation(&buf_size, node_map); hist_data = xgboost::common::Span(static_cast(buf), buf_size); } else { // Parallel processing by nodes and data in each node - common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { + common::ParallelFor2d(space, this->n_threads_, [&](std::size_t nid_in_set, common::Range1d r) { const auto tid = static_cast(omp_get_thread_num()); bst_node_t const nidx = nodes_to_build[nid_in_set]; auto elem = row_set_collection[nidx]; @@ -190,7 +190,7 @@ class HistogramBuilder { // Add the local histogram cache to the parallel buffer before processing the first page. auto n_nodes = nodes_to_build.size(); std::vector target_hists(n_nodes); - for (size_t i = 0; i < n_nodes; ++i) { + for (std::size_t i = 0; i < n_nodes; ++i) { auto const nidx = nodes_to_build[i]; target_hists[i] = hist_[nidx]; } @@ -213,7 +213,7 @@ class HistogramBuilder { common::BlockedSpace2d space( nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024); - common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) { + common::ParallelFor2d(space, this->n_threads_, [&](std::size_t node, common::Range1d r) { // Merging histograms from each thread. this->buffer_.ReduceHist(node, r.begin(), r.end()); }); @@ -249,7 +249,7 @@ class HistogramBuilder { // iterator of the beginning of the vector auto it = reinterpret_cast(this->hist_[first_nidx].data()); // iterate through the hist vector of the label owner - for (size_t i = 0; i < n; i++) { + for (std::size_t i = 0; i < n; i++) { // get the sum of the entries from all ranks double hist_sum = 0.0; for (std::size_t rank_idx = 0; rank_idx < hist_aggr.size()/n; rank_idx++) { @@ -297,7 +297,7 @@ common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners, // has significant variance. std::vector partition_size(nodes_to_build.size(), 0); for (auto const &partition : partitioners) { - size_t k = 0; + std::size_t k = 0; for (auto nidx : nodes_to_build) { auto n_rows_in_node = partition.Partitions()[nidx].Size(); partition_size[k] = std::max(partition_size[k], n_rows_in_node); @@ -305,7 +305,7 @@ common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners, } } common::BlockedSpace2d space{ - nodes_to_build.size(), [&](size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256}; + nodes_to_build.size(), [&](std::size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256}; return space; } From 5b2dfe6a3b8bead475ffd42f02084d1029dabb7a Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 24 Apr 2024 16:22:31 -0400 Subject: [PATCH 44/55] extra linting from last change --- src/tree/hist/histogram.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 55c0d66283ef..d34c43cd4565 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -109,7 +109,8 @@ class HistogramBuilder { hist_data = xgboost::common::Span(static_cast(buf), buf_size); } else { // Parallel processing by nodes and data in each node - common::ParallelFor2d(space, this->n_threads_, [&](std::size_t nid_in_set, common::Range1d r) { + common::ParallelFor2d(space, this->n_threads_, + [&](std::size_t nid_in_set, common::Range1d r) { const auto tid = static_cast(omp_get_thread_num()); bst_node_t const nidx = nodes_to_build[nid_in_set]; auto elem = row_set_collection[nidx]; @@ -305,7 +306,8 @@ common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners, } } common::BlockedSpace2d space{ - nodes_to_build.size(), [&](std::size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256}; + nodes_to_build.size(), + [&](std::size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256}; return space; } From 80d3b89c4754216af4325fadbfa37b4c1b5b190b Mon Sep 17 00:00:00 2001 From: Zhihong Zhang Date: Wed, 24 Apr 2024 17:42:39 -0400 Subject: [PATCH 45/55] Added Windows support --- src/processing/processor.h | 7 ++++-- src/processing/processor_loader.cc | 40 ++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/processing/processor.h b/src/processing/processor.h index 7927fc5015b9..66de650f3216 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -94,8 +94,11 @@ class Processor { class ProcessorLoader { private: std::map params; - void *handle = NULL; - +#if defined(_WIN32) + HMODULE handle_ = NULL; +#else + void *handle_ = NULL; +#endif public: ProcessorLoader(): params{} {} diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc index 89c6964a8a89..6bb0480cb2b7 100644 --- a/src/processing/processor_loader.cc +++ b/src/processing/processor_loader.cc @@ -4,7 +4,11 @@ #include -#include "dlfcn.h" +#if defined(_WIN32) +#include +#else +#include +#endif #include "./processor.h" #include "plugins/dummy_processor.h" @@ -21,7 +25,9 @@ namespace processing { auto lib_name = "libproc_" + plugin_name; auto extension = -#if defined(__APPLE__) || defined(__MACH__) +#if defined(_WIN32) + ".dll"; +#elif defined(__APPLE__) || defined(__MACH__) ".dylib"; #else ".so"; @@ -34,31 +40,49 @@ namespace processing { lib_path = lib_file_name; } else { auto p = params[kLibraryPath]; - if (p.back() != '/') { + if (p.back() != '/' && p.back() != '\\') { p += '/'; } lib_path = p + lib_file_name; } - handle = dlopen(lib_path.c_str(), RTLD_LAZY); - if (!handle) { - std::cerr << "Failed to load the dynamic library: " << dlerror() << std::endl; +#if defined(_WIN32) + HMODULE handle_ = LoadLibrary(lib_path.c_str()); + if (!handle_) { + std::cerr << "Failed to load the dynamic library" << std::endl; return NULL; } - void* func_ptr = dlsym(handle, kLoadFunc); + void* func_ptr = GetProcAddress(handle_, kLoadFunc); + if (!func_ptr) { + std::cerr << "Failed to find loader function." << std::endl; + return NULL; + } +#else + handle_ = dlopen(lib_path.c_str(), RTLD_LAZY); + if (!handle_) { + std::cerr << "Failed to load the dynamic library: " << dlerror() << std::endl; + return NULL; + } + void* func_ptr = dlsym(handle_, kLoadFunc); if (!func_ptr) { std::cerr << "Failed to find loader function: " << dlerror() << std::endl; return NULL; } +#endif + auto func = reinterpret_cast(func_ptr); return (*func)(plugin_name.c_str()); } void ProcessorLoader::unload() { - dlclose(handle); +#if defined(_WIN32) + FreeLibrary(handle_); +#else + dlclose(handle_); +#endif } } // namespace processing From 338270774aab6bc2b563c09b3c246785fe146e14 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 25 Apr 2024 10:06:01 -0400 Subject: [PATCH 46/55] fix for cstdint types --- src/processing/plugins/dummy_processor.cc | 21 +++++++++++---------- src/processing/processor.h | 3 ++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index 271571030b56..1c2d45580c39 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -3,16 +3,17 @@ */ #include #include +#include #include "./dummy_processor.h" const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 const int64_t kPrefixLen = 24; -bool ValidDam(void *buffer, size_t size) { +bool ValidDam(void *buffer, std::size_t size) { return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; } -void* DummyProcessor::ProcessGHPairs(size_t *size, const std::vector& pairs) { +void* DummyProcessor::ProcessGHPairs(std::size_t *size, const std::vector& pairs) { *size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x int64_t buf_size = *size; @@ -25,7 +26,7 @@ void* DummyProcessor::ProcessGHPairs(size_t *size, const std::vector& pa // Simulate encryption by duplicating value 10 times int index = kPrefixLen; for (auto value : pairs) { - for (int i = 0; i < 10; i++) { + for (std::size_t i = 0; i < 10; i++) { memcpy(buf+index, &value, 8); index += 8; } @@ -38,7 +39,7 @@ void* DummyProcessor::ProcessGHPairs(size_t *size, const std::vector& pa } -void* DummyProcessor::HandleGHPairs(size_t *size, void *buffer, size_t buf_size) { +void* DummyProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t buf_size) { *size = buf_size; if (!ValidDam(buffer, *size)) { return buffer; @@ -49,9 +50,9 @@ void* DummyProcessor::HandleGHPairs(size_t *size, void *buffer, size_t buf_size) int8_t *ptr = static_cast(buffer); ptr += kPrefixLen; double *pairs = reinterpret_cast(ptr); - size_t num = (buf_size - kPrefixLen) / 8; + std::size_t num = (buf_size - kPrefixLen) / 8; gh_pairs_ = new std::vector(); - for (int i = 0; i < num; i += 10) { + for (std::size_t i = 0; i < num; i += 10) { gh_pairs_->push_back(pairs[i]); } } @@ -59,12 +60,12 @@ void* DummyProcessor::HandleGHPairs(size_t *size, void *buffer, size_t buf_size) return buffer; } -void *DummyProcessor::ProcessAggregation(size_t *size, std::map> nodes) { +void *DummyProcessor::ProcessAggregation(std::size_t *size, std::map> nodes) { auto total_bin_size = cuts_.back(); auto histo_size = total_bin_size*2; *size = kPrefixLen + 8*histo_size*nodes.size(); int64_t buf_size = *size; - std::int8_t *buf = static_cast(calloc(buf_size, 1)); + int8_t *buf = static_cast(calloc(buf_size, 1)); memcpy(buf, kSignature, strlen(kSignature)); memcpy(buf + 8, &buf_size, 8); memcpy(buf + 16, &kDataTypeHisto, 8); @@ -92,7 +93,7 @@ void *DummyProcessor::ProcessAggregation(size_t *size, std::map DummyProcessor::HandleAggregation(void *buffer, size_t buf_size) { +std::vector DummyProcessor::HandleAggregation(void *buffer, std::size_t buf_size) { std::vector result = std::vector(); int8_t* ptr = static_cast(buffer); @@ -102,7 +103,7 @@ std::vector DummyProcessor::HandleAggregation(void *buffer, size_t buf_s if (!ValidDam(ptr, rest_size)) { continue; } - std::int64_t *size_ptr = reinterpret_cast(ptr + 8); + int64_t *size_ptr = reinterpret_cast(ptr + 8); double *array_start = reinterpret_cast(ptr + kPrefixLen); auto array_size = (*size_ptr - kPrefixLen)/8; result.insert(result.end(), array_start, array_start + array_size); diff --git a/src/processing/processor.h b/src/processing/processor.h index 66de650f3216..14cce65b087e 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -3,8 +3,9 @@ */ #pragma once -#include #include +#include +#include #include #include From 2a8f19a046a14b4370244a82db8fac55dfdc2119 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 25 Apr 2024 11:00:19 -0400 Subject: [PATCH 47/55] fix for cstdint types --- src/processing/plugins/dummy_processor.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/dummy_processor.cc index 1c2d45580c39..f04d70a3338d 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/dummy_processor.cc @@ -61,8 +61,8 @@ void* DummyProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t } void *DummyProcessor::ProcessAggregation(std::size_t *size, std::map> nodes) { - auto total_bin_size = cuts_.back(); - auto histo_size = total_bin_size*2; + int total_bin_size = cuts_.back(); + int histo_size = total_bin_size*2; *size = kPrefixLen + 8*histo_size*nodes.size(); int64_t buf_size = *size; int8_t *buf = static_cast(calloc(buf_size, 1)); @@ -76,7 +76,7 @@ void *DummyProcessor::ProcessAggregation(std::size_t *size, std::map= total_bin_size)) { continue; } From 9ff293595a26dba1cdf09e85afb6521b10614197 Mon Sep 17 00:00:00 2001 From: Zhihong Zhang Date: Thu, 25 Apr 2024 13:20:18 -0400 Subject: [PATCH 48/55] Added support for horizontal secure XGBoost --- .../{dummy_processor.cc => mock_processor.cc} | 73 ++++++++- .../{dummy_processor.h => mock_processor.h} | 7 +- src/processing/processor.h | 28 +++- src/processing/processor_loader.cc | 18 +-- tests/cpp/processing/test_processor.cc | 146 +++++++++++------- 5 files changed, 192 insertions(+), 80 deletions(-) rename src/processing/plugins/{dummy_processor.cc => mock_processor.cc} (55%) rename src/processing/plugins/{dummy_processor.h => mock_processor.h} (84%) diff --git a/src/processing/plugins/dummy_processor.cc b/src/processing/plugins/mock_processor.cc similarity index 55% rename from src/processing/plugins/dummy_processor.cc rename to src/processing/plugins/mock_processor.cc index f04d70a3338d..2943c2aea284 100644 --- a/src/processing/plugins/dummy_processor.cc +++ b/src/processing/plugins/mock_processor.cc @@ -4,7 +4,7 @@ #include #include #include -#include "./dummy_processor.h" +#include "./mock_processor.h" const char kSignature[] = "NVDADAM1"; // DAM (Direct Accessible Marshalling) V1 const int64_t kPrefixLen = 24; @@ -13,7 +13,7 @@ bool ValidDam(void *buffer, std::size_t size) { return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0; } -void* DummyProcessor::ProcessGHPairs(std::size_t *size, const std::vector& pairs) { +void* MockProcessor::ProcessGHPairs(std::size_t *size, const std::vector& pairs) { *size = kPrefixLen + pairs.size()*10*8; // Assume encrypted size is 10x int64_t buf_size = *size; @@ -39,13 +39,13 @@ void* DummyProcessor::ProcessGHPairs(std::size_t *size, const std::vector(buffer); ptr += kPrefixLen; @@ -60,7 +60,7 @@ void* DummyProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t return buffer; } -void *DummyProcessor::ProcessAggregation(std::size_t *size, std::map> nodes) { +void *MockProcessor::ProcessAggregation(std::size_t *size, std::map> nodes) { int total_bin_size = cuts_.back(); int histo_size = total_bin_size*2; *size = kPrefixLen + 8*histo_size*nodes.size(); @@ -93,7 +93,7 @@ void *DummyProcessor::ProcessAggregation(std::size_t *size, std::map DummyProcessor::HandleAggregation(void *buffer, std::size_t buf_size) { +std::vector MockProcessor::HandleAggregation(void *buffer, std::size_t buf_size) { std::vector result = std::vector(); int8_t* ptr = static_cast(buffer); @@ -101,7 +101,7 @@ std::vector DummyProcessor::HandleAggregation(void *buffer, std::size_t while (rest_size > kPrefixLen) { if (!ValidDam(ptr, rest_size)) { - continue; + break; } int64_t *size_ptr = reinterpret_cast(ptr + 8); double *array_start = reinterpret_cast(ptr + kPrefixLen); @@ -113,3 +113,62 @@ std::vector DummyProcessor::HandleAggregation(void *buffer, std::size_t return result; } + +void* MockProcessor::ProcessHistograms(std::size_t *size, const std::vector& histograms) { + *size = kPrefixLen + histograms.size()*10*8; // Assume encrypted size is 10x + + int64_t buf_size = *size; + // This memory needs to be freed + char *buf = static_cast(malloc(buf_size)); + memcpy(buf, kSignature, strlen(kSignature)); + memcpy(buf + 8, &buf_size, 8); + memcpy(buf + 16, &kDataTypeAggregatedHisto, 8); + + // Simulate encryption by duplicating value 10 times + int index = kPrefixLen; + for (auto value : histograms) { + for (std::size_t i = 0; i < 10; i++) { + memcpy(buf+index, &value, 8); + index += 8; + } + } + + return buf; +} + +std::vector MockProcessor::HandleHistograms(void *buffer, std::size_t buf_size) { + std::vector result = std::vector(); + + int8_t* ptr = static_cast(buffer); + auto rest_size = buf_size; + + while (rest_size > kPrefixLen) { + if (!ValidDam(ptr, rest_size)) { + break; + } + int64_t *size_ptr = reinterpret_cast(ptr + 8); + double *array_start = reinterpret_cast(ptr + kPrefixLen); + auto array_size = (*size_ptr - kPrefixLen)/8; + auto empty = result.empty(); + if (!empty) { + if (result.size() != array_size / 10) { + std::cout << "Histogram size doesn't match " << result.size() << " != " << array_size << std::endl; + return result; + } + } + + for (std::size_t i = 0; i < array_size/10; i++) { + auto value = array_start[i*10]; + if (empty) { + result.push_back(value); + } else { + result[i] += value; + } + } + + rest_size -= *size_ptr; + ptr = ptr + *size_ptr; + } + + return result; +} diff --git a/src/processing/plugins/dummy_processor.h b/src/processing/plugins/mock_processor.h similarity index 84% rename from src/processing/plugins/dummy_processor.h rename to src/processing/plugins/mock_processor.h index c1690e61e20d..4045280edf83 100644 --- a/src/processing/plugins/dummy_processor.h +++ b/src/processing/plugins/mock_processor.h @@ -10,8 +10,9 @@ // Data type definition const int64_t kDataTypeGHPairs = 1; const int64_t kDataTypeHisto = 2; +const int64_t kDataTypeAggregatedHisto = 3; -class DummyProcessor: public processing::Processor { +class MockProcessor: public processing::Processor { private: bool active_ = false; const std::map *params_{nullptr}; @@ -50,4 +51,8 @@ class DummyProcessor: public processing::Processor { void *ProcessAggregation(size_t *size, std::map> nodes) override; std::vector HandleAggregation(void *buffer, size_t buf_size) override; + + void *ProcessHistograms(size_t *size, const std::vector& histograms) override; + + std::vector HandleHistograms(void *buffer, size_t buf_size) override; }; diff --git a/src/processing/processor.h b/src/processing/processor.h index 14cce65b087e..a6994ef9f5b7 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -12,7 +12,7 @@ namespace processing { const char kLibraryPath[] = "LIBRARY_PATH"; -const char kDummyProcessor[] = "dummy"; +const char kMockProcessor[] = "mock"; const char kLoadFunc[] = "LoadProcessor"; /*! \brief An processor interface to handle tasks that require external library through plugins */ @@ -76,7 +76,7 @@ class Processor { * \param size The output buffer size * \param nodes Map of node and the rows belong to this node * - * \return The encoded buffer to be sent via AllGather + * \return The encoded buffer to be sent via AllGatherV */ virtual void *ProcessAggregation(size_t *size, std::map> nodes) = 0; @@ -90,16 +90,32 @@ class Processor { * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 */ virtual std::vector HandleAggregation(void *buffer, size_t buf_size) = 0; + + /*! + * \brief Prepare histograms for further processing + * + * \param size The output buffer size + * \param histograms Flattened array of histograms for all features + * + * \return The encoded buffer to be sent via AllGatherV + */ + virtual void *ProcessHistograms(size_t *size, const std::vector& histograms) = 0; + + /*! + * \brief Handle processed histograms + * + * \param buffer Buffer from allgatherV + * \param buf_size The size of the buffer + * + * \return A flattened vector of histograms for all features + */ + virtual std::vector HandleHistograms(void *buffer, size_t buf_size) = 0; }; class ProcessorLoader { private: std::map params; -#if defined(_WIN32) - HMODULE handle_ = NULL; -#else void *handle_ = NULL; -#endif public: ProcessorLoader(): params{} {} diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc index 6bb0480cb2b7..00bb49d03cbe 100644 --- a/src/processing/processor_loader.cc +++ b/src/processing/processor_loader.cc @@ -4,28 +4,28 @@ #include -#if defined(_WIN32) +#if defined(_WIN32) || defined(_WIN64) #include #else #include #endif #include "./processor.h" -#include "plugins/dummy_processor.h" +#include "plugins/mock_processor.h" namespace processing { using LoadFunc = Processor *(const char *); Processor* ProcessorLoader::load(const std::string& plugin_name) { // Dummy processor for unit testing without loading a shared library - if (plugin_name == kDummyProcessor) { - return new DummyProcessor(); + if (plugin_name == kMockProcessor) { + return new MockProcessor(); } auto lib_name = "libproc_" + plugin_name; auto extension = -#if defined(_WIN32) +#if defined(_WIN32) || defined(_WIN64) ".dll"; #elif defined(__APPLE__) || defined(__MACH__) ".dylib"; @@ -46,19 +46,18 @@ namespace processing { lib_path = p + lib_file_name; } -#if defined(_WIN32) - HMODULE handle_ = LoadLibrary(lib_path.c_str()); +#if defined(_WIN32) || defined(_WIN64) + handle_ = reinterpret_cast(LoadLibrary(lib_path.c_str())); if (!handle_) { std::cerr << "Failed to load the dynamic library" << std::endl; return NULL; } - void* func_ptr = GetProcAddress(handle_, kLoadFunc); + void* func_ptr = reinterpret_cast(GetProcAddress((HMODULE)handle_, kLoadFunc)); if (!func_ptr) { std::cerr << "Failed to find loader function." << std::endl; return NULL; } - #else handle_ = dlopen(lib_path.c_str(), RTLD_LAZY); if (!handle_) { @@ -70,7 +69,6 @@ namespace processing { std::cerr << "Failed to find loader function: " << dlerror() << std::endl; return NULL; } - #endif auto func = reinterpret_cast(func_ptr); diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 0cefcc65bd9f..58c575821cdc 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -5,75 +5,109 @@ #include "../../../src/processing/processor.h" +const double kError = 1E-10; + class ProcessorTest : public testing::Test { public: - void SetUp() override { - auto loader = processing::ProcessorLoader(); - processor_ = loader.load("dummy"); - processor_->Initialize(true, {}); - } + void SetUp() override { + auto loader = processing::ProcessorLoader(); + processor_ = loader.load(processing::kMockProcessor); + processor_->Initialize(true, {}); + } - void TearDown() override { - processor_->Shutdown(); - processor_ = nullptr; - } + void TearDown() override { + processor_->Shutdown(); + processor_ = nullptr; + } protected: - processing::Processor *processor_ = nullptr; - - // Test data, 4 Rows, 2 Features - std::vector gh_pairs_ = {1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1}; // 4 Rows, 8 GH Pairs - std::vector cuts_ = {0, 4, 10}; // 2 features, one has 4 bins, another 6 - std::vector slots_ = { - 0, 4, - 1, 9, - 3, 7, - 0, 4 - }; - - std::vector node0_ = {0, 2}; - std::vector node1_ = {1, 3}; - - std::map> nodes_ = {{0, node0_}, {1, node1_}}; + processing::Processor *processor_ = nullptr; + + // Test data, 4 Rows, 2 Features + std::vector gh_pairs_ = {1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1}; // 4 Rows, 8 GH Pairs + std::vector cuts_ = {0, 4, 10}; // 2 features, one has 4 bins, another 6 + std::vector slots_ = { + 0, 4, + 1, 9, + 3, 7, + 0, 4 + }; + + std::vector node0_ = {0, 2}; + std::vector node1_ = {1, 3}; + + std::map> nodes_ = {{0, node0_}, + {1, node1_}}; + + std::vector histo1_ = {1.0, 2.0, 3.0, 4.0}; + std::vector histo2_ = {5.0, 6.0, 7.0, 8.0}; }; TEST_F(ProcessorTest, TestLoading) { - auto base_class = dynamic_cast(processor_); - ASSERT_NE(base_class, nullptr); + auto base_class = dynamic_cast(processor_); + ASSERT_NE(base_class, nullptr); } TEST_F(ProcessorTest, TestGHEncoding) { - size_t buf_size; - auto buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); - size_t expected_size = 24; // DAM header size - expected_size += gh_pairs_.size()*10*8; // Dummy plugin duplicate each number 10x to simulate encryption - ASSERT_EQ(buf_size, expected_size); - - size_t new_size; - auto new_buffer = processor_->HandleGHPairs(&new_size, buffer, buf_size); - // Dummy plugin doesn't change buffer - ASSERT_EQ(new_size, buf_size); - ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); + size_t buf_size; + auto buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); + size_t expected_size = 24; // DAM header size + expected_size += gh_pairs_.size() * 10 * 8; // Dummy plugin duplicate each number 10x to simulate encryption + ASSERT_EQ(buf_size, expected_size); + + size_t new_size; + auto new_buffer = processor_->HandleGHPairs(&new_size, buffer, buf_size); + // Dummy plugin doesn't change buffer + ASSERT_EQ(new_size, buf_size); + ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); } TEST_F(ProcessorTest, TestAggregation) { - size_t buf_size; - processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin - - processor_->InitAggregationContext(cuts_, slots_); - auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); - auto histos = processor_->HandleAggregation(buffer, buf_size); - std::vector expected_histos = { - 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, - 0, 0, 0, 0, 5.1, 6.1, 0, 0, 0, 0, - 7.1, 8.1, 3.1, 4.1, 0, 0, 0, 0, 7.1, 8.1, - 0, 0, 0, 0, 0, 0, 0, 0, 3.1, 4.1 - }; - - ASSERT_EQ(expected_histos.size(), histos.size()) << "Histograms have different sizes"; - - for (size_t i = 0; i < histos.size(); ++i) { - EXPECT_EQ(expected_histos[i], histos[i]) << "Histogram differs at index " << i; - } + size_t buf_size; + processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin + + processor_->InitAggregationContext(cuts_, slots_); + auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); + auto histos = processor_->HandleAggregation(buffer, buf_size); + double expected_result[] = { + 1.1, 2.1, 0, 0, 0, 0, 5.1, 6.1, 1.1, 2.1, + 0, 0, 0, 0, 5.1, 6.1, 0, 0, 0, 0, + 7.1, 8.1, 3.1, 4.1, 0, 0, 0, 0, 7.1, 8.1, + 0, 0, 0, 0, 0, 0, 0, 0, 3.1, 4.1 + }; + + auto expected_size = sizeof(expected_result)/sizeof(expected_result[0]); + + ASSERT_EQ(expected_size, histos.size()) << "Histograms have different sizes"; + + for (size_t i = 0; i < histos.size(); ++i) { + EXPECT_NEAR(expected_result[i], histos[i], kError) << "Histogram differs at index " << i; + } +} + +TEST_F(ProcessorTest, TestHistogramSum) { + + size_t buf1_size, buf2_size; + + auto buf1 = processor_->ProcessHistograms(&buf1_size, histo1_); + auto buf2 = processor_->ProcessHistograms(&buf2_size, histo2_); + + // Simulate allgatherV + auto buf_size = buf1_size + buf2_size; + auto buf = malloc(buf_size); + memcpy(buf, buf1, buf1_size); + memcpy(static_cast(buf) + buf1_size, buf2, buf2_size); + + auto result = processor_->HandleHistograms(buf, buf_size); + + double expected_result[] = {6.0, 8.0, 10.0, 12.0}; + auto expected_size = sizeof(expected_result)/sizeof(expected_result[0]); + ASSERT_EQ(expected_size, result.size()) << "Histograms have different sizes"; + + for (size_t i = 0; i < result.size(); ++i) { + EXPECT_NEAR(expected_result[i], result[i], kError) << "Histogram differs at index " << i; + } + + free(buf); } From 64185036714ee4727082d9072d6e5333d0d05103 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 29 Apr 2024 09:59:55 -0400 Subject: [PATCH 49/55] remove horizontal funcs from this PR --- src/processing/plugins/mock_processor.cc | 59 ------------------------ src/processing/plugins/mock_processor.h | 4 -- src/processing/processor.h | 20 -------- tests/cpp/processing/test_processor.cc | 27 ----------- 4 files changed, 110 deletions(-) diff --git a/src/processing/plugins/mock_processor.cc b/src/processing/plugins/mock_processor.cc index 2943c2aea284..427c53229baf 100644 --- a/src/processing/plugins/mock_processor.cc +++ b/src/processing/plugins/mock_processor.cc @@ -113,62 +113,3 @@ std::vector MockProcessor::HandleAggregation(void *buffer, std::size_t b return result; } - -void* MockProcessor::ProcessHistograms(std::size_t *size, const std::vector& histograms) { - *size = kPrefixLen + histograms.size()*10*8; // Assume encrypted size is 10x - - int64_t buf_size = *size; - // This memory needs to be freed - char *buf = static_cast(malloc(buf_size)); - memcpy(buf, kSignature, strlen(kSignature)); - memcpy(buf + 8, &buf_size, 8); - memcpy(buf + 16, &kDataTypeAggregatedHisto, 8); - - // Simulate encryption by duplicating value 10 times - int index = kPrefixLen; - for (auto value : histograms) { - for (std::size_t i = 0; i < 10; i++) { - memcpy(buf+index, &value, 8); - index += 8; - } - } - - return buf; -} - -std::vector MockProcessor::HandleHistograms(void *buffer, std::size_t buf_size) { - std::vector result = std::vector(); - - int8_t* ptr = static_cast(buffer); - auto rest_size = buf_size; - - while (rest_size > kPrefixLen) { - if (!ValidDam(ptr, rest_size)) { - break; - } - int64_t *size_ptr = reinterpret_cast(ptr + 8); - double *array_start = reinterpret_cast(ptr + kPrefixLen); - auto array_size = (*size_ptr - kPrefixLen)/8; - auto empty = result.empty(); - if (!empty) { - if (result.size() != array_size / 10) { - std::cout << "Histogram size doesn't match " << result.size() << " != " << array_size << std::endl; - return result; - } - } - - for (std::size_t i = 0; i < array_size/10; i++) { - auto value = array_start[i*10]; - if (empty) { - result.push_back(value); - } else { - result[i] += value; - } - } - - rest_size -= *size_ptr; - ptr = ptr + *size_ptr; - } - - return result; -} diff --git a/src/processing/plugins/mock_processor.h b/src/processing/plugins/mock_processor.h index 4045280edf83..d9830510691a 100644 --- a/src/processing/plugins/mock_processor.h +++ b/src/processing/plugins/mock_processor.h @@ -51,8 +51,4 @@ class MockProcessor: public processing::Processor { void *ProcessAggregation(size_t *size, std::map> nodes) override; std::vector HandleAggregation(void *buffer, size_t buf_size) override; - - void *ProcessHistograms(size_t *size, const std::vector& histograms) override; - - std::vector HandleHistograms(void *buffer, size_t buf_size) override; }; diff --git a/src/processing/processor.h b/src/processing/processor.h index a6994ef9f5b7..31ac75c84a6b 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -90,26 +90,6 @@ class Processor { * site1_node1, site1_node2 site1_node3, site2_node1, site2_node2, site2_node3 */ virtual std::vector HandleAggregation(void *buffer, size_t buf_size) = 0; - - /*! - * \brief Prepare histograms for further processing - * - * \param size The output buffer size - * \param histograms Flattened array of histograms for all features - * - * \return The encoded buffer to be sent via AllGatherV - */ - virtual void *ProcessHistograms(size_t *size, const std::vector& histograms) = 0; - - /*! - * \brief Handle processed histograms - * - * \param buffer Buffer from allgatherV - * \param buf_size The size of the buffer - * - * \return A flattened vector of histograms for all features - */ - virtual std::vector HandleHistograms(void *buffer, size_t buf_size) = 0; }; class ProcessorLoader { diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 58c575821cdc..61a6cbcd2939 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -84,30 +84,3 @@ TEST_F(ProcessorTest, TestAggregation) { EXPECT_NEAR(expected_result[i], histos[i], kError) << "Histogram differs at index " << i; } } - -TEST_F(ProcessorTest, TestHistogramSum) { - - size_t buf1_size, buf2_size; - - auto buf1 = processor_->ProcessHistograms(&buf1_size, histo1_); - auto buf2 = processor_->ProcessHistograms(&buf2_size, histo2_); - - // Simulate allgatherV - auto buf_size = buf1_size + buf2_size; - auto buf = malloc(buf_size); - memcpy(buf, buf1, buf1_size); - memcpy(static_cast(buf) + buf1_size, buf2, buf2_size); - - auto result = processor_->HandleHistograms(buf, buf_size); - - double expected_result[] = {6.0, 8.0, 10.0, 12.0}; - auto expected_size = sizeof(expected_result)/sizeof(expected_result[0]); - ASSERT_EQ(expected_size, result.size()) << "Histograms have different sizes"; - - for (size_t i = 0; i < result.size(); ++i) { - EXPECT_NEAR(expected_result[i], result[i], kError) << "Histogram differs at index " << i; - } - - free(buf); -} - From 3a86daae46ab12cf4a3991c5f2c0beac21fe11ec Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Mon, 29 Apr 2024 14:37:47 -0400 Subject: [PATCH 50/55] change loader and proc params input pattern to align with std map --- src/collective/communicator.cc | 51 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index e762be7e3208..a585a48e2c2b 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -21,6 +21,28 @@ thread_local std::unique_ptr Communicator::communicator_{new NoOpC thread_local CommunicatorType Communicator::type_{}; thread_local std::string Communicator::nccl_path_{}; +std::map json_to_map(xgboost::Json const& config, std::string key) { + auto json_map = xgboost::OptionalArg(config, key, xgboost::JsonObject::Map{}); + std::map params{}; + for (auto entry : json_map) { + std::string text; + xgboost::Value* value = &(entry.second.GetValue()); + if (value->Type() == xgboost::Value::ValueKind::kString) { + text = reinterpret_cast(value)->GetString(); + } else if (value->Type() == xgboost::Value::ValueKind::kInteger) { + auto num = reinterpret_cast(value)->GetInteger(); + text = std::to_string(num); + } else if (value->Type() == xgboost::Value::ValueKind::kNumber) { + auto num = reinterpret_cast(value)->GetNumber(); + text = std::to_string(num); + } else { + text = "Unsupported type "; + } + params[entry.first] = text; + } + return params; +} + void Communicator::Init(Json const& config) { auto nccl = OptionalArg(config, "dmlc_nccl_path", std::string{DefaultNcclName()}); nccl_path_ = nccl; @@ -50,30 +72,27 @@ void Communicator::Init(Json const& config) { std::string proc_params_key{}; std::string proc_params_map{}; plugin_name = OptionalArg(config, "plugin_name", plugin_name); - loader_params_key = OptionalArg(config, "loader_params_key", loader_params_key); - loader_params_map = OptionalArg(config, "loader_params_map", loader_params_map); - proc_params_key = OptionalArg(config, "proc_params_key", proc_params_key); - proc_params_map = OptionalArg(config, "proc_params_map", proc_params_map); // Initialize processor if plugin_name is provided if (!plugin_name.empty()) { - std::map loader_params = {{loader_params_key, loader_params_map}}; - std::map proc_params = {{proc_params_key, proc_params_map}}; + std::map loader_params = json_to_map(config, "loader_params"); + std::map proc_params = json_to_map(config, "proc_params"); processing::ProcessorLoader loader(loader_params); processor_instance = loader.load(plugin_name); processor_instance->Initialize(collective::GetRank() == 0, proc_params); } #else - LOG(FATAL) << "XGBoost is not compiled with Federated Learning support."; + LOG(FATAL) << "XGBoost is not compiled with Federated Learning support."; #endif - break; - } - case CommunicatorType::kInMemory: - case CommunicatorType::kInMemoryNccl: { - communicator_.reset(InMemoryCommunicator::Create(config)); - break; - } - case CommunicatorType::kUnknown: - LOG(FATAL) << "Unknown communicator type."; + break; + } + + case CommunicatorType::kInMemory: + case CommunicatorType::kInMemoryNccl: { + communicator_.reset(InMemoryCommunicator::Create(config)); + break; + } + case CommunicatorType::kUnknown: + LOG(FATAL) << "Unknown communicator type."; } } From f3967c5698b7764ee3529f9227144ebb21781f46 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 10 May 2024 16:36:47 -0400 Subject: [PATCH 51/55] add processor shutdown --- src/c_api/c_api.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 0f4748bfec27..83b5b07d836d 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -45,6 +45,7 @@ #include "xgboost/span.h" // for Span #include "xgboost/string_view.h" // for StringView, operator<< #include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS... +#include "../processing/processor.h" // for Processor #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_server.h" @@ -1737,6 +1738,8 @@ XGB_DLL int XGCommunicatorInit(char const* json_config) { XGB_DLL int XGCommunicatorFinalize() { API_BEGIN(); collective::Finalize(); + processor_instance->Shutdown(); + processor_instance = nullptr; API_END(); } From 20bb965e983ca2dc10831456676b96d9c80d768d Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 10 May 2024 17:22:02 -0400 Subject: [PATCH 52/55] move processor shutdown --- src/c_api/c_api.cc | 3 --- src/collective/communicator.cc | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 83b5b07d836d..0f4748bfec27 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -45,7 +45,6 @@ #include "xgboost/span.h" // for Span #include "xgboost/string_view.h" // for StringView, operator<< #include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS... -#include "../processing/processor.h" // for Processor #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_server.h" @@ -1738,8 +1737,6 @@ XGB_DLL int XGCommunicatorInit(char const* json_config) { XGB_DLL int XGCommunicatorFinalize() { API_BEGIN(); collective::Finalize(); - processor_instance->Shutdown(); - processor_instance = nullptr; API_END(); } diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index a585a48e2c2b..0386ccb97fd2 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -100,6 +100,10 @@ void Communicator::Init(Json const& config) { void Communicator::Finalize() { communicator_->Shutdown(); communicator_.reset(new NoOpCommunicator()); + if (processor_instance != nullptr) { + processor_instance->Shutdown(); + processor_instance = nullptr; + } } #endif } // namespace xgboost::collective From 0be6129d388838d1654106044b037c0c7a7db1d8 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Wed, 15 May 2024 17:21:47 -0400 Subject: [PATCH 53/55] fix memory leakage in processor test --- tests/cpp/processing/test_processor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 61a6cbcd2939..911b347d79d7 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -60,6 +60,10 @@ TEST_F(ProcessorTest, TestGHEncoding) { // Dummy plugin doesn't change buffer ASSERT_EQ(new_size, buf_size); ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); + + // Clean up + free(buffer); + free(new_buffer); } TEST_F(ProcessorTest, TestAggregation) { From 6d1bbe7404efbed221f51f6fbc5af76eafe7679b Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 17 May 2024 16:58:26 -0400 Subject: [PATCH 54/55] fix double free issue --- src/processing/plugins/mock_processor.cc | 5 ++++- src/processing/plugins/mock_processor.h | 18 +++++++++++++++--- src/processing/processor.h | 6 ++++++ src/processing/processor_loader.cc | 8 ++++++-- tests/cpp/processing/test_processor.cc | 11 ++++++++--- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/processing/plugins/mock_processor.cc b/src/processing/plugins/mock_processor.cc index 427c53229baf..918ab9e8e0e6 100644 --- a/src/processing/plugins/mock_processor.cc +++ b/src/processing/plugins/mock_processor.cc @@ -57,7 +57,10 @@ void* MockProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t } } - return buffer; + auto result = malloc(buf_size); + memcpy(result, buffer, buf_size); + + return result; } void *MockProcessor::ProcessAggregation(std::size_t *size, std::map> nodes) { diff --git a/src/processing/plugins/mock_processor.h b/src/processing/plugins/mock_processor.h index d9830510691a..46e2357f2d0c 100644 --- a/src/processing/plugins/mock_processor.h +++ b/src/processing/plugins/mock_processor.h @@ -21,15 +21,27 @@ class MockProcessor: public processing::Processor { std::vector slots_; public: + + ~MockProcessor() { + if (gh_pairs_) { + gh_pairs_->clear(); + delete gh_pairs_; + } + } + void Initialize(bool active, std::map params) override { this->active_ = active; this->params_ = ¶ms; } void Shutdown() override { - this->gh_pairs_ = nullptr; - this->cuts_.clear(); - this->slots_.clear(); + if (gh_pairs_) { + gh_pairs_->clear(); + delete gh_pairs_; + } + gh_pairs_ = nullptr; + cuts_.clear(); + slots_.clear(); } void FreeBuffer(void *buffer) override { diff --git a/src/processing/processor.h b/src/processing/processor.h index 31ac75c84a6b..f3add42cd40f 100644 --- a/src/processing/processor.h +++ b/src/processing/processor.h @@ -18,6 +18,12 @@ const char kLoadFunc[] = "LoadProcessor"; /*! \brief An processor interface to handle tasks that require external library through plugins */ class Processor { public: + /*! + * \brief Virtual destructor + * + */ + virtual ~Processor() = default; + /*! * \brief Initialize the processor * diff --git a/src/processing/processor_loader.cc b/src/processing/processor_loader.cc index 00bb49d03cbe..dfb267a35d6e 100644 --- a/src/processing/processor_loader.cc +++ b/src/processing/processor_loader.cc @@ -78,9 +78,13 @@ namespace processing { void ProcessorLoader::unload() { #if defined(_WIN32) - FreeLibrary(handle_); + if (handle_) { + FreeLibrary(handle_); + } #else - dlclose(handle_); + if (handle_) { + dlclose(handle_); + } #endif } } // namespace processing diff --git a/tests/cpp/processing/test_processor.cc b/tests/cpp/processing/test_processor.cc index 911b347d79d7..7ba1292439cf 100644 --- a/tests/cpp/processing/test_processor.cc +++ b/tests/cpp/processing/test_processor.cc @@ -13,10 +13,12 @@ class ProcessorTest : public testing::Test { auto loader = processing::ProcessorLoader(); processor_ = loader.load(processing::kMockProcessor); processor_->Initialize(true, {}); + loader.unload(); } void TearDown() override { processor_->Shutdown(); + delete processor_; processor_ = nullptr; } @@ -62,13 +64,13 @@ TEST_F(ProcessorTest, TestGHEncoding) { ASSERT_EQ(0, memcmp(buffer, new_buffer, buf_size)); // Clean up - free(buffer); - free(new_buffer); + processor_->FreeBuffer(buffer); + processor_->FreeBuffer(new_buffer); } TEST_F(ProcessorTest, TestAggregation) { size_t buf_size; - processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin + auto gh_buffer = processor_->ProcessGHPairs(&buf_size, gh_pairs_); // Pass the GH pairs to the plugin processor_->InitAggregationContext(cuts_, slots_); auto buffer = processor_->ProcessAggregation(&buf_size, nodes_); @@ -87,4 +89,7 @@ TEST_F(ProcessorTest, TestAggregation) { for (size_t i = 0; i < histos.size(); ++i) { EXPECT_NEAR(expected_result[i], histos[i], kError) << "Histogram differs at index " << i; } + + processor_->FreeBuffer(buffer); + processor_->FreeBuffer(gh_buffer); } From 3aa64b36c1a05298156325d74ece2a180787e8e4 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Fri, 17 May 2024 17:09:28 -0400 Subject: [PATCH 55/55] linting update --- src/processing/plugins/mock_processor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/processing/plugins/mock_processor.h b/src/processing/plugins/mock_processor.h index 46e2357f2d0c..48ca1e812a44 100644 --- a/src/processing/plugins/mock_processor.h +++ b/src/processing/plugins/mock_processor.h @@ -21,7 +21,6 @@ class MockProcessor: public processing::Processor { std::vector slots_; public: - ~MockProcessor() { if (gh_pairs_) { gh_pairs_->clear();