dmlc · ZiyueXu77 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024 · Feb 2, 2024
diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h
@@ -14,6 +14,7 @@
 #include "communicator-inl.h"
 #include "xgboost/collective/result.h"  // for Result
 #include "xgboost/data.h"               // for MetaINfo
+#include "../processing/processor.h"      // for Processor
 
 namespace xgboost::collective {
 
@@ -69,7 +70,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, void* buffer, std::si
  * @param result The HostDeviceVector storing the results.
  * @param function The function used to calculate the results.
  */
-template <typename T, typename Function>
+template <bool is_gpair, typename T, typename Function>
 void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>* result,
                      Function&& function) {
   if (info.IsVerticalFederated()) {
@@ -96,8 +97,46 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>*
     }
     collective::Broadcast(&size, sizeof(std::size_t), 0);
 
-    result->Resize(size);
-    collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+    if (info.IsSecure() && is_gpair) {
+      // Under secure mode, gpairs will be processed to vector and encrypt
+      // information only available on rank 0
+      std::size_t buffer_size{};
+      std::int8_t *buffer;
+      if (collective::GetRank() == 0) {
+        std::vector<double> vector_gh;
+        for (std::size_t i = 0; i < size; i++) {
+          auto gpair = result->HostVector()[i];
+          // cast from GradientPair to float pointer
+          auto gpair_ptr = reinterpret_cast<float*>(&gpair);
+          // save to vector
+          vector_gh.push_back(gpair_ptr[0]);
+          vector_gh.push_back(gpair_ptr[1]);
+        }
+        // provide the vectors to the processor interface
+        size_t size;
+        auto buf = processor_instance->ProcessGHPairs(&size, vector_gh);
+        buffer_size = size;
+        buffer = reinterpret_cast<std::int8_t *>(buf);
+      }
+
+      // broadcast the buffer size for other ranks to prepare
+      collective::Broadcast(&buffer_size, sizeof(std::size_t), 0);
+      // prepare buffer on passive parties for satisfying broadcast mpi call
+      if (collective::GetRank() != 0) {
+        buffer = reinterpret_cast<std::int8_t *>(malloc(buffer_size));
+      }
+
+      // broadcast the data buffer holding processed gpairs
+      collective::Broadcast(buffer, buffer_size, 0);
+
+      // call HandleGHPairs
+      size_t size;
+      processor_instance->HandleGHPairs(&size, buffer, buffer_size);
+    } else {
+      // clear text mode, broadcast the data directly
+      result->Resize(size);
+      collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+    }
   } else {
     std::forward<Function>(function)();
   }

diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc
@@ -1,6 +1,7 @@
 /*!
  * Copyright 2022 XGBoost contributors
  */
+#include <map>
 #include "communicator.h"
 
 #include "comm.h"
@@ -9,9 +10,12 @@
 #include "rabit_communicator.h"
 
 #if defined(XGBOOST_USE_FEDERATED)
-#include "../../plugin/federated/federated_communicator.h"
+  #include "../../plugin/federated/federated_communicator.h"
 #endif
 
+#include "../processing/processor.h"
+processing::Processor *processor_instance;
+
 namespace xgboost::collective {
 thread_local std::unique_ptr<Communicator> Communicator::communicator_{new NoOpCommunicator()};
 thread_local CommunicatorType Communicator::type_{};
@@ -38,7 +42,26 @@ void Communicator::Init(Json const& config) {
     }
     case CommunicatorType::kFederated: {
 #if defined(XGBOOST_USE_FEDERATED)
-      communicator_.reset(FederatedCommunicator::Create(config));
+  communicator_.reset(FederatedCommunicator::Create(config));
+  // Get processor configs
+  std::string plugin_name{};
+  std::string loader_params_key{};
+  std::string loader_params_map{};
+  std::string proc_params_key{};
+  std::string proc_params_map{};
+  plugin_name = OptionalArg<String>(config, "plugin_name", plugin_name);
+  loader_params_key = OptionalArg<String>(config, "loader_params_key", loader_params_key);
+  loader_params_map = OptionalArg<String>(config, "loader_params_map", loader_params_map);
+  proc_params_key = OptionalArg<String>(config, "proc_params_key", proc_params_key);
+  proc_params_map = OptionalArg<String>(config, "proc_params_map", proc_params_map);
+  // Initialize processor if plugin_name is provided
+  if (!plugin_name.empty()) {
+    std::map<std::string, std::string> loader_params = {{loader_params_key, loader_params_map}};
+    std::map<std::string, std::string> proc_params = {{proc_params_key, proc_params_map}};
+    processing::ProcessorLoader loader(loader_params);
+    processor_instance = loader.load(plugin_name);
+    processor_instance->Initialize(collective::GetRank() == 0, proc_params);
+  }
 #else
       LOG(FATAL) << "XGBoost is not compiled with Federated Learning support.";
 #endif

diff --git a/src/learner.cc b/src/learner.cc
@@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner {
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
     base_score->Reshape(1);
-    collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
+    collective::ApplyWithLabels<false>(this->Ctx(), info, base_score->Data(),
                                 [&] { UsePtr(obj_)->InitEstimation(info, base_score); });
   }
 };
@@ -1472,8 +1472,9 @@ class LearnerImpl : public LearnerIO {
   void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
                    std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
     out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
-    collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
-                                [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
+    // calculate gradient and communicate
+    collective::ApplyWithLabels<true>(&ctx_, info, out_gpair->Data(),
+                                  [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
   }
 
   /*! \brief random number transformation seed. */

diff --git a/src/processing/plugins/mock_processor.cc b/src/processing/plugins/mock_processor.cc
@@ -0,0 +1,174 @@
+/**
+ * Copyright 2014-2024 by XGBoost Contributors
+ */
+#include <iostream>
+#include <cstring>
+#include <cstdint>
+#include "./mock_processor.h"
+
+const char kSignature[] = "NVDADAM1";  // DAM (Direct Accessible Marshalling) V1
+const int64_t kPrefixLen = 24;
+
+bool ValidDam(void *buffer, std::size_t size) {
+  return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0;
+}
+
+void* MockProcessor::ProcessGHPairs(std::size_t *size, const std::vector<double>& pairs) {
+  *size = kPrefixLen + pairs.size()*10*8;  // Assume encrypted size is 10x
+
+  int64_t buf_size = *size;
+  // This memory needs to be freed
+  char *buf = static_cast<char *>(calloc(*size, 1));
+  memcpy(buf, kSignature, strlen(kSignature));
+  memcpy(buf + 8, &buf_size, 8);
+  memcpy(buf + 16, &kDataTypeGHPairs, 8);
+
+  // Simulate encryption by duplicating value 10 times
+  int index = kPrefixLen;
+  for (auto value : pairs) {
+    for (std::size_t i = 0; i < 10; i++) {
+      memcpy(buf+index, &value, 8);
+      index += 8;
+    }
+  }
+
+  // Save pairs for future operations
+  this->gh_pairs_ = new std::vector<double>(pairs);
+
+  return buf;
+}
+
+
+void* MockProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t buf_size) {
+  *size = buf_size;
+  if (!ValidDam(buffer, *size)) {
+    return buffer;
+  }
+
+  // For mock, this call is used to set gh_pairs for passive sites
+  if (!active_) {
+    int8_t *ptr = static_cast<int8_t *>(buffer);
+    ptr += kPrefixLen;
+    double *pairs = reinterpret_cast<double *>(ptr);
+    std::size_t num = (buf_size - kPrefixLen) / 8;
+    gh_pairs_ = new std::vector<double>();
+    for (std::size_t i = 0; i < num; i += 10) {
+      gh_pairs_->push_back(pairs[i]);
+    }
+  }
+
+  return buffer;
+}
+
+void *MockProcessor::ProcessAggregation(std::size_t *size, std::map<int, std::vector<int>> nodes) {
+  int total_bin_size = cuts_.back();
+  int histo_size = total_bin_size*2;
+  *size = kPrefixLen + 8*histo_size*nodes.size();
+  int64_t buf_size = *size;
+  int8_t *buf = static_cast<int8_t *>(calloc(buf_size, 1));
+  memcpy(buf, kSignature, strlen(kSignature));
+  memcpy(buf + 8, &buf_size, 8);
+  memcpy(buf + 16, &kDataTypeHisto, 8);
+
+  double *histo = reinterpret_cast<double *>(buf + kPrefixLen);
+  for ( const auto &node : nodes ) {
+    auto rows = node.second;
+    for (const auto &row_id : rows) {
+      auto num = cuts_.size() - 1;
+      for (std::size_t f = 0; f < num; f++) {
+        int slot = slots_[f + num*row_id];
+        if ((slot < 0) || (slot >= total_bin_size)) {
+          continue;
+        }
+
+        auto g = (*gh_pairs_)[row_id*2];
+        auto h = (*gh_pairs_)[row_id*2+1];
+        histo[slot*2] += g;
+        histo[slot*2+1] += h;
+      }
+    }
+    histo += histo_size;
+  }
+
+  return buf;
+}
+
+std::vector<double> MockProcessor::HandleAggregation(void *buffer, std::size_t buf_size) {
+  std::vector<double> result = std::vector<double>();
+
+  int8_t* ptr = static_cast<int8_t *>(buffer);
+  auto rest_size = buf_size;
+
+  while (rest_size > kPrefixLen) {
+    if (!ValidDam(ptr, rest_size)) {
+        break;
+    }
+    int64_t *size_ptr = reinterpret_cast<int64_t *>(ptr + 8);
+    double *array_start = reinterpret_cast<double *>(ptr + kPrefixLen);
+    auto array_size = (*size_ptr - kPrefixLen)/8;
+    result.insert(result.end(), array_start, array_start + array_size);
+    rest_size -= *size_ptr;
+    ptr = ptr + *size_ptr;
+  }
+
+  return result;
+}
+
+void* MockProcessor::ProcessHistograms(std::size_t *size, const std::vector<double>& histograms) {
+    *size = kPrefixLen + histograms.size()*10*8;  // Assume encrypted size is 10x
+
+    int64_t buf_size = *size;
+    // This memory needs to be freed
+    char *buf = static_cast<char *>(malloc(buf_size));
+    memcpy(buf, kSignature, strlen(kSignature));
+    memcpy(buf + 8, &buf_size, 8);
+    memcpy(buf + 16, &kDataTypeAggregatedHisto, 8);
+
+    // Simulate encryption by duplicating value 10 times
+    int index = kPrefixLen;
+    for (auto value : histograms) {
+        for (std::size_t i = 0; i < 10; i++) {
+            memcpy(buf+index, &value, 8);
+            index += 8;
+        }
+    }
+
+    return buf;
+}
+
+std::vector<double> MockProcessor::HandleHistograms(void *buffer, std::size_t buf_size) {
+    std::vector<double> result = std::vector<double>();
+
+    int8_t* ptr = static_cast<int8_t *>(buffer);
+    auto rest_size = buf_size;
+
+    while (rest_size > kPrefixLen) {
+        if (!ValidDam(ptr, rest_size)) {
+            break;
+        }
+        int64_t *size_ptr = reinterpret_cast<int64_t *>(ptr + 8);
+        double *array_start = reinterpret_cast<double *>(ptr + kPrefixLen);
+        auto array_size = (*size_ptr - kPrefixLen)/8;
+        auto empty = result.empty();
+        if (!empty) {
+            if (result.size() != array_size / 10) {
+                std::cout << "Histogram size doesn't match " << result.size() << " != " << array_size << std::endl;
+                return result;
+            }
+        }
+
+        for (std::size_t i = 0; i < array_size/10; i++) {
+            auto value = array_start[i*10];
+            if (empty) {
+                result.push_back(value);
+            } else {
+                result[i] += value;
+            }
+        }
+
+        rest_size -= *size_ptr;
+        ptr = ptr + *size_ptr;
+    }
+
+    return result;
+}
diff --git a/src/processing/plugins/mock_processor.h b/src/processing/plugins/mock_processor.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2014-2024 by XGBoost Contributors
+ */
+#pragma once
+#include <string>
+#include <vector>
+#include <map>
+#include "../processor.h"
+
+//  Data type definition
+const int64_t kDataTypeGHPairs = 1;
+const int64_t kDataTypeHisto = 2;
+const int64_t kDataTypeAggregatedHisto = 3;
+
+class MockProcessor: public processing::Processor {
+ private:
+    bool active_ = false;
+    const std::map<std::string, std::string> *params_{nullptr};
+    std::vector<double> *gh_pairs_{nullptr};
+    std::vector<uint32_t> cuts_;
+    std::vector<int> slots_;
+
+ public:
+    void Initialize(bool active, std::map<std::string, std::string> params) override {
+        this->active_ = active;
+        this->params_ = &params;
+    }
+
+    void Shutdown() override {
+        this->gh_pairs_ = nullptr;
+        this->cuts_.clear();
+        this->slots_.clear();
+    }
+
+    void FreeBuffer(void *buffer) override {
+        free(buffer);
+    }
+
+    void* ProcessGHPairs(size_t *size, const std::vector<double>& pairs) override;
+
+    void* HandleGHPairs(size_t *size, void *buffer, size_t buf_size) override;
+
+    void InitAggregationContext(const std::vector<uint32_t> &cuts,
+                                const std::vector<int> &slots) override {
+        this->cuts_ = cuts;
+        if (this->slots_.empty()) {
+            this->slots_ = slots;
+        }
+    }
+
+    void *ProcessAggregation(size_t *size, std::map<int, std::vector<int>> nodes) override;
+
+    std::vector<double> HandleAggregation(void *buffer, size_t buf_size) override;
+
+    void *ProcessHistograms(size_t *size, const std::vector<double>& histograms) override;
+
+    std::vector<double> HandleHistograms(void *buffer, size_t buf_size) override;
+};