PaddlePaddle · ZeyuChen · Aug 9, 2022 · Jul 22, 2022 · Jul 22, 2022 · Jul 22, 2022
diff --git a/paddle/fluid/operators/graph_send_recv_op.cc b/paddle/fluid/operators/graph_send_recv_op.cc
@@ -58,6 +58,10 @@ class GraphSendRecvOpMaker : public framework::OpProtoAndCheckerMaker {
              "The input tensor with data type float32, float64, int32, int64.");
     AddInput("Src_index", "The source index tensor.");
     AddInput("Dst_index", "The destination index tensor.");
+    AddInput("OutSizeTensor",
+             "(Tensor<int>, optional). The 0th dimension of the output."
+             "It has a higher priority than Attr(out_size).")
+        .AsDispensable();
     AddOutput("Out", "Output tensor of graph_send_recv op.");
     AddOutput("Dst_count",
               "Count tensor of Dst_index, mainly for MEAN pool_type.")
@@ -68,12 +72,12 @@ class GraphSendRecvOpMaker : public framework::OpProtoAndCheckerMaker {
                          "tensors of Dst_index.")
         .SetDefault("SUM")
         .InEnum({"SUM", "MEAN", "MIN", "MAX"});
-    AddAttr<int64_t>(
+    AddAttr<std::vector<int64_t>>(
         "out_size",
-        "(int64_t, default 0)"
+        "(vector<int64_t>, default {0})"
         "Define the first dimension of Output tensor."
-        "If set default 0, then the shape of Out is the same with X.")
-        .SetDefault(0);
+        "If set default {0}, then the shape of Out is the same with X.")
+        .SetDefault({0});
     AddComment(R"DOC(
 Graph Learning Send_Recv combine operator.
 

diff --git a/paddle/fluid/pybind/op_function_generator.h b/paddle/fluid/pybind/op_function_generator.h
@@ -225,6 +225,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
       "Bias3",
       "Mean3",
       "Var3"}},
+    {"graph_send_recv", {"X", "Src_index", "Dst_index", "OutSizeTensor"}},
 };
 
 // NOTE(zhiqiu): Like op_ins_map.

diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
@@ -886,7 +886,7 @@
   backward : gelu_grad
 
 - api : graph_send_recv
-  args : (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", int64_t out_size = 0)
+  args : (Tensor x, Tensor src_index, Tensor dst_index, str pool_type, IntArray out_size)
   output : Tensor(out), Tensor(dst_count)
   infer_meta :
     func : GraphSendRecvInferMeta

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -828,8 +828,8 @@
     func : gelu_grad
 
 - backward_api : graph_send_recv_grad
-  forward : graph_send_recv (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", int64_t out_size = 0) -> Tensor(out), Tensor(dst_count)
-  args : (Tensor x, Tensor src_index, Tensor dst_index, Tensor out, Tensor dst_count, Tensor out_grad, str pool_type = "SUM")
+  forward : graph_send_recv (Tensor x, Tensor src_index, Tensor dst_index, str pool_type, IntArray out_size) -> Tensor(out), Tensor(dst_count)
+  args : (Tensor x, Tensor src_index, Tensor dst_index, Tensor out, Tensor dst_count, Tensor out_grad, str pool_type)
   output : Tensor(x_grad)
   infer_meta :
     func : GeneralUnaryGradInferMeta

diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc
@@ -302,7 +302,7 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                             const MetaTensor& src_index,
                             const MetaTensor& dst_index,
                             const std::string& pool_type,
-                            int64_t out_size,
+                            const IntArray& out_size,
                             MetaTensor* out,
                             MetaTensor* dst_count) {
   auto src_index_dims = src_index.dims();
@@ -345,23 +345,11 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                         "Src_index and Dst_index should have the same shape."));
 
   auto dims = x.dims();
-  if (out_size <= 0) {
-    out->set_dims(dims);
-  } else {
-    std::vector<int64_t> dims_ = phi::vectorize(dims);
-    if (dims_.size() > 0) {
-      dims_[0] = out_size;
-    }
-    out->set_dims(phi::make_ddim(dims_));
-  }
+  out->set_dims(dims);
   out->set_dtype(x.dtype());
 
   if (pool_type == "MEAN") {
-    if (out_size <= 0) {
-      dst_count->set_dims({dims[0]});
-    } else {
-      dst_count->set_dims({out_size});
-    }
+    dst_count->set_dims({dims[0]});
     dst_count->set_dtype(DataType::INT32);
   }
 }

diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 
+#include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/meta_tensor.h"
 
 namespace phi {
@@ -65,7 +66,7 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                             const MetaTensor& src_index,
                             const MetaTensor& dst_index,
                             const std::string& pool_type,
-                            int64_t out_size,
+                            const IntArray& out_size,
                             MetaTensor* out,
                             MetaTensor* dst_count);
 

diff --git a/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc
@@ -88,27 +88,34 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx,
                                        DenseTensor* dst_count = nullptr) {
   const int& index_size = src_index.dims()[0];
 
-  ctx.template Alloc<T>(out);
-  T* p_output = out->data<T>();
   const auto& src_dims = x.dims();
   int64_t memset_size = 1;
   if (out_size <= 0) {
     for (int i = 0; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   } else {
+    // set out dim following out_size.
+    std::vector<int64_t> dims_ = phi::vectorize(src_dims);
+    if (dims_.size() > 0) {
+      dims_[0] = out_size;
+    }
+    out->Resize(phi::make_ddim(dims_));
     memset_size = out_size;
     for (int i = 1; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   }
+
+  ctx.template Alloc<T>(out);
+  T* p_output = out->data<T>();
   const size_t& memset_bytes = memset_size * sizeof(T);
   memset(p_output, 0, memset_bytes);
 
   if (index_size == 0) return;
-
   const IndexT* s_index = src_index.data<IndexT>();
   const IndexT* d_index = dst_index.data<IndexT>();
+
   if (pool_type == "SUM") {
     GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(
         src_dims[0], index_size, s_index, d_index, x, out, pool_type);
@@ -119,10 +126,12 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx,
     GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvMaxFunctor<T>>(
         src_dims[0], index_size, s_index, d_index, x, out, pool_type);
   } else if (pool_type == "MEAN") {
+    int64_t input_size = out_size <= 0 ? src_dims[0] : out_size;
+    dst_count->Resize({input_size});
     ctx.template Alloc<int>(dst_count);
     int* p_dst_count = dst_count->data<int>();
-    memset(p_dst_count, 0, src_dims[0] * sizeof(int));
-    GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(src_dims[0],
+    memset(p_dst_count, 0, input_size * sizeof(int));
+    GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(input_size,
                                                                 index_size,
                                                                 s_index,
                                                                 d_index,
@@ -139,16 +148,29 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count) {
   auto index_type = src_index.dtype();
+  auto& out_size_data = out_size.GetData();
   if (index_type == phi::DataType::INT32) {
-    GraphSendRecvOpKernelLaunchHelper<Context, T, int32_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpKernelLaunchHelper<Context, T, int32_t>(ctx,
+                                                           x,
+                                                           src_index,
+                                                           dst_index,
+                                                           pool_type,
+                                                           out_size_data[0],
+                                                           out,
+                                                           dst_count);
   } else if (index_type == phi::DataType::INT64) {
-    GraphSendRecvOpKernelLaunchHelper<Context, T, int64_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpKernelLaunchHelper<Context, T, int64_t>(ctx,
+                                                           x,
+                                                           src_index,
+                                                           dst_index,
+                                                           pool_type,
+                                                           out_size_data[0],
+                                                           out,
+                                                           dst_count);
   }
 }
 

diff --git a/paddle/phi/kernels/gpu/graph_send_recv_funcs.h b/paddle/phi/kernels/gpu/graph_send_recv_funcs.h
@@ -81,7 +81,7 @@ __global__ void InputResetMaxCUDAKernel(T* output,
                                         size_t input_size,
                                         size_t slice_size) {
   CUDA_KERNEL_LOOP_TYPE(i, input_size * slice_size, int64_t) {
-    if (*(output + i) == std::numeric_limits<T>::min()) {
+    if (*(output + i) == std::numeric_limits<T>::lowest()) {
       *(output + i) = 0;
     }
   }

diff --git a/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu
@@ -37,20 +37,26 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
                                            DenseTensor* out,
                                            DenseTensor* dst_count = nullptr) {
   const int& index_size = src_index.dims()[0];
-  ctx.template Alloc<T>(out);
-  T* p_output = out->data<T>();
   const auto& src_dims = x.dims();
   int64_t memset_size = 1;
   if (out_size <= 0) {
     for (int i = 0; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   } else {
+    // set out dim following out_size.
+    std::vector<int64_t> dims_ = phi::vectorize(src_dims);
+    if (dims_.size() > 0) {
+      dims_[0] = out_size;
+    }
+    out->Resize(phi::make_ddim(dims_));
     memset_size = out_size;
     for (int i = 1; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   }
+  ctx.template Alloc<T>(out);
+  T* p_output = out->data<T>();
   const size_t& memset_bytes = memset_size * sizeof(T);
   if (pool_type == "SUM" || pool_type == "MEAN") {
 #ifdef PADDLE_WITH_HIP
@@ -63,7 +69,7 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
     thrust::fill(thrust::device,
                  p_output_ptr,
                  p_output_ptr + memset_size,
-                 std::numeric_limits<T>::min());
+                 std::numeric_limits<T>::lowest());
   } else if (pool_type == "MIN") {
     thrust::device_ptr<T> p_output_ptr(p_output);
     thrust::fill(thrust::device,
@@ -91,7 +97,7 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
   int64_t max_grid_dimx = ctx.GetCUDAMaxGridDimSize()[0];
   int64_t grid_tmp = (n + block - 1) / block;
   int64_t grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx;
-  int64_t input_size = src_dims[0];
+  int64_t input_size = out_size <= 0 ? src_dims[0] : out_size;
   if (pool_type == "SUM") {
     GraphSendRecvSumCUDAFunctor<T, IndexT> functor;
     GraphSendRecvCUDAKernel<T, IndexT, GraphSendRecvSumCUDAFunctor<T, IndexT>>
@@ -103,9 +109,6 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
 
-    if (out_size > 0) {
-      input_size = out_size;
-    }
     int64_t grid_max_tmp = (input_size * slice_size + block - 1) / block;
     int64_t grid_max =
         grid_max_tmp < max_grid_dimx ? grid_max_tmp : max_grid_dimx;
@@ -117,9 +120,6 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
 
-    if (out_size > 0) {
-      input_size = out_size;
-    }
     int64_t grid_min_tmp = (input_size * slice_size + block - 1) / block;
     int64_t grid_min =
         grid_min_tmp < max_grid_dimx ? grid_min_tmp : max_grid_dimx;
@@ -130,12 +130,9 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
     GraphSendRecvCUDAKernel<T, IndexT, GraphSendRecvSumCUDAFunctor<T, IndexT>>
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
-
+    dst_count->Resize({input_size});
     ctx.template Alloc<int32_t>(dst_count);
-    int32_t* p_dst_count = dst_count->data<int32_t>();
-    if (out_size > 0) {
-      input_size = out_size;
-    }
+    int* p_dst_count = dst_count->data<int>();
 
 #ifdef PADDLE_WITH_HIP
     hipMemset(p_dst_count, 0, input_size * sizeof(int));
@@ -161,16 +158,29 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count) {
   auto index_type = src_index.dtype();
+  auto& out_size_data = out_size.GetData();
   if (index_type == phi::DataType::INT32) {
-    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int32_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int32_t>(ctx,
+                                                               x,
+                                                               src_index,
+                                                               dst_index,
+                                                               pool_type,
+                                                               out_size_data[0],
+                                                               out,
+                                                               dst_count);
   } else if (index_type == phi::DataType::INT64) {
-    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int64_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int64_t>(ctx,
+                                                               x,
+                                                               src_index,
+                                                               dst_index,
+                                                               pool_type,
+                                                               out_size_data[0],
+                                                               out,
+                                                               dst_count);
   }
 }
 

diff --git a/paddle/phi/kernels/graph_send_recv_kernel.h b/paddle/phi/kernels/graph_send_recv_kernel.h
@@ -16,6 +16,7 @@
 
 #include <string>
 
+#include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace phi {
@@ -26,7 +27,7 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count);
 

diff --git a/paddle/phi/ops/compat/graph_send_recv_sig.cc b/paddle/phi/ops/compat/graph_send_recv_sig.cc
@@ -18,10 +18,17 @@ namespace phi {
 
 KernelSignature GraphSendRecvOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
-  return KernelSignature("graph_send_recv",
-                         {"X", "Src_index", "Dst_index"},
-                         {"pool_type", "out_size"},
-                         {"Out", "Dst_count"});
+  if (ctx.HasInput("OutSizeTensor")) {
+    return KernelSignature("graph_send_recv",
+                           {"X", "Src_index", "Dst_index"},
+                           {"pool_type", "OutSizeTensor"},
+                           {"Out", "Dst_count"});
+  } else {
+    return KernelSignature("graph_send_recv",
+                           {"X", "Src_index", "Dst_index"},
+                           {"pool_type", "out_size"},
+                           {"Out", "Dst_count"});
+  }
 }
 
 KernelSignature GraphSendRecvGradOpArgumentMapping(

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
@@ -78,6 +78,7 @@
 import paddle.reader  # noqa: F401
 import paddle.static  # noqa: F401
 import paddle.vision  # noqa: F401
+import paddle.geometric  # noqa: F401
 
 from .tensor.attribute import is_complex  # noqa: F401
 from .tensor.attribute import is_integer  # noqa: F401