From 1b2bc14cb3307660265f0e31d46e42fadc5ec832 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Wed, 3 Aug 2022 06:54:26 +0000 Subject: [PATCH 01/12] initial commit --- paddle/phi/api/yaml/api.yaml | 12 +- paddle/phi/api/yaml/backward.yaml | 11 + paddle/phi/infermeta/multiary.cc | 75 +++++ paddle/phi/infermeta/multiary.h | 7 + .../kernels/gpu/graph_send_ue_recv_funcs.h | 131 ++++++++ .../kernels/gpu/graph_send_uv_grad_kernel.cu | 295 ++++++++++++++++++ .../phi/kernels/gpu/graph_send_uv_kernel.cu | 166 ++++++++++ .../phi/kernels/graph_send_uv_grad_kernel.h | 33 ++ paddle/phi/kernels/graph_send_uv_kernel.h | 31 ++ .../impl/graph_send_ue_recv_kernel_impl.h | 139 +++++++++ .../tests/unittests/test_graph_send_uv_op.py | 146 +++++++++ 11 files changed, 1045 insertions(+), 1 deletion(-) create mode 100644 paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h create mode 100644 paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/graph_send_uv_kernel.cu create mode 100644 paddle/phi/kernels/graph_send_uv_grad_kernel.h create mode 100644 paddle/phi/kernels/graph_send_uv_kernel.h create mode 100644 paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h create mode 100644 python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py diff --git a/paddle/phi/api/yaml/api.yaml b/paddle/phi/api/yaml/api.yaml index 1156206ee4b51..c00d64cf853e7 100644 --- a/paddle/phi/api/yaml/api.yaml +++ b/paddle/phi/api/yaml/api.yaml @@ -98,6 +98,16 @@ func : erf backward : erf_grad +- api : graph_send_uv + args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str compute_type = "ADD") + output : Tensor(out) + infer_meta : + func : GraphSendUVInferMeta + kernel : + func : graph_send_uv + data_type : x + backward : graph_send_uv_grad + - api : lgamma args : (Tensor x) output : Tensor(out) @@ -105,7 +115,7 @@ func : UnchangedInferMeta kernel : func : lgamma - backward : lgamma_grad + backward : lgamma_grad - api : mv args : (Tensor x, Tensor vec) diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 53cdc97a716d7..c1d411c54ca50 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -105,6 +105,17 @@ func : erf_grad data_type : out_grad +- backward_api : graph_send_uv_grad + forward : graph_send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str compute_type = "ADD") -> Tensor(out) + args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str compute_type = "ADD") + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + kernel : + func : graph_send_uv_grad + data_type : x + - backward_api : lgamma_grad forward : lgamma(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a524506c7f07b..7953900d706c0 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" namespace phi { @@ -2410,6 +2411,80 @@ void Yolov3LossInferMeta(const MetaTensor& x, gt_match_mask->set_dtype(x.dtype()); } +void GraphSendUVInferMeta(const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& src_index, + const MetaTensor& dst_index, + const std::string& compute_type, + MetaTensor* out) { + std::cout << "Enter GraphSendUVInferMeta" << std::endl; + + auto src_index_dims = src_index.dims(); + + std::cout << "Check whether enter here.\n"; + if (src_index_dims.size() == 2) { + PADDLE_ENFORCE_EQ(src_index_dims[1], + 1, + phi::errors::InvalidArgument( + "The last dim of Src_index should be 1 when it " + "is 2D, but we get %d", + src_index_dims[1])); + } else { + PADDLE_ENFORCE_EQ( + src_index_dims.size(), + 1, + phi::errors::InvalidArgument( + "The Src_index should be 1D, when it is not 2D, but we get %d", + src_index_dims.size())); + } + + auto dst_index_dims = dst_index.dims(); + if (dst_index_dims.size() == 2) { + PADDLE_ENFORCE_EQ(dst_index_dims[1], + 1, + phi::errors::InvalidArgument( + "The last dim of Dst_index should be 1 when it " + "is 2D, but we get %d", + dst_index_dims[1])); + } else { + PADDLE_ENFORCE_EQ( + dst_index_dims.size(), + 1, + phi::errors::InvalidArgument("The Dst_index should be 1D, " + "when it is not 2D, but we get %d", + dst_index_dims.size())); + } + + PADDLE_ENFORCE_EQ(src_index_dims[0], + dst_index_dims[0], + phi::errors::InvalidArgument( + "Src_index and Dst_index should have the same shape.")); + + // Infer out's shape according to x and y(need broadcasting condition) + out->set_dtype(x.dtype()); + auto x_dims = x.dims(); + auto y_dims = y.dims(); + auto x_dims1 = phi::vectorize(x_dims); + auto y_dims1 = phi::vectorize(y_dims); + std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); + std::vector y_dims2(y_dims1.begin() + 1, y_dims1.end()); + int max_dim = std::max(x_dims2.size(), y_dims2.size()); + int axis = std::abs(static_cast(x_dims2.size() - y_dims2.size())); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + // Only need to broadcast dimensions other than the 0th dimension. + phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), + phi::make_ddim(y_dims2), + x_dims_array.data(), + y_dims_array.data(), + out_dims_array.data(), + max_dim, + axis); + out_dims_array.insert(out_dims_array.begin(), src_index_dims[0]); + out->set_dims(phi::make_ddim(out_dims_array)); +} + } // namespace phi PD_REGISTER_INFER_META_FN(batch_norm, phi::BatchNormInferMeta); diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 60342dc58f5c9..8f585350044a1 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -420,4 +420,11 @@ void Yolov3LossInferMeta(const MetaTensor& x, MetaTensor* objectness_mask, MetaTensor* gt_match_mask); +void GraphSendUVInferMeta(const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& src_index, + const MetaTensor& dst_index, + const std::string& compute_type, + MetaTensor* out); + } // namespace phi diff --git a/paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h b/paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h new file mode 100644 index 0000000000000..2853a522b83d0 --- /dev/null +++ b/paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h @@ -0,0 +1,131 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include +#include + +#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" + +namespace phi { + +#define CUDA_MAX_NUM_THREADS 1024 + +inline void CopyBCastOff(const BroadCastInfo& bcast_info, + thrust::device_vector& l_bcastoff, + thrust::device_vector& r_bcastoff) { + l_bcastoff.resize(bcast_info.out_len); + r_bcastoff.resize(bcast_info.out_len); +#ifdef PADDLE_WITH_HIP + hipMemcpy(thrust::raw_pointer_cast(l_bcastoff.data()), + bcast_info.l_offset.data(), + sizeof(int64_t) * bcast_info.out_len, + hipMemcpyHostToDevice); + hipMemcpy(thrust::raw_pointer_cast(r_bcastoff.data()), + bcast_info.r_offset.data(), + sizeof(int64_t) * bcast_info.out_len, + hipMemcpyHostToDevice); +#else + cudaMemcpy(thrust::raw_pointer_cast(l_bcastoff.data()), + bcast_info.l_offset.data(), + sizeof(int64_t) * bcast_info.out_len, + cudaMemcpyHostToDevice); + cudaMemcpy(thrust::raw_pointer_cast(r_bcastoff.data()), + bcast_info.r_offset.data(), + sizeof(int64_t) * bcast_info.out_len, + cudaMemcpyHostToDevice); +#endif +} + +inline int FindNumThreads(int dim, int max_num_threads = CUDA_MAX_NUM_THREADS) { + PADDLE_ENFORCE_GE(dim, + 0, + phi::errors::PreconditionNotMet( + "Required dim >= 0, but received dim = %d", dim)); + if (dim == 0) return 1; + int res = max_num_threads; + while (res > dim) { + res = res >> 1; + } + return res; +} + +template +struct GraphSendUERecvSumCUDAFunctor { + DEVICE inline void operator()(T* output, T val) { + paddle::platform::CudaAtomicAdd(output, val); + } +}; + +template +struct GraphSendUERecvMaxCUDAFunctor { + DEVICE inline void operator()(T* output, T val) { + paddle::platform::CudaAtomicMax(output, val); + } +}; + +template +struct GraphSendUERecvMinCUDAFunctor { + DEVICE inline void operator()(T* output, T val) { + paddle::platform::CudaAtomicMin(output, val); + } +}; + +template +__global__ void GraphSendUERecvCUDAKernel(const T* x_data, + const T* e_data, + const IndexT* src_indices, + const IndexT* dst_indices, + const int64_t* xbcast_off, + const int64_t* ebcast_off, + T* output, + int64_t index_size, + int64_t x_len, + int64_t e_len, + int64_t out_len, + bool use_bcast, + ComputeFunctor cfunctor, + ReduceFunctor rfunctor) { + IndexT ty = blockIdx.y * blockDim.y + threadIdx.y; + const IndexT stride_y = blockDim.y * gridDim.y; + + while (ty < index_size) { + IndexT src = src_indices[ty]; + IndexT dst = dst_indices[ty]; + int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride_x = blockDim.x * gridDim.x; + + const T* x_off = x_data + src * x_len; + const T* e_off = e_data + ty * e_len; + T* out_off = output + dst * out_len; + while (tx < out_len) { + int64_t x_add = use_bcast ? xbcast_off[tx] : tx; + int64_t e_add = use_bcast ? ebcast_off[tx] : tx; + T val = cfunctor(x_off[x_add], e_off[e_add]); + rfunctor(out_off + tx, val); + tx += stride_x; + } + ty += stride_y; + } +} + +} // namespace phi diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu new file mode 100644 index 0000000000000..96ec93ae555e6 --- /dev/null +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -0,0 +1,295 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/graph_send_uv_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/gpu/graph_send_recv_funcs.h" +#include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" +#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/reduce_sum_kernel.h" + +namespace phi { + +template +void CalculateGrad(const Context& ctx, + const T* out_grad, + const IndexT* s_index, + const IndexT* d_index, + const phi::DDim& out_grad_dims, + const phi::DDim& x_grad_dims, + const std::string& compute_type, + int64_t index_size, + int64_t slice_size, + T* x_grad, + const DenseTensor& out_grad_tensor, + const DenseTensor& y) { + std::vector reduce_idx; + bool reduce = ReduceGrad(out_grad_dims, x_grad_dims, reduce_idx); + + if (compute_type == "ADD") { +#ifdef PADDLE_WITH_HIP + int block = 256; +#else + int block = 1024; +#endif + int64_t n = slice_size * index_size; + int max_grid_dimx = ctx.GetCUDAMaxGridDimSize()[0]; + int64_t grid_tmp = (n + block - 1) / block; + int64_t grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx; + GraphSendRecvSumCUDAFunctor functor; + if (!reduce) { + GraphSendRecvCUDAKernel> + <<>>(out_grad, + d_index, + s_index, + x_grad, + index_size, + slice_size, + functor); + } else { + const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); + DenseTensor x_grad_v2 = phi::EmptyLike(ctx, out_grad_tensor); + phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); + T* x_grad_v2_data = x_grad_v2.data(); + GraphSendRecvCUDAKernel> + <<>>(out_grad, + d_index, + s_index, + x_grad, + index_size, + bcast_info.out_len, + functor); + // Run reduce sum + DenseTensor x_grad_out = phi::Sum( + ctx, + x_grad_v2, + reduce_idx, + paddle::experimental::CppTypeToDataType::Type(), + true); +#ifdef PADDLE_WITH_HIP + hipMemcpy(x_grad, + x_grad_out.data(), + x_grad_out.numel() * sizeof(T), + hipMemcpyDeviceToDevice); +#else + cudaMemcpy(x_grad, + x_grad_out.data(), + x_grad_out.numel() * sizeof(T), + cudaMemcpyDeviceToDevice); +#endif + } + } else if (compute_type == "MUL") { + const auto& bcast_info = phi::CalcBCastInfo(y.dims(), out_grad_dims); + thrust::device_vector l_bcastoff, r_bcastoff; + if (bcast_info.use_bcast) { + CopyBCastOff(bcast_info, l_bcastoff, r_bcastoff); + } + int64_t out_len = bcast_info.out_len; + const int ntx = FindNumThreads(out_len); + const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int nbx = (out_len + ntx - 1) / ntx; + const int nby = (index_size + nty - 1) / nty; + const dim3 grid_(nbx, nby); + const dim3 block_(ntx, nty); + funcs::MultiplyFunctor mul_functor; + GraphSendUERecvSumCUDAFunctor sum_functor; + const T* y_data = y.data(); + if (!reduce) { + GraphSendUERecvCUDAKernel, + funcs::MultiplyFunctor> + <<>>( + y_data, + out_grad, + d_index, + s_index, + thrust::raw_pointer_cast(l_bcastoff.data()), + thrust::raw_pointer_cast(r_bcastoff.data()), + x_grad, + index_size, + bcast_info.l_len, + bcast_info.r_len, + out_len, + bcast_info.use_bcast, + mul_functor, + sum_functor); + } else { + DenseTensor x_grad_v2 = phi::EmptyLike(ctx, out_grad_tensor); + phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); + T* x_grad_v2_data = x_grad_v2.data(); + GraphSendUERecvCUDAKernel, + funcs::MultiplyFunctor> + <<>>( + y_data, + out_grad, + d_index, + s_index, + thrust::raw_pointer_cast(l_bcastoff.data()), + thrust::raw_pointer_cast(r_bcastoff.data()), + x_grad_v2_data, + index_size, + bcast_info.l_len, + bcast_info.r_len, + out_len, + bcast_info.use_bcast, + mul_functor, + sum_functor); + // Run reduce_sum + DenseTensor x_grad_out = phi::Sum( + ctx, + x_grad_v2, + reduce_idx, + paddle::experimental::CppTypeToDataType::Type(), + true); +#ifdef PADDLE_WITH_HIP + hipMemcpy(x_grad, + x_grad_out.data(), + x_grad_out.numel() * sizeof(T), + hipMemcpyDeviceToDevice); +#else + cudaMemcpy(x_grad, + x_grad_out.data(), + x_grad_out.numel() * sizeof(T), + cudaMemcpyDeviceToDevice); +#endif + } + } +} + +template +void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out_grad, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* x_grad, + DenseTensor* y_grad) { + const int& index_size = dst_index.dims()[0]; + + ctx.template Alloc(x_grad); + T* x_grad_data = x_grad->data(); + ctx.template Alloc(y_grad); + T* y_grad_data = y_grad->data(); + const auto& x_grad_dims = x_grad->dims(); + const auto& y_grad_dims = y_grad->dims(); + int64_t memset_size_x = 1, memset_size_y = 1; + int64_t slice_size_x = 1, slice_size_y = 1; + for (int i = 0; i < x_grad_dims.size(); i++) { + memset_size_x *= x_grad_dims[i]; + if (i > 0) slice_size_x *= x_grad_dims[i]; + } + for (int i = 0; i < y_grad_dims.size(); i++) { + memset_size_y *= y_grad_dims[i]; + if (i > 0) slice_size_y *= y_grad_dims[i]; + } + const size_t& memset_bytes_x = memset_size_x * sizeof(T); + const size_t& memset_bytes_y = memset_size_y * sizeof(T); +#ifdef PADDLE_WITH_HIP + hipMemset(x_grad_data, 0, memset_bytes_x); + hipMemset(y_grad_data, 0, memset_bytes_y); +#else + cudaMemset(x_grad_data, 0, memset_bytes_x); + cudaMemset(y_grad_data, 0, memset_bytes_y); +#endif + + if (index_size == 0) return; + + const T* out_grad_data = out_grad.data(); + const IndexT* s_index = src_index.data(); + const IndexT* d_index = dst_index.data(); + // Calculate X grad. + const auto& out_grad_dims = out_grad.dims(); + CalculateGrad(ctx, + out_grad_data, + s_index, + d_index, + out_grad_dims, + x_grad_dims, + compute_type, + index_size, + slice_size_x, + x_grad_data, + out_grad, + y); + // Calculate Y grad. + CalculateGrad(ctx, + out_grad_data, + d_index, + s_index, + out_grad_dims, + y_grad_dims, + compute_type, + index_size, + slice_size_y, + y_grad_data, + out_grad, + x); +} + +template +void GraphSendUVGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const DenseTensor& out_grad, + const std::string& compute_type, + DenseTensor* x_grad, + DenseTensor* y_grad) { + auto index_type = src_index.dtype(); + if (index_type == phi::DataType::INT32) { + GraphSendUVGradOpCUDAKernelLaunchHelper(ctx, + x, + y, + out_grad, + src_index, + dst_index, + compute_type, + x_grad, + y_grad); + } else if (index_type == phi::DataType::INT64) { + GraphSendUVGradOpCUDAKernelLaunchHelper(ctx, + x, + y, + out_grad, + src_index, + dst_index, + compute_type, + x_grad, + y_grad); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(graph_send_uv_grad, + GPU, + ALL_LAYOUT, + phi::GraphSendUVGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu new file mode 100644 index 0000000000000..2a8bb9eed5750 --- /dev/null +++ b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu @@ -0,0 +1,166 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/graph_send_uv_kernel.h" +#include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" +#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" + +#include + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" + +namespace phi { + +template +__global__ void GraphSendUVCUDAKernel(const T* x_data, + const T* y_data, + const IndexT* src_indices, + const IndexT* dst_indices, + const int64_t* xbcast_off, + const int64_t* ybcast_off, + T* output, + int64_t index_size, + int64_t x_len, + int64_t y_len, + int64_t out_len, + bool use_bcast, + ComputeFunctor cfunctor) { + IndexT ty = blockIdx.y * blockDim.y + threadIdx.y; + const IndexT stride_y = blockDim.y * gridDim.y; + + while (ty < index_size) { + IndexT src = src_indices[ty]; + IndexT dst = dst_indices[ty]; + int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride_x = blockDim.x * gridDim.x; + + const T* x_off = x_data + src * x_len; + const T* y_off = y_data + dst * y_len; + T* out_off = output + ty * out_len; + while (tx < out_len) { + int64_t x_add = use_bcast ? xbcast_off[tx] : tx; + int64_t y_add = use_bcast ? ybcast_off[tx] : tx; + T val = cfunctor(x_off[x_add], y_off[y_add]); + out_off[tx] = val; + tx += stride_x; + } + ty += stride_y; + } +} + +template +void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* out) { + const int& index_size = src_index.dims()[0]; + auto out_dims = out->dims(); + int64_t memset_size = 1; + for (int i = 0; i < out_dims.size(); i++) { + memset_size *= out_dims[i]; + } + ctx.template Alloc(out); + T* out_data = out->data(); + if (index_size == 0) return; + + const auto& bcast_info = phi::CalcBCastInfo(x.dims(), y.dims()); + const T* x_data = x.data(); + const T* y_data = y.data(); + const IndexT* s_index = src_index.data(); + const IndexT* d_index = dst_index.data(); + + thrust::device_vector x_bcastoff, y_bcastoff; + if (bcast_info.use_bcast) { + CopyBCastOff(bcast_info, x_bcastoff, y_bcastoff); + } + + int64_t out_len = bcast_info.out_len; + const int ntx = FindNumThreads(out_len); + const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int nbx = (out_len + ntx - 1) / ntx; + const int nby = (index_size + nty - 1) / nty; + const dim3 grid(nbx, nby); + const dim3 block(ntx, nty); + if (compute_type == "ADD") { + funcs::AddFunctor add_functor; + GraphSendUVCUDAKernel> + <<>>( + x_data, + y_data, + s_index, + d_index, + thrust::raw_pointer_cast(x_bcastoff.data()), + thrust::raw_pointer_cast(y_bcastoff.data()), + out_data, + index_size, + bcast_info.l_len, + bcast_info.r_len, + out_len, + bcast_info.use_bcast, + add_functor); + } else if (compute_type == "MUL") { + funcs::MultiplyFunctor mul_functor; + GraphSendUVCUDAKernel> + <<>>( + x_data, + y_data, + s_index, + d_index, + thrust::raw_pointer_cast(x_bcastoff.data()), + thrust::raw_pointer_cast(y_bcastoff.data()), + out_data, + index_size, + bcast_info.l_len, + bcast_info.r_len, + out_len, + bcast_info.use_bcast, + mul_functor); + } +} + +template +void GraphSendUVKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* out) { + auto index_type = src_index.dtype(); + if (index_type == phi::DataType::INT32) { + GraphSendUVOpCUDAKernelLaunchHelper( + ctx, x, y, src_index, dst_index, compute_type, out); + } else if (index_type == phi::DataType::INT64) { + GraphSendUVOpCUDAKernelLaunchHelper( + ctx, x, y, src_index, dst_index, compute_type, out); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(graph_send_uv, + GPU, + ALL_LAYOUT, + phi::GraphSendUVKernel, + float, + double, + int, + int64_t, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/graph_send_uv_grad_kernel.h b/paddle/phi/kernels/graph_send_uv_grad_kernel.h new file mode 100644 index 0000000000000..329e4e61e9e74 --- /dev/null +++ b/paddle/phi/kernels/graph_send_uv_grad_kernel.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void GraphSendUVGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const DenseTensor& out_grad, + const std::string& compute_type, + DenseTensor* x_grad, + DenseTensor* y_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/graph_send_uv_kernel.h b/paddle/phi/kernels/graph_send_uv_kernel.h new file mode 100644 index 0000000000000..159a27fc30a3b --- /dev/null +++ b/paddle/phi/kernels/graph_send_uv_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void GraphSendUVKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* out); + +} // namespace phi diff --git a/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h b/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h new file mode 100644 index 0000000000000..35e51fb930c8d --- /dev/null +++ b/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h @@ -0,0 +1,139 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/phi/kernels/funcs/common_shape.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" + +namespace phi { + +struct BroadCastInfo { + bool use_bcast; + // l_offset[i] indicates the start position of tensor lhs that required to + // compute the i-th element in output, so as r_offset[i]. + std::vector l_offset, r_offset; + int64_t l_len, r_len, out_len, reduce_size; +}; + +inline bool UseBroadCast(const phi::DDim& l_dims, const phi::DDim& r_dims) { + if (l_dims.size() != r_dims.size()) { + return true; + } + for (int i = 1; i < l_dims.size(); i++) { + if (l_dims[i] != r_dims[i]) { + return true; + } + } + return false; +} + +inline BroadCastInfo CalcBCastInfo(const phi::DDim& l_dims, + const phi::DDim& r_dims) { + BroadCastInfo binfo; + binfo.use_bcast = UseBroadCast(l_dims, r_dims); + binfo.l_len = 1; + binfo.r_len = 1; + for (int i = 1; i < l_dims.size(); i++) { + binfo.l_len *= l_dims[i]; + } + for (int i = 1; i < r_dims.size(); i++) { + binfo.r_len *= r_dims[i]; + } + // TODO(daisiming): Whether to add dot. + binfo.reduce_size = 1; + if (binfo.use_bcast) { + const int max_dim = std::max(l_dims.size(), r_dims.size()) - 1; + int stride_l = 1, stride_r = 1; + binfo.l_offset.emplace_back(0); + binfo.r_offset.emplace_back(0); + int out_len = 1; + for (int i = 0; i < max_dim; i++) { + // Iterate the axis from back to front. + const int dl = + (l_dims.size() - 1 - i < 1) ? 1 : l_dims[l_dims.size() - 1 - i]; + const int dr = + (r_dims.size() - 1 - i < 1) ? 1 : r_dims[r_dims.size() - 1 - i]; + for (int j = 1; j < std::max(dl, dr); j++) { + for (int k = 0; k < out_len; k++) { + binfo.l_offset.emplace_back(binfo.l_offset[k] + + j * (j < dl) * stride_l); + binfo.r_offset.emplace_back(binfo.r_offset[k] + + j * (j < dr) * stride_r); + } + } + out_len *= std::max(dl, dr); + stride_l *= dl; + stride_r *= dr; + } + binfo.out_len = out_len; + } else { + binfo.out_len = binfo.l_len; + } + return binfo; +} + +inline std::vector InferBroadcastShape(const phi::DDim& x_dims, + const phi::DDim& e_dims, + const std::string& type = "x") { + auto x_dims1 = phi::vectorize(x_dims); + auto e_dims1 = phi::vectorize(e_dims); + std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); + std::vector e_dims2(e_dims1.begin() + 1, e_dims1.end()); + int max_dim = std::max(x_dims2.size(), e_dims2.size()); + int axis = std::abs(static_cast(x_dims2.size() - e_dims2.size())); + std::vector x_dims_array(max_dim); + std::vector e_dims_array(max_dim); + std::vector out_dims_array(max_dim); + // Only need to broadcast dimensions other than the 0th dimension. + phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), + phi::make_ddim(e_dims2), + x_dims_array.data(), + e_dims_array.data(), + out_dims_array.data(), + max_dim, + axis); + if (type == "x") { + out_dims_array.insert(out_dims_array.begin(), x_dims[0]); + } else { + out_dims_array.insert(out_dims_array.begin(), e_dims[0]); + } + return out_dims_array; +} + +inline bool ReduceGrad(const phi::DDim& out_grad_dims, + const phi::DDim& x_dims, + std::vector& axis) { + // We must ensure the ndim of out_grad and x are the same. + bool reduce = false; + for (int i = 1; i < out_grad_dims.size(); i++) { + if (out_grad_dims[i] != x_dims[i]) { + reduce = true; + break; + } + } + if (!reduce) return false; + + // Get reduce axis. + for (int i = 1; i < out_grad_dims.size(); i++) { + if (out_grad_dims[i] - x_dims[i] != 0) { + axis.emplace_back(i); + } + } + return true; +} + +} // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py new file mode 100644 index 0000000000000..3d66df06cc5ed --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -0,0 +1,146 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.framework import _test_eager_guard + +from op_test import OpTest + + +def get_broadcast_shape(shp1, shp2): + pad_shp1, pad_shp2 = shp1, shp2 + if len(shp1) > len(shp2): + pad_shp2 = [ + 1, + ] * (len(shp1) - len(shp2)) + shp2 + elif len(shp1) < len(shp2): + pad_shp1 = [ + 1, + ] * (len(shp2) - len(shp1)) + shp1 + for d1, d2 in zip(pad_shp1, pad_shp2): + if d1 != d2 and d1 != 1 and d2 != 1: + raise ValueError + rst = [max(d1, d2) for d1, d2 in zip(pad_shp1, pad_shp2)] + return rst + + +class BroadCastInfo(object): + + def __init__(self, x_shape, e_shape): + self.x_shape = x_shape + self.e_shape = e_shape + + self.calculate_bcastinfo() + + def use_bcast(self): + if len(self.x_shape) != len(self.e_shape): + return True + for i in range(1, len(self.x_shape)): + if self.x_shape[i] != self.e_shape[i]: + return True + return False + + def calculate_bcastinfo(self): + lhs_len = 1 + rhs_len = 1 + for i in range(1, len(self.x_shape)): + lhs_len *= self.x_shape[i] + for i in range(1, len(self.e_shape)): + rhs_len *= self.e_shape[i] + use_b = self.use_bcast() + + if use_b: + max_ndim = max(len(self.x_shape), len(self.e_shape)) - 1 + out_len = 1 + stride_l = stride_r = 1 + lhs_offset = [0] + rhs_offset = [0] + for j in range(0, max_ndim): + dl = 1 if (len(self.x_shape) - 1 - j) < 1 \ + else self.x_shape[len(self.x_shape) - 1 - j] + dr = 1 if (len(self.e_shape) - 1 - j) < 1 \ + else self.e_shape[len(self.e_shape) - 1 - j] + for i in range(1, max(dl, dr)): + for k in range(0, out_len): + lhs_offset.append(lhs_offset[k] + i * + (i < dl) * stride_l) + rhs_offset.append(rhs_offset[k] + i * + (i < dr) * stride_r) + + out_len *= max(dl, dr) + stride_l *= dl + stride_r *= dr + else: + out_len = rhs_len + + self.use_broadcast = use_b + self.out_len = out_len + self.lhs_len = lhs_len + self.rhs_len = rhs_len + if use_b: + self.lhs_offset = lhs_offset + self.rhs_offset = rhs_offset + + +def compute_graph_send_uv(inputs, attributes): + x = inputs['x'] + y = inputs['y'] + src_index = inputs['src_index'] + dst_index = inputs['dst_index'] + compute_type = attributes['compute_type'] + + gather_x = x[src_index] + gather_y = y[dst_index] + + # Calculate forward output. + if compute_type == "ADD": + results = gather_x + gather_y + elif compute_type == "MUL": + results = gather_x * gather_y + return results + + +class TestGraphSendUVOp(OpTest): + + def setUp(self): + paddle.enable_static() + self.op_type = "graph_send_uv" + self.set_config() + self.inputs = { + 'x': self.x, + 'y': self.y, + 'src_index': self.src_index, + 'dst_index': self.dst_index + } + self.attrs = {'compute_type': self.compute_type} + out = compute_graph_send_uv(self.inputs, self.attrs) + self.outputs = {'out': out} + + def test_check_output(self): + self.check_output(check_eager=True) + + # def test_check_grad(self): + # self.check_grad(['x', 'y'], 'Out', check_eager=True) + + def set_config(self): + self.x = np.random.random((10, 20)).astype("float64") + self.y = np.random.random((10, 20)).astype("float64") + index = np.random.randint(0, 10, (15, 2)).astype(np.int64) + self.src_index = index[:, 0] + self.dst_index = index[:, 1] + self.compute_type = 'ADD' From 6f456000ecb5fcf1b516fc56c0d64bc5379847b0 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Wed, 3 Aug 2022 08:57:25 +0000 Subject: [PATCH 02/12] fix op maker bug --- paddle/phi/api/yaml/generator/filters.py | 4 +- paddle/phi/infermeta/multiary.cc | 4 - .../tests/unittests/test_graph_send_uv_op.py | 81 +------------------ 3 files changed, 5 insertions(+), 84 deletions(-) diff --git a/paddle/phi/api/yaml/generator/filters.py b/paddle/phi/api/yaml/generator/filters.py index cda858ab6e74e..de9fdf25e9834 100644 --- a/paddle/phi/api/yaml/generator/filters.py +++ b/paddle/phi/api/yaml/generator/filters.py @@ -86,9 +86,9 @@ def to_opmaker_name(s): def to_opmaker_name_cstr(s): if s.endswith("_grad"): - return '"{}@GRAD"'.format(to_pascal_case(s[:-5])) + return '"{}@GRAD"'.format(s[:-5]) else: - return '"{}"'.format(to_pascal_case(s)) + return '"{}"'.format(s) def to_pascal_case(s): diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 7953900d706c0..e379ee8129134 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -2417,11 +2417,7 @@ void GraphSendUVInferMeta(const MetaTensor& x, const MetaTensor& dst_index, const std::string& compute_type, MetaTensor* out) { - std::cout << "Enter GraphSendUVInferMeta" << std::endl; - auto src_index_dims = src_index.dims(); - - std::cout << "Check whether enter here.\n"; if (src_index_dims.size() == 2) { PADDLE_ENFORCE_EQ(src_index_dims[1], 1, diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py index 3d66df06cc5ed..2affb75ee7a61 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -22,81 +22,6 @@ from op_test import OpTest -def get_broadcast_shape(shp1, shp2): - pad_shp1, pad_shp2 = shp1, shp2 - if len(shp1) > len(shp2): - pad_shp2 = [ - 1, - ] * (len(shp1) - len(shp2)) + shp2 - elif len(shp1) < len(shp2): - pad_shp1 = [ - 1, - ] * (len(shp2) - len(shp1)) + shp1 - for d1, d2 in zip(pad_shp1, pad_shp2): - if d1 != d2 and d1 != 1 and d2 != 1: - raise ValueError - rst = [max(d1, d2) for d1, d2 in zip(pad_shp1, pad_shp2)] - return rst - - -class BroadCastInfo(object): - - def __init__(self, x_shape, e_shape): - self.x_shape = x_shape - self.e_shape = e_shape - - self.calculate_bcastinfo() - - def use_bcast(self): - if len(self.x_shape) != len(self.e_shape): - return True - for i in range(1, len(self.x_shape)): - if self.x_shape[i] != self.e_shape[i]: - return True - return False - - def calculate_bcastinfo(self): - lhs_len = 1 - rhs_len = 1 - for i in range(1, len(self.x_shape)): - lhs_len *= self.x_shape[i] - for i in range(1, len(self.e_shape)): - rhs_len *= self.e_shape[i] - use_b = self.use_bcast() - - if use_b: - max_ndim = max(len(self.x_shape), len(self.e_shape)) - 1 - out_len = 1 - stride_l = stride_r = 1 - lhs_offset = [0] - rhs_offset = [0] - for j in range(0, max_ndim): - dl = 1 if (len(self.x_shape) - 1 - j) < 1 \ - else self.x_shape[len(self.x_shape) - 1 - j] - dr = 1 if (len(self.e_shape) - 1 - j) < 1 \ - else self.e_shape[len(self.e_shape) - 1 - j] - for i in range(1, max(dl, dr)): - for k in range(0, out_len): - lhs_offset.append(lhs_offset[k] + i * - (i < dl) * stride_l) - rhs_offset.append(rhs_offset[k] + i * - (i < dr) * stride_r) - - out_len *= max(dl, dr) - stride_l *= dl - stride_r *= dr - else: - out_len = rhs_len - - self.use_broadcast = use_b - self.out_len = out_len - self.lhs_len = lhs_len - self.rhs_len = rhs_len - if use_b: - self.lhs_offset = lhs_offset - self.rhs_offset = rhs_offset - - def compute_graph_send_uv(inputs, attributes): x = inputs['x'] y = inputs['y'] @@ -132,10 +57,10 @@ def setUp(self): self.outputs = {'out': out} def test_check_output(self): - self.check_output(check_eager=True) + self.check_output(check_eager=False) - # def test_check_grad(self): - # self.check_grad(['x', 'y'], 'Out', check_eager=True) + def test_check_grad(self): + self.check_grad(['x', 'y'], 'out', check_eager=False) def set_config(self): self.x = np.random.random((10, 20)).astype("float64") From c667bd09ce1ffd7b0043a54d764a8eaf99030450 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Thu, 4 Aug 2022 08:20:09 +0000 Subject: [PATCH 03/12] fix mul grad bug --- .../kernels/gpu/graph_send_uv_grad_kernel.cu | 38 ++++++++++++- .../tests/unittests/test_graph_send_uv_op.py | 56 +++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu index 96ec93ae555e6..49741b65bd9f7 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -27,6 +27,29 @@ namespace phi { +/*template +__global__ void GraphSendUVCUDAKernel(const T* out_grad, + const IndexT* src_indices, + const IndexT* dst_indices, + int64_t index_size, + int64_t slice_size, + T* x_grad) { + Idx ty = blockIdx.y * blockDim.y + threadIdx.y; + const Idx stride_y = blockDim.y * gridDim.y; + while (ty < index_size) { + IndexT src = src_indices[ty]; + IndexT dst = dst_indices[ty]; + int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride_x = blockDim.x * gridDim.x; + + const T* e_off = e_data + ty * slice_size; + T* x_grad_off = x_grad + dst * slice_size; + while (tx < slice_size) { + + } + } +}*/ + template void CalculateGrad(const Context& ctx, const T* out_grad, @@ -64,12 +87,17 @@ void CalculateGrad(const Context& ctx, slice_size, functor); } else { + // TODO(daisiming): 反向很多bug const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); - DenseTensor x_grad_v2 = phi::EmptyLike(ctx, out_grad_tensor); + auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, + out_grad_dims_1.end()); + out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); GraphSendRecvCUDAKernel> - <<>>(out_grad, + <<>>(out_grad, // 觉得有点怪 d_index, s_index, x_grad, @@ -132,7 +160,11 @@ void CalculateGrad(const Context& ctx, mul_functor, sum_functor); } else { - DenseTensor x_grad_v2 = phi::EmptyLike(ctx, out_grad_tensor); + auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, + out_grad_dims_1.end()); + out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); GraphSendUERecvCUDAKernel Date: Fri, 5 Aug 2022 06:31:36 +0000 Subject: [PATCH 04/12] add unittest --- .../kernels/gpu/graph_send_uv_grad_kernel.cu | 8 +++---- .../tests/unittests/test_graph_send_uv_op.py | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu index 49741b65bd9f7..c4a9d111be491 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -27,7 +27,7 @@ namespace phi { -/*template +template __global__ void GraphSendUVCUDAKernel(const T* out_grad, const IndexT* src_indices, const IndexT* dst_indices, @@ -45,10 +45,9 @@ __global__ void GraphSendUVCUDAKernel(const T* out_grad, const T* e_off = e_data + ty * slice_size; T* x_grad_off = x_grad + dst * slice_size; while (tx < slice_size) { - } } -}*/ +} template void CalculateGrad(const Context& ctx, @@ -87,7 +86,6 @@ void CalculateGrad(const Context& ctx, slice_size, functor); } else { - // TODO(daisiming): 反向很多bug const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); auto out_grad_dims_1 = phi::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, @@ -97,7 +95,7 @@ void CalculateGrad(const Context& ctx, phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); GraphSendRecvCUDAKernel> - <<>>(out_grad, // 觉得有点怪 + <<>>(out_grad, d_index, s_index, x_grad, diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py index 0d7e2faeaaf9b..b5ca6364555c5 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -125,3 +125,25 @@ def set_config(self): self.src_index = index[:, 0] self.dst_index = index[:, 1] self.compute_type = 'MUL' + + +class TestCase6(TestGraphSendUVOp): + + def set_config(self): + self.x = np.random.random((10, 10, 1)).astype("float64") + self.y = np.random.random((10, 10, 10)) + index = np.random.randint(0, 10, (15, 2)).astype(np.int64) + self.src_index = index[:, 0] + self.dst_index = index[:, 1] + self.compute_type = 'ADD' + + +class TestCase7(TestGraphSendUVOp): + + def set_config(self): + self.x = np.random.random((10, 10, 1)).astype("float64") + self.y = np.random.random((10, 10, 10)) + index = np.random.randint(0, 10, (15, 2)).astype(np.int64) + self.src_index = index[:, 0] + self.dst_index = index[:, 1] + self.compute_type = 'MUL' From 1f66a0dd60f2d95015889d3311b40aaf23edbafd Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Sun, 7 Aug 2022 07:34:01 +0000 Subject: [PATCH 05/12] fix add grad bug, add cpu kernel --- .../kernels/cpu/graph_send_ue_recv_funcs.h | 46 +++ .../kernels/cpu/graph_send_uv_grad_kernel.cc | 268 ++++++++++++++++++ .../phi/kernels/cpu/graph_send_uv_kernel.cc | 125 ++++++++ .../kernels/gpu/graph_send_uv_grad_kernel.cu | 75 ++--- .../phi/kernels/gpu/graph_send_uv_kernel.cu | 2 +- 5 files changed, 478 insertions(+), 38 deletions(-) create mode 100644 paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h create mode 100644 paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/graph_send_uv_kernel.cc diff --git a/paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h b/paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h new file mode 100644 index 0000000000000..7647415d8e7cb --- /dev/null +++ b/paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" + +namespace phi { + +template +struct GraphAddFunctor { + inline T operator()(const T a, const T b) const { return a + b; } +}; + +template +struct GraphMulFunctor { + inline T operator()(const T a, const T b) const { return a * b; } +}; + +template +struct GraphMaxFunctor { + inline T operator()(const T a, const T b) const { return a < b ? b : a; } +}; + +template +struct GraphMinFunctor { + inline T operator()(const T a, const T b) const { return a < b ? a : b; } +}; + +} // namespace phi diff --git a/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc new file mode 100644 index 0000000000000..4b224726b6ffb --- /dev/null +++ b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc @@ -0,0 +1,268 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/graph_send_uv_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/reduce_sum_kernel.h" + +namespace phi { + +template +void CalculateGrad(const Context& ctx, + const T* out_grad, + const IndexT* s_index, + const IndexT* d_index, + const phi::DDim& out_grad_dims, + const phi::DDim& x_grad_dims, + const std::string& compute_type, + int64_t index_size, + int64_t slice_size, + T* x_grad, + const DenseTensor& out_grad_tensor, + const DenseTensor& y) { + std::vector reduce_idx; + bool reduce = ReduceGrad(out_grad_dims, x_grad_dims, reduce_idx); + + if (compute_type == "ADD") { + if (!reduce) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int64_t i = 0; i < index_size; i++) { + IndexT dst = d_index[i]; + T* x_grad_off = x_grad + dst * slice_size; + const T* out_grad_off = out_grad + i * slice_size; + for (int64_t j = 0; j < slice_size; j++) { + if (out_grad_off[j] != 0) { +#ifdef PADDLE_WITH_MKLML +#pragma omp atomic +#endif + x_grad_off[j] += out_grad_off[j]; + } + } + } + } else { + const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); + auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, + out_grad_dims_1.end()); + out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); + phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); + T* x_grad_v2_data = x_grad_v2.data(); +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int64_t i = 0; i < index_size; i++) { + IndexT dst = d_index[i]; + T* x_grad_off = x_grad_v2_data + dst * bcast_info.out_len; + const T* out_grad_off = out_grad + i * bcast_info.out_len; + for (int64_t j = 0; j < bcast_info.out_len; j++) { + if (out_grad_off[j] != 0) { +#ifdef PADDLE_WITH_MKLML +#pragma omp atomic +#endif + x_grad_off[j] += out_grad_off[j]; + } + } + } + DenseTensor x_grad_out = phi::Sum( + ctx, + x_grad_v2, + reduce_idx, + paddle::experimental::CppTypeToDataType::Type(), + true); + memcpy(x_grad, x_grad_out.data(), x_grad_out.numel() * sizeof(T)); + } + } else if (compute_type == "MUL") { + const auto& bcast = phi::CalcBCastInfo(y.dims(), out_grad_dims); + const T* y_data = y.data(); + if (!reduce) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int64_t i = 0; i < index_size; i++) { + IndexT src = s_index[i]; + IndexT dst = d_index[i]; + T* x_grad_off = x_grad + dst * bcast.out_len; + const T* y_off = y_data + src * bcast.l_len; + const T* out_grad_off = out_grad + i * bcast.r_len; + for (int64_t j = 0; j < bcast.out_len; j++) { + int64_t y_add = bcast.use_bcast ? bcast.l_offset[j] : j; + int64_t o_add = bcast.use_bcast ? bcast.r_offset[j] : j; + T val = y_off[y_add] * out_grad_off[o_add]; + if (val != 0) { +#ifdef PADDLE_WITH_MKLML +#pragma omp atomic +#endif + x_grad_off[j] += val; + } + } + } + } else { + auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, + out_grad_dims_1.end()); + out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); + phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); + T* x_grad_v2_data = x_grad_v2.data(); +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int64_t i = 0; i < index_size; i++) { + IndexT src = s_index[i]; + IndexT dst = d_index[i]; + T* x_grad_off = x_grad_v2_data + dst * bcast.out_len; + const T* y_off = y_data + src * bcast.l_len; + const T* out_grad_off = out_grad + i * bcast.r_len; + for (int64_t j = 0; j < bcast.out_len; j++) { + int64_t y_add = bcast.use_bcast ? bcast.l_offset[j] : j; + int64_t o_add = bcast.use_bcast ? bcast.r_offset[j] : j; + T val = y_off[y_add] * out_grad_off[o_add]; + if (val != 0) { +#ifdef PADDLE_WITH_MKLML +#pragma omp atomic +#endif + x_grad_off[j] += val; + } + } + } + DenseTensor x_grad_out = phi::Sum( + ctx, + x_grad_v2, + reduce_idx, + paddle::experimental::CppTypeToDataType::Type(), + true); + memcpy(x_grad, x_grad_out.data(), x_grad_out.numel() * sizeof(T)); + } + } +} + +template +void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out_grad, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* x_grad, + DenseTensor* y_grad) { + const int64_t& index_size = dst_index.dims()[0]; + + ctx.template Alloc(x_grad); + T* x_grad_data = x_grad->data(); + ctx.template Alloc(y_grad); + T* y_grad_data = y_grad->data(); + const auto& x_grad_dims = x_grad->dims(); + const auto& y_grad_dims = y_grad->dims(); + int64_t memset_size_x = 1, memset_size_y = 1; + int64_t slice_size_x = 1, slice_size_y = 1; + for (int i = 0; i < x_grad_dims.size(); i++) { + memset_size_x *= x_grad_dims[i]; + if (i > 0) slice_size_x *= x_grad_dims[i]; + } + for (int i = 0; i < y_grad_dims.size(); i++) { + memset_size_y *= y_grad_dims[i]; + if (i > 0) slice_size_y *= y_grad_dims[i]; + } + const size_t& memset_bytes_x = memset_size_x * sizeof(T); + const size_t& memset_bytes_y = memset_size_y * sizeof(T); + memset(x_grad_data, 0, memset_bytes_x); + memset(y_grad_data, 0, memset_bytes_y); + + if (index_size == 0) return; + const T* out_grad_data = out_grad.data(); + const IndexT* s_index = src_index.data(); + const IndexT* d_index = dst_index.data(); + const auto& out_grad_dims = out_grad.dims(); + // Calculate X Grad. + CalculateGrad(ctx, + out_grad_data, + d_index, + s_index, + out_grad_dims, + x_grad_dims, + compute_type, + index_size, + slice_size_x, + x_grad_data, + out_grad, + y); + // Calcuate Y Grad. + CalculateGrad(ctx, + out_grad_data, + s_index, + d_index, + out_grad_dims, + y_grad_dims, + compute_type, + index_size, + slice_size_y, + y_grad_data, + out_grad, + x); +} + +template +void GraphSendUVGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const DenseTensor& out_grad, + const std::string& compute_type, + DenseTensor* x_grad, + DenseTensor* y_grad) { + auto index_type = src_index.dtype(); + if (index_type == phi::DataType::INT32) { + GraphSendUVGradOpKernelLaunchHelper(ctx, + x, + y, + out_grad, + src_index, + dst_index, + compute_type, + x_grad, + y_grad); + } else if (index_type == phi::DataType::INT64) { + GraphSendUVGradOpKernelLaunchHelper(ctx, + x, + y, + out_grad, + src_index, + dst_index, + compute_type, + x_grad, + y_grad); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(graph_send_uv_grad, + CPU, + ALL_LAYOUT, + phi::GraphSendUVGradKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc new file mode 100644 index 0000000000000..3689f51199777 --- /dev/null +++ b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/graph_send_uv_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h" +#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" + +namespace phi { + +template +void GraphSendUVCpuKernel(const BroadCastInfo& bcast, + const T* x_data, + const T* y_data, + const IndexT* src_indices, + const IndexT* dst_indices, + T* output, + int64_t index_size, + ComputeFunctor cfunctor) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int64_t i = 0; i < index_size; i++) { + IndexT src = src_indices[i]; + IndexT dst = dst_indices[i]; + T* out_off = output + i * bcast.out_len; + const T* x_off = x_data + src * bcast.l_len; + const T* y_off = y_data + dst * bcast.r_len; + for (int64_t j = 0; j < bcast.out_len; j++) { + int64_t x_add = bcast.use_bcast ? bcast.l_offset[j] : j; + int64_t y_add = bcast.use_bcast ? bcast.r_offset[j] : j; + T val = cfunctor(x_off[x_add], y_off[y_add]); + out_off[j] = val; + } + } +} + +template +void GraphSendUVOpKernelLaunchHelper(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* out) { + const int& index_size = src_index.dims()[0]; + auto out_dims = out->dims(); + int64_t memset_size = 1; + for (int i = 0; i < out_dims.size(); i++) { + memset_size *= out_dims[i]; + } + ctx.template Alloc(out); + T* out_data = out->data(); + if (index_size == 0) return; + + const auto& bcast_info = phi::CalcBCastInfo(x.dims(), y.dims()); + const T* x_data = x.data(); + const T* y_data = y.data(); + const IndexT* s_index = src_index.data(); + const IndexT* d_index = dst_index.data(); + if (compute_type == "ADD") { + GraphAddFunctor add_functor; + GraphSendUVCpuKernel>(bcast_info, + x_data, + y_data, + s_index, + d_index, + out_data, + index_size, + add_functor); + } else if (compute_type == "MUL") { + GraphMulFunctor mul_functor; + GraphSendUVCpuKernel>(bcast_info, + x_data, + y_data, + s_index, + d_index, + out_data, + index_size, + mul_functor); + } +} + +template +void GraphSendUVKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& src_index, + const DenseTensor& dst_index, + const std::string& compute_type, + DenseTensor* out) { + auto index_type = src_index.dtype(); + if (index_type == phi::DataType::INT32) { + GraphSendUVOpKernelLaunchHelper( + ctx, x, y, src_index, dst_index, compute_type, out); + } else if (index_type == phi::DataType::INT64) { + GraphSendUVOpKernelLaunchHelper( + ctx, x, y, src_index, dst_index, compute_type, out); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(graph_send_uv, + CPU, + ALL_LAYOUT, + phi::GraphSendUVKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu index c4a9d111be491..ae3c6eea19a39 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -27,25 +27,28 @@ namespace phi { -template -__global__ void GraphSendUVCUDAKernel(const T* out_grad, - const IndexT* src_indices, - const IndexT* dst_indices, - int64_t index_size, - int64_t slice_size, - T* x_grad) { - Idx ty = blockIdx.y * blockDim.y + threadIdx.y; - const Idx stride_y = blockDim.y * gridDim.y; +template +__global__ void GraphSendUVGradCUDAKernel(const T* out_grad, + const IndexT* src_indices, + const IndexT* dst_indices, + int64_t index_size, + int64_t slice_size, + T* x_grad) { + IndexT ty = blockIdx.y * blockDim.y + threadIdx.y; + const IndexT stride_y = blockDim.y * gridDim.y; while (ty < index_size) { IndexT src = src_indices[ty]; IndexT dst = dst_indices[ty]; int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; int64_t stride_x = blockDim.x * gridDim.x; - const T* e_off = e_data + ty * slice_size; + const T* out_grad_off = out_grad + ty * slice_size; T* x_grad_off = x_grad + dst * slice_size; while (tx < slice_size) { + paddle::platform::CudaAtomicAdd(x_grad_off + tx, out_grad_off[tx]); + tx += stride_x; } + ty += stride_y; } } @@ -66,25 +69,16 @@ void CalculateGrad(const Context& ctx, bool reduce = ReduceGrad(out_grad_dims, x_grad_dims, reduce_idx); if (compute_type == "ADD") { -#ifdef PADDLE_WITH_HIP - int block = 256; -#else - int block = 1024; -#endif - int64_t n = slice_size * index_size; - int max_grid_dimx = ctx.GetCUDAMaxGridDimSize()[0]; - int64_t grid_tmp = (n + block - 1) / block; - int64_t grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx; - GraphSendRecvSumCUDAFunctor functor; if (!reduce) { - GraphSendRecvCUDAKernel> - <<>>(out_grad, - d_index, - s_index, - x_grad, - index_size, - slice_size, - functor); + const int ntx = FindNumThreads(slice_size); + const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int nbx = (slice_size + ntx - 1) / ntx; + const int nby = (index_size + nty - 1) / nty; + const dim3 grid_tmp(nbx, nby); + const dim3 block_tmp(ntx, nty); + GraphSendUVGradCUDAKernel + <<>>( + out_grad, d_index, s_index, index_size, slice_size, x_grad); } else { const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); auto out_grad_dims_1 = phi::vectorize(out_grad_dims); @@ -94,14 +88,21 @@ void CalculateGrad(const Context& ctx, DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); - GraphSendRecvCUDAKernel> - <<>>(out_grad, - d_index, - s_index, - x_grad, - index_size, - bcast_info.out_len, - functor); + + const int ntx = FindNumThreads(bcast_info.out_len); + const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int nbx = (bcast_info.out_len + ntx - 1) / ntx; + const int nby = (index_size + nty - 1) / nty; + const dim3 grid_tmp(nbx, nby); + const dim3 block_tmp(ntx, nty); + GraphSendUVGradCUDAKernel + <<>>(out_grad, + d_index, + s_index, + index_size, + bcast_info.out_len, + x_grad_v2_data); + // Run reduce sum DenseTensor x_grad_out = phi::Sum( ctx, @@ -216,7 +217,7 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, const std::string& compute_type, DenseTensor* x_grad, DenseTensor* y_grad) { - const int& index_size = dst_index.dims()[0]; + const int64_t& index_size = dst_index.dims()[0]; ctx.template Alloc(x_grad); T* x_grad_data = x_grad->data(); diff --git a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu index 2a8bb9eed5750..31e48f1f90704 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu @@ -70,7 +70,7 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, const DenseTensor& dst_index, const std::string& compute_type, DenseTensor* out) { - const int& index_size = src_index.dims()[0]; + const int64_t& index_size = src_index.dims()[0]; auto out_dims = out->dims(); int64_t memset_size = 1; for (int i = 0; i < out_dims.size(); i++) { From a91b584a6cc7d176a449e72b7f34f8bd3c7b13b9 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Mon, 8 Aug 2022 03:46:14 +0000 Subject: [PATCH 06/12] add paddle.geometric.message_passing --- python/paddle/geometric/__init__.py | 19 +++++++ .../geometric/message_passing/.send.py.swp | Bin 0 -> 12288 bytes .../geometric/message_passing/__init__.py | 15 ++++++ .../paddle/geometric/message_passing/send.py | 13 +++++ .../paddle/geometric/message_passing/utils.py | 47 ++++++++++++++++++ python/setup.py.in | 2 + 6 files changed, 96 insertions(+) create mode 100644 python/paddle/geometric/__init__.py create mode 100644 python/paddle/geometric/message_passing/.send.py.swp create mode 100644 python/paddle/geometric/message_passing/__init__.py create mode 100644 python/paddle/geometric/message_passing/send.py create mode 100644 python/paddle/geometric/message_passing/utils.py diff --git a/python/paddle/geometric/__init__.py b/python/paddle/geometric/__init__.py new file mode 100644 index 0000000000000..04a4103debdd9 --- /dev/null +++ b/python/paddle/geometric/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .message_passing import send_uv # noqa: F401 + +__all__ = [ + 'send_uv', +] diff --git a/python/paddle/geometric/message_passing/.send.py.swp b/python/paddle/geometric/message_passing/.send.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..7175c240dbb5b37d0b35e02de41dd55b64e35373 GIT binary patch literal 12288 zcmeI2&5j#I5XWcXzyTHz9N|z*4(x(=>~$gq1?6Bii!x#vBYQVdPNVirji;rV?xwqE zy~Blf;0lidyaC@L1a3S5uR!(MmNpPUuApk^@1E|duCDr(FP3XDx|eKZrFgkqFnV}J7+2HV_^<;WqYqi~6 zqHfV!SqxirPAoAu!a1pyLx-wNO#dX0yvGEXz*73Py`FGkTL(r2WM z^epMC-6;Bk^f^hArlgGYHt8+WKIwPz|10Sc=@-&N($A!D?@y#3Nk5RjCw)ia!33B9 z6JP>NfC(@GCcp%k02BB}0ze~<<4c?EbBPs%4&uB!-@6zK0UbZQ=|TvXTWfLM=P$riP_HN2{ox8tA%KQfqZ^DpW~c?x&}Br)Q^de=?g*veWc< z4$~PNO|v`cX*$hl^*&6p2k=3f-5CH%3c> literal 0 HcmV?d00001 diff --git a/python/paddle/geometric/message_passing/__init__.py b/python/paddle/geometric/message_passing/__init__.py new file mode 100644 index 0000000000000..5beaea8aac2e5 --- /dev/null +++ b/python/paddle/geometric/message_passing/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .send import send_uv # noqa: F401 diff --git a/python/paddle/geometric/message_passing/send.py b/python/paddle/geometric/message_passing/send.py new file mode 100644 index 0000000000000..513558501a0eb --- /dev/null +++ b/python/paddle/geometric/message_passing/send.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/geometric/message_passing/utils.py b/python/paddle/geometric/message_passing/utils.py new file mode 100644 index 0000000000000..a0d54360f27a1 --- /dev/null +++ b/python/paddle/geometric/message_passing/utils.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + + +def reshape_lhs_rhs(x, y): + """ + Expand dims to ensure there will be no broadcasting issues with different + number of dimensions. + """ + if len(x.shape) == 1: + x = paddle.reshape(x, [-1, 1]) + if len(y.shape) == 1: + y = paddle.reshape(y, [-1, 1]) + + x_shape = paddle.shape(x) + y_shape = paddle.shape(y) + if len(x.shape) != len(y.shape): + max_ndims = max(len(x.shape), len(y.shape)) + x_pad_ndims = max_ndims - len(x.shape) + y_pad_ndims = max_ndims - len(y.shape) + new_x_shape = [ + x_shape[0], + ] + [ + 1, + ] * x_pad_ndims + list(x_shape[1:]) + new_y_shape = [ + y_shape[0], + ] + [ + 1, + ] * y_pad_ndims + list(y_shape[1:]) + x = paddle.reshape(x, new_x_shape) + y = paddle.reshape(y, new_y_shape) + + return x, y diff --git a/python/setup.py.in b/python/setup.py.in index 1b36b272d0d70..b317da7972214 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -400,6 +400,8 @@ packages=['paddle', 'paddle.device.cuda', 'paddle.version', 'paddle.profiler', + 'paddle.geometric', + 'paddle.geometric.message_passing', ] with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: From 6fdfcb16af1adde163c447aa8c9fed250358f355 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Mon, 8 Aug 2022 07:49:44 +0000 Subject: [PATCH 07/12] add paddle.geometric.send_uv api, add unittest --- python/paddle/__init__.py | 1 + .../tests/unittests/test_graph_send_uv_op.py | 116 ++++++++++++++++- .../paddle/geometric/message_passing/send.py | 121 ++++++++++++++++++ 3 files changed, 236 insertions(+), 2 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 2e4dc53290226..1c013e1ca3494 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -78,6 +78,7 @@ import paddle.reader # noqa: F401 import paddle.static # noqa: F401 import paddle.vision # noqa: F401 +import paddle.geometric # noqa: F401 from .tensor.attribute import is_complex # noqa: F401 from .tensor.attribute import is_integer # noqa: F401 diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py index b5ca6364555c5..aa44b172025ed 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -41,10 +41,17 @@ def compute_graph_send_uv(inputs, attributes): return results +def graph_send_uv_wrapper(x, y, src_index, dst_index, compute_type="add"): + return paddle.geometric.send_uv(x, y, src_index, dst_index, + compute_type.lower()) + + class TestGraphSendUVOp(OpTest): def setUp(self): paddle.enable_static() + self.python_api = graph_send_uv_wrapper + self.python_out_sig = ['out'] self.op_type = "graph_send_uv" self.set_config() self.inputs = { @@ -58,10 +65,10 @@ def setUp(self): self.outputs = {'out': out} def test_check_output(self): - self.check_output(check_eager=False) + self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad(['x', 'y'], 'out', check_eager=False) + self.check_grad(['x', 'y'], 'out', check_eager=True) def set_config(self): self.x = np.random.random((10, 20)).astype("float64") @@ -147,3 +154,108 @@ def set_config(self): self.src_index = index[:, 0] self.dst_index = index[:, 1] self.compute_type = 'MUL' + + +class API_GeometricSendUVTest(unittest.TestCase): + + def test_compute_all_dygraph(self): + paddle.disable_static() + x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") + y = paddle.to_tensor([[1, 1, 2], [2, 3, 4], [4, 5, 6]], dtype="float32") + src_index = paddle.to_tensor(np.array([0, 1, 2, 0]), dtype="int32") + dst_index = paddle.to_tensor(np.array([1, 2, 1, 0]), dtype="int32") + + res_add = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="add") + res_sub = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="sub") + res_mul = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="mul") + res_div = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="div") + res = [res_add, res_sub, res_mul, res_div] + + np_add = np.array([[2, 5, 7], [5, 9, 11], [4, 9, 11], [1, 3, 5]], + dtype="float32") + np_sub = np.array([[-2, -1, -1], [-3, -1, -1], [0, 3, 3], [-1, 1, 1]], + dtype="float32") + np_mul = np.array([[0, 6, 12], [4, 20, 30], [4, 18, 28], [0, 2, 6]], + dtype="float32") + np_div = np.array( + [[0, 2 / 3, 0.75], [0.25, 0.8, 5 / 6], [1, 2, 7 / 4], [0, 2, 1.5]], + dtype="float32") + + for np_res, paddle_res in zip([np_add, np_sub, np_mul, np_div], res): + self.assertTrue( + np.allclose(np_res, paddle_res, atol=1e-6), "two value is\ + {}\n{}, check diff!".format(np_res, paddle_res)) + + def test_compute_all_static(self): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(name="x", shape=[3, 3], dtype="float32") + y = paddle.static.data(name="y", shape=[3, 3], dtype="float32") + src_index = paddle.static.data(name="src", shape=[4], dtype="int32") + dst_index = paddle.static.data(name="dst", shape=[4], dtype="int32") + res_add = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="add") + res_sub = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="sub") + res_mul = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="mul") + res_div = paddle.geometric.send_uv(x, + y, + src_index, + dst_index, + compute_type="div") + + exe = paddle.static.Executor(paddle.CPUPlace()) + data1 = np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") + data2 = np.array([[1, 1, 2], [2, 3, 4], [4, 5, 6]], dtype="float32") + data3 = np.array([0, 1, 2, 0], dtype="int32") + data4 = np.array([1, 2, 1, 0], dtype="int32") + + np_add = np.array([[2, 5, 7], [5, 9, 11], [4, 9, 11], [1, 3, 5]], + dtype="float32") + np_sub = np.array( + [[-2, -1, -1], [-3, -1, -1], [0, 3, 3], [-1, 1, 1]], + dtype="float32") + np_mul = np.array([[0, 6, 12], [4, 20, 30], [4, 18, 28], [0, 2, 6]], + dtype="float32") + np_div = np.array([[0, 2 / 3, 0.75], [0.25, 0.8, 5 / 6], + [1, 2, 7 / 4], [0, 2, 1.5]], + dtype="float32") + + ret = exe.run(feed={ + 'x': data1, + 'y': data2, + 'src': data3, + 'dst': data4, + }, + fetch_list=[res_add, res_sub, res_mul, res_div]) + for np_res, paddle_res in zip([np_add, np_sub, np_mul, np_div], + ret): + self.assertTrue( + np.allclose(np_res, paddle_res, atol=1e-6), "two value is\ + {}\n{}, check diff!".format(np_res, paddle_res)) diff --git a/python/paddle/geometric/message_passing/send.py b/python/paddle/geometric/message_passing/send.py index 513558501a0eb..64363cb8656f8 100644 --- a/python/paddle/geometric/message_passing/send.py +++ b/python/paddle/geometric/message_passing/send.py @@ -11,3 +11,124 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import numpy as np +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode +from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle import _C_ops + +from .utils import reshape_lhs_rhs + + +def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): + """ + + Graph Learning message passing api. + + This api is mainly used in Graph Learning domain, and the main purpose is to reduce intermediate memory + consumption in the process of message passing. Take `x` as the source node feature tensor, take `y` as + the destination node feature tensor. Then we use `src_index` and `dst_index` to gather the corresponding data, + and then compute the edge features in different compute_types like `add`, `sub`, `mul`, `div`. + + .. code-block:: text + + Given: + + X = [[0, 2, 3], + [1, 4, 5], + [2, 6, 7]] + + Y = [[0, 1, 2], + [2, 3, 4], + [4, 5, 6]] + + src_index = [0, 1, 2, 0] + + dst_index = [1, 2, 1, 0] + + compute_type = "add" + + Then: + + Out = [[2, 5, 7], + [5, 9, 11], + [4, 9, 11], + [0, 3, 5]] + + Args: + x (Tensor): The source node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. + y (Tensor): The destination node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. + src_index (Tensor): An 1-D tensor, and the available data type is int32, int64. + dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`. + The available data type is int32, int64. + compute_type (Tensor): Different compute types for x and y, including `add`, `sub`, `mul` and `div`. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + out (Tensor): The output tensor. + + Examples: + + .. code-block:: python + + import paddle + + x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") + y = paddle.to_tensor([[0, 1, 2], [2, 3, 4], [4, 5, 6]], dtype="float32") + indexes = paddle.to_tensor([[0, 1], [1, 2], [2, 1], [0, 0]], dtype="int32") + src_index = indexes[:, 0] + dst_index = indexes[:, 1] + out = paddle.geometric.send_uv(x, y, src_index, dst_index, compute_type="add") + # Outputs: [[2., 5., 7.], [5., 9., 11.], [4., 9., 11.], [0., 3., 5.]] + + """ + + if compute_type not in ['add', 'sub', 'mul', 'div']: + raise ValueError( + "compute_type should be `add`, `sub`, `mul`, `div`, but received %s" + % compute_type) + + x, y = reshape_lhs_rhs(x, y) + + if compute_type == 'sub': + compute_type = 'add' + y = -y + if compute_type == 'div': + compute_type = 'mul' + y = 1. / y + + if in_dygraph_mode(): + return _C_ops.final_state_graph_send_uv(x, y, src_index, dst_index, + compute_type.upper()) + else: + if _in_legacy_dygraph(): + return _C_ops.graph_send_uv(x, y, src_index, dst_index, + "compute_type", compute_type.upper()) + else: + helper = LayerHelper("send_uv", **locals()) + check_variable_and_dtype(x, 'x', + ['int32', 'int64', 'float32', 'float64'], + 'graph_send_uv') + check_variable_and_dtype(y, 'y', + ['int32', 'int64', 'float32', 'float64'], + 'graph_send_uv') + check_variable_and_dtype(src_index, 'src_index', ['int32', 'int64'], + 'graph_send_uv') + check_variable_and_dtype(dst_index, 'dst_index', ['int32', 'int64'], + 'graph_send_uv') + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + inputs = { + 'x': x, + 'y': y, + 'src_index': src_index, + 'dst_index': dst_index + } + attrs = {'compute_type': compute_type.upper()} + helper.append_op(type="graph_send_uv", + inputs=inputs, + attrs=attrs, + outputs={"out": out}) + return out From c21919984beb617a37b18dc88ed528ae010feae4 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Mon, 8 Aug 2022 13:06:21 +0000 Subject: [PATCH 08/12] add fp16 judgement --- python/paddle/geometric/message_passing/send.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/paddle/geometric/message_passing/send.py b/python/paddle/geometric/message_passing/send.py index 64363cb8656f8..1d26c45c5f9ef 100644 --- a/python/paddle/geometric/message_passing/send.py +++ b/python/paddle/geometric/message_passing/send.py @@ -108,12 +108,12 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): "compute_type", compute_type.upper()) else: helper = LayerHelper("send_uv", **locals()) - check_variable_and_dtype(x, 'x', - ['int32', 'int64', 'float32', 'float64'], - 'graph_send_uv') - check_variable_and_dtype(y, 'y', - ['int32', 'int64', 'float32', 'float64'], - 'graph_send_uv') + check_variable_and_dtype( + x, 'x', ['int32', 'int64', 'float32', 'float64', 'float16'], + 'graph_send_uv') + check_variable_and_dtype( + y, 'y', ['int32', 'int64', 'float32', 'float64', 'float16'], + 'graph_send_uv') check_variable_and_dtype(src_index, 'src_index', ['int32', 'int64'], 'graph_send_uv') check_variable_and_dtype(dst_index, 'dst_index', ['int32', 'int64'], From 6ceb41668ef9b81d70ca6bf75cd6c8d92f7a23a5 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Fri, 12 Aug 2022 04:52:33 +0000 Subject: [PATCH 09/12] fix file typo, move compute_type to message_op --- paddle/phi/api/yaml/api.yaml | 2 +- paddle/phi/api/yaml/backward.yaml | 4 +- paddle/phi/infermeta/multiary.cc | 2 +- paddle/phi/infermeta/multiary.h | 2 +- .../cpu/graph_send_ue_recv_grad_kernel.cc | 2 +- .../kernels/cpu/graph_send_ue_recv_kernel.cc | 2 +- .../kernels/cpu/graph_send_uv_grad_kernel.cc | 38 ++--- .../phi/kernels/cpu/graph_send_uv_kernel.cc | 14 +- .../gpu/graph_send_ue_recv_grad_kernel.cu | 2 +- .../kernels/gpu/graph_send_ue_recv_kernel.cu | 2 +- .../kernels/gpu/graph_send_uv_grad_kernel.cu | 51 +++---- .../phi/kernels/gpu/graph_send_uv_kernel.cu | 18 +-- .../phi/kernels/graph_send_uv_grad_kernel.h | 2 +- paddle/phi/kernels/graph_send_uv_kernel.h | 2 +- .../impl/graph_messaage_passing_impl.h | 140 ------------------ .../impl/graph_send_ue_recv_kernel_impl.h | 139 ----------------- .../tests/unittests/test_graph_send_uv_op.py | 44 +++--- .../geometric/message_passing/.send.py.swp | Bin 12288 -> 0 bytes .../paddle/geometric/message_passing/send.py | 36 ++--- 19 files changed, 98 insertions(+), 404 deletions(-) delete mode 100644 paddle/phi/kernels/impl/graph_messaage_passing_impl.h delete mode 100644 paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h delete mode 100644 python/paddle/geometric/message_passing/.send.py.swp diff --git a/paddle/phi/api/yaml/api.yaml b/paddle/phi/api/yaml/api.yaml index 3454fe7e7d925..dafc15646e4b1 100644 --- a/paddle/phi/api/yaml/api.yaml +++ b/paddle/phi/api/yaml/api.yaml @@ -136,7 +136,7 @@ backward : fft_r2c_grad - api : graph_send_uv - args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str compute_type = "ADD") + args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") output : Tensor(out) infer_meta : func : GraphSendUVInferMeta diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 574f554bd0203..d2ed2533ae03e 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -148,8 +148,8 @@ no_need_buffer: x - backward_api : graph_send_uv_grad - forward : graph_send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str compute_type = "ADD") -> Tensor(out) - args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str compute_type = "ADD") + forward : graph_send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") -> Tensor(out) + args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str message_op = "ADD") output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 41e4acc323e4c..27c3d20d853c3 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -2691,7 +2691,7 @@ void GraphSendUVInferMeta(const MetaTensor& x, const MetaTensor& y, const MetaTensor& src_index, const MetaTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, MetaTensor* out) { auto src_index_dims = src_index.dims(); if (src_index_dims.size() == 2) { diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 3d0ec5fa22c4d..05c834cebf361 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -480,7 +480,7 @@ void GraphSendUVInferMeta(const MetaTensor& x, const MetaTensor& y, const MetaTensor& src_index, const MetaTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, MetaTensor* out); } // namespace phi diff --git a/paddle/phi/kernels/cpu/graph_send_ue_recv_grad_kernel.cc b/paddle/phi/kernels/cpu/graph_send_ue_recv_grad_kernel.cc index 95fdc6ff0a9cc..c7b1e3e51853b 100644 --- a/paddle/phi/kernels/cpu/graph_send_ue_recv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_ue_recv_grad_kernel.cc @@ -24,7 +24,7 @@ #include "paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/impl/graph_messaage_passing_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/graph_send_ue_recv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_ue_recv_kernel.cc index 74fca002294db..ab9adc3897170 100644 --- a/paddle/phi/kernels/cpu/graph_send_ue_recv_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_ue_recv_kernel.cc @@ -22,7 +22,7 @@ #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_messaage_passing_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc index 4b224726b6ffb..bd92875461da7 100644 --- a/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc @@ -19,7 +19,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" namespace phi { @@ -31,7 +31,7 @@ void CalculateGrad(const Context& ctx, const IndexT* d_index, const phi::DDim& out_grad_dims, const phi::DDim& x_grad_dims, - const std::string& compute_type, + const std::string& message_op, int64_t index_size, int64_t slice_size, T* x_grad, @@ -40,7 +40,7 @@ void CalculateGrad(const Context& ctx, std::vector reduce_idx; bool reduce = ReduceGrad(out_grad_dims, x_grad_dims, reduce_idx); - if (compute_type == "ADD") { + if (message_op == "ADD") { if (!reduce) { #ifdef PADDLE_WITH_MKLML #pragma omp parallel for @@ -91,7 +91,7 @@ void CalculateGrad(const Context& ctx, true); memcpy(x_grad, x_grad_out.data(), x_grad_out.numel() * sizeof(T)); } - } else if (compute_type == "MUL") { + } else if (message_op == "MUL") { const auto& bcast = phi::CalcBCastInfo(y.dims(), out_grad_dims); const T* y_data = y.data(); if (!reduce) { @@ -163,7 +163,7 @@ void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, const DenseTensor& out_grad, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* x_grad, DenseTensor* y_grad) { const int64_t& index_size = dst_index.dims()[0]; @@ -201,7 +201,7 @@ void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, s_index, out_grad_dims, x_grad_dims, - compute_type, + message_op, index_size, slice_size_x, x_grad_data, @@ -214,7 +214,7 @@ void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, d_index, out_grad_dims, y_grad_dims, - compute_type, + message_op, index_size, slice_size_y, y_grad_data, @@ -229,30 +229,16 @@ void GraphSendUVGradKernel(const Context& ctx, const DenseTensor& src_index, const DenseTensor& dst_index, const DenseTensor& out_grad, - const std::string& compute_type, + const std::string& message_op, DenseTensor* x_grad, DenseTensor* y_grad) { auto index_type = src_index.dtype(); if (index_type == phi::DataType::INT32) { - GraphSendUVGradOpKernelLaunchHelper(ctx, - x, - y, - out_grad, - src_index, - dst_index, - compute_type, - x_grad, - y_grad); + GraphSendUVGradOpKernelLaunchHelper( + ctx, x, y, out_grad, src_index, dst_index, message_op, x_grad, y_grad); } else if (index_type == phi::DataType::INT64) { - GraphSendUVGradOpKernelLaunchHelper(ctx, - x, - y, - out_grad, - src_index, - dst_index, - compute_type, - x_grad, - y_grad); + GraphSendUVGradOpKernelLaunchHelper( + ctx, x, y, out_grad, src_index, dst_index, message_op, x_grad, y_grad); } } diff --git a/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc index 3689f51199777..6404999ba23df 100644 --- a/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc @@ -18,7 +18,7 @@ #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" namespace phi { @@ -55,7 +55,7 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx, const DenseTensor& y, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* out) { const int& index_size = src_index.dims()[0]; auto out_dims = out->dims(); @@ -72,7 +72,7 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx, const T* y_data = y.data(); const IndexT* s_index = src_index.data(); const IndexT* d_index = dst_index.data(); - if (compute_type == "ADD") { + if (message_op == "ADD") { GraphAddFunctor add_functor; GraphSendUVCpuKernel>(bcast_info, x_data, @@ -82,7 +82,7 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx, out_data, index_size, add_functor); - } else if (compute_type == "MUL") { + } else if (message_op == "MUL") { GraphMulFunctor mul_functor; GraphSendUVCpuKernel>(bcast_info, x_data, @@ -101,15 +101,15 @@ void GraphSendUVKernel(const Context& ctx, const DenseTensor& y, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* out) { auto index_type = src_index.dtype(); if (index_type == phi::DataType::INT32) { GraphSendUVOpKernelLaunchHelper( - ctx, x, y, src_index, dst_index, compute_type, out); + ctx, x, y, src_index, dst_index, message_op, out); } else if (index_type == phi::DataType::INT64) { GraphSendUVOpKernelLaunchHelper( - ctx, x, y, src_index, dst_index, compute_type, out); + ctx, x, y, src_index, dst_index, message_op, out); } } diff --git a/paddle/phi/kernels/gpu/graph_send_ue_recv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_ue_recv_grad_kernel.cu index cb3d5591a7be6..c5d5fb7196fb2 100644 --- a/paddle/phi/kernels/gpu/graph_send_ue_recv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_ue_recv_grad_kernel.cu @@ -21,7 +21,7 @@ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/graph_send_recv_funcs.h" #include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_messaage_passing_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/graph_send_ue_recv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_ue_recv_kernel.cu index f339387f0bbfc..7351c562dff9d 100644 --- a/paddle/phi/kernels/gpu/graph_send_ue_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_ue_recv_kernel.cu @@ -15,7 +15,7 @@ #include "paddle/phi/kernels/graph_send_ue_recv_kernel.h" #include "paddle/phi/kernels/gpu/graph_send_recv_funcs.h" #include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_messaage_passing_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include #include diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu index ae3c6eea19a39..4fb0335241d01 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -22,7 +22,7 @@ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/graph_send_recv_funcs.h" #include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" namespace phi { @@ -59,7 +59,7 @@ void CalculateGrad(const Context& ctx, const IndexT* d_index, const phi::DDim& out_grad_dims, const phi::DDim& x_grad_dims, - const std::string& compute_type, + const std::string& message_op, int64_t index_size, int64_t slice_size, T* x_grad, @@ -68,10 +68,10 @@ void CalculateGrad(const Context& ctx, std::vector reduce_idx; bool reduce = ReduceGrad(out_grad_dims, x_grad_dims, reduce_idx); - if (compute_type == "ADD") { + if (message_op == "ADD") { if (!reduce) { - const int ntx = FindNumThreads(slice_size); - const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int ntx = FindNumThreads(slice_size, ctx.GetMaxThreadsPerBlock()); + const int nty = ctx.GetMaxThreadsPerBlock() / ntx; const int nbx = (slice_size + ntx - 1) / ntx; const int nby = (index_size + nty - 1) / nty; const dim3 grid_tmp(nbx, nby); @@ -89,8 +89,9 @@ void CalculateGrad(const Context& ctx, phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); - const int ntx = FindNumThreads(bcast_info.out_len); - const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int ntx = + FindNumThreads(bcast_info.out_len, ctx.GetMaxThreadsPerBlock()); + const int nty = ctx.GetMaxThreadsPerBlock() / ntx; const int nbx = (bcast_info.out_len + ntx - 1) / ntx; const int nby = (index_size + nty - 1) / nty; const dim3 grid_tmp(nbx, nby); @@ -122,15 +123,15 @@ void CalculateGrad(const Context& ctx, cudaMemcpyDeviceToDevice); #endif } - } else if (compute_type == "MUL") { + } else if (message_op == "MUL") { const auto& bcast_info = phi::CalcBCastInfo(y.dims(), out_grad_dims); thrust::device_vector l_bcastoff, r_bcastoff; if (bcast_info.use_bcast) { CopyBCastOff(bcast_info, l_bcastoff, r_bcastoff); } int64_t out_len = bcast_info.out_len; - const int ntx = FindNumThreads(out_len); - const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int ntx = FindNumThreads(out_len, ctx.GetMaxThreadsPerBlock()); + const int nty = ctx.GetMaxThreadsPerBlock() / ntx; const int nbx = (out_len + ntx - 1) / ntx; const int nby = (index_size + nty - 1) / nty; const dim3 grid_(nbx, nby); @@ -214,7 +215,7 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, const DenseTensor& out_grad, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* x_grad, DenseTensor* y_grad) { const int64_t& index_size = dst_index.dims()[0]; @@ -258,7 +259,7 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, d_index, out_grad_dims, x_grad_dims, - compute_type, + message_op, index_size, slice_size_x, x_grad_data, @@ -271,7 +272,7 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, s_index, out_grad_dims, y_grad_dims, - compute_type, + message_op, index_size, slice_size_y, y_grad_data, @@ -286,30 +287,16 @@ void GraphSendUVGradKernel(const Context& ctx, const DenseTensor& src_index, const DenseTensor& dst_index, const DenseTensor& out_grad, - const std::string& compute_type, + const std::string& message_op, DenseTensor* x_grad, DenseTensor* y_grad) { auto index_type = src_index.dtype(); if (index_type == phi::DataType::INT32) { - GraphSendUVGradOpCUDAKernelLaunchHelper(ctx, - x, - y, - out_grad, - src_index, - dst_index, - compute_type, - x_grad, - y_grad); + GraphSendUVGradOpCUDAKernelLaunchHelper( + ctx, x, y, out_grad, src_index, dst_index, message_op, x_grad, y_grad); } else if (index_type == phi::DataType::INT64) { - GraphSendUVGradOpCUDAKernelLaunchHelper(ctx, - x, - y, - out_grad, - src_index, - dst_index, - compute_type, - x_grad, - y_grad); + GraphSendUVGradOpCUDAKernelLaunchHelper( + ctx, x, y, out_grad, src_index, dst_index, message_op, x_grad, y_grad); } } diff --git a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu index 31e48f1f90704..05f38a5dd3cda 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu @@ -14,7 +14,7 @@ #include "paddle/phi/kernels/graph_send_uv_kernel.h" #include "paddle/phi/kernels/gpu/graph_send_ue_recv_funcs.h" -#include "paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h" +#include "paddle/phi/kernels/impl/graph_message_passing_impl.h" #include @@ -68,7 +68,7 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, const DenseTensor& y, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* out) { const int64_t& index_size = src_index.dims()[0]; auto out_dims = out->dims(); @@ -92,13 +92,13 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, } int64_t out_len = bcast_info.out_len; - const int ntx = FindNumThreads(out_len); - const int nty = CUDA_MAX_NUM_THREADS / ntx; + const int ntx = FindNumThreads(out_len, ctx.GetMaxThreadsPerBlock()); + const int nty = ctx.GetMaxThreadsPerBlock() / ntx; const int nbx = (out_len + ntx - 1) / ntx; const int nby = (index_size + nty - 1) / nty; const dim3 grid(nbx, nby); const dim3 block(ntx, nty); - if (compute_type == "ADD") { + if (message_op == "ADD") { funcs::AddFunctor add_functor; GraphSendUVCUDAKernel> <<>>( @@ -115,7 +115,7 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, out_len, bcast_info.use_bcast, add_functor); - } else if (compute_type == "MUL") { + } else if (message_op == "MUL") { funcs::MultiplyFunctor mul_functor; GraphSendUVCUDAKernel> <<>>( @@ -141,15 +141,15 @@ void GraphSendUVKernel(const Context& ctx, const DenseTensor& y, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* out) { auto index_type = src_index.dtype(); if (index_type == phi::DataType::INT32) { GraphSendUVOpCUDAKernelLaunchHelper( - ctx, x, y, src_index, dst_index, compute_type, out); + ctx, x, y, src_index, dst_index, message_op, out); } else if (index_type == phi::DataType::INT64) { GraphSendUVOpCUDAKernelLaunchHelper( - ctx, x, y, src_index, dst_index, compute_type, out); + ctx, x, y, src_index, dst_index, message_op, out); } } diff --git a/paddle/phi/kernels/graph_send_uv_grad_kernel.h b/paddle/phi/kernels/graph_send_uv_grad_kernel.h index 329e4e61e9e74..fa2285627a4b7 100644 --- a/paddle/phi/kernels/graph_send_uv_grad_kernel.h +++ b/paddle/phi/kernels/graph_send_uv_grad_kernel.h @@ -26,7 +26,7 @@ void GraphSendUVGradKernel(const Context& ctx, const DenseTensor& src_index, const DenseTensor& dst_index, const DenseTensor& out_grad, - const std::string& compute_type, + const std::string& message_op, DenseTensor* x_grad, DenseTensor* y_grad); diff --git a/paddle/phi/kernels/graph_send_uv_kernel.h b/paddle/phi/kernels/graph_send_uv_kernel.h index 159a27fc30a3b..7b723122c1a7f 100644 --- a/paddle/phi/kernels/graph_send_uv_kernel.h +++ b/paddle/phi/kernels/graph_send_uv_kernel.h @@ -25,7 +25,7 @@ void GraphSendUVKernel(const Context& ctx, const DenseTensor& y, const DenseTensor& src_index, const DenseTensor& dst_index, - const std::string& compute_type, + const std::string& message_op, DenseTensor* out); } // namespace phi diff --git a/paddle/phi/kernels/impl/graph_messaage_passing_impl.h b/paddle/phi/kernels/impl/graph_messaage_passing_impl.h deleted file mode 100644 index dc1477e77227b..0000000000000 --- a/paddle/phi/kernels/impl/graph_messaage_passing_impl.h +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// Copyright The DGL team. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/phi/kernels/funcs/common_shape.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" - -namespace phi { - -struct BroadCastInfo { - bool use_bcast; - // l_offset[i] indicates the start position of tensor lhs that required to - // compute the i-th element in output, so as r_offset[i]. - std::vector l_offset, r_offset; - int64_t l_len, r_len, out_len, reduce_size; -}; - -inline bool UseBroadCast(const phi::DDim& l_dims, const phi::DDim& r_dims) { - if (l_dims.size() != r_dims.size()) { - return true; - } - for (int i = 1; i < l_dims.size(); i++) { - if (l_dims[i] != r_dims[i]) { - return true; - } - } - return false; -} - -inline BroadCastInfo CalcBCastInfo(const phi::DDim& l_dims, - const phi::DDim& r_dims) { - BroadCastInfo binfo; - binfo.use_bcast = UseBroadCast(l_dims, r_dims); - binfo.l_len = 1; - binfo.r_len = 1; - for (int i = 1; i < l_dims.size(); i++) { - binfo.l_len *= l_dims[i]; - } - for (int i = 1; i < r_dims.size(); i++) { - binfo.r_len *= r_dims[i]; - } - // TODO(daisiming): Whether to add dot. - binfo.reduce_size = 1; - if (binfo.use_bcast) { - const int max_dim = std::max(l_dims.size(), r_dims.size()) - 1; - int stride_l = 1, stride_r = 1; - binfo.l_offset.emplace_back(0); - binfo.r_offset.emplace_back(0); - int out_len = 1; - for (int i = 0; i < max_dim; i++) { - // Iterate the axis from back to front. - const int dl = - (l_dims.size() - 1 - i < 1) ? 1 : l_dims[l_dims.size() - 1 - i]; - const int dr = - (r_dims.size() - 1 - i < 1) ? 1 : r_dims[r_dims.size() - 1 - i]; - for (int j = 1; j < std::max(dl, dr); j++) { - for (int k = 0; k < out_len; k++) { - binfo.l_offset.emplace_back(binfo.l_offset[k] + - j * (j < dl) * stride_l); - binfo.r_offset.emplace_back(binfo.r_offset[k] + - j * (j < dr) * stride_r); - } - } - out_len *= std::max(dl, dr); - stride_l *= dl; - stride_r *= dr; - } - binfo.out_len = out_len; - } else { - binfo.out_len = binfo.l_len; - } - return binfo; -} - -inline std::vector InferBroadcastShape(const phi::DDim& x_dims, - const phi::DDim& e_dims, - const std::string& type = "x") { - auto x_dims1 = phi::vectorize(x_dims); - auto e_dims1 = phi::vectorize(e_dims); - std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); - std::vector e_dims2(e_dims1.begin() + 1, e_dims1.end()); - int max_dim = std::max(x_dims2.size(), e_dims2.size()); - int axis = std::abs(static_cast(x_dims2.size() - e_dims2.size())); - std::vector x_dims_array(max_dim); - std::vector e_dims_array(max_dim); - std::vector out_dims_array(max_dim); - // Only need to broadcast dimensions other than the 0th dimension. - phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), - phi::make_ddim(e_dims2), - x_dims_array.data(), - e_dims_array.data(), - out_dims_array.data(), - max_dim, - axis); - if (type == "x") { - out_dims_array.insert(out_dims_array.begin(), x_dims[0]); - } else { - out_dims_array.insert(out_dims_array.begin(), e_dims[0]); - } - return out_dims_array; -} - -inline bool ReduceGrad(const phi::DDim& out_grad_dims, - const phi::DDim& x_dims, - std::vector& axis) { - // We must ensure the ndim of out_grad and x are the same. - bool reduce = false; - for (int i = 1; i < out_grad_dims.size(); i++) { - if (out_grad_dims[i] != x_dims[i]) { - reduce = true; - break; - } - } - if (!reduce) return false; - - // Get reduce axis. - for (int i = 1; i < out_grad_dims.size(); i++) { - if (out_grad_dims[i] - x_dims[i] != 0) { - axis.emplace_back(i); - } - } - return true; -} - -} // namespace phi diff --git a/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h b/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h deleted file mode 100644 index 35e51fb930c8d..0000000000000 --- a/paddle/phi/kernels/impl/graph_send_ue_recv_kernel_impl.h +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/phi/kernels/funcs/common_shape.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" - -namespace phi { - -struct BroadCastInfo { - bool use_bcast; - // l_offset[i] indicates the start position of tensor lhs that required to - // compute the i-th element in output, so as r_offset[i]. - std::vector l_offset, r_offset; - int64_t l_len, r_len, out_len, reduce_size; -}; - -inline bool UseBroadCast(const phi::DDim& l_dims, const phi::DDim& r_dims) { - if (l_dims.size() != r_dims.size()) { - return true; - } - for (int i = 1; i < l_dims.size(); i++) { - if (l_dims[i] != r_dims[i]) { - return true; - } - } - return false; -} - -inline BroadCastInfo CalcBCastInfo(const phi::DDim& l_dims, - const phi::DDim& r_dims) { - BroadCastInfo binfo; - binfo.use_bcast = UseBroadCast(l_dims, r_dims); - binfo.l_len = 1; - binfo.r_len = 1; - for (int i = 1; i < l_dims.size(); i++) { - binfo.l_len *= l_dims[i]; - } - for (int i = 1; i < r_dims.size(); i++) { - binfo.r_len *= r_dims[i]; - } - // TODO(daisiming): Whether to add dot. - binfo.reduce_size = 1; - if (binfo.use_bcast) { - const int max_dim = std::max(l_dims.size(), r_dims.size()) - 1; - int stride_l = 1, stride_r = 1; - binfo.l_offset.emplace_back(0); - binfo.r_offset.emplace_back(0); - int out_len = 1; - for (int i = 0; i < max_dim; i++) { - // Iterate the axis from back to front. - const int dl = - (l_dims.size() - 1 - i < 1) ? 1 : l_dims[l_dims.size() - 1 - i]; - const int dr = - (r_dims.size() - 1 - i < 1) ? 1 : r_dims[r_dims.size() - 1 - i]; - for (int j = 1; j < std::max(dl, dr); j++) { - for (int k = 0; k < out_len; k++) { - binfo.l_offset.emplace_back(binfo.l_offset[k] + - j * (j < dl) * stride_l); - binfo.r_offset.emplace_back(binfo.r_offset[k] + - j * (j < dr) * stride_r); - } - } - out_len *= std::max(dl, dr); - stride_l *= dl; - stride_r *= dr; - } - binfo.out_len = out_len; - } else { - binfo.out_len = binfo.l_len; - } - return binfo; -} - -inline std::vector InferBroadcastShape(const phi::DDim& x_dims, - const phi::DDim& e_dims, - const std::string& type = "x") { - auto x_dims1 = phi::vectorize(x_dims); - auto e_dims1 = phi::vectorize(e_dims); - std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); - std::vector e_dims2(e_dims1.begin() + 1, e_dims1.end()); - int max_dim = std::max(x_dims2.size(), e_dims2.size()); - int axis = std::abs(static_cast(x_dims2.size() - e_dims2.size())); - std::vector x_dims_array(max_dim); - std::vector e_dims_array(max_dim); - std::vector out_dims_array(max_dim); - // Only need to broadcast dimensions other than the 0th dimension. - phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), - phi::make_ddim(e_dims2), - x_dims_array.data(), - e_dims_array.data(), - out_dims_array.data(), - max_dim, - axis); - if (type == "x") { - out_dims_array.insert(out_dims_array.begin(), x_dims[0]); - } else { - out_dims_array.insert(out_dims_array.begin(), e_dims[0]); - } - return out_dims_array; -} - -inline bool ReduceGrad(const phi::DDim& out_grad_dims, - const phi::DDim& x_dims, - std::vector& axis) { - // We must ensure the ndim of out_grad and x are the same. - bool reduce = false; - for (int i = 1; i < out_grad_dims.size(); i++) { - if (out_grad_dims[i] != x_dims[i]) { - reduce = true; - break; - } - } - if (!reduce) return false; - - // Get reduce axis. - for (int i = 1; i < out_grad_dims.size(); i++) { - if (out_grad_dims[i] - x_dims[i] != 0) { - axis.emplace_back(i); - } - } - return true; -} - -} // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py index aa44b172025ed..ee74016999c68 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -27,23 +27,23 @@ def compute_graph_send_uv(inputs, attributes): y = inputs['y'] src_index = inputs['src_index'] dst_index = inputs['dst_index'] - compute_type = attributes['compute_type'] + message_op = attributes['message_op'] gather_x = x[src_index] gather_y = y[dst_index] # Calculate forward output. - if compute_type == "ADD": + if message_op == "ADD": results = gather_x + gather_y - elif compute_type == "MUL": + elif message_op == "MUL": results = gather_x * gather_y return results -def graph_send_uv_wrapper(x, y, src_index, dst_index, compute_type="add"): +def graph_send_uv_wrapper(x, y, src_index, dst_index, message_op="add"): return paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type.lower()) + message_op.lower()) class TestGraphSendUVOp(OpTest): @@ -60,7 +60,7 @@ def setUp(self): 'src_index': self.src_index, 'dst_index': self.dst_index } - self.attrs = {'compute_type': self.compute_type} + self.attrs = {'message_op': self.message_op} out = compute_graph_send_uv(self.inputs, self.attrs) self.outputs = {'out': out} @@ -76,7 +76,7 @@ def set_config(self): index = np.random.randint(0, 10, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'ADD' + self.message_op = 'ADD' class TestCase1(TestGraphSendUVOp): @@ -87,7 +87,7 @@ def set_config(self): index = np.random.randint(0, 10, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'MUL' + self.message_op = 'MUL' class TestCase2(TestGraphSendUVOp): @@ -98,7 +98,7 @@ def set_config(self): index = np.random.randint(0, 100, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'ADD' + self.message_op = 'ADD' class TestCase3(TestGraphSendUVOp): @@ -109,7 +109,7 @@ def set_config(self): index = np.random.randint(0, 100, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'ADD' + self.message_op = 'ADD' class TestCase4(TestGraphSendUVOp): @@ -120,7 +120,7 @@ def set_config(self): index = np.random.randint(0, 100, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'MUL' + self.message_op = 'MUL' class TestCase5(TestGraphSendUVOp): @@ -131,7 +131,7 @@ def set_config(self): index = np.random.randint(0, 100, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'MUL' + self.message_op = 'MUL' class TestCase6(TestGraphSendUVOp): @@ -142,7 +142,7 @@ def set_config(self): index = np.random.randint(0, 10, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'ADD' + self.message_op = 'ADD' class TestCase7(TestGraphSendUVOp): @@ -153,7 +153,7 @@ def set_config(self): index = np.random.randint(0, 10, (15, 2)).astype(np.int64) self.src_index = index[:, 0] self.dst_index = index[:, 1] - self.compute_type = 'MUL' + self.message_op = 'MUL' class API_GeometricSendUVTest(unittest.TestCase): @@ -169,22 +169,22 @@ def test_compute_all_dygraph(self): y, src_index, dst_index, - compute_type="add") + message_op="add") res_sub = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="sub") + message_op="sub") res_mul = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="mul") + message_op="mul") res_div = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="div") + message_op="div") res = [res_add, res_sub, res_mul, res_div] np_add = np.array([[2, 5, 7], [5, 9, 11], [4, 9, 11], [1, 3, 5]], @@ -213,22 +213,22 @@ def test_compute_all_static(self): y, src_index, dst_index, - compute_type="add") + message_op="add") res_sub = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="sub") + message_op="sub") res_mul = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="mul") + message_op="mul") res_div = paddle.geometric.send_uv(x, y, src_index, dst_index, - compute_type="div") + message_op="div") exe = paddle.static.Executor(paddle.CPUPlace()) data1 = np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") diff --git a/python/paddle/geometric/message_passing/.send.py.swp b/python/paddle/geometric/message_passing/.send.py.swp deleted file mode 100644 index 7175c240dbb5b37d0b35e02de41dd55b64e35373..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&5j#I5XWcXzyTHz9N|z*4(x(=>~$gq1?6Bii!x#vBYQVdPNVirji;rV?xwqE zy~Blf;0lidyaC@L1a3S5uR!(MmNpPUuApk^@1E|duCDr(FP3XDx|eKZrFgkqFnV}J7+2HV_^<;WqYqi~6 zqHfV!SqxirPAoAu!a1pyLx-wNO#dX0yvGEXz*73Py`FGkTL(r2WM z^epMC-6;Bk^f^hArlgGYHt8+WKIwPz|10Sc=@-&N($A!D?@y#3Nk5RjCw)ia!33B9 z6JP>NfC(@GCcp%k02BB}0ze~<<4c?EbBPs%4&uB!-@6zK0UbZQ=|TvXTWfLM=P$riP_HN2{ox8tA%KQfqZ^DpW~c?x&}Br)Q^de=?g*veWc< z4$~PNO|v`cX*$hl^*&6p2k=3f-5CH%3c> diff --git a/python/paddle/geometric/message_passing/send.py b/python/paddle/geometric/message_passing/send.py index 1d26c45c5f9ef..27c57ecfeb9ca 100644 --- a/python/paddle/geometric/message_passing/send.py +++ b/python/paddle/geometric/message_passing/send.py @@ -21,7 +21,7 @@ from .utils import reshape_lhs_rhs -def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): +def send_uv(x, y, src_index, dst_index, message_op="add", name=None): """ Graph Learning message passing api. @@ -29,17 +29,17 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): This api is mainly used in Graph Learning domain, and the main purpose is to reduce intermediate memory consumption in the process of message passing. Take `x` as the source node feature tensor, take `y` as the destination node feature tensor. Then we use `src_index` and `dst_index` to gather the corresponding data, - and then compute the edge features in different compute_types like `add`, `sub`, `mul`, `div`. + and then compute the edge features in different message_ops like `add`, `sub`, `mul`, `div`. .. code-block:: text Given: - X = [[0, 2, 3], + x = [[0, 2, 3], [1, 4, 5], [2, 6, 7]] - Y = [[0, 1, 2], + y = [[0, 1, 2], [2, 3, 4], [4, 5, 6]] @@ -47,11 +47,11 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): dst_index = [1, 2, 1, 0] - compute_type = "add" + message_op = "add" Then: - Out = [[2, 5, 7], + out = [[2, 5, 7], [5, 9, 11], [4, 9, 11], [0, 3, 5]] @@ -62,7 +62,7 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): src_index (Tensor): An 1-D tensor, and the available data type is int32, int64. dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`. The available data type is int32, int64. - compute_type (Tensor): Different compute types for x and y, including `add`, `sub`, `mul` and `div`. + message_op (Tensor): Different message ops for x and y, including `add`, `sub`, `mul` and `div`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -80,32 +80,32 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): indexes = paddle.to_tensor([[0, 1], [1, 2], [2, 1], [0, 0]], dtype="int32") src_index = indexes[:, 0] dst_index = indexes[:, 1] - out = paddle.geometric.send_uv(x, y, src_index, dst_index, compute_type="add") + out = paddle.geometric.send_uv(x, y, src_index, dst_index, message_op="add") # Outputs: [[2., 5., 7.], [5., 9., 11.], [4., 9., 11.], [0., 3., 5.]] """ - if compute_type not in ['add', 'sub', 'mul', 'div']: + if message_op not in ['add', 'sub', 'mul', 'div']: raise ValueError( - "compute_type should be `add`, `sub`, `mul`, `div`, but received %s" - % compute_type) + "message_op should be `add`, `sub`, `mul`, `div`, but received %s" % + message_op) x, y = reshape_lhs_rhs(x, y) - if compute_type == 'sub': - compute_type = 'add' + if message_op == 'sub': + message_op = 'add' y = -y - if compute_type == 'div': - compute_type = 'mul' + if message_op == 'div': + message_op = 'mul' y = 1. / y if in_dygraph_mode(): return _C_ops.final_state_graph_send_uv(x, y, src_index, dst_index, - compute_type.upper()) + message_op.upper()) else: if _in_legacy_dygraph(): return _C_ops.graph_send_uv(x, y, src_index, dst_index, - "compute_type", compute_type.upper()) + "message_op", message_op.upper()) else: helper = LayerHelper("send_uv", **locals()) check_variable_and_dtype( @@ -126,7 +126,7 @@ def send_uv(x, y, src_index, dst_index, compute_type="add", name=None): 'src_index': src_index, 'dst_index': dst_index } - attrs = {'compute_type': compute_type.upper()} + attrs = {'message_op': message_op.upper()} helper.append_op(type="graph_send_uv", inputs=inputs, attrs=attrs, From ffc2e3e418299cf585176a5ad3b6229f1a667f29 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Fri, 12 Aug 2022 04:54:10 +0000 Subject: [PATCH 10/12] add impl file --- .../kernels/impl/graph_message_passing_impl.h | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 paddle/phi/kernels/impl/graph_message_passing_impl.h diff --git a/paddle/phi/kernels/impl/graph_message_passing_impl.h b/paddle/phi/kernels/impl/graph_message_passing_impl.h new file mode 100644 index 0000000000000..dc1477e77227b --- /dev/null +++ b/paddle/phi/kernels/impl/graph_message_passing_impl.h @@ -0,0 +1,140 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright The DGL team. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/phi/kernels/funcs/common_shape.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" + +namespace phi { + +struct BroadCastInfo { + bool use_bcast; + // l_offset[i] indicates the start position of tensor lhs that required to + // compute the i-th element in output, so as r_offset[i]. + std::vector l_offset, r_offset; + int64_t l_len, r_len, out_len, reduce_size; +}; + +inline bool UseBroadCast(const phi::DDim& l_dims, const phi::DDim& r_dims) { + if (l_dims.size() != r_dims.size()) { + return true; + } + for (int i = 1; i < l_dims.size(); i++) { + if (l_dims[i] != r_dims[i]) { + return true; + } + } + return false; +} + +inline BroadCastInfo CalcBCastInfo(const phi::DDim& l_dims, + const phi::DDim& r_dims) { + BroadCastInfo binfo; + binfo.use_bcast = UseBroadCast(l_dims, r_dims); + binfo.l_len = 1; + binfo.r_len = 1; + for (int i = 1; i < l_dims.size(); i++) { + binfo.l_len *= l_dims[i]; + } + for (int i = 1; i < r_dims.size(); i++) { + binfo.r_len *= r_dims[i]; + } + // TODO(daisiming): Whether to add dot. + binfo.reduce_size = 1; + if (binfo.use_bcast) { + const int max_dim = std::max(l_dims.size(), r_dims.size()) - 1; + int stride_l = 1, stride_r = 1; + binfo.l_offset.emplace_back(0); + binfo.r_offset.emplace_back(0); + int out_len = 1; + for (int i = 0; i < max_dim; i++) { + // Iterate the axis from back to front. + const int dl = + (l_dims.size() - 1 - i < 1) ? 1 : l_dims[l_dims.size() - 1 - i]; + const int dr = + (r_dims.size() - 1 - i < 1) ? 1 : r_dims[r_dims.size() - 1 - i]; + for (int j = 1; j < std::max(dl, dr); j++) { + for (int k = 0; k < out_len; k++) { + binfo.l_offset.emplace_back(binfo.l_offset[k] + + j * (j < dl) * stride_l); + binfo.r_offset.emplace_back(binfo.r_offset[k] + + j * (j < dr) * stride_r); + } + } + out_len *= std::max(dl, dr); + stride_l *= dl; + stride_r *= dr; + } + binfo.out_len = out_len; + } else { + binfo.out_len = binfo.l_len; + } + return binfo; +} + +inline std::vector InferBroadcastShape(const phi::DDim& x_dims, + const phi::DDim& e_dims, + const std::string& type = "x") { + auto x_dims1 = phi::vectorize(x_dims); + auto e_dims1 = phi::vectorize(e_dims); + std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); + std::vector e_dims2(e_dims1.begin() + 1, e_dims1.end()); + int max_dim = std::max(x_dims2.size(), e_dims2.size()); + int axis = std::abs(static_cast(x_dims2.size() - e_dims2.size())); + std::vector x_dims_array(max_dim); + std::vector e_dims_array(max_dim); + std::vector out_dims_array(max_dim); + // Only need to broadcast dimensions other than the 0th dimension. + phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), + phi::make_ddim(e_dims2), + x_dims_array.data(), + e_dims_array.data(), + out_dims_array.data(), + max_dim, + axis); + if (type == "x") { + out_dims_array.insert(out_dims_array.begin(), x_dims[0]); + } else { + out_dims_array.insert(out_dims_array.begin(), e_dims[0]); + } + return out_dims_array; +} + +inline bool ReduceGrad(const phi::DDim& out_grad_dims, + const phi::DDim& x_dims, + std::vector& axis) { + // We must ensure the ndim of out_grad and x are the same. + bool reduce = false; + for (int i = 1; i < out_grad_dims.size(); i++) { + if (out_grad_dims[i] != x_dims[i]) { + reduce = true; + break; + } + } + if (!reduce) return false; + + // Get reduce axis. + for (int i = 1; i < out_grad_dims.size(); i++) { + if (out_grad_dims[i] - x_dims[i] != 0) { + axis.emplace_back(i); + } + } + return true; +} + +} // namespace phi From 0752fdf6989fb4d97343f29b68f826ef2d7fd1e6 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Fri, 12 Aug 2022 06:27:21 +0000 Subject: [PATCH 11/12] fix unittest timeout time --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 1 + python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 9f899552e6987..32e89576a914b 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1563,6 +1563,7 @@ set_tests_properties(test_pool3d_api PROPERTIES TIMEOUT 120) set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120) set_tests_properties(test_split_program PROPERTIES TIMEOUT 120) set_tests_properties(test_graph_send_ue_recv_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_graph_send_uv_op PROPERTIES TIMEOUT 60) if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py index ee74016999c68..3d0cc3a57c6b3 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py @@ -259,3 +259,7 @@ def test_compute_all_static(self): self.assertTrue( np.allclose(np_res, paddle_res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, paddle_res)) + + def test_api_eager_dygraph(self): + with _test_eager_guard(): + self.test_compute_all_dygraph() From 9cb1b16ee2bca999760f58072d3591665ee90da5 Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Mon, 15 Aug 2022 07:13:54 +0000 Subject: [PATCH 12/12] add review revise --- .../kernels/cpu/graph_send_uv_grad_kernel.cc | 12 +- .../phi/kernels/cpu/graph_send_uv_kernel.cc | 8 +- .../kernels/gpu/graph_send_uv_grad_kernel.cu | 8 +- .../phi/kernels/gpu/graph_send_uv_kernel.cu | 8 +- .../geometric/message_passing/__init__.py | 8 +- .../paddle/geometric/message_passing/send.py | 134 ------------------ .../geometric/message_passing/send_recv.py | 115 +++++++++++++++ 7 files changed, 151 insertions(+), 142 deletions(-) delete mode 100644 python/paddle/geometric/message_passing/send.py diff --git a/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc index bd92875461da7..4e28acdad3db4 100644 --- a/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_uv_grad_kernel.cc @@ -63,7 +63,7 @@ void CalculateGrad(const Context& ctx, auto out_grad_dims_1 = phi::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); - out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + out_grad_dims_2.emplace(out_grad_dims_2.begin(), x_grad_dims[0]); DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); @@ -120,7 +120,7 @@ void CalculateGrad(const Context& ctx, auto out_grad_dims_1 = phi::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); - out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); + out_grad_dims_2.emplace(out_grad_dims_2.begin(), x_grad_dims[0]); DenseTensor x_grad_v2 = phi::Empty(ctx, out_grad_dims_2); phi::funcs::SetConstant()(ctx, &x_grad_v2, T(0)); T* x_grad_v2_data = x_grad_v2.data(); @@ -168,6 +168,13 @@ void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, DenseTensor* y_grad) { const int64_t& index_size = dst_index.dims()[0]; + PADDLE_ENFORCE_GT( + index_size, + 0, + errors::InvalidArgument("The first dimension of src_index or dst_index " + "shoule be greater than 0, but received %d.", + index_size)); + ctx.template Alloc(x_grad); T* x_grad_data = x_grad->data(); ctx.template Alloc(y_grad); @@ -189,7 +196,6 @@ void GraphSendUVGradOpKernelLaunchHelper(const Context& ctx, memset(x_grad_data, 0, memset_bytes_x); memset(y_grad_data, 0, memset_bytes_y); - if (index_size == 0) return; const T* out_grad_data = out_grad.data(); const IndexT* s_index = src_index.data(); const IndexT* d_index = dst_index.data(); diff --git a/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc index 6404999ba23df..2183eb2a4c593 100644 --- a/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_uv_kernel.cc @@ -58,6 +58,13 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx, const std::string& message_op, DenseTensor* out) { const int& index_size = src_index.dims()[0]; + PADDLE_ENFORCE_GT( + index_size, + 0, + errors::InvalidArgument("The first dimension of src_index or dst_index " + "shoule be greater than 0, but received %d.", + index_size)); + auto out_dims = out->dims(); int64_t memset_size = 1; for (int i = 0; i < out_dims.size(); i++) { @@ -65,7 +72,6 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx, } ctx.template Alloc(out); T* out_data = out->data(); - if (index_size == 0) return; const auto& bcast_info = phi::CalcBCastInfo(x.dims(), y.dims()); const T* x_data = x.data(); diff --git a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu index 4fb0335241d01..5b8d7b28dcc29 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_grad_kernel.cu @@ -219,6 +219,12 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, DenseTensor* x_grad, DenseTensor* y_grad) { const int64_t& index_size = dst_index.dims()[0]; + PADDLE_ENFORCE_GT( + index_size, + 0, + errors::InvalidArgument("The first dimension of src_index or dst_index " + "shoule be greater than 0, but received %d.", + index_size)); ctx.template Alloc(x_grad); T* x_grad_data = x_grad->data(); @@ -246,8 +252,6 @@ void GraphSendUVGradOpCUDAKernelLaunchHelper(const Context& ctx, cudaMemset(y_grad_data, 0, memset_bytes_y); #endif - if (index_size == 0) return; - const T* out_grad_data = out_grad.data(); const IndexT* s_index = src_index.data(); const IndexT* d_index = dst_index.data(); diff --git a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu index 05f38a5dd3cda..f1e4581773f54 100644 --- a/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu +++ b/paddle/phi/kernels/gpu/graph_send_uv_kernel.cu @@ -71,6 +71,13 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, const std::string& message_op, DenseTensor* out) { const int64_t& index_size = src_index.dims()[0]; + PADDLE_ENFORCE_GT( + index_size, + 0, + errors::InvalidArgument("The first dimension of src_index or dst_index " + "shoule be greater than 0, but received %d.", + index_size)); + auto out_dims = out->dims(); int64_t memset_size = 1; for (int i = 0; i < out_dims.size(); i++) { @@ -78,7 +85,6 @@ void GraphSendUVOpCUDAKernelLaunchHelper(const Context& ctx, } ctx.template Alloc(out); T* out_data = out->data(); - if (index_size == 0) return; const auto& bcast_info = phi::CalcBCastInfo(x.dims(), y.dims()); const T* x_data = x.data(); diff --git a/python/paddle/geometric/message_passing/__init__.py b/python/paddle/geometric/message_passing/__init__.py index dd195e45e3d14..f215e5be74a48 100644 --- a/python/paddle/geometric/message_passing/__init__.py +++ b/python/paddle/geometric/message_passing/__init__.py @@ -14,4 +14,10 @@ from .send_recv import send_u_recv # noqa: F401 from .send_recv import send_ue_recv # noqa: F401 -from .send import send_uv # noqa: F401 +from .send_recv import send_uv # noqa: F401 + +__all__ = [ + 'send_u_recv', + 'send_ue_recv', + 'send_uv', +] diff --git a/python/paddle/geometric/message_passing/send.py b/python/paddle/geometric/message_passing/send.py deleted file mode 100644 index 27c57ecfeb9ca..0000000000000 --- a/python/paddle/geometric/message_passing/send.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode -from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle import _C_ops - -from .utils import reshape_lhs_rhs - - -def send_uv(x, y, src_index, dst_index, message_op="add", name=None): - """ - - Graph Learning message passing api. - - This api is mainly used in Graph Learning domain, and the main purpose is to reduce intermediate memory - consumption in the process of message passing. Take `x` as the source node feature tensor, take `y` as - the destination node feature tensor. Then we use `src_index` and `dst_index` to gather the corresponding data, - and then compute the edge features in different message_ops like `add`, `sub`, `mul`, `div`. - - .. code-block:: text - - Given: - - x = [[0, 2, 3], - [1, 4, 5], - [2, 6, 7]] - - y = [[0, 1, 2], - [2, 3, 4], - [4, 5, 6]] - - src_index = [0, 1, 2, 0] - - dst_index = [1, 2, 1, 0] - - message_op = "add" - - Then: - - out = [[2, 5, 7], - [5, 9, 11], - [4, 9, 11], - [0, 3, 5]] - - Args: - x (Tensor): The source node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. - y (Tensor): The destination node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. - src_index (Tensor): An 1-D tensor, and the available data type is int32, int64. - dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`. - The available data type is int32, int64. - message_op (Tensor): Different message ops for x and y, including `add`, `sub`, `mul` and `div`. - name (str, optional): Name for the operation (optional, default is None). - For more information, please refer to :ref:`api_guide_Name`. - - Returns: - out (Tensor): The output tensor. - - Examples: - - .. code-block:: python - - import paddle - - x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") - y = paddle.to_tensor([[0, 1, 2], [2, 3, 4], [4, 5, 6]], dtype="float32") - indexes = paddle.to_tensor([[0, 1], [1, 2], [2, 1], [0, 0]], dtype="int32") - src_index = indexes[:, 0] - dst_index = indexes[:, 1] - out = paddle.geometric.send_uv(x, y, src_index, dst_index, message_op="add") - # Outputs: [[2., 5., 7.], [5., 9., 11.], [4., 9., 11.], [0., 3., 5.]] - - """ - - if message_op not in ['add', 'sub', 'mul', 'div']: - raise ValueError( - "message_op should be `add`, `sub`, `mul`, `div`, but received %s" % - message_op) - - x, y = reshape_lhs_rhs(x, y) - - if message_op == 'sub': - message_op = 'add' - y = -y - if message_op == 'div': - message_op = 'mul' - y = 1. / y - - if in_dygraph_mode(): - return _C_ops.final_state_graph_send_uv(x, y, src_index, dst_index, - message_op.upper()) - else: - if _in_legacy_dygraph(): - return _C_ops.graph_send_uv(x, y, src_index, dst_index, - "message_op", message_op.upper()) - else: - helper = LayerHelper("send_uv", **locals()) - check_variable_and_dtype( - x, 'x', ['int32', 'int64', 'float32', 'float64', 'float16'], - 'graph_send_uv') - check_variable_and_dtype( - y, 'y', ['int32', 'int64', 'float32', 'float64', 'float16'], - 'graph_send_uv') - check_variable_and_dtype(src_index, 'src_index', ['int32', 'int64'], - 'graph_send_uv') - check_variable_and_dtype(dst_index, 'dst_index', ['int32', 'int64'], - 'graph_send_uv') - out = helper.create_variable_for_type_inference(dtype=x.dtype) - - inputs = { - 'x': x, - 'y': y, - 'src_index': src_index, - 'dst_index': dst_index - } - attrs = {'message_op': message_op.upper()} - helper.append_op(type="graph_send_uv", - inputs=inputs, - attrs=attrs, - outputs={"out": out}) - return out diff --git a/python/paddle/geometric/message_passing/send_recv.py b/python/paddle/geometric/message_passing/send_recv.py index bfe63f1f04d73..de8fd3b005e29 100644 --- a/python/paddle/geometric/message_passing/send_recv.py +++ b/python/paddle/geometric/message_passing/send_recv.py @@ -21,6 +21,8 @@ from .utils import convert_out_size_to_list, get_out_size_tensor_inputs, reshape_lhs_rhs +__all__ = [] + def send_u_recv(x, src_index, @@ -336,3 +338,116 @@ def send_ue_recv(x, }, attrs=attrs) return out + + +def send_uv(x, y, src_index, dst_index, message_op="add", name=None): + """ + + Graph Learning message passing api. + + This api is mainly used in Graph Learning domain, and the main purpose is to reduce intermediate memory + consumption in the process of message passing. Take `x` as the source node feature tensor, take `y` as + the destination node feature tensor. Then we use `src_index` and `dst_index` to gather the corresponding data, + and then compute the edge features in different message_ops like `add`, `sub`, `mul`, `div`. + + .. code-block:: text + + Given: + + x = [[0, 2, 3], + [1, 4, 5], + [2, 6, 7]] + + y = [[0, 1, 2], + [2, 3, 4], + [4, 5, 6]] + + src_index = [0, 1, 2, 0] + + dst_index = [1, 2, 1, 0] + + message_op = "add" + + Then: + + out = [[2, 5, 7], + [5, 9, 11], + [4, 9, 11], + [0, 3, 5]] + + Args: + x (Tensor): The source node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. + y (Tensor): The destination node feature tensor, and the available data type is float32, float64, int32, int64. And we support float16 in gpu version. + src_index (Tensor): An 1-D tensor, and the available data type is int32, int64. + dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`. + The available data type is int32, int64. + message_op (Tensor): Different message ops for x and y, including `add`, `sub`, `mul` and `div`. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + out (Tensor): The output tensor. + + Examples: + + .. code-block:: python + + import paddle + + x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32") + y = paddle.to_tensor([[0, 1, 2], [2, 3, 4], [4, 5, 6]], dtype="float32") + indexes = paddle.to_tensor([[0, 1], [1, 2], [2, 1], [0, 0]], dtype="int32") + src_index = indexes[:, 0] + dst_index = indexes[:, 1] + out = paddle.geometric.send_uv(x, y, src_index, dst_index, message_op="add") + # Outputs: [[2., 5., 7.], [5., 9., 11.], [4., 9., 11.], [0., 3., 5.]] + + """ + + if message_op not in ['add', 'sub', 'mul', 'div']: + raise ValueError( + "message_op should be `add`, `sub`, `mul`, `div`, but received %s" % + message_op) + + x, y = reshape_lhs_rhs(x, y) + + if message_op == 'sub': + message_op = 'add' + y = -y + if message_op == 'div': + message_op = 'mul' + y = 1. / y + + if in_dygraph_mode(): + return _C_ops.final_state_graph_send_uv(x, y, src_index, dst_index, + message_op.upper()) + else: + if _in_legacy_dygraph(): + return _C_ops.graph_send_uv(x, y, src_index, dst_index, + "message_op", message_op.upper()) + else: + helper = LayerHelper("send_uv", **locals()) + check_variable_and_dtype( + x, 'x', ['int32', 'int64', 'float32', 'float64', 'float16'], + 'graph_send_uv') + check_variable_and_dtype( + y, 'y', ['int32', 'int64', 'float32', 'float64', 'float16'], + 'graph_send_uv') + check_variable_and_dtype(src_index, 'src_index', ['int32', 'int64'], + 'graph_send_uv') + check_variable_and_dtype(dst_index, 'dst_index', ['int32', 'int64'], + 'graph_send_uv') + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + inputs = { + 'x': x, + 'y': y, + 'src_index': src_index, + 'dst_index': dst_index + } + attrs = {'message_op': message_op.upper()} + helper.append_op(type="graph_send_uv", + inputs=inputs, + attrs=attrs, + outputs={"out": out}) + return out