From d04f2e5ab06f83af5b6a26fccfde742dcc0903cf Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 7 Apr 2022 09:13:51 +0000 Subject: [PATCH 01/23] batch_norm --- .../fluid/tests/unittests/test_sparse_norm.py | 32 +++++++++ python/paddle/sparse/__init__.py | 3 +- python/paddle/sparse/layer/__init__.py | 1 + python/paddle/sparse/layer/norm.py | 70 +++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 python/paddle/fluid/tests/unittests/test_sparse_norm.py create mode 100644 python/paddle/sparse/layer/norm.py diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm.py b/python/paddle/fluid/tests/unittests/test_sparse_norm.py new file mode 100644 index 0000000000000..ac505e3b7b42e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle +from paddle.fluid.framework import _test_eager_guard + + +class TestSparseBatchNorm(unittest.TestCase): + def test_sparse_batch_norm(self): + with _test_eager_guard(): + shape = [2, 6, 6, 6, 4] + dense_x = paddle.randn(shape) + print(dense_x) + batch_norm = paddle.nn.BatchNorm3D(4, data_format="NDHWC") + dense_y = batch_norm(dense_x) + sparse_dim = 4 + sparse_x = dense_x.to_sparse_coo(sparse_dim) + batch_norm = paddle.sparse.BatchNorm(4) diff --git a/python/paddle/sparse/__init__.py b/python/paddle/sparse/__init__.py index aff9625469ef2..1bcd87efe6956 100644 --- a/python/paddle/sparse/__init__.py +++ b/python/paddle/sparse/__init__.py @@ -15,5 +15,6 @@ from .creation import sparse_coo_tensor from .creation import sparse_csr_tensor from .layer.activation import ReLU +from .layer.norm import BatchNorm -__all__ = ['sparse_coo_tensor', 'sparse_csr_tensor', 'ReLU'] +__all__ = ['sparse_coo_tensor', 'sparse_csr_tensor', 'ReLU', 'BatchNorm'] diff --git a/python/paddle/sparse/layer/__init__.py b/python/paddle/sparse/layer/__init__.py index 66abce260b6f7..5a5f59183f12e 100644 --- a/python/paddle/sparse/layer/__init__.py +++ b/python/paddle/sparse/layer/__init__.py @@ -13,5 +13,6 @@ # limitations under the License. from .activation import ReLU +from .norm import BatchNorm __all__ = [] diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py new file mode 100644 index 0000000000000..4f2818cd2dcb2 --- /dev/null +++ b/python/paddle/sparse/layer/norm.py @@ -0,0 +1,70 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BatchNorm(paddle.fluid.dygraph.BatchNorm): + def __init__(self, + num_channels, + act=None, + is_test=False, + momentum=0.9, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + dtype='float32', + data_layout='NCHW', + in_place=False, + moving_mean_name=None, + moving_variance_name=None, + do_model_average_for_mean_and_var=True, + use_global_stats=False, + trainable_statistics=False): + super(BatchNorm, self).__init__( + num_channels, + act=act, + is_test=is_test, + momentum=momentum, + epsilon=epsilon, + param_attr=param_attr, + bias_attr=bias_attr, + dtype=dtype, + data_layout, + in_place=in_place, + moving_mean_name=moving_mean_name, + moving_variance_name=moving_variance_name, + do_model_average_for_mean_and_var=do_model_average_for_mean_and_var, + use_global_stats=use_global_stats, + trainable_statistics=tranable_statistics) + + def forward(self, input): + values = input.values() + out = super(BatchNorm, self).forward(values) + return paddle.sparse.sparse_coo_tensor( + input.indices(), + out, + shape=input.shape, + stop_gradient=input.stop_gradient) From bfe56c73aebdcdd86c768225e26b458fc73a65c6 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Sat, 9 Apr 2022 14:21:50 +0000 Subject: [PATCH 02/23] add kernel sparse_mask_helper; sparse_coo_tensor_grad --- .../phi/kernels/funcs/sparse/common_shape.h | 27 +++ .../kernels/sparse/cpu/sparse_mask_kernel.cc | 103 ++++++++++-- .../kernels/sparse/cpu/sparse_utils_kernel.cc | 12 ++ .../kernels/sparse/gpu/sparse_mask_kernel.cu | 156 +++++++++++++++++- .../kernels/sparse/gpu/sparse_utils_kernel.cu | 12 ++ .../phi/kernels/sparse/sparse_mask_kernel.h | 6 + .../sparse/sparse_utils_grad_kernel.cc | 25 +++ .../kernels/sparse/sparse_utils_grad_kernel.h | 9 + .../phi/kernels/sparse/sparse_utils_kernel.h | 12 ++ .../tests/unittests/test_sparse_utils_op.py | 21 +++ python/paddle/sparse/creation.py | 4 +- python/paddle/utils/code_gen/sparse_api.yaml | 8 + .../paddle/utils/code_gen/sparse_bw_api.yaml | 7 + 13 files changed, 389 insertions(+), 13 deletions(-) diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index 3617e3cd2f406..ea7583a16f9e2 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -40,6 +40,33 @@ inline const DDim InferDenseDims(const DDim& x_dims, return values_dims; } +template +inline IntT HOSTDEVICE IndicesToIndex(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int i) { + IntT index = 0; + for (IntT j = 0; j < sparse_dim; j++) { + index += indices[j * non_zero_num + i] * sparse_offsets[j]; + } + return index; +} + +// 1. indices.dims().size() == 2 +template +inline const void CalcOffsetsPerDim(const DenseTensor& indices, + const DDim& dims, + std::vector* offsets) { + const DDim& indices_dims = indices.dims(); + const IntT sparse_dim = indices_dims[0]; + IntT offset = 1; + for (IntT i = sparse_dim - 1; i >= 0; i--) { + (*offsets)[i] = offset; + offset *= dims[i]; + } +} + } // namespace sparse } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 0a5e145312e0e..8d9f0c37df18c 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/sparse/common_shape.h" #include "paddle/phi/api/ext/dispatch.h" @@ -38,12 +39,6 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, const DenseTensor& indices = mask.non_zero_indices(); const DenseTensor& values = mask.non_zero_elements(); int sparse_dim = indices.dims().size(); - std::vector sparse_offsets(sparse_dim); - int64_t offset = 1; - for (int i = sparse_dim - 1; i >= 0; i--) { - sparse_offsets[i] = offset; - offset *= dims[i]; - } DenseTensor out_indices = phi::EmptyLike(dev_ctx, indices); DenseTensor out_values = phi::EmptyLike(dev_ctx, values); @@ -51,19 +46,23 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, // the out_indices is same as indices of mask phi::Copy(dev_ctx, indices, dev_ctx.GetPlace(), false, &out_indices); - const IntT* indices_ptr = indices.data(); T* out_values_ptr = out_values.data(); const T* x_ptr = x.data(); const int64_t non_zero_num = mask.nnz(); auto dims_2d = flatten_to_2d(dims, sparse_dim); const int cols = dims_2d[1]; + const IntT* indices_ptr = indices.data(); + std::vector out_indexs(non_zero_num), sparse_offsets(sparse_dim); + // phi::funcs::sparse::FlattenIndices(indices, x.dims(), &out_indexs); for (int64_t i = 0; i < non_zero_num; i++) { - int64_t index = 0; - for (int j = 0; j < sparse_dim; j++) { - index += indices_ptr[j * non_zero_num + i] * sparse_offsets[j]; - } + out_indexs[i] = phi::funcs::sparse::IndicesToIndex( + indices_ptr, sparse_offsets.data(), non_zero_num, sparse_dim, i); + } + + for (int64_t i = 0; i < non_zero_num; i++) { + int64_t index = out_indexs[i]; memcpy(out_values_ptr + i * cols, x_ptr + index * cols, cols * sizeof(T)); } out->SetMember(out_indices, out_values, dims, true); @@ -85,6 +84,75 @@ void SparseMaskKernel(const Context& dev_ctx, })); } +template +void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& mask_indices, + DenseTensor* out) { + PADDLE_ENFORCE_EQ( + mask_indices.dims().size(), + 2, + phi::errors::InvalidArgument("the mask_indices must be 2-D tensor")); + + const int64_t sparse_dim = x.non_zero_indices().dims()[0]; + + std::vector sparse_offsets(sparse_dim), x_indexs(x.nnz()), + mask_indexs(mask_indices.dims()[1]); + phi::funcs::sparse::CalcOffsetsPerDim( + x.non_zero_indices(), x.dims(), &sparse_offsets); + + auto FlattenIndices = [](const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + std::vector* out) { + for (int64_t i = 0; i < non_zero_num; i++) { + (*out)[i] = phi::funcs::sparse::IndicesToIndex( + indices, sparse_offsets, non_zero_num, sparse_dim, i); + } + }; + + FlattenIndices(x.non_zero_indices().data(), + sparse_offsets.data(), + x.nnz(), + sparse_dim, + &x_indexs); + FlattenIndices(mask_indices.data(), + sparse_offsets.data(), + x.nnz(), + sparse_dim, + &mask_indexs); + + std::set x_indexs_set(x_indexs.begin(), x_indexs.end()); + *out = phi::EmptyLike(dev_ctx, x.non_zero_elements()); + T* out_ptr = out->data(); + memset(out_ptr, static_cast(0), out->numel() * sizeof(T)); + const int stride = x.non_zero_elements().dims()[1]; + const T* in_ptr = x.non_zero_elements().data(); + for (uint64_t i = 0; i < mask_indexs.size(); i++) { + auto iter = x_indexs_set.find(mask_indexs[i]); + if (iter != x_indexs_set.end()) { + memcpy(out_ptr + i * stride, + in_ptr + mask_indexs[i] * stride, + stride * sizeof(T)); + } + } +} + +/** + * @brief filter values from x.values() using mask_indices + */ +template +void SparseMaskHelperKernel(const Context& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& mask_indices, + DenseTensor* out) { + PD_DISPATCH_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "SparseMaskHelperCPUKernel", ([&] { + SparseMaskHelperCPUKernel(dev_ctx, x, mask_indices, out); + })); +} + } // namespace sparse } // namespace phi @@ -101,3 +169,16 @@ PD_REGISTER_KERNEL(sparse_mask, int64_t) { kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); } + +PD_REGISTER_KERNEL(sparse_mask_helper, + CPU, + ALL_LAYOUT, + phi::sparse::SparseMaskHelperKernel, + float, + double, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index acc834269663d..0499371a4dd17 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -394,3 +394,15 @@ PD_REGISTER_KERNEL(csr_values, int64_t) { kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); } + +PD_REGISTER_KERNEL(sparse_coo_tensor, + CPU, + ALL_LAYOUT, + phi::sparse::SparseCooTensorKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t) {} diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index d206d6bbc195c..1f6b32d90b14e 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/ddim.h" @@ -20,6 +22,7 @@ limitations under the License. */ #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/sparse/common_shape.h" #include "paddle/phi/kernels/sparse/sparse_mask_kernel.h" #include "paddle/phi/api/ext/dispatch.h" @@ -59,7 +62,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx, const DenseTensor& indices = mask.non_zero_indices(); const DenseTensor& values = mask.non_zero_elements(); int sparse_dim = indices.dims().size(); - DenseTensor sparse_offsets = phi::Empty( + DenseTensor sparse_offsets = phi::Empty( dev_ctx, DenseTensorMeta(DataType::INT64, {sparse_dim}, DataLayout::NCHW)); std::vector h_sparse_offsets(sparse_dim); @@ -121,6 +124,143 @@ void SparseMaskKernel(const Context& dev_ctx, })); } +template +__global__ void FlattenIndicesKernel(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + IntT* out) { + CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) { + out[i] = phi::funcs::sparse::IndicesToIndex( + indices, sparse_offsets, non_zero_num, sparse_dim, i); + } +} + +template +__global__ void SparseMaskCopyKernel(const IntT* x_indexs, + const IntT* mask_indexs, + const IntT* bound_out, + const T* x_values, + const int64_t n, + const int64_t stride, + T* out_values) { + CUDA_KERNEL_LOOP_TYPE(i, n, int64_t) { + const IntT j = bound_out[i]; + if (j >= 0 && j < n && mask_indexs[i] == x_indexs[j]) { + for (int k = 0; k < stride; k++) { + out_values[i * stride + k] = x_values[j * stride + k]; + } + } + } +} + +template +void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& mask_indices, + DenseTensor* out) { + PADDLE_ENFORCE_EQ( + mask_indices.dims().size(), + 2, + phi::errors::InvalidArgument("the mask_indices must be 2-D tensor")); + + const int64_t sparse_dim = x.non_zero_indices().dims()[0]; + auto indices_dtype = paddle::experimental::CppTypeToDataType::Type(); + + std::vector sparse_offsets( + sparse_dim); //, x_indexs(x.nnz()), mask_indexs(mask_indices.dims()[1]); + + DenseTensorMeta x_indices_meta(indices_dtype, {x.nnz()}, DataLayout::NCHW); + DenseTensorMeta mask_indices_meta( + indices_dtype, {mask_indices.dims()[1]}, DataLayout::NCHW); + DenseTensorMeta sparse_offset_meta( + indices_dtype, {sparse_dim}, DataLayout::NCHW); + + DenseTensor x_indexs = + phi::Empty(dev_ctx, std::move(x_indices_meta)); + DenseTensor mask_indexs = + phi::Empty(dev_ctx, std::move(mask_indices_meta)); + DenseTensor bound_out = + phi::Empty(dev_ctx, std::move(mask_indices_meta)); + DenseTensor d_sparse_offsets = + phi::Empty(dev_ctx, std::move(sparse_offset_meta)); + IntT* x_indexs_ptr = x_indexs.data(); + IntT* mask_indexs_ptr = mask_indexs.data(); + IntT* bound_out_ptr = bound_out.data(); + + // 1. calc the offsets of per dim + phi::funcs::sparse::CalcOffsetsPerDim( + x.non_zero_indices(), x.dims(), &sparse_offsets); + // 2. copy sparse_offsets to device + phi::backends::gpu::GpuMemcpyAsync(d_sparse_offsets.data(), + sparse_offsets.data(), + sizeof(IntT) * sparse_dim, +#ifdef PADDLE_WITH_HIP + hipMemcpyHostToDevice, +#else + cudaMemcpyHostToDevice, +#endif + dev_ctx.stream()); + + // 3. flatten x indices and mask indices + auto config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, x_indexs.numel(), 1); + FlattenIndicesKernel<<>>(x.non_zero_indices().data(), + d_sparse_offsets.data(), + x_indexs.numel(), + sparse_dim, + x_indexs_ptr); + + config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, mask_indexs.numel(), 1); + FlattenIndicesKernel<<>>(mask_indices.data(), + d_sparse_offsets.data(), + mask_indices.numel(), + sparse_dim, + mask_indexs_ptr); + // 4. call thrust::lower_bound + thrust::lower_bound(thrust::cuda::par.on(dev_ctx.stream()), + x_indexs_ptr, + x_indexs_ptr + x_indexs.numel(), + mask_indexs_ptr, + mask_indexs_ptr + mask_indexs.numel(), + bound_out_ptr); + + // 5. copy value to out + *out = phi::EmptyLike(dev_ctx, x.non_zero_elements()); + phi::funcs::SetConstant set_zero; + set_zero(dev_ctx, out, static_cast(0)); + T* out_ptr = out->data(); + + SparseMaskCopyKernel<<>>(x_indexs_ptr, + mask_indexs_ptr, + bound_out_ptr, + x.non_zero_elements().data(), + mask_indexs.numel(), + x.non_zero_elements().dims()[1], + out_ptr); +} + +template +void SparseMaskHelperKernel(const Context& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& mask_indices, + DenseTensor* out) { + PD_DISPATCH_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] { + SparseMaskHelperGPUKernel(dev_ctx, x, mask_indices, out); + })); +} + } // namespace sparse } // namespace phi @@ -138,3 +278,17 @@ PD_REGISTER_KERNEL(sparse_mask, int64_t) { kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); } + +PD_REGISTER_KERNEL(sparse_mask_helper, + GPU, + ALL_LAYOUT, + phi::sparse::SparseMaskHelperKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} diff --git a/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu index 1109baf92e302..0b6ac1aed0147 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu @@ -665,3 +665,15 @@ PD_REGISTER_KERNEL(csr_values, int64_t) { kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); } + +PD_REGISTER_KERNEL(sparse_coo_tensor, + GPU, + ALL_LAYOUT, + phi::sparse::SparseCooTensorKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t) {} diff --git a/paddle/phi/kernels/sparse/sparse_mask_kernel.h b/paddle/phi/kernels/sparse/sparse_mask_kernel.h index 210412abd8620..88899e3dc672e 100644 --- a/paddle/phi/kernels/sparse/sparse_mask_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_mask_kernel.h @@ -26,5 +26,11 @@ void SparseMaskKernel(const Context& dev_ctx, const SparseCooTensor& mask, SparseCooTensor* out); +template +void SparseMaskHelperKernel(const Context& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& mask_indices, + DenseTensor* out); + } // namespace sparse } // namespace phi diff --git a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc index 35329807e7798..71958db1d3fd8 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc @@ -66,6 +66,19 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad, kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); } +PD_REGISTER_KERNEL(sparse_coo_tensor_grad, + CPU, + ALL_LAYOUT, + phi::sparse::SparseCooTensorGradKernel, + float, + double, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_REGISTER_KERNEL(coo_values_grad, GPU, @@ -95,4 +108,16 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad, int64_t) { kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); } +PD_REGISTER_KERNEL(sparse_coo_tensor_grad, + GPU, + ALL_LAYOUT, + phi::sparse::SparseCooTensorGradKernel, + float, + double, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); +} #endif diff --git a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h index 0775582bf1fb8..a00b9c275c292 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" +#include "paddle/phi/kernels/sparse/sparse_mask_kernel.h" namespace phi { namespace sparse { @@ -32,5 +33,13 @@ void SparseCooToDenseGradKernel(const Context& dev_ctx, const DenseTensor& out_grad, SparseCooTensor* x_grad); +template +void SparseCooTensorGradKernel(const Context& dev_ctx, + const DenseTensor& indices, + const SparseCooTensor& out_grad, + DenseTensor* values_grad) { + SparseMaskHelperKernel(dev_ctx, out_grad, indices, values_grad); +} + } // namespace sparse } // namespace phi diff --git a/paddle/phi/kernels/sparse/sparse_utils_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_kernel.h index 961cd9f829eb2..8ead6034b589b 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/phi/api/lib/utils/storage.h" +#include "paddle/phi/common/int_array.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" @@ -147,5 +148,16 @@ void CsrValuesKernel(const Context& dev_ctx, *out = x.non_zero_elements(); } +template +void SparseCooTensorKernel(const Context& dev_ctx, + const DenseTensor& indices, + const DenseTensor& values, + const IntArray& dense_shape, + SparseCooTensor* out) { + *out = + SparseCooTensor(indices, values, phi::make_ddim(dense_shape.GetData())); + // TODO(zhangkaihuo): sort and merge the dumplicate indices +} + } // namespace sparse } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 04488ac58c5fb..f486f6469e599 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -186,6 +186,27 @@ def test_coo_values_grad(self): values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) + def test_sparse_coo_tensor_grad(self): + with _test_eager_guard(): + indices = [[0, 1], [0, 1]] + values = [1, 2] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor( + values, dtype='float32', stop_gradient=False) + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=[2, 2], stop_gradient=False) + print(sparse_x) + grad_indices = [[0, 1], [1, 0]] + grad_values = [2, 3] + grad_indices = paddle.to_tensor(grad_indices, dtype='int32') + grad_values = paddle.to_tensor(grad_values, dtype='float32') + sparse_out_grad = paddle.sparse.sparse_coo_tensor( + grad_indices, grad_values, shape=[2, 2]) + sparse_x.backward(sparse_out_grad) + print(sparse_x.grad) + print(values.grad) + print(indices.grad) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py index e29351e3d179c..34a62b947ec62 100644 --- a/python/paddle/sparse/creation.py +++ b/python/paddle/sparse/creation.py @@ -107,7 +107,9 @@ def sparse_coo_tensor(indices, values = _handle_dtype(values, dtype) if shape is None: shape = _infer_dense_shape(indices) - return core.eager.sparse_coo_tensor(indices, values, shape, stop_gradient) +#return core.eager.sparse_coo_tensor(indices, values, shape, stop_gradient) + return _C_ops.final_state_sparse_create_sparse_coo_tensor(indices, values, + shape) #TODO: need to support shape is None diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index 7bdd77e27bcef..4f30e966e9eb7 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -21,6 +21,14 @@ layout : x backward : coo_values_grad +- api : create_sparse_coo_tensor + args : (Tensor indices, Tensor values, IntArray dense_shape) + output : Tensor(out@SparseCooTensor) + kernel : + func : sparse_coo_tensor + layout : values + backward : create_sparse_coo_tensor_grad + - api : csr_values args : (Tensor x) output : Tensor(out@DenseTensor) diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index 800145b06e0b6..2280ed6ad61a4 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -19,6 +19,13 @@ kernel : func : coo_values_grad +- backward_api : create_sparse_coo_tensor_grad + forward : create_sparse_coo_tensor(Tensor indices, Tensor values, IntArray dense_shape) -> Tensor(out@SparseCooTensor) + args : (Tensor indices, Tensor out_grad) + output : Tensor(values_grad@DenseTensor) + kernel : + func : sparse_coo_tensor_grad + - backward_api : dense_to_coo_grad forward : dense_to_coo(Tensor x, int64_t sparse_dim) -> Tensor(out@SparseCooTensor) args : (Tensor out_grad) From 311700fd9a2329b23985abd4dfe8fcc9491343eb Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Mon, 11 Apr 2022 09:28:31 +0000 Subject: [PATCH 03/23] fix bug --- .../phi/kernels/funcs/sparse/common_shape.h | 16 ++++----- .../kernels/sparse/cpu/sparse_mask_kernel.cc | 14 +++++--- .../kernels/sparse/gpu/sparse_mask_kernel.cu | 26 ++++++++------ .../sparse/sparse_utils_grad_kernel.cc | 2 +- .../phi/kernels/sparse/sparse_utils_kernel.h | 2 +- .../tests/unittests/test_sparse_utils_op.py | 25 ++++++++++--- python/paddle/sparse/creation.py | 36 ++++++++++++++++--- python/paddle/utils/code_gen/sparse_api.yaml | 2 +- .../paddle/utils/code_gen/sparse_bw_api.yaml | 2 +- 9 files changed, 89 insertions(+), 36 deletions(-) diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index ea7583a16f9e2..b28bc30e1ffcf 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -41,11 +41,11 @@ inline const DDim InferDenseDims(const DDim& x_dims, } template -inline IntT HOSTDEVICE IndicesToIndex(const IntT* indices, - const IntT* sparse_offsets, - const int64_t non_zero_num, - const int64_t sparse_dim, - const int i) { +inline const IntT HOSTDEVICE IndicesToIndex(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int i) { IntT index = 0; for (IntT j = 0; j < sparse_dim; j++) { index += indices[j * non_zero_num + i] * sparse_offsets[j]; @@ -55,9 +55,9 @@ inline IntT HOSTDEVICE IndicesToIndex(const IntT* indices, // 1. indices.dims().size() == 2 template -inline const void CalcOffsetsPerDim(const DenseTensor& indices, - const DDim& dims, - std::vector* offsets) { +inline void CalcOffsetsPerDim(const DenseTensor& indices, + const DDim& dims, + std::vector* offsets) { const DDim& indices_dims = indices.dims(); const IntT sparse_dim = indices_dims[0]; IntT offset = 1; diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 8d9f0c37df18c..7f61ab7765a12 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -123,17 +123,21 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, sparse_dim, &mask_indexs); - std::set x_indexs_set(x_indexs.begin(), x_indexs.end()); + std::unordered_map x_indexs_map; + for (uint64_t i = 0; i < x_indexs.size(); i++) { + x_indexs_map[x_indexs[i]] = i; + } *out = phi::EmptyLike(dev_ctx, x.non_zero_elements()); T* out_ptr = out->data(); memset(out_ptr, static_cast(0), out->numel() * sizeof(T)); - const int stride = x.non_zero_elements().dims()[1]; + const int64_t stride = + x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; const T* in_ptr = x.non_zero_elements().data(); for (uint64_t i = 0; i < mask_indexs.size(); i++) { - auto iter = x_indexs_set.find(mask_indexs[i]); - if (iter != x_indexs_set.end()) { + auto iter = x_indexs_map.find(mask_indexs[i]); + if (iter != x_indexs_map.end()) { memcpy(out_ptr + i * stride, - in_ptr + mask_indexs[i] * stride, + in_ptr + iter->second * stride, stride * sizeof(T)); } } diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index 1f6b32d90b14e..a9f0408cd2a8d 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -167,21 +167,20 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, const int64_t sparse_dim = x.non_zero_indices().dims()[0]; auto indices_dtype = paddle::experimental::CppTypeToDataType::Type(); - std::vector sparse_offsets( - sparse_dim); //, x_indexs(x.nnz()), mask_indexs(mask_indices.dims()[1]); + std::vector sparse_offsets(sparse_dim); - DenseTensorMeta x_indices_meta(indices_dtype, {x.nnz()}, DataLayout::NCHW); - DenseTensorMeta mask_indices_meta( + DenseTensorMeta x_indexs_meta(indices_dtype, {x.nnz()}, DataLayout::NCHW); + DenseTensorMeta mask_indexs_meta( indices_dtype, {mask_indices.dims()[1]}, DataLayout::NCHW); DenseTensorMeta sparse_offset_meta( indices_dtype, {sparse_dim}, DataLayout::NCHW); DenseTensor x_indexs = - phi::Empty(dev_ctx, std::move(x_indices_meta)); + phi::Empty(dev_ctx, std::move(x_indexs_meta)); DenseTensor mask_indexs = - phi::Empty(dev_ctx, std::move(mask_indices_meta)); + phi::Empty(dev_ctx, std::move(mask_indexs_meta)); DenseTensor bound_out = - phi::Empty(dev_ctx, std::move(mask_indices_meta)); + phi::Empty(dev_ctx, std::move(mask_indexs_meta)); DenseTensor d_sparse_offsets = phi::Empty(dev_ctx, std::move(sparse_offset_meta)); IntT* x_indexs_ptr = x_indexs.data(); @@ -221,11 +220,15 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, 0, dev_ctx.stream()>>>(mask_indices.data(), d_sparse_offsets.data(), - mask_indices.numel(), + mask_indexs.numel(), sparse_dim, mask_indexs_ptr); - // 4. call thrust::lower_bound +// 4. call thrust::lower_bound +#ifdef PADDLE_WITH_HIP + thrust::lower_bound(thrust::hip::par.on(dev_ctx.stream()), +#else thrust::lower_bound(thrust::cuda::par.on(dev_ctx.stream()), +#endif x_indexs_ptr, x_indexs_ptr + x_indexs.numel(), mask_indexs_ptr, @@ -238,6 +241,9 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, set_zero(dev_ctx, out, static_cast(0)); T* out_ptr = out->data(); + const int64_t stride = + x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + SparseMaskCopyKernel<<(), mask_indexs.numel(), - x.non_zero_elements().dims()[1], + stride, out_ptr); } diff --git a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc index 71958db1d3fd8..15d78692f4f35 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc @@ -76,7 +76,7 @@ PD_REGISTER_KERNEL(sparse_coo_tensor_grad, int16_t, int, int64_t) { - kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/phi/kernels/sparse/sparse_utils_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_kernel.h index 8ead6034b589b..8cf9c0a28648a 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h @@ -150,8 +150,8 @@ void CsrValuesKernel(const Context& dev_ctx, template void SparseCooTensorKernel(const Context& dev_ctx, - const DenseTensor& indices, const DenseTensor& values, + const DenseTensor& indices, const IntArray& dense_shape, SparseCooTensor* out) { *out = diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index f486f6469e599..933e8b952d972 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -18,6 +18,7 @@ import paddle import paddle.fluid.core as core from paddle.fluid.framework import _test_eager_guard +from paddle import _C_ops class TestSparseCreate(unittest.TestCase): @@ -195,17 +196,31 @@ def test_sparse_coo_tensor_grad(self): values, dtype='float32', stop_gradient=False) sparse_x = paddle.sparse.sparse_coo_tensor( indices, values, shape=[2, 2], stop_gradient=False) - print(sparse_x) - grad_indices = [[0, 1], [1, 0]] + grad_indices = [[0, 1], [1, 1]] grad_values = [2, 3] grad_indices = paddle.to_tensor(grad_indices, dtype='int32') grad_values = paddle.to_tensor(grad_values, dtype='float32') sparse_out_grad = paddle.sparse.sparse_coo_tensor( grad_indices, grad_values, shape=[2, 2]) sparse_x.backward(sparse_out_grad) - print(sparse_x.grad) - print(values.grad) - print(indices.grad) + correct_values_grad = [0, 3] + assert np.array_equal(correct_values_grad, values.grad.numpy()) + + place = core.CPUPlace() + indices_cpu = paddle.to_tensor(indices, dtype='int32', place=place) + values_cpu = paddle.to_tensor( + values, dtype='float32', place=place, stop_gradient=False) + sparse_x_cpu = paddle.sparse.sparse_coo_tensor( + indices_cpu, + values_cpu, + shape=[2, 2], + place=place, + stop_gradient=False) + + sparse_out_grad_cpu = paddle.sparse.sparse_coo_tensor( + grad_indices, grad_values, shape=[2, 2], place=place) + sparse_x_cpu.backward(sparse_out_grad_cpu) + assert np.array_equal(correct_values_grad, values_cpu.grad.numpy()) if __name__ == "__main__": diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py index 34a62b947ec62..ac9276f3142c0 100644 --- a/python/paddle/sparse/creation.py +++ b/python/paddle/sparse/creation.py @@ -14,6 +14,7 @@ from paddle import _C_ops from ..framework import core, dygraph_only +from ..framework import _current_expected_place, _get_paddle_place from ..tensor import to_tensor from ..tensor import max from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype @@ -38,6 +39,18 @@ def _infer_dense_shape(indices): return list(lens.numpy()) +def _get_place(place): + place = _get_paddle_place(place) + if place is None: + place = _current_expected_place() + elif not isinstance(place, (core.Place, core.CPUPlace, core.CUDAPinnedPlace, + core.CUDAPlace)): + raise ValueError( + "'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace" + ) + return place + + @dygraph_only def sparse_coo_tensor(indices, values, @@ -94,6 +107,8 @@ def sparse_coo_tensor(indices, # values=[1., 2., 3.]) """ + place = _get_place(place) + if not isinstance(indices, core.eager.Tensor): indices = to_tensor( indices, dtype=None, place=place, stop_gradient=True) @@ -101,14 +116,19 @@ def sparse_coo_tensor(indices, values = to_tensor(values, dtype, place, stop_gradient) if len(indices.shape) != 2: raise ValueError("'indices' must be 2-D.") - if place is not None: + + if not indices.place._equals(place): indices = indices._copy_to(place, False) + + if not values.place._equals(place): values = values._copy_to(place, False) values = _handle_dtype(values, dtype) + values.stop_gradient = stop_gradient + if shape is None: shape = _infer_dense_shape(indices) -#return core.eager.sparse_coo_tensor(indices, values, shape, stop_gradient) - return _C_ops.final_state_sparse_create_sparse_coo_tensor(indices, values, + + return _C_ops.final_state_sparse_create_sparse_coo_tensor(values, indices, shape) @@ -173,6 +193,9 @@ def sparse_csr_tensor(crows, # cols=[1, 3, 2, 0, 1], # values=[1, 2, 3, 4, 5]) """ + + place = _get_place(place) + if not isinstance(crows, core.eager.Tensor): crows = to_tensor(crows, dtype=None, place=place, stop_gradient=True) if not isinstance(cols, core.eager.Tensor): @@ -184,10 +207,15 @@ def sparse_csr_tensor(crows, "SparseCsrTensor only support 2-D or 3-D matrix. The 'crows', 'cols' and 'values' must be 1-D." ) - if place is not None: + if not crows.place._equals(place): crows = crows._copy_to(place, False) + + if not cols.place._equals(place): cols = cols._copy_to(place, False) + + if not values.place._equals(place): values = values._copy_to(place, False) values = _handle_dtype(values, dtype) + values.stop_gradient = stop_gradient return core.eager.sparse_csr_tensor(crows, cols, values, shape, stop_gradient) diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index 4f30e966e9eb7..2187d4abb2d63 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -22,7 +22,7 @@ backward : coo_values_grad - api : create_sparse_coo_tensor - args : (Tensor indices, Tensor values, IntArray dense_shape) + args : (Tensor values, Tensor indices, IntArray dense_shape) output : Tensor(out@SparseCooTensor) kernel : func : sparse_coo_tensor diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index 2280ed6ad61a4..e3946cbf72bc2 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -20,7 +20,7 @@ func : coo_values_grad - backward_api : create_sparse_coo_tensor_grad - forward : create_sparse_coo_tensor(Tensor indices, Tensor values, IntArray dense_shape) -> Tensor(out@SparseCooTensor) + forward : create_sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out@SparseCooTensor) args : (Tensor indices, Tensor out_grad) output : Tensor(values_grad@DenseTensor) kernel : From 8e7ae8b2c670f28ace7633b036035057c3c56465 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Mon, 11 Apr 2022 11:45:55 +0000 Subject: [PATCH 04/23] opt code --- .../phi/kernels/funcs/sparse/common_shape.h | 14 ++++++ .../kernels/sparse/cpu/sparse_mask_kernel.cc | 44 +++++++------------ .../kernels/sparse/gpu/sparse_mask_kernel.cu | 12 +++-- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index b28bc30e1ffcf..722d3e5e8b2ce 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -53,6 +53,20 @@ inline const IntT HOSTDEVICE IndicesToIndex(const IntT* indices, return index; } +template +inline void HOSTDEVICE FlattenIndices(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int start, + const int stride, + IntT* out) { + for (int i = start; i < non_zero_num; i += stride) { + out[i] = + IndicesToIndex(indices, sparse_offsets, non_zero_num, sparse_dim, i); + } +} + // 1. indices.dims().size() == 2 template inline void CalcOffsetsPerDim(const DenseTensor& indices, diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 7f61ab7765a12..9dc20ed8b95c8 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -55,16 +55,12 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, const IntT* indices_ptr = indices.data(); std::vector out_indexs(non_zero_num), sparse_offsets(sparse_dim); - // phi::funcs::sparse::FlattenIndices(indices, x.dims(), &out_indexs); for (int64_t i = 0; i < non_zero_num; i++) { - out_indexs[i] = phi::funcs::sparse::IndicesToIndex( + int64_t index = phi::funcs::sparse::IndicesToIndex( indices_ptr, sparse_offsets.data(), non_zero_num, sparse_dim, i); - } - - for (int64_t i = 0; i < non_zero_num; i++) { - int64_t index = out_indexs[i]; memcpy(out_values_ptr + i * cols, x_ptr + index * cols, cols * sizeof(T)); } + out->SetMember(out_indices, out_values, dims, true); } @@ -101,27 +97,20 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, phi::funcs::sparse::CalcOffsetsPerDim( x.non_zero_indices(), x.dims(), &sparse_offsets); - auto FlattenIndices = [](const IntT* indices, - const IntT* sparse_offsets, - const int64_t non_zero_num, - const int64_t sparse_dim, - std::vector* out) { - for (int64_t i = 0; i < non_zero_num; i++) { - (*out)[i] = phi::funcs::sparse::IndicesToIndex( - indices, sparse_offsets, non_zero_num, sparse_dim, i); - } - }; - - FlattenIndices(x.non_zero_indices().data(), - sparse_offsets.data(), - x.nnz(), - sparse_dim, - &x_indexs); - FlattenIndices(mask_indices.data(), - sparse_offsets.data(), - x.nnz(), - sparse_dim, - &mask_indexs); + phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data(), + sparse_offsets.data(), + x.nnz(), + sparse_dim, + 0, + 1, + x_indexs.data()); + phi::funcs::sparse::FlattenIndices(mask_indices.data(), + sparse_offsets.data(), + x.nnz(), + sparse_dim, + 0, + 1, + mask_indexs.data()); std::unordered_map x_indexs_map; for (uint64_t i = 0; i < x_indexs.size(); i++) { @@ -133,6 +122,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, const int64_t stride = x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; const T* in_ptr = x.non_zero_elements().data(); + // TODO(zhangkaihuo): multithreading can be used for acceleration for (uint64_t i = 0; i < mask_indexs.size(); i++) { auto iter = x_indexs_map.find(mask_indexs[i]); if (iter != x_indexs_map.end()) { diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index a9f0408cd2a8d..624f2b468e3cc 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -130,10 +130,14 @@ __global__ void FlattenIndicesKernel(const IntT* indices, const int64_t non_zero_num, const int64_t sparse_dim, IntT* out) { - CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) { - out[i] = phi::funcs::sparse::IndicesToIndex( - indices, sparse_offsets, non_zero_num, sparse_dim, i); - } + int tid = threadIdx.x + blockIdx.x * blockDim.x; + phi::funcs::sparse::FlattenIndices(indices, + sparse_offsets, + non_zero_num, + sparse_dim, + tid, + gridDim.x * blockDim.x, + out); } template From 4622c74c6b06130ce6880b2851f846356e62f80d Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Mon, 11 Apr 2022 11:52:40 +0000 Subject: [PATCH 05/23] add todo --- paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index 624f2b468e3cc..ca5097d0ea872 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -124,6 +124,7 @@ void SparseMaskKernel(const Context& dev_ctx, })); } +// TODO(zhangkaihuo): Use an op to realize the function of FlattenIndices template __global__ void FlattenIndicesKernel(const IntT* indices, const IntT* sparse_offsets, From 760262a6f4635d6ef6d142d3ec8e946ea87a42a0 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Tue, 12 Apr 2022 02:17:28 +0000 Subject: [PATCH 06/23] fix sparse_mask_cpu --- .../phi/kernels/funcs/sparse/common_shape.h | 6 ++-- .../kernels/sparse/cpu/sparse_mask_kernel.cc | 6 +++- .../kernels/sparse/gpu/sparse_mask_kernel.cu | 3 +- .../tests/unittests/test_sparse_utils_op.py | 30 ++++++++++++++----- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index 722d3e5e8b2ce..e4c836d116252 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -69,11 +69,9 @@ inline void HOSTDEVICE FlattenIndices(const IntT* indices, // 1. indices.dims().size() == 2 template -inline void CalcOffsetsPerDim(const DenseTensor& indices, - const DDim& dims, +inline void CalcOffsetsPerDim(const DDim& dims, + const int64_t sparse_dim, std::vector* offsets) { - const DDim& indices_dims = indices.dims(); - const IntT sparse_dim = indices_dims[0]; IntT offset = 1; for (IntT i = sparse_dim - 1; i >= 0; i--) { (*offsets)[i] = offset; diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 9dc20ed8b95c8..a07a7fb2ecf44 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -55,6 +55,10 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, const IntT* indices_ptr = indices.data(); std::vector out_indexs(non_zero_num), sparse_offsets(sparse_dim); + + phi::funcs::sparse::CalcOffsetsPerDim( + dims, sparse_dim, &sparse_offsets); + for (int64_t i = 0; i < non_zero_num; i++) { int64_t index = phi::funcs::sparse::IndicesToIndex( indices_ptr, sparse_offsets.data(), non_zero_num, sparse_dim, i); @@ -95,7 +99,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, std::vector sparse_offsets(sparse_dim), x_indexs(x.nnz()), mask_indexs(mask_indices.dims()[1]); phi::funcs::sparse::CalcOffsetsPerDim( - x.non_zero_indices(), x.dims(), &sparse_offsets); + x.dims(), sparse_dim, &sparse_offsets); phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data(), sparse_offsets.data(), diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index ca5097d0ea872..96ab56697b9b0 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -193,8 +193,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, IntT* bound_out_ptr = bound_out.data(); // 1. calc the offsets of per dim - phi::funcs::sparse::CalcOffsetsPerDim( - x.non_zero_indices(), x.dims(), &sparse_offsets); + phi::funcs::sparse::CalcOffsetsPerDim(x.dims(), sparse_dim, &sparse_offsets); // 2. copy sparse_offsets to device phi::backends::gpu::GpuMemcpyAsync(d_sparse_offsets.data(), sparse_offsets.data(), diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 933e8b952d972..89cfc711910ce 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.core as core from paddle.fluid.framework import _test_eager_guard -from paddle import _C_ops class TestSparseCreate(unittest.TestCase): @@ -135,9 +134,11 @@ def test_to_sparse_coo(self): #test to_sparse_coo_grad backward out_grad_indices = [[0, 1], [0, 1]] out_grad_values = [2.0, 3.0] - out_grad = core.eager.sparse_coo_tensor( + out_grad = paddle.sparse.sparse_coo_tensor( paddle.to_tensor(out_grad_indices), - paddle.to_tensor(out_grad_values), out.shape, True) + paddle.to_tensor(out_grad_values), + shape=out.shape, + stop_gradient=True) out.backward(out_grad) assert np.array_equal(dense_x.grad.numpy(), out_grad.to_dense().numpy()) @@ -146,9 +147,11 @@ def test_coo_to_dense(self): with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] - sparse_x = core.eager.sparse_coo_tensor( + sparse_x = paddle.sparse.sparse_coo_tensor( paddle.to_tensor(indices), - paddle.to_tensor(values), [3, 4], False) + paddle.to_tensor(values), + shape=[3, 4], + stop_gradient=False) dense_tensor = sparse_x.to_dense() #test to_dense_grad backward out_grad = [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], @@ -159,6 +162,17 @@ def test_coo_to_dense(self): assert np.array_equal(correct_x_grad, sparse_x.grad.values().numpy()) + paddle.device.set_device("cpu") + sparse_x_cpu = paddle.sparse.sparse_coo_tensor( + paddle.to_tensor(indices), + paddle.to_tensor(values), + shape=[3, 4], + stop_gradient=False) + dense_tensor_cpu = sparse_x_cpu.to_dense() + dense_tensor_cpu.backward(paddle.to_tensor(out_grad)) + assert np.array_equal(correct_x_grad, + sparse_x_cpu.grad.values().numpy()) + def test_to_sparse_csr(self): with _test_eager_guard(): x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]] @@ -178,9 +192,11 @@ def test_coo_values_grad(self): with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] - sparse_x = core.eager.sparse_coo_tensor( + sparse_x = paddle.sparse.sparse_coo_tensor( paddle.to_tensor(indices), - paddle.to_tensor(values), [3, 4], False) + paddle.to_tensor(values), + shape=[3, 4], + stop_gradient=False) values_tensor = sparse_x.values() out_grad = [2.0, 3.0, 5.0, 8.0, 9.0] # test coo_values_grad From b6bfb2a48a0a59211d8afbaccfed60042102f3e7 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 14 Apr 2022 02:52:21 +0000 Subject: [PATCH 07/23] sparse sort --- .../phi/kernels/funcs/sparse/common_shape.h | 39 ----- .../kernels/funcs/sparse/flatten_indices.cu.h | 57 +++++++ .../kernels/funcs/sparse/flatten_indices.h | 93 ++++++++++ paddle/phi/kernels/sparse/cpu/sort_kernel.cc | 98 +++++++++++ .../kernels/sparse/cpu/sparse_mask_kernel.cc | 8 +- .../kernels/sparse/cpu/sparse_utils_kernel.cc | 1 - paddle/phi/kernels/sparse/gpu/sort_kernel.cu | 161 ++++++++++++++++++ .../kernels/sparse/gpu/sparse_mask_kernel.cu | 56 +++--- paddle/phi/kernels/sparse/sort_kernel.h | 30 ++++ .../phi/kernels/sparse/sparse_utils_kernel.h | 9 +- .../tests/unittests/test_sparse_utils_op.py | 68 ++++---- python/paddle/utils/code_gen/sparse_api.yaml | 1 + 12 files changed, 507 insertions(+), 114 deletions(-) create mode 100644 paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h create mode 100644 paddle/phi/kernels/funcs/sparse/flatten_indices.h create mode 100644 paddle/phi/kernels/sparse/cpu/sort_kernel.cc create mode 100644 paddle/phi/kernels/sparse/gpu/sort_kernel.cu create mode 100644 paddle/phi/kernels/sparse/sort_kernel.h diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index e4c836d116252..3617e3cd2f406 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -40,45 +40,6 @@ inline const DDim InferDenseDims(const DDim& x_dims, return values_dims; } -template -inline const IntT HOSTDEVICE IndicesToIndex(const IntT* indices, - const IntT* sparse_offsets, - const int64_t non_zero_num, - const int64_t sparse_dim, - const int i) { - IntT index = 0; - for (IntT j = 0; j < sparse_dim; j++) { - index += indices[j * non_zero_num + i] * sparse_offsets[j]; - } - return index; -} - -template -inline void HOSTDEVICE FlattenIndices(const IntT* indices, - const IntT* sparse_offsets, - const int64_t non_zero_num, - const int64_t sparse_dim, - const int start, - const int stride, - IntT* out) { - for (int i = start; i < non_zero_num; i += stride) { - out[i] = - IndicesToIndex(indices, sparse_offsets, non_zero_num, sparse_dim, i); - } -} - -// 1. indices.dims().size() == 2 -template -inline void CalcOffsetsPerDim(const DDim& dims, - const int64_t sparse_dim, - std::vector* offsets) { - IntT offset = 1; - for (IntT i = sparse_dim - 1; i >= 0; i--) { - (*offsets)[i] = offset; - offset *= dims[i]; - } -} - } // namespace sparse } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h b/paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h new file mode 100644 index 0000000000000..26b8549aaafdc --- /dev/null +++ b/paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" + +namespace phi { +namespace funcs { +namespace sparse { + +template +__global__ void FlattenIndicesKernel(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + IntT* out) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + phi::funcs::sparse::FlattenIndices(indices, + sparse_offsets, + non_zero_num, + sparse_dim, + tid, + gridDim.x * blockDim.x, + out); +} + +template +__global__ void IndexToCoordinateKernel(const IntT* indexs, + const Dim dims, + const int64_t non_zero_num, + const int64_t sparse_dim, + IntT* indices) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + IndexToCoordinate(indexs, + dims, + non_zero_num, + sparse_dim, + tid, + gridDim.x * blockDim.x, + indices); +} + +} // namespace sparse +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/funcs/sparse/flatten_indices.h b/paddle/phi/kernels/funcs/sparse/flatten_indices.h new file mode 100644 index 0000000000000..ca212e4366ec4 --- /dev/null +++ b/paddle/phi/kernels/funcs/sparse/flatten_indices.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/phi/core/ddim.h" + +namespace phi { +namespace funcs { +namespace sparse { + +template +inline const IntT HOSTDEVICE CoordinateToIndex(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int i) { + IntT index = 0; + for (IntT j = 0; j < sparse_dim; j++) { + index += indices[j * non_zero_num + i] * sparse_offsets[j]; + } + return index; +} + +template +inline void HOSTDEVICE FlattenIndices(const IntT* indices, + const IntT* sparse_offsets, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int64_t start, + const int64_t stride, + IntT* out) { + for (int64_t i = start; i < non_zero_num; i += stride) { + out[i] = + CoordinateToIndex(indices, sparse_offsets, non_zero_num, sparse_dim, i); + } +} + +// 1. indices.dims().size() == 2 +template +inline void CalcOffsetsPerDim(const DDim& dims, + const int64_t sparse_dim, + IntT* offsets) { + IntT offset = 1; + for (IntT i = sparse_dim - 1; i >= 0; i--) { + offsets[i] = offset; + offset *= dims[i]; + } +} + +template +inline void HOSTDEVICE IndexToCoordinate(const IntT index, + const Dim& dims, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int indices_offset, + IntT* indices) { + IntT tmp_index = index; + for (int j = sparse_dim - 1; j >= 0; j--) { + indices[j * non_zero_num + indices_offset] = tmp_index % dims[j]; + tmp_index /= dims[j]; + } +} + +template +inline void HOSTDEVICE IndexToCoordinate(const IntT* indexs, + const Dim& dims, + const int64_t non_zero_num, + const int64_t sparse_dim, + const int64_t start, + const int64_t stride, + IntT* indices) { + for (int64_t i = start; i < non_zero_num; i += stride) { + IntT tmp_index = indexs[i]; + IndexToCoordinate(tmp_index, dims, non_zero_num, sparse_dim, i, indices); + } +} + +} // namespace sparse +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/sparse/cpu/sort_kernel.cc b/paddle/phi/kernels/sparse/cpu/sort_kernel.cc new file mode 100644 index 0000000000000..eee44ebda7f6f --- /dev/null +++ b/paddle/phi/kernels/sparse/cpu/sort_kernel.cc @@ -0,0 +1,98 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/sort_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" + +#include "paddle/phi/api/ext/dispatch.h" + +namespace phi { +namespace sparse { + +template +void SortCPUKernel(const CPUContext& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + const DenseTensor& x_indices = x.non_zero_indices(); + const DenseTensor& x_values = x.non_zero_elements(); + DenseTensor out_indices = phi::EmptyLike(dev_ctx, x_indices); + DenseTensor out_values = phi::EmptyLike(dev_ctx, x_values); + + const int64_t sparse_dim = x.non_zero_indices().dims()[0]; + std::vector sparse_offsets(sparse_dim), x_indexs(x.nnz()); + phi::funcs::sparse::CalcOffsetsPerDim( + x.dims(), sparse_dim, sparse_offsets.data()); + + phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data(), + sparse_offsets.data(), + x.nnz(), + sparse_dim, + 0, + 1, + x_indexs.data()); + + const T* x_values_ptr = x_values.data(); + const int64_t stride = + x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + + std::map indices_to_index; + for (uint64_t i = 0; i < x_indexs.size(); i++) { + indices_to_index[x_indexs[i]] = i; + } + + IntT* out_indices_ptr = out_indices.data(); + T* out_values_ptr = out_values.data(); + auto iter = indices_to_index.begin(); + + Dim const_dims; + for (int i = 0; i < x.dims().size(); i++) { + const_dims[i] = x.dims()[i]; + } + + for (int i = 0; iter != indices_to_index.end(); iter++, i++) { + phi::funcs::sparse::IndexToCoordinate( + iter->first, const_dims, x.nnz(), sparse_dim, i, out_indices_ptr); + memcpy(out_values_ptr + i * stride, + x_values_ptr + iter->second * stride, + stride * sizeof(T)); + } + out->SetMember(out_indices, out_values, x.dims(), true); +} + +template +void SortKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + PD_DISPATCH_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "SortCPUKernel", ([&] { + SortCPUKernel(dev_ctx, x, out); + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(sort, + CPU, + ALL_LAYOUT, + phi::sparse::SortKernel, + float, + double, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index a07a7fb2ecf44..33e62c5bfe88a 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/sparse/common_shape.h" +#include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" #include "paddle/phi/api/ext/dispatch.h" @@ -57,10 +57,10 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, std::vector out_indexs(non_zero_num), sparse_offsets(sparse_dim); phi::funcs::sparse::CalcOffsetsPerDim( - dims, sparse_dim, &sparse_offsets); + dims, sparse_dim, sparse_offsets.data()); for (int64_t i = 0; i < non_zero_num; i++) { - int64_t index = phi::funcs::sparse::IndicesToIndex( + int64_t index = phi::funcs::sparse::CoordinateToIndex( indices_ptr, sparse_offsets.data(), non_zero_num, sparse_dim, i); memcpy(out_values_ptr + i * cols, x_ptr + index * cols, cols * sizeof(T)); } @@ -99,7 +99,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, std::vector sparse_offsets(sparse_dim), x_indexs(x.nnz()), mask_indexs(mask_indices.dims()[1]); phi::funcs::sparse::CalcOffsetsPerDim( - x.dims(), sparse_dim, &sparse_offsets); + x.dims(), sparse_dim, sparse_offsets.data()); phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data(), sparse_offsets.data(), diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index 0499371a4dd17..081981a0fb8ec 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -401,7 +401,6 @@ PD_REGISTER_KERNEL(sparse_coo_tensor, phi::sparse::SparseCooTensorKernel, float, double, - phi::dtype::float16, uint8_t, int16_t, int, diff --git a/paddle/phi/kernels/sparse/gpu/sort_kernel.cu b/paddle/phi/kernels/sparse/gpu/sort_kernel.cu new file mode 100644 index 0000000000000..f1cf91425cb36 --- /dev/null +++ b/paddle/phi/kernels/sparse/gpu/sort_kernel.cu @@ -0,0 +1,161 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/index_impl.cu.h" +#include "paddle/phi/kernels/funcs/scatter.cu.h" +#include "paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h" +#include "paddle/phi/kernels/sparse/sort_kernel.h" + +#include "paddle/phi/api/ext/dispatch.h" + +namespace phi { +namespace sparse { + +template +__global__ void CopyValuesKernel(const T* x_values, + const IntT* indexs, + const int64_t n, + const int64_t stride, + T* out_values) { + CUDA_KERNEL_LOOP_TYPE(i, n * stride, int64_t) { + int64_t indices_i = i / stride; + int64_t slice_i = i - indices_i * stride; // offset inside the slice + IntT in_i = indexs[indices_i]; + out_values[i * stride + slice_i] = x_values[in_i * stride + slice_i]; + } +} + +template +void SortGPUKernel(const GPUContext& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + const DenseTensor& x_indices = x.non_zero_indices(); + const DenseTensor& x_values = x.non_zero_elements(); + DenseTensor out_indices = phi::EmptyLike(dev_ctx, x_indices); + DenseTensor out_values = phi::EmptyLike(dev_ctx, x_values); + + const int64_t nnz = x.nnz(); + const int64_t sparse_dim = x.non_zero_indices().dims()[0]; + std::vector sparse_offsets(sparse_dim); + + phi::funcs::sparse::CalcOffsetsPerDim( + x.dims(), sparse_dim, sparse_offsets.data()); + + DenseTensorMeta sparse_offset_meta( + paddle::experimental::CppTypeToDataType::Type(), + {sparse_dim}, + DataLayout::NCHW); + DenseTensor d_sparse_offsets = + phi::Empty(dev_ctx, std::move(sparse_offset_meta)); + DenseTensor indexs = phi::Empty( + dev_ctx, DenseTensorMeta(x_indices.dtype(), {nnz}, x_indices.layout())); + IntT* indexs_ptr = indexs.data(); + + phi::backends::gpu::GpuMemcpyAsync(d_sparse_offsets.data(), + sparse_offsets.data(), + sizeof(IntT) * sparse_dim, +#ifdef PADDLE_WITH_HIP + hipMemcpyHostToDevice, +#else + cudaMemcpyHostToDevice, +#endif + dev_ctx.stream()); + + // 1. flatten indices + auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, nnz, 1); + phi::funcs::sparse::FlattenIndicesKernel<<>>( + x.non_zero_indices().data(), + d_sparse_offsets.data(), + indexs.numel(), + sparse_dim, + indexs_ptr); + + // 2. get the address of each non-zero values + const T* x_values_ptr = x_values.data(); + const int64_t stride = + x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + DenseTensor values_indexs = phi::Empty( + dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW)); + int* values_indexs_ptr = values_indexs.data(); + + // values_indexs = [0,1,2,,,nnz-1] + phi::IndexKernel>( + dev_ctx, &values_indexs, kps::IdentityFunctor()); + +// 3. sort (indices, values index) +#ifdef PADDLE_WITH_HIP + thrust::sort_by_key(thrust::hip::par.on(dev_ctx.stream()), +#else + thrust::sort_by_key(thrust::cuda::par.on(dev_ctx.stream()), +#endif + indexs_ptr, + indexs_ptr + nnz, + values_indexs_ptr); + + config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, nnz * stride, 1); + // 4. scatter the values + CopyValuesKernel<<>>( + x_values_ptr, values_indexs_ptr, nnz, stride, out_values.data()); + + // 6. convert index to coordinate + Dim const_dims; + for (int i = 0; i < x.dims().size(); i++) { + const_dims[i] = x.dims()[i]; + } + + config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, nnz, 1); + phi::funcs::sparse::IndexToCoordinateKernel<<>>( + indexs_ptr, const_dims, nnz, sparse_dim, out_indices.data()); + + out->SetMember(out_indices, out_values, x.dims(), true); +} + +template +void SortKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + PD_DISPATCH_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "SortGPUKernel", ([&] { + SortGPUKernel(dev_ctx, x, out); + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(sort, + GPU, + ALL_LAYOUT, + phi::sparse::SortKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index 96ab56697b9b0..c20234897bb93 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/sparse/common_shape.h" +#include "paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h" #include "paddle/phi/kernels/sparse/sparse_mask_kernel.h" #include "paddle/phi/api/ext/dispatch.h" @@ -124,23 +124,6 @@ void SparseMaskKernel(const Context& dev_ctx, })); } -// TODO(zhangkaihuo): Use an op to realize the function of FlattenIndices -template -__global__ void FlattenIndicesKernel(const IntT* indices, - const IntT* sparse_offsets, - const int64_t non_zero_num, - const int64_t sparse_dim, - IntT* out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - phi::funcs::sparse::FlattenIndices(indices, - sparse_offsets, - non_zero_num, - sparse_dim, - tid, - gridDim.x * blockDim.x, - out); -} - template __global__ void SparseMaskCopyKernel(const IntT* x_indexs, const IntT* mask_indexs, @@ -193,7 +176,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, IntT* bound_out_ptr = bound_out.data(); // 1. calc the offsets of per dim - phi::funcs::sparse::CalcOffsetsPerDim(x.dims(), sparse_dim, &sparse_offsets); + phi::funcs::sparse::CalcOffsetsPerDim( + x.dims(), sparse_dim, sparse_offsets.data()); // 2. copy sparse_offsets to device phi::backends::gpu::GpuMemcpyAsync(d_sparse_offsets.data(), sparse_offsets.data(), @@ -208,25 +192,27 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, // 3. flatten x indices and mask indices auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, x_indexs.numel(), 1); - FlattenIndicesKernel<<>>(x.non_zero_indices().data(), - d_sparse_offsets.data(), - x_indexs.numel(), - sparse_dim, - x_indexs_ptr); + phi::funcs::sparse::FlattenIndicesKernel<<>>( + x.non_zero_indices().data(), + d_sparse_offsets.data(), + x_indexs.numel(), + sparse_dim, + x_indexs_ptr); config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, mask_indexs.numel(), 1); - FlattenIndicesKernel<<>>(mask_indices.data(), - d_sparse_offsets.data(), - mask_indexs.numel(), - sparse_dim, - mask_indexs_ptr); + phi::funcs::sparse::FlattenIndicesKernel<<>>( + mask_indices.data(), + d_sparse_offsets.data(), + mask_indexs.numel(), + sparse_dim, + mask_indexs_ptr); // 4. call thrust::lower_bound #ifdef PADDLE_WITH_HIP thrust::lower_bound(thrust::hip::par.on(dev_ctx.stream()), diff --git a/paddle/phi/kernels/sparse/sort_kernel.h b/paddle/phi/kernels/sparse/sort_kernel.h new file mode 100644 index 0000000000000..4fb7714d5bfab --- /dev/null +++ b/paddle/phi/kernels/sparse/sort_kernel.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/sparse_coo_tensor.h" +#include "paddle/phi/kernels/empty_kernel.h" + +namespace phi { +namespace sparse { + +template +void SortKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out); + +} // namespace sparse +} // namespace phi diff --git a/paddle/phi/kernels/sparse/sparse_utils_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_kernel.h index 8cf9c0a28648a..83de8bfec5d1b 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/sparse/sort_kernel.h" namespace phi { namespace sparse { @@ -154,9 +155,11 @@ void SparseCooTensorKernel(const Context& dev_ctx, const DenseTensor& indices, const IntArray& dense_shape, SparseCooTensor* out) { - *out = - SparseCooTensor(indices, values, phi::make_ddim(dense_shape.GetData())); - // TODO(zhangkaihuo): sort and merge the dumplicate indices + SparseCooTensor before_sorted( + indices, values, phi::make_ddim(dense_shape.GetData())); + // sort + SortKernel(dev_ctx, before_sorted, out); + // TODO(zhangkaihuo): merge the dumplicate indices } } // namespace sparse diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 89cfc711910ce..2ebd0edb20f77 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -19,6 +19,8 @@ import paddle.fluid.core as core from paddle.fluid.framework import _test_eager_guard +devices = ['cpu', 'gpu'] + class TestSparseCreate(unittest.TestCase): def test_create_coo_by_tensor(self): @@ -39,6 +41,7 @@ def test_create_coo_by_np(self): values = [1.0, 2.0, 3.0] dense_shape = [2, 3] coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape) + print(coo) assert np.array_equal(indices, coo.indices().numpy()) assert np.array_equal(values, coo.values().numpy()) @@ -205,38 +208,39 @@ def test_coo_values_grad(self): def test_sparse_coo_tensor_grad(self): with _test_eager_guard(): - indices = [[0, 1], [0, 1]] - values = [1, 2] - indices = paddle.to_tensor(indices, dtype='int32') - values = paddle.to_tensor( - values, dtype='float32', stop_gradient=False) - sparse_x = paddle.sparse.sparse_coo_tensor( - indices, values, shape=[2, 2], stop_gradient=False) - grad_indices = [[0, 1], [1, 1]] - grad_values = [2, 3] - grad_indices = paddle.to_tensor(grad_indices, dtype='int32') - grad_values = paddle.to_tensor(grad_values, dtype='float32') - sparse_out_grad = paddle.sparse.sparse_coo_tensor( - grad_indices, grad_values, shape=[2, 2]) - sparse_x.backward(sparse_out_grad) - correct_values_grad = [0, 3] - assert np.array_equal(correct_values_grad, values.grad.numpy()) - - place = core.CPUPlace() - indices_cpu = paddle.to_tensor(indices, dtype='int32', place=place) - values_cpu = paddle.to_tensor( - values, dtype='float32', place=place, stop_gradient=False) - sparse_x_cpu = paddle.sparse.sparse_coo_tensor( - indices_cpu, - values_cpu, - shape=[2, 2], - place=place, - stop_gradient=False) - - sparse_out_grad_cpu = paddle.sparse.sparse_coo_tensor( - grad_indices, grad_values, shape=[2, 2], place=place) - sparse_x_cpu.backward(sparse_out_grad_cpu) - assert np.array_equal(correct_values_grad, values_cpu.grad.numpy()) + for device in devices: + paddle.device.set_device(device) + indices = [[0, 1], [0, 1]] + values = [1, 2] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor( + values, dtype='float32', stop_gradient=False) + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=[2, 2], stop_gradient=False) + grad_indices = [[0, 1], [1, 1]] + grad_values = [2, 3] + grad_indices = paddle.to_tensor(grad_indices, dtype='int32') + grad_values = paddle.to_tensor(grad_values, dtype='float32') + sparse_out_grad = paddle.sparse.sparse_coo_tensor( + grad_indices, grad_values, shape=[2, 2]) + sparse_x.backward(sparse_out_grad) + correct_values_grad = [0, 3] + assert np.array_equal(correct_values_grad, values.grad.numpy()) + + def test_sparse_coo_tensor_sorted(self): + with _test_eager_guard(): + for device in devices: + paddle.device.set_device(device) + indices = [[1, 0], [0, 1]] + values = [1.0, 2.0] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor(values, dtype='float32') + sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) + indices_sorted = [[0, 1], [1, 0]] + values_sorted = [2.0, 1.0] + assert np.array_equal(indices_sorted, + sparse_x.indices().numpy()) + assert np.array_equal(values_sorted, sparse_x.values().numpy()) if __name__ == "__main__": diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index 2187d4abb2d63..100d7ad78319b 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -27,6 +27,7 @@ kernel : func : sparse_coo_tensor layout : values + data_type : values backward : create_sparse_coo_tensor_grad - api : csr_values From 8689f7ae3465eec860423c1c7817c547ab182cc1 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 14 Apr 2022 03:38:54 +0000 Subject: [PATCH 08/23] check shape --- .../fluid/tests/unittests/test_sparse_utils_op.py | 14 ++++++++++++-- python/paddle/sparse/creation.py | 7 ++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 2ebd0edb20f77..157da73ad07de 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -39,9 +39,8 @@ def test_create_coo_by_np(self): with _test_eager_guard(): indices = [[0, 1, 2], [1, 2, 0]] values = [1.0, 2.0, 3.0] - dense_shape = [2, 3] + dense_shape = [3, 3] coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape) - print(coo) assert np.array_equal(indices, coo.indices().numpy()) assert np.array_equal(values, coo.values().numpy()) @@ -243,5 +242,16 @@ def test_sparse_coo_tensor_sorted(self): assert np.array_equal(values_sorted, sparse_x.values().numpy()) +class TestError(unittest.TestCase): + def test_sparse_coo_tensor(self): + with self.assertRaises(ValueError): + indices = [[2, 3], [0, 2]] + values = [1, 2] + #the shape too small + dense_shape = [2, 2] + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=dense_shape) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py index ac9276f3142c0..b48cc60851c99 100644 --- a/python/paddle/sparse/creation.py +++ b/python/paddle/sparse/creation.py @@ -125,8 +125,13 @@ def sparse_coo_tensor(indices, values = _handle_dtype(values, dtype) values.stop_gradient = stop_gradient + min_shape = _infer_dense_shape(indices) if shape is None: - shape = _infer_dense_shape(indices) + shape = min_shape + else: + if shape < min_shape: + raise ValueError("the minimun shape required is {}, but get {}". + format(min_shape, shape)) return _C_ops.final_state_sparse_create_sparse_coo_tensor(values, indices, shape) From 30f85d8c0d056288275d86aa766481b85a3c2756 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 14 Apr 2022 05:02:57 +0000 Subject: [PATCH 09/23] fix float16 --- paddle/phi/kernels/sparse/cpu/sort_kernel.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/phi/kernels/sparse/cpu/sort_kernel.cc b/paddle/phi/kernels/sparse/cpu/sort_kernel.cc index 46e6acfd3a9de..6cbd3ba59a597 100644 --- a/paddle/phi/kernels/sparse/cpu/sort_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sort_kernel.cc @@ -88,6 +88,7 @@ PD_REGISTER_KERNEL(sort, phi::sparse::SortKernel, float, double, + phi::dtype::float16, uint8_t, int16_t, int, From 09b94482cc7045d3aeaf0cd83033ff2c6447d9ce Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Fri, 15 Apr 2022 05:48:43 +0000 Subject: [PATCH 10/23] add coalesced --- paddle/phi/kernels/funcs/sparse/scatter.cu.h | 63 ++++++++++++ paddle/phi/kernels/funcs/sparse/utils.cu.h | 31 ++++++ .../{sort_kernel.h => coalesced_kernel.h} | 6 +- .../{sort_kernel.cc => coalesced_kernel.cc} | 54 +++++++--- .../{sort_kernel.cu => coalesced_kernel.cu} | 98 ++++++++++++------- .../phi/kernels/sparse/gpu/convolution.cu.h | 57 +---------- .../sparse/gpu/convolution_grad_kernel.cu | 24 ++--- .../kernels/sparse/gpu/convolution_kernel.cu | 22 +++-- .../phi/kernels/sparse/sparse_utils_kernel.h | 8 +- .../tests/unittests/test_sparse_utils_op.py | 7 +- 10 files changed, 237 insertions(+), 133 deletions(-) create mode 100644 paddle/phi/kernels/funcs/sparse/scatter.cu.h create mode 100644 paddle/phi/kernels/funcs/sparse/utils.cu.h rename paddle/phi/kernels/sparse/{sort_kernel.h => coalesced_kernel.h} (86%) rename paddle/phi/kernels/sparse/cpu/{sort_kernel.cc => coalesced_kernel.cc} (65%) rename paddle/phi/kernels/sparse/gpu/{sort_kernel.cu => coalesced_kernel.cu} (64%) diff --git a/paddle/phi/kernels/funcs/sparse/scatter.cu.h b/paddle/phi/kernels/funcs/sparse/scatter.cu.h new file mode 100644 index 0000000000000..9ed7cef12a148 --- /dev/null +++ b/paddle/phi/kernels/funcs/sparse/scatter.cu.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace phi { +namespace funcs { +namespace sparse { + +/** + * brief: scatter add + * input: the inputs + * unique_value: refer to UpdateIndexKernel notes + * out_index: the output feature index + * non_zero_num: the number of output features + * rulebook_len: the length of rulebook + * channels: the output channel size + * out: the outputs +**/ +template +__global__ void ScatterKernel(const T* input, + const int* unique_value, + const int* out_index, + const int non_zero_num, + const int rulebook_len, + const int channels, + T* out, + const bool subm = false) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < non_zero_num * channels; i += gridDim.x * blockDim.x) { + int indices_i = i / channels; + int channels_i = i - indices_i * channels; + + int start = unique_value[indices_i]; + int end = indices_i == non_zero_num - 1 ? rulebook_len + : unique_value[indices_i + 1]; + // max(end-start) = kernel_size + T sum = static_cast(0); + if (subm) { + sum = out[indices_i * channels + channels_i]; + } + for (int j = start; j < end; j++) { + const int out_feature_i = out_index[j]; + sum += input[out_feature_i * channels + channels_i]; + } + out[indices_i * channels + channels_i] = sum; + } +} + +} // namespace sparse +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/funcs/sparse/utils.cu.h b/paddle/phi/kernels/funcs/sparse/utils.cu.h new file mode 100644 index 0000000000000..074fe1ca42049 --- /dev/null +++ b/paddle/phi/kernels/funcs/sparse/utils.cu.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace phi { +namespace funcs { +namespace sparse { + +// brief: calculation the distance between start and end +template +__global__ void DistanceKernel(const T* start, const T* end, T* distance) { + if (threadIdx.x == 0) { + *distance = end - start; + } +} + +} // namespace sparse +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/sparse/sort_kernel.h b/paddle/phi/kernels/sparse/coalesced_kernel.h similarity index 86% rename from paddle/phi/kernels/sparse/sort_kernel.h rename to paddle/phi/kernels/sparse/coalesced_kernel.h index 4fb7714d5bfab..0755579a57ade 100644 --- a/paddle/phi/kernels/sparse/sort_kernel.h +++ b/paddle/phi/kernels/sparse/coalesced_kernel.h @@ -22,9 +22,9 @@ namespace phi { namespace sparse { template -void SortKernel(const Context& dev_ctx, - const SparseCooTensor& x, - SparseCooTensor* out); +void CoalescedKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out); } // namespace sparse } // namespace phi diff --git a/paddle/phi/kernels/sparse/cpu/sort_kernel.cc b/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc similarity index 65% rename from paddle/phi/kernels/sparse/cpu/sort_kernel.cc rename to paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc index 6cbd3ba59a597..0ebddf9b683f0 100644 --- a/paddle/phi/kernels/sparse/cpu/sort_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/sparse/sort_kernel.h" +#include "paddle/phi/kernels/sparse/coalesced_kernel.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" @@ -21,9 +21,9 @@ namespace phi { namespace sparse { template -void SortCPUKernel(const CPUContext& dev_ctx, - const SparseCooTensor& x, - SparseCooTensor* out) { +void CoalescedCPUKernel(const CPUContext& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_values = x.non_zero_elements(); DenseTensor out_indices = phi::EmptyLike(dev_ctx, x_indices); @@ -46,9 +46,25 @@ void SortCPUKernel(const CPUContext& dev_ctx, const int64_t stride = x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; - std::map indices_to_index; + std::map> indices_to_index; for (uint64_t i = 0; i < x_indexs.size(); i++) { - indices_to_index[x_indexs[i]] = i; + IntT index = x_indexs[i]; + if (indices_to_index.find(index) == indices_to_index.end()) { + std::vector indexs; + indexs.push_back(i); + indices_to_index[index] = indexs; + } else { + indices_to_index[index].push_back(i); + } + } + + const int64_t out_nnz = indices_to_index.size(); + + out_indices.Resize({x_indices.dims()[0], out_nnz}); + if (out_values.dims().size() == 1) { + out_values.Resize(phi::make_ddim({out_nnz})); + } else { + out_values.Resize(phi::make_ddim({out_nnz, x_values.dims()[1]})); } IntT* out_indices_ptr = out_indices.data(); @@ -62,21 +78,29 @@ void SortCPUKernel(const CPUContext& dev_ctx, for (int i = 0; iter != indices_to_index.end(); iter++, i++) { phi::funcs::sparse::IndexToCoordinate( - iter->first, const_dims, x.nnz(), sparse_dim, i, out_indices_ptr); + iter->first, const_dims, out_nnz, sparse_dim, i, out_indices_ptr); memcpy(out_values_ptr + i * stride, - x_values_ptr + iter->second * stride, + x_values_ptr + iter->second[0] * stride, stride * sizeof(T)); + for (uint64_t j = 1; j < iter->second.size(); j++) { + for (int k = 0; k < stride; k++) { + out_values_ptr[i * stride + k] += + x_values_ptr[iter->second[j] * stride + k]; + } + } } + out->SetMember(out_indices, out_values, x.dims(), true); } template -void SortKernel(const Context& dev_ctx, - const SparseCooTensor& x, - SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), "SortCPUKernel", ([&] { - SortCPUKernel(dev_ctx, x, out); - })); +void CoalescedKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + PD_VISIT_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "CoalescedCPUKernel", ([&] { + CoalescedCPUKernel(dev_ctx, x, out); + })); } } // namespace sparse @@ -85,7 +109,7 @@ void SortKernel(const Context& dev_ctx, PD_REGISTER_KERNEL(sort, CPU, ALL_LAYOUT, - phi::sparse::SortKernel, + phi::sparse::CoalescedKernel, float, double, phi::dtype::float16, diff --git a/paddle/phi/kernels/sparse/gpu/sort_kernel.cu b/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu similarity index 64% rename from paddle/phi/kernels/sparse/gpu/sort_kernel.cu rename to paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu index 39369616eca7e..3ffcd28955a53 100644 --- a/paddle/phi/kernels/sparse/gpu/sort_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu @@ -17,31 +17,18 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" -#include "paddle/phi/kernels/funcs/scatter.cu.h" #include "paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h" -#include "paddle/phi/kernels/sparse/sort_kernel.h" +#include "paddle/phi/kernels/funcs/sparse/scatter.cu.h" +#include "paddle/phi/kernels/funcs/sparse/utils.cu.h" +#include "paddle/phi/kernels/sparse/coalesced_kernel.h" namespace phi { namespace sparse { template -__global__ void CopyValuesKernel(const T* x_values, - const IntT* indexs, - const int64_t n, - const int64_t stride, - T* out_values) { - CUDA_KERNEL_LOOP_TYPE(i, n * stride, int64_t) { - int64_t indices_i = i / stride; - int64_t slice_i = i - indices_i * stride; // offset inside the slice - IntT in_i = indexs[indices_i]; - out_values[i * stride + slice_i] = x_values[in_i * stride + slice_i]; - } -} - -template -void SortGPUKernel(const GPUContext& dev_ctx, - const SparseCooTensor& x, - SparseCooTensor* out) { +void CoalescedGPUKernel(const GPUContext& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_values = x.non_zero_elements(); DenseTensor out_indices = phi::EmptyLike(dev_ctx, x_indices); @@ -93,10 +80,13 @@ void SortGPUKernel(const GPUContext& dev_ctx, DenseTensor values_indexs = phi::Empty( dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW)); int* values_indexs_ptr = values_indexs.data(); + DenseTensor public_indexs = phi::EmptyLike(dev_ctx, values_indexs); // values_indexs = [0,1,2,,,nnz-1] phi::IndexKernel>( dev_ctx, &values_indexs, kps::IdentityFunctor()); + phi::IndexKernel>( + dev_ctx, &public_indexs, kps::IdentityFunctor()); // 3. sort (indices, values index) #ifdef PADDLE_WITH_HIP @@ -108,13 +98,52 @@ void SortGPUKernel(const GPUContext& dev_ctx, indexs_ptr + nnz, values_indexs_ptr); + // 4. unique index + thrust::pair new_end = +#ifdef PADDLE_WITH_HIP + thrust::unique_by_key(thrust::hip::par.on(dev_ctx.stream()), +#else + thrust::unique_by_key(thrust::cuda::par.on(dev_ctx.stream()), +#endif + indexs_ptr, + indexs_ptr + nnz, + public_indexs.data()); + + phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( + indexs_ptr, new_end.first, out_indices.data()); + + IntT out_nnz = 0; + phi::backends::gpu::GpuMemcpyAsync(&out_nnz, + out_indices.data(), + sizeof(IntT), +#ifdef PADDLE_WITH_HIP + hipMemcpyDeviceToHost, +#else + cudaMemcpyDeviceToHost, +#endif + dev_ctx.stream()); + dev_ctx.Wait(); + + out_indices.Resize({x_indices.dims()[0], out_nnz}); + if (out_values.dims().size() == 1) { + out_values.Resize(phi::make_ddim({out_nnz})); + } else { + out_values.Resize(phi::make_ddim({out_nnz, x_values.dims()[1]})); + } + + // 5. scatter the values config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, nnz * stride, 1); - // 4. scatter the values - CopyValuesKernel<<>>( - x_values_ptr, values_indexs_ptr, nnz, stride, out_values.data()); + phi::funcs::sparse::ScatterKernel<<>>( + x_values_ptr, + public_indexs.data(), + values_indexs_ptr, + out_nnz, + nnz, + stride, + out_values.data()); // 6. convert index to coordinate Dim const_dims; @@ -122,23 +151,24 @@ void SortGPUKernel(const GPUContext& dev_ctx, const_dims[i] = x.dims()[i]; } - config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, nnz, 1); + config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, out_nnz, 1); phi::funcs::sparse::IndexToCoordinateKernel<<>>( - indexs_ptr, const_dims, nnz, sparse_dim, out_indices.data()); + indexs_ptr, const_dims, out_nnz, sparse_dim, out_indices.data()); out->SetMember(out_indices, out_values, x.dims(), true); } template -void SortKernel(const Context& dev_ctx, - const SparseCooTensor& x, - SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), "SortGPUKernel", ([&] { - SortGPUKernel(dev_ctx, x, out); - })); +void CoalescedKernel(const Context& dev_ctx, + const SparseCooTensor& x, + SparseCooTensor* out) { + PD_VISIT_INTEGRAL_TYPES( + x.non_zero_indices().dtype(), "CoalescedGPUKernel", ([&] { + CoalescedGPUKernel(dev_ctx, x, out); + })); } } // namespace sparse @@ -147,7 +177,7 @@ void SortKernel(const Context& dev_ctx, PD_REGISTER_KERNEL(sort, GPU, ALL_LAYOUT, - phi::sparse::SortKernel, + phi::sparse::CoalescedKernel, float, double, phi::dtype::float16, diff --git a/paddle/phi/kernels/sparse/gpu/convolution.cu.h b/paddle/phi/kernels/sparse/gpu/convolution.cu.h index 1bceb767b6708..c5f6f2746f681 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution.cu.h +++ b/paddle/phi/kernels/sparse/gpu/convolution.cu.h @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/sparse/utils.cu.h" #include "paddle/phi/kernels/primitive/compute_primitives.h" #include "paddle/phi/kernels/sparse/convolution_kernel.h" @@ -58,46 +59,6 @@ __global__ void GatherKernel(const T* params, } } -/** - * brief: scatter add - * input: the inputs - * unique_value: refer to UpdateIndexKernel notes - * out_index: the output feature index - * non_zero_num: the number of output features - * rulebook_len: the length of rulebook - * channels: the output channel size - * out: the outputs -**/ -template -__global__ void ScatterKernel(const T* input, - const int* unique_value, - const int* out_index, - const int non_zero_num, - const int rulebook_len, - const int channels, - T* out, - const bool subm = false) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - for (int i = tid; i < non_zero_num * channels; i += gridDim.x * blockDim.x) { - int indices_i = i / channels; - int channels_i = i - indices_i * channels; - - int start = unique_value[indices_i]; - int end = indices_i == non_zero_num - 1 ? rulebook_len - : unique_value[indices_i + 1]; - // max(end-start) = kernel_size - T sum = static_cast(0); - if (subm) { - sum = out[indices_i * channels + channels_i]; - } - for (int j = start; j < end; j++) { - const int out_feature_i = out_index[j]; - sum += input[out_feature_i * channels + channels_i]; - } - out[indices_i * channels + channels_i] = sum; - } -} - template inline IntT* SortedAndUniqueIndex(const Context& dev_ctx, const IntT* rulebook_ptr, @@ -213,14 +174,6 @@ __global__ void UpdateIndexKernel(const T* unique_keys, } } -// brief: calculation the distance between start and end -template -__global__ void DistanceKernel(const T* start, const T* end, T* distance) { - if (threadIdx.x == 0) { - *distance = end - start; - } -} - /** * @brief product rulebook * for input_i in x_indices: @@ -395,7 +348,7 @@ int ProductRuleBook(const Context& dev_ctx, rulebook_ptr + rulebook_rows * rulebook_cols, -1); - DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( + phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( rulebook_ptr, last, rulebook_ptr + 3 * kernel_size * non_zero_num - 1); IntT rulebook_len = 0; phi::backends::gpu::GpuMemcpyAsync( @@ -480,7 +433,7 @@ int ProductRuleBook(const Context& dev_ctx, val_result_start); } - DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( + phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( key_result.data(), end.first, key_result.data() + rulebook_len); @@ -516,7 +469,7 @@ int ProductRuleBook(const Context& dev_ctx, rulebook_ptr, rulebook_ptr + 3 * rulebook_len, -1); - DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( + phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( rulebook_ptr, last, key_result.data() + rulebook_len); phi::backends::gpu::GpuMemcpyAsync(&rulebook_len, key_result.data() + rulebook_len, @@ -587,7 +540,7 @@ int ProductRuleBook(const Context& dev_ctx, // thrust::distance doesn't support stream parameters // const int out_non_zero_num = thrust::distance(unique_key_ptr, // new_end.first); - DistanceKernel<<<1, 1>>>( + phi::funcs::sparse::DistanceKernel<<<1, 1>>>( unique_key_ptr, new_end, rulebook_ptr + rulebook_rows * rulebook_cols - 1); diff --git a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu index 6c37f759923c3..5e7de7518e298 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/sparse/scatter.cu.h" #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h" #include "paddle/phi/kernels/sparse/gpu/convolution.cu.h" @@ -222,17 +223,18 @@ void Conv3dGradGPUKernel(const GPUContext& dev_ctx, config = phi::backends::gpu::GetGpuLaunchConfig1D( dev_ctx, rulebook_len * in_channels, 1); - ScatterKernel<<>>(d_x_features_ptr, - unique_value.data(), - out_index.data(), - x.nnz(), - rulebook_len, - in_channels, - x_grad_values_ptr, - subm); + phi::funcs::sparse::ScatterKernel<<>>( + d_x_features_ptr, + unique_value.data(), + out_index.data(), + x.nnz(), + rulebook_len, + in_channels, + x_grad_values_ptr, + subm); } template diff --git a/paddle/phi/kernels/sparse/gpu/convolution_kernel.cu b/paddle/phi/kernels/sparse/gpu/convolution_kernel.cu index 83f19ce5785df..abf84dc7d739b 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/convolution_kernel.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/kernels/funcs/sparse/scatter.cu.h" #include "paddle/phi/kernels/sparse/convolution_kernel.h" #include "paddle/phi/kernels/sparse/gpu/convolution.cu.h" @@ -149,16 +150,17 @@ void Conv3dGPUKernel(const GPUContext& dev_ctx, // 4. scatter config = phi::backends::gpu::GetGpuLaunchConfig1D( dev_ctx, out->nnz() * out_channels, 1); - ScatterKernel<<>>(out_features_ptr, - unique_value.data(), - out_index.data(), - out->nnz(), - n, - out_channels, - out_values_ptr); + phi::funcs::sparse::ScatterKernel<<>>( + out_features_ptr, + unique_value.data(), + out_index.data(), + out->nnz(), + n, + out_channels, + out_values_ptr); } /** * x: (N, D, H, W, C) diff --git a/paddle/phi/kernels/sparse/sparse_utils_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_kernel.h index 83de8bfec5d1b..072e6f141f8f1 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/kernels/empty_kernel.h" -#include "paddle/phi/kernels/sparse/sort_kernel.h" +#include "paddle/phi/kernels/sparse/coalesced_kernel.h" namespace phi { namespace sparse { @@ -155,11 +155,9 @@ void SparseCooTensorKernel(const Context& dev_ctx, const DenseTensor& indices, const IntArray& dense_shape, SparseCooTensor* out) { - SparseCooTensor before_sorted( + SparseCooTensor before_coalesced( indices, values, phi::make_ddim(dense_shape.GetData())); - // sort - SortKernel(dev_ctx, before_sorted, out); - // TODO(zhangkaihuo): merge the dumplicate indices + CoalescedKernel(dev_ctx, before_coalesced, out); } } // namespace sparse diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 157da73ad07de..a13cb58520d9d 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -230,13 +230,14 @@ def test_sparse_coo_tensor_sorted(self): with _test_eager_guard(): for device in devices: paddle.device.set_device(device) - indices = [[1, 0], [0, 1]] - values = [1.0, 2.0] + #test unsorted and duplicate indices + indices = [[1, 0, 0], [0, 1, 1]] + values = [1.0, 2.0, 3.0] indices = paddle.to_tensor(indices, dtype='int32') values = paddle.to_tensor(values, dtype='float32') sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) indices_sorted = [[0, 1], [1, 0]] - values_sorted = [2.0, 1.0] + values_sorted = [5.0, 1.0] assert np.array_equal(indices_sorted, sparse_x.indices().numpy()) assert np.array_equal(values_sorted, sparse_x.values().numpy()) From 1385e1cb81c9c83a4546909bb9103cfbb8b607ef Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Fri, 15 Apr 2022 08:21:37 +0000 Subject: [PATCH 11/23] fix conflict --- paddle/phi/kernels/sparse/gpu/convolution.cu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/sparse/gpu/convolution.cu.h b/paddle/phi/kernels/sparse/gpu/convolution.cu.h index 0acfa41c4ef9b..fcbb3c60183eb 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution.cu.h +++ b/paddle/phi/kernels/sparse/gpu/convolution.cu.h @@ -422,7 +422,7 @@ int ProductRuleBook(const Context& dev_ctx, rulebook_ptr + 3 * rulebook_len, -1); phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( - rulebook_ptr, last, key_result.data() + rulebook_len); + rulebook_ptr, last, bound_ptr); phi::backends::gpu::GpuMemcpyAsync(&rulebook_len, bound_ptr, sizeof(IntT), @@ -489,7 +489,7 @@ int ProductRuleBook(const Context& dev_ctx, // thrust::distance doesn't support stream parameters // const int out_non_zero_num = thrust::distance(unique_key_ptr, // new_end.first); - DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( + phi::funcs::sparse::DistanceKernel<<<1, 1, 0, dev_ctx.stream()>>>( unique_key_ptr, new_end, rulebook_ptr + rulebook_rows * rulebook_cols - 1); From ed97c98577e16f3cea867643e06565fa4e343a96 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Fri, 15 Apr 2022 09:23:00 +0000 Subject: [PATCH 12/23] Add parameter verification --- .../tests/unittests/test_sparse_utils_op.py | 119 ++++++++++++++++-- python/paddle/sparse/creation.py | 60 ++++++++- 2 files changed, 167 insertions(+), 12 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index a13cb58520d9d..d56556720e9be 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -32,6 +32,8 @@ def test_create_coo_by_tensor(self): dense_elements = paddle.to_tensor(values, dtype='float32') coo = paddle.sparse.sparse_coo_tensor( dense_indices, dense_elements, dense_shape, stop_gradient=False) + # test the to_string.py + print(coo) assert np.array_equal(indices, coo.indices().numpy()) assert np.array_equal(values, coo.values().numpy()) @@ -69,6 +71,8 @@ def test_create_csr_by_np(self): dense_shape = [3, 4] csr = paddle.sparse.sparse_csr_tensor(crows, cols, values, dense_shape) + # test the to_string.py + print(csr) assert np.array_equal(crows, csr.crows().numpy()) assert np.array_equal(cols, csr.cols().numpy()) assert np.array_equal(values, csr.values().numpy()) @@ -243,15 +247,112 @@ def test_sparse_coo_tensor_sorted(self): assert np.array_equal(values_sorted, sparse_x.values().numpy()) -class TestError(unittest.TestCase): - def test_sparse_coo_tensor(self): - with self.assertRaises(ValueError): - indices = [[2, 3], [0, 2]] - values = [1, 2] - #the shape too small - dense_shape = [2, 2] - sparse_x = paddle.sparse.sparse_coo_tensor( - indices, values, shape=dense_shape) +class TestCooError(unittest.TestCase): + def test_small_shape(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + indices = [[2, 3], [0, 2]] + values = [1, 2] + # 1. the shape too small + dense_shape = [2, 2] + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=dense_shape) + + def test_same_nnz(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + # 2. test the nnz of indices must same as nnz of values + indices = [[1, 2], [1, 0]] + values = [1, 2, 3] + sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) + + def test_same_dimensions(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + indices = [[1, 2], [1, 0]] + values = [1, 2, 3] + shape = [2, 3, 4] + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=shape) + + def test_indices_dtype(self): + with _test_eager_guard(): + with self.assertRaises(TypeError): + indices = [[1.0, 2.0], [0, 1]] + values = [1, 2] + sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) + + +class TestCsrError(unittest.TestCase): + def test_dimension1(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [0, 1, 2, 3] + cols = [0, 1, 2] + values = [1, 2, 3] + shape = [3] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_dimension2(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [0, 1, 2, 3] + cols = [0, 1, 2] + values = [1, 2, 3] + shape = [3, 3, 3, 3] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_same_shape1(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [0, 1, 2, 3] + cols = [0, 1, 2, 3] + values = [1, 2, 3] + shape = [3, 4] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_same_shape2(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [0, 1, 2, 3] + cols = [0, 1, 2, 3] + values = [1, 2, 3, 4] + shape = [3, 4] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_same_shape3(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [0, 1, 2, 3, 0, 1, 2] + cols = [0, 1, 2, 3, 0, 1, 2] + values = [1, 2, 3, 4, 0, 1, 2] + shape = [2, 3, 4] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_crows_first_value(self): + with _test_eager_guard(): + with self.assertRaises(ValueError): + crows = [1, 1, 2, 3] + cols = [0, 1, 2] + values = [1, 2, 3] + shape = [3, 4] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) + + def test_dtype(self): + with _test_eager_guard(): + with self.assertRaises(TypeError): + crows = [0, 1, 2, 3.0] + cols = [0, 1, 2] + values = [1, 2, 3] + shape = [3] + sparse_x = paddle.sparse.sparse_csr_tensor(crows, cols, values, + shape) if __name__ == "__main__": diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py index b48cc60851c99..d494336e1ff50 100644 --- a/python/paddle/sparse/creation.py +++ b/python/paddle/sparse/creation.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle from paddle import _C_ops from ..framework import core, dygraph_only from ..framework import _current_expected_place, _get_paddle_place @@ -51,6 +52,13 @@ def _get_place(place): return place +def _check_indices_dtype(dtype): + if dtype not in [paddle.int8, paddle.int16, paddle.int32, paddle.int64]: + raise TypeError( + "the dtype of indices must be 'int8' or 'int16' or 'int32' or 'int64'" + ) + + @dygraph_only def sparse_coo_tensor(indices, values, @@ -117,6 +125,18 @@ def sparse_coo_tensor(indices, if len(indices.shape) != 2: raise ValueError("'indices' must be 2-D.") + nnz = indices.shape[1] + sparse_dim = indices.shape[0] + + _check_indices_dtype(indices.dtype) + + if nnz != values.shape[0]: + raise ValueError( + "the indices and values must have same number of non-zero, but get {} and {}". + format(nnz, values.shape[0])) + + dense_dim = len(values.shape) - 1 + if not indices.place._equals(place): indices = indices._copy_to(place, False) @@ -132,6 +152,10 @@ def sparse_coo_tensor(indices, if shape < min_shape: raise ValueError("the minimun shape required is {}, but get {}". format(min_shape, shape)) + if len(shape) != sparse_dim + dense_dim: + raise ValueError( + "the number of dimensions(len(shape) must be sparse_dim({}) + dense_dim({}), but get {}". + format(sparse_dim, dense_dim, len(shape))) return _C_ops.final_state_sparse_create_sparse_coo_tensor(values, indices, shape) @@ -149,6 +173,7 @@ def sparse_csr_tensor(crows, r""" Constructs a sparse ``paddle.Tensor`` in CSR(Compressed Sparse Row) format according to the ``crows``, ``cols`` and ``values``. + Currently, the crows and cols of each batch must be incrementd. Args: crows(list|tuple|ndarray|Tensor): 1-D array, each element in the rows represents the @@ -207,10 +232,14 @@ def sparse_csr_tensor(crows, cols = to_tensor(cols, dtype=None, place=place, stop_gradient=True) if not isinstance(values, core.eager.Tensor): values = to_tensor(values, dtype, place, stop_gradient) - if len(crows.shape) != 1 or len(cols.shape) != 1 or len(values.shape) != 1: + + _check_indices_dtype(crows.dtype) + _check_indices_dtype(cols.dtype) + + if len(shape) != 2 and len(shape) != 3: raise ValueError( - "SparseCsrTensor only support 2-D or 3-D matrix. The 'crows', 'cols' and 'values' must be 1-D." - ) + "SparseCsrTensor only support 2-D or 3-D matrix. but get shape {}". + format(shape)) if not crows.place._equals(place): crows = crows._copy_to(place, False) @@ -222,5 +251,30 @@ def sparse_csr_tensor(crows, values = values._copy_to(place, False) values = _handle_dtype(values, dtype) values.stop_gradient = stop_gradient + + if len(crows.shape) != 1 or len(cols.shape) != 1 or len(values.shape) != 1: + raise ValueError("The 'crows', 'cols' and 'values' must be 1-D.") + + if (len(cols) != len(values)): + raise ValueError("the length of cols must be same as length of values") + + if len(shape) == 2: + if crows.shape[0] != shape[0] + 1: + raise ValueError( + "The length({}) of crows must be equal to the rows({})+1 of matrix.". + format(crows.shape[0], shape[0])) + if crows[0] != 0: + raise ValueError("the 0th value of crows must be 0") + + if crows[-1] != values.shape[0]: + raise ValueError( + "the last value of crows must be equal the number of non-zero") + else: + if crows.shape[0] % (shape[0] + 1) != 0: + raise ValueError( + "The length({}) of crows must be divisible the rows({})+1 of matrix.". + format(crows.shape[0], shape[0])) + # TODO(zkh2016): check whether the value in crows and cols is legal + return core.eager.sparse_csr_tensor(crows, cols, values, shape, stop_gradient) From 591e3c1470393a9c9391d9fb33211be9f0a618ce Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Fri, 15 Apr 2022 12:58:04 +0000 Subject: [PATCH 13/23] fix device --- .../tests/unittests/test_sparse_utils_op.py | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index d56556720e9be..c87626a10c631 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -212,39 +212,45 @@ def test_coo_values_grad(self): def test_sparse_coo_tensor_grad(self): with _test_eager_guard(): for device in devices: - paddle.device.set_device(device) - indices = [[0, 1], [0, 1]] - values = [1, 2] - indices = paddle.to_tensor(indices, dtype='int32') - values = paddle.to_tensor( - values, dtype='float32', stop_gradient=False) - sparse_x = paddle.sparse.sparse_coo_tensor( - indices, values, shape=[2, 2], stop_gradient=False) - grad_indices = [[0, 1], [1, 1]] - grad_values = [2, 3] - grad_indices = paddle.to_tensor(grad_indices, dtype='int32') - grad_values = paddle.to_tensor(grad_values, dtype='float32') - sparse_out_grad = paddle.sparse.sparse_coo_tensor( - grad_indices, grad_values, shape=[2, 2]) - sparse_x.backward(sparse_out_grad) - correct_values_grad = [0, 3] - assert np.array_equal(correct_values_grad, values.grad.numpy()) + if device == 'cpu' or (device == 'gpu' and + paddle.is_compiled_with_cuda()): + paddle.device.set_device(device) + indices = [[0, 1], [0, 1]] + values = [1, 2] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor( + values, dtype='float32', stop_gradient=False) + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=[2, 2], stop_gradient=False) + grad_indices = [[0, 1], [1, 1]] + grad_values = [2, 3] + grad_indices = paddle.to_tensor(grad_indices, dtype='int32') + grad_values = paddle.to_tensor(grad_values, dtype='float32') + sparse_out_grad = paddle.sparse.sparse_coo_tensor( + grad_indices, grad_values, shape=[2, 2]) + sparse_x.backward(sparse_out_grad) + correct_values_grad = [0, 3] + assert np.array_equal(correct_values_grad, + values.grad.numpy()) def test_sparse_coo_tensor_sorted(self): with _test_eager_guard(): for device in devices: - paddle.device.set_device(device) - #test unsorted and duplicate indices - indices = [[1, 0, 0], [0, 1, 1]] - values = [1.0, 2.0, 3.0] - indices = paddle.to_tensor(indices, dtype='int32') - values = paddle.to_tensor(values, dtype='float32') - sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) - indices_sorted = [[0, 1], [1, 0]] - values_sorted = [5.0, 1.0] - assert np.array_equal(indices_sorted, - sparse_x.indices().numpy()) - assert np.array_equal(values_sorted, sparse_x.values().numpy()) + if device == 'cpu' or (device == 'gpu' and + paddle.is_compiled_with_cuda()): + paddle.device.set_device(device) + #test unsorted and duplicate indices + indices = [[1, 0, 0], [0, 1, 1]] + values = [1.0, 2.0, 3.0] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor(values, dtype='float32') + sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) + indices_sorted = [[0, 1], [1, 0]] + values_sorted = [5.0, 1.0] + assert np.array_equal(indices_sorted, + sparse_x.indices().numpy()) + assert np.array_equal(values_sorted, + sparse_x.values().numpy()) class TestCooError(unittest.TestCase): From 8ecfa706c5f09bc553f6f1921915d2d1e4e0dca3 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Mon, 18 Apr 2022 08:53:10 +0000 Subject: [PATCH 14/23] add sparse norm and pool --- .../fluid/tests/unittests/test_sparse_norm.py | 15 +++- .../tests/unittests/test_sparse_pool_op.py | 36 +++++++++ python/paddle/sparse/__init__.py | 4 +- python/paddle/sparse/layer/__init__.py | 2 +- python/paddle/sparse/layer/norm.py | 75 +++++++++++-------- python/paddle/utils/code_gen/sparse_api.yaml | 9 +++ .../paddle/utils/code_gen/sparse_bw_api.yaml | 7 ++ 7 files changed, 108 insertions(+), 40 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_sparse_pool_op.py diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm.py b/python/paddle/fluid/tests/unittests/test_sparse_norm.py index ac505e3b7b42e..e1081b03270af 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm.py @@ -22,11 +22,18 @@ class TestSparseBatchNorm(unittest.TestCase): def test_sparse_batch_norm(self): with _test_eager_guard(): - shape = [2, 6, 6, 6, 4] + channels = 1 + shape = [1, 1, 6, 6, channels] dense_x = paddle.randn(shape) - print(dense_x) - batch_norm = paddle.nn.BatchNorm3D(4, data_format="NDHWC") + batch_norm = paddle.nn.BatchNorm3D(channels, data_format="NDHWC") dense_y = batch_norm(dense_x) + sparse_dim = 4 sparse_x = dense_x.to_sparse_coo(sparse_dim) - batch_norm = paddle.sparse.BatchNorm(4) + sparse_batch_norm = paddle.sparse.BatchNorm1D(channels) + sparse_y = sparse_batch_norm(sparse_x) + assert np.allclose( + dense_y.flatten().numpy(), + sparse_y.values().flatten().numpy(), + atol=1e-5, + rtol=1e-5) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py b/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py new file mode 100644 index 0000000000000..c8c8d0637eb05 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py @@ -0,0 +1,36 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle +import paddle.fluid.core as core +from paddle import _C_ops +from paddle.fluid.framework import _test_eager_guard + + +class TestSparseMaxPool3D(unittest.TestCase): + def test1(self): + with _test_eager_guard(): + dense_x = paddle.randn((1, 1, 4, 4, 1)) + print(dense_x) + sparse_x = dense_x.to_sparse_coo(4) + kernel_sizes = [1, 3, 3] + paddings = [0, 0, 0] + strides = [1, 1, 1] + dilations = [1, 1, 1] + out = _C_ops.final_state_sparse_max_pool3d( + sparse_x, kernel_sizes, paddings, dilations, strides) + print(out) diff --git a/python/paddle/sparse/__init__.py b/python/paddle/sparse/__init__.py index 23ee0c5014aed..680d2d5109857 100644 --- a/python/paddle/sparse/__init__.py +++ b/python/paddle/sparse/__init__.py @@ -15,12 +15,12 @@ from .creation import sparse_coo_tensor from .creation import sparse_csr_tensor from .layer.activation import ReLU -from .layer.norm import BatchNorm +from .layer.norm import BatchNorm1D from .layer.conv import Conv3D from .layer.conv import SubmConv3D __all__ = [ 'sparse_coo_tensor', 'sparse_csr_tensor', 'ReLU', 'Conv3D', 'SubmConv3D', - 'BatchNorm' + 'BatchNorm1D' ] diff --git a/python/paddle/sparse/layer/__init__.py b/python/paddle/sparse/layer/__init__.py index ee32e5027b50f..f065e6c8bb3c1 100644 --- a/python/paddle/sparse/layer/__init__.py +++ b/python/paddle/sparse/layer/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from .activation import ReLU -from .norm import BatchNorm +from .norm import BatchNorm1D from .conv import Conv3D from .conv import SubmConv3D diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py index 4f2818cd2dcb2..71d1b55eeb26a 100644 --- a/python/paddle/sparse/layer/norm.py +++ b/python/paddle/sparse/layer/norm.py @@ -25,46 +25,55 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle +import warnings -class BatchNorm(paddle.fluid.dygraph.BatchNorm): + +class BatchNorm1D(paddle.nn.BatchNorm1D): def __init__(self, - num_channels, - act=None, - is_test=False, + num_features, momentum=0.9, epsilon=1e-05, - param_attr=None, + weight_attr=None, bias_attr=None, - dtype='float32', - data_layout='NCHW', - in_place=False, - moving_mean_name=None, - moving_variance_name=None, - do_model_average_for_mean_and_var=True, - use_global_stats=False, - trainable_statistics=False): - super(BatchNorm, self).__init__( - num_channels, - act=act, - is_test=is_test, + data_format='NCL', + use_global_stats=None, + name=None): + super(BatchNorm1D, self).__init__( + num_features, momentum=momentum, epsilon=epsilon, - param_attr=param_attr, + weight_attr=weight_attr, bias_attr=bias_attr, - dtype=dtype, - data_layout, - in_place=in_place, - moving_mean_name=moving_mean_name, - moving_variance_name=moving_variance_name, - do_model_average_for_mean_and_var=do_model_average_for_mean_and_var, + data_format=data_format, use_global_stats=use_global_stats, - trainable_statistics=tranable_statistics) + name=name) + + def forward(self, input): + values = input.values() + #out = super(BatchNorm1D, self).forward(values) + self._check_data_format(self._data_format) + + self._check_input_dim(values) + + if self.training: + warnings.warn( + "When training, we now always track global mean and variance.") + + out = paddle.nn.functional.batch_norm( + values, + self._mean, + self._variance, + weight=self.weight, + bias=self.bias, + training=self.training, + momentum=self._momentum, + epsilon=self._epsilon, + data_format=self._data_format, + use_global_stats=self._use_global_stats) - def forward(self, input): - values = input.values() - out = super(BatchNorm, self).forward(values) - return paddle.sparse.sparse_coo_tensor( - input.indices(), - out, - shape=input.shape, - stop_gradient=input.stop_gradient) + return paddle.sparse.sparse_coo_tensor( + input.indices(), + out, + shape=input.shape, + stop_gradient=input.stop_gradient) diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index 100d7ad78319b..f1a2f427b2af0 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -65,3 +65,12 @@ args : (Tensor x) output : Tensor(out@SparseCsrTensor) invoke : to_sparse_csr_impl(x) + +- api: max_pool3d + args : (Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) + output : Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) + kernel : + func : sparse_maxpool + layout : x + intermediate : rulebook + backward : max_pool3d_grad diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index e3946cbf72bc2..e0e1675db398c 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -32,6 +32,13 @@ output : Tensor(x_grad@DenseTensor) invoke : to_dense_impl(out_grad) +- backward_api : max_pool3d_grad + forward : max_pool3d (Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) + args : (Tensor x, Tensor rulebook, Tensor out, Tensor out_grad, int[] kernel_sizes) + output : Tensor(x_grad@SparseCooTensor) + kernel : + func : sparse_maxpool_grad + - backward_api : sparse_relu_grad forward : sparse_relu(Tensor x) -> Tensor(out@SparseCooTensor) args : (Tensor x, Tensor out_grad) From 6153a5a5d82ecb4576926c460772900f128a8009 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Mon, 18 Apr 2022 09:12:41 +0000 Subject: [PATCH 15/23] batch norm --- .../fluid/tests/unittests/test_sparse_norm.py | 17 +++- python/paddle/sparse/layer/norm.py | 78 ++++++++++++++++++- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm.py b/python/paddle/fluid/tests/unittests/test_sparse_norm.py index e1081b03270af..30fb6a3bccbe0 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm.py @@ -30,10 +30,25 @@ def test_sparse_batch_norm(self): sparse_dim = 4 sparse_x = dense_x.to_sparse_coo(sparse_dim) - sparse_batch_norm = paddle.sparse.BatchNorm1D(channels) + sparse_batch_norm = paddle.sparse.BatchNorm(channels) sparse_y = sparse_batch_norm(sparse_x) assert np.allclose( dense_y.flatten().numpy(), sparse_y.values().flatten().numpy(), atol=1e-5, rtol=1e-5) + + def test(self): + with _test_eager_guard(): + np.random.seed(123) + channels = 3 + x_data = np.random.random(size=(1, 6, 6, 6, + channels)).astype('float32') + dense_x = paddle.to_tensor(x_data) + sparse_x = dense_x.to_sparse_coo(4) + batch_norm = paddle.sparse.BatchNorm(channels) + batch_norm_out = batch_norm(sparse_x) + print(batch_norm_out) + + +#TODO(zkh2016): add more test diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py index 71d1b55eeb26a..63b0d3dca23e1 100644 --- a/python/paddle/sparse/layer/norm.py +++ b/python/paddle/sparse/layer/norm.py @@ -29,7 +29,83 @@ import warnings -class BatchNorm1D(paddle.nn.BatchNorm1D): +class BatchNorm(paddle.nn.BatchNorm1D): + r""" + Applies Batch Normalization over a SparseCooTensor as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . + + When use_global_stats = False, the :math:`\mu_{\beta}` + and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch. + Calculated as follows: + + .. math:: + + \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad &//\ + \ mini-batch\ mean \\ + \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \ + \mu_{\beta})^2 \qquad &//\ mini-batch\ variance \\ + + When use_global_stats = True, the :math:`\mu_{\beta}` + and :math:`\sigma_{\beta}^{2}` are not the statistics of one mini-batch. + They are global or running statistics (moving_mean and moving_variance). It usually got from the + pre-trained model. Calculated as follows: + + .. math:: + moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global \ mean \\ + moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global \ variance \\ + + The normalization function formula is as follows: + + .. math:: + + \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ + y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift + + - :math:`\epsilon` : add a smaller value to the variance to prevent division by zero + - :math:`\gamma` : trainable proportional parameter + - :math:`\beta` : trainable deviation parameter + + Parameters: + num_features(int): Indicate the number of channels of the input ``Tensor``. + epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. + momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. + weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as weight_attr. If it is set to Fasle, the weight is not learnable. + If the Initializer of the weight_attr is not set, the parameter is initialized with Xavier. Default: None. + bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr. If it is set to Fasle, the weight is not learnable. + If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. + data_format(str, optional): Specify the input data format, may be "NC", "NCL" or "NLC". Defalut "NCL". + use_global_stats(bool|None, optional): Whether to use global mean and variance. If set to False, use the statistics of one mini-batch, if set to True, use the global statistics, if set to None, use global statistics in the test phase and use the statistics of one mini-batch in the training phase. Default: None. + name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + + Shape: + - x: A SparseCooTensor with layout = 'NDHWC'. + - output: SparseCooTensor with same shape as input x. + + Returns: + None. + + + Examples: + .. code-block:: python + + import paddle + import numpy as np + from paddle.fluid.framework import _test_eager_guard + + with _test_eager_guard(): + np.random.seed(123) + channels = 3 + x_data = np.random.random(size=(1, 6, 6, 6, channels)).astype('float32') + dense_x = paddle.to_tensor(x_data) + sparse_x = dense_x.to_sparse_coo(4) + batch_norm = paddle.sparse.BatchNorm(channels) + batch_norm_out = batch_norm(sparse_x) + print(batch_norm_out) + """ + def __init__(self, num_features, momentum=0.9, From 4667790e44fc104b266d194654c54a1ea331375f Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 02:56:51 +0000 Subject: [PATCH 16/23] fix getting stride, fix infer dense shape --- paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc | 2 +- paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc | 2 +- paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu | 2 +- paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu | 2 +- python/paddle/sparse/creation.py | 12 +++++++++--- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc b/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc index 0ebddf9b683f0..22c5e14b35f56 100644 --- a/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc @@ -44,7 +44,7 @@ void CoalescedCPUKernel(const CPUContext& dev_ctx, const T* x_values_ptr = x_values.data(); const int64_t stride = - x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; std::map> indices_to_index; for (uint64_t i = 0; i < x_indexs.size(); i++) { diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 1508de407caa7..0ec8b808ba838 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -125,7 +125,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, T* out_ptr = out->data(); memset(out_ptr, static_cast(0), out->numel() * sizeof(T)); const int64_t stride = - x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; const T* in_ptr = x.non_zero_elements().data(); // TODO(zhangkaihuo): multithreading can be used for acceleration for (uint64_t i = 0; i < mask_indexs.size(); i++) { diff --git a/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu b/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu index 3ffcd28955a53..b2e7884580c74 100644 --- a/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu @@ -76,7 +76,7 @@ void CoalescedGPUKernel(const GPUContext& dev_ctx, // 2. get the address of each non-zero values const T* x_values_ptr = x_values.data(); const int64_t stride = - x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; DenseTensor values_indexs = phi::Empty( dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW)); int* values_indexs_ptr = values_indexs.data(); diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index 4e2d12f33955e..4253845956ea7 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -231,7 +231,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, T* out_ptr = out->data(); const int64_t stride = - x.dims().size() == sparse_dim ? 1 : x.dims().size() - sparse_dim; + x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; SparseMaskCopyKernel<< 1: + lens = np.append(lens, values.shape[1:]) + return list(lens) def _get_place(place): @@ -145,7 +150,8 @@ def sparse_coo_tensor(indices, values = _handle_dtype(values, dtype) values.stop_gradient = stop_gradient - min_shape = _infer_dense_shape(indices) + min_shape = _infer_dense_shape(indices, values) + if shape is None: shape = min_shape else: From 378eb60fe68a1172d6b71e5290f3dc5a7c947657 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 03:18:53 +0000 Subject: [PATCH 17/23] sparse batch_norm --- .../fluid/tests/unittests/test_sparse_norm.py | 51 ++++++++++++------- .../tests/unittests/test_sparse_utils_op.py | 41 ++++++++++++++- python/paddle/sparse/__init__.py | 4 +- python/paddle/sparse/layer/__init__.py | 2 +- python/paddle/sparse/layer/norm.py | 19 ++++--- 5 files changed, 89 insertions(+), 28 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm.py b/python/paddle/fluid/tests/unittests/test_sparse_norm.py index 30fb6a3bccbe0..937f24c26f29d 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm.py @@ -17,38 +17,55 @@ import numpy as np import paddle from paddle.fluid.framework import _test_eager_guard +import copy class TestSparseBatchNorm(unittest.TestCase): - def test_sparse_batch_norm(self): + def test(self): with _test_eager_guard(): - channels = 1 - shape = [1, 1, 6, 6, channels] + paddle.seed(0) + channels = 4 + shape = [2, 3, 6, 6, channels] + #there is no zero in dense_x dense_x = paddle.randn(shape) + dense_x.stop_gradient = False + batch_norm = paddle.nn.BatchNorm3D(channels, data_format="NDHWC") dense_y = batch_norm(dense_x) + dense_y.backward(dense_y) sparse_dim = 4 - sparse_x = dense_x.to_sparse_coo(sparse_dim) + dense_x2 = copy.deepcopy(dense_x) + dense_x2.stop_gradient = False + sparse_x = dense_x2.to_sparse_coo(sparse_dim) sparse_batch_norm = paddle.sparse.BatchNorm(channels) + # set same params + sparse_batch_norm._mean.set_value(batch_norm._mean) + sparse_batch_norm._variance.set_value(batch_norm._variance) + sparse_batch_norm.weight.set_value(batch_norm.weight) + sparse_y = sparse_batch_norm(sparse_x) + # compare the result with dense batch_norm assert np.allclose( dense_y.flatten().numpy(), sparse_y.values().flatten().numpy(), atol=1e-5, rtol=1e-5) - def test(self): + # test backward + sparse_y.backward(sparse_y) + assert np.allclose( + dense_x.grad.flatten().numpy(), + sparse_x.grad.values().flatten().numpy(), + atol=1e-5, + rtol=1e-5) + + def test_error_layout(self): with _test_eager_guard(): - np.random.seed(123) - channels = 3 - x_data = np.random.random(size=(1, 6, 6, 6, - channels)).astype('float32') - dense_x = paddle.to_tensor(x_data) - sparse_x = dense_x.to_sparse_coo(4) - batch_norm = paddle.sparse.BatchNorm(channels) - batch_norm_out = batch_norm(sparse_x) - print(batch_norm_out) - - -#TODO(zkh2016): add more test + with self.assertRaises(ValueError): + shape = [2, 3, 6, 6, 3] + x = paddle.randn(shape) + sparse_x = x.to_sparse_coo(4) + sparse_batch_norm = paddle.sparse.BatchNorm( + 3, data_format='NCDHW') + sparse_batch_norm(sparse_x) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index c87626a10c631..ad282e2fd2418 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -19,7 +19,7 @@ import paddle.fluid.core as core from paddle.fluid.framework import _test_eager_guard -devices = ['cpu', 'gpu'] +devices = ['cpu'] class TestSparseCreate(unittest.TestCase): @@ -208,6 +208,20 @@ def test_coo_values_grad(self): # test coo_values_grad values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) + indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] + values = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], + [5.0, 5.0]] + sparse_x = paddle.sparse.sparse_coo_tensor( + paddle.to_tensor(indices), + paddle.to_tensor(values), + shape=[3, 4, 2], + stop_gradient=False) + values_tensor = sparse_x.values() + out_grad = [[2.0, 2.0], [3.0, 3.0], [5.0, 5.0], [8.0, 8.0], + [9.0, 9.0]] + # test coo_values_grad + values_tensor.backward(paddle.to_tensor(out_grad)) + assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) def test_sparse_coo_tensor_grad(self): with _test_eager_guard(): @@ -233,6 +247,21 @@ def test_sparse_coo_tensor_grad(self): assert np.array_equal(correct_values_grad, values.grad.numpy()) + # test the non-zero values is a vector + values = [[1, 1], [2, 2]] + values = paddle.to_tensor( + values, dtype='float32', stop_gradient=False) + sparse_x = paddle.sparse.sparse_coo_tensor( + indices, values, shape=[2, 2, 2], stop_gradient=False) + grad_values = [[2, 2], [3, 3]] + grad_values = paddle.to_tensor(grad_values, dtype='float32') + sparse_out_grad = paddle.sparse.sparse_coo_tensor( + grad_indices, grad_values, shape=[2, 2, 2]) + sparse_x.backward(sparse_out_grad) + correct_values_grad = [[0, 0], [3, 3]] + assert np.array_equal(correct_values_grad, + values.grad.numpy()) + def test_sparse_coo_tensor_sorted(self): with _test_eager_guard(): for device in devices: @@ -252,6 +281,16 @@ def test_sparse_coo_tensor_sorted(self): assert np.array_equal(values_sorted, sparse_x.values().numpy()) + # test the non-zero values is a vector + values = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]] + values = paddle.to_tensor(values, dtype='float32') + sparse_x = paddle.sparse.sparse_coo_tensor(indices, values) + values_sorted = [[5.0, 5.0], [1.0, 1.0]] + assert np.array_equal(indices_sorted, + sparse_x.indices().numpy()) + assert np.array_equal(values_sorted, + sparse_x.values().numpy()) + class TestCooError(unittest.TestCase): def test_small_shape(self): diff --git a/python/paddle/sparse/__init__.py b/python/paddle/sparse/__init__.py index 680d2d5109857..23ee0c5014aed 100644 --- a/python/paddle/sparse/__init__.py +++ b/python/paddle/sparse/__init__.py @@ -15,12 +15,12 @@ from .creation import sparse_coo_tensor from .creation import sparse_csr_tensor from .layer.activation import ReLU -from .layer.norm import BatchNorm1D +from .layer.norm import BatchNorm from .layer.conv import Conv3D from .layer.conv import SubmConv3D __all__ = [ 'sparse_coo_tensor', 'sparse_csr_tensor', 'ReLU', 'Conv3D', 'SubmConv3D', - 'BatchNorm1D' + 'BatchNorm' ] diff --git a/python/paddle/sparse/layer/__init__.py b/python/paddle/sparse/layer/__init__.py index f065e6c8bb3c1..ee32e5027b50f 100644 --- a/python/paddle/sparse/layer/__init__.py +++ b/python/paddle/sparse/layer/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from .activation import ReLU -from .norm import BatchNorm1D +from .norm import BatchNorm from .conv import Conv3D from .conv import SubmConv3D diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py index 63b0d3dca23e1..1cd8cd07f85af 100644 --- a/python/paddle/sparse/layer/norm.py +++ b/python/paddle/sparse/layer/norm.py @@ -112,10 +112,10 @@ def __init__(self, epsilon=1e-05, weight_attr=None, bias_attr=None, - data_format='NCL', + data_format='NDHWC', use_global_stats=None, name=None): - super(BatchNorm1D, self).__init__( + super(BatchNorm, self).__init__( num_features, momentum=momentum, epsilon=epsilon, @@ -125,18 +125,23 @@ def __init__(self, use_global_stats=use_global_stats, name=name) + def _check_data_format(self, input): + if input != "NDHWC": + raise ValueError('sparse BatchNorm only support layout of "NDHWC"') + def forward(self, input): values = input.values() - #out = super(BatchNorm1D, self).forward(values) self._check_data_format(self._data_format) - self._check_input_dim(values) + if len(values.shape) != 2: + raise ValueError('expected 2D input.values() (got {}D)'.format( + len(values.shape))) if self.training: warnings.warn( "When training, we now always track global mean and variance.") - out = paddle.nn.functional.batch_norm( + batch_norm_out = paddle.nn.functional.batch_norm( values, self._mean, self._variance, @@ -145,11 +150,11 @@ def forward(self, input): training=self.training, momentum=self._momentum, epsilon=self._epsilon, - data_format=self._data_format, + data_format='NC', use_global_stats=self._use_global_stats) return paddle.sparse.sparse_coo_tensor( input.indices(), - out, + batch_norm_out, shape=input.shape, stop_gradient=input.stop_gradient) From 5467b9c814a6c1f412b6aba8c949da5190ea7ee6 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 06:10:51 +0000 Subject: [PATCH 18/23] remove pool3d --- python/paddle/utils/code_gen/sparse_api.yaml | 9 --------- python/paddle/utils/code_gen/sparse_bw_api.yaml | 7 ------- 2 files changed, 16 deletions(-) diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index f1a2f427b2af0..100d7ad78319b 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -65,12 +65,3 @@ args : (Tensor x) output : Tensor(out@SparseCsrTensor) invoke : to_sparse_csr_impl(x) - -- api: max_pool3d - args : (Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) - output : Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) - kernel : - func : sparse_maxpool - layout : x - intermediate : rulebook - backward : max_pool3d_grad diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index e0e1675db398c..e3946cbf72bc2 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -32,13 +32,6 @@ output : Tensor(x_grad@DenseTensor) invoke : to_dense_impl(out_grad) -- backward_api : max_pool3d_grad - forward : max_pool3d (Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) - args : (Tensor x, Tensor rulebook, Tensor out, Tensor out_grad, int[] kernel_sizes) - output : Tensor(x_grad@SparseCooTensor) - kernel : - func : sparse_maxpool_grad - - backward_api : sparse_relu_grad forward : sparse_relu(Tensor x) -> Tensor(out@SparseCooTensor) args : (Tensor x, Tensor out_grad) From 3a4572542a0d98fb24e7c5e69eb7024b87c80a88 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 08:19:33 +0000 Subject: [PATCH 19/23] remove pool --- ..._sparse_norm.py => test_sparse_norm_op.py} | 0 .../tests/unittests/test_sparse_pool_op.py | 36 ------------------- 2 files changed, 36 deletions(-) rename python/paddle/fluid/tests/unittests/{test_sparse_norm.py => test_sparse_norm_op.py} (100%) delete mode 100644 python/paddle/fluid/tests/unittests/test_sparse_pool_op.py diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py similarity index 100% rename from python/paddle/fluid/tests/unittests/test_sparse_norm.py rename to python/paddle/fluid/tests/unittests/test_sparse_norm_op.py diff --git a/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py b/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py deleted file mode 100644 index c8c8d0637eb05..0000000000000 --- a/python/paddle/fluid/tests/unittests/test_sparse_pool_op.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import unittest -import numpy as np -import paddle -import paddle.fluid.core as core -from paddle import _C_ops -from paddle.fluid.framework import _test_eager_guard - - -class TestSparseMaxPool3D(unittest.TestCase): - def test1(self): - with _test_eager_guard(): - dense_x = paddle.randn((1, 1, 4, 4, 1)) - print(dense_x) - sparse_x = dense_x.to_sparse_coo(4) - kernel_sizes = [1, 3, 3] - paddings = [0, 0, 0] - strides = [1, 1, 1] - dilations = [1, 1, 1] - out = _C_ops.final_state_sparse_max_pool3d( - sparse_x, kernel_sizes, paddings, dilations, strides) - print(out) From e20c1d9989335c3f6ed71b882fef1d38567e6200 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 11:59:03 +0000 Subject: [PATCH 20/23] fix examples --- python/paddle/sparse/layer/norm.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py index 1cd8cd07f85af..39b0028fe8ece 100644 --- a/python/paddle/sparse/layer/norm.py +++ b/python/paddle/sparse/layer/norm.py @@ -96,14 +96,15 @@ class BatchNorm(paddle.nn.BatchNorm1D): from paddle.fluid.framework import _test_eager_guard with _test_eager_guard(): - np.random.seed(123) - channels = 3 - x_data = np.random.random(size=(1, 6, 6, 6, channels)).astype('float32') - dense_x = paddle.to_tensor(x_data) - sparse_x = dense_x.to_sparse_coo(4) - batch_norm = paddle.sparse.BatchNorm(channels) - batch_norm_out = batch_norm(sparse_x) - print(batch_norm_out) + np.random.seed(123) + channels = 3 + x_data = np.random.random(size=(1, 6, 6, 6, channels)).astype('float32') + dense_x = paddle.to_tensor(x_data) + sparse_x = dense_x.to_sparse_coo(4) + batch_norm = paddle.sparse.BatchNorm(channels) + batch_norm_out = batch_norm(sparse_x) + print(batch_norm_out.shape) + # [1, 6, 6, 6, 3] """ def __init__(self, From b609c2b3cb409d8d0f8e953ea215ca5d0f8e06a6 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Wed, 20 Apr 2022 12:31:36 +0000 Subject: [PATCH 21/23] conv3d support bias --- .../tests/unittests/test_sparse_conv_op.py | 6 ++++-- python/paddle/sparse/functional/conv.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py index 42f628c8fb1fd..1677051ee9db4 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py @@ -31,19 +31,21 @@ def test_conv3d(self): paddings = [0, 0, 0] strides = [1, 1, 1] dilations = [1, 1, 1] + bias = [1] indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]] values = [1, 2, 3, 4] indices = paddle.to_tensor(indices, dtype='int32') values = paddle.to_tensor(values, dtype='float32') dense_shape = [1, 1, 3, 4, 1] - correct_out_values = [[4], [10]] + correct_out_values = [[5], [11]] sparse_input = core.eager.sparse_coo_tensor(indices, values, dense_shape, False) out = paddle.sparse.functional.conv3d( sparse_input, dense_kernel, - bias=None, + bias=paddle.to_tensor( + bias, dtype='float32'), stride=strides, padding=paddings, dilation=dilations, diff --git a/python/paddle/sparse/functional/conv.py b/python/paddle/sparse/functional/conv.py index d8c0e5c914ccb..42b7b49835cf0 100644 --- a/python/paddle/sparse/functional/conv.py +++ b/python/paddle/sparse/functional/conv.py @@ -16,6 +16,8 @@ from paddle import _C_ops, in_dynamic_mode from ...fluid.layers.utils import convert_to_list +from ...fluid.layers.nn import elementwise_add +from .. import sparse_coo_tensor from paddle.nn.functional.conv import _update_padding_nd @@ -30,7 +32,6 @@ def _conv3d(x, data_format="NDHWC", name=None): assert in_dynamic_mode(), "Currently, only support dynamic mode" - assert bias == None, "Currently, sparse_conv3d does not support bias" assert groups == 1, "Currently, only support groups=1" dims = 3 @@ -61,8 +62,18 @@ def _conv3d(x, dilation = convert_to_list(dilation, dims, 'dilation') op_type = "conv3d" - return _C_ops.final_state_sparse_conv3d(x, weight, padding, dilation, - stride, groups, subm) + pre_bias = _C_ops.final_state_sparse_conv3d(x, weight, padding, dilation, + stride, groups, subm) + if bias is not None: + values = pre_bias.values() + add_bias = elementwise_add(values, bias, axis=1) + return sparse_coo_tensor( + pre_bias.indices(), + add_bias, + shape=pre_bias.shape, + stop_gradient=pre_bias.stop_gradient) + else: + return pre_bias def conv3d(x, From 422e8019d7b93a1b2cad1ce355f1e29a48d0a16b Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 21 Apr 2022 09:09:07 +0000 Subject: [PATCH 22/23] fix docs --- .../fluid/tests/unittests/test_sparse_norm_op.py | 16 ++++++++++++++++ python/paddle/sparse/layer/norm.py | 7 +++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py index 937f24c26f29d..3c3085ec8be69 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py @@ -69,3 +69,19 @@ def test_error_layout(self): sparse_batch_norm = paddle.sparse.BatchNorm( 3, data_format='NCDHW') sparse_batch_norm(sparse_x) + + def test2(self): + with _test_eager_guard(): + paddle.seed(123) + channels = 3 + x_data = paddle.randn((1, 6, 6, 6, channels)).astype('float32') + dense_x = paddle.to_tensor(x_data) + sparse_x = dense_x.to_sparse_coo(4) + batch_norm = paddle.sparse.BatchNorm(channels) + batch_norm_out = batch_norm(sparse_x) + print(batch_norm_out.shape) + # [1, 6, 6, 6, 3] + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/sparse/layer/norm.py b/python/paddle/sparse/layer/norm.py index 39b0028fe8ece..83b738a5dc354 100644 --- a/python/paddle/sparse/layer/norm.py +++ b/python/paddle/sparse/layer/norm.py @@ -66,8 +66,8 @@ class BatchNorm(paddle.nn.BatchNorm1D): Parameters: num_features(int): Indicate the number of channels of the input ``Tensor``. - epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. + epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale` of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr as weight_attr. If it is set to Fasle, the weight is not learnable. @@ -92,13 +92,12 @@ class BatchNorm(paddle.nn.BatchNorm1D): .. code-block:: python import paddle - import numpy as np from paddle.fluid.framework import _test_eager_guard with _test_eager_guard(): - np.random.seed(123) + paddle.seed(123) channels = 3 - x_data = np.random.random(size=(1, 6, 6, 6, channels)).astype('float32') + x_data = paddle.randn((1, 6, 6, 6, channels)).astype('float32') dense_x = paddle.to_tensor(x_data) sparse_x = dense_x.to_sparse_coo(4) batch_norm = paddle.sparse.BatchNorm(channels) From 56774d9d4760699a44db3a97996088d5a7aa2498 Mon Sep 17 00:00:00 2001 From: zkh2016 Date: Thu, 21 Apr 2022 10:22:47 +0000 Subject: [PATCH 23/23] fix examples --- python/paddle/sparse/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py index 0b3ee29f87bdf..2cfbb3144acc2 100644 --- a/python/paddle/sparse/creation.py +++ b/python/paddle/sparse/creation.py @@ -111,7 +111,7 @@ def sparse_coo_tensor(indices, with _test_eager_guard(): indices = [[0, 1, 2], [1, 2, 0]] values = [1.0, 2.0, 3.0] - dense_shape = [2, 3] + dense_shape = [3, 3] coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape) # print(coo) # Tensor(shape=[2, 3], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,