Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon No.60】refactor unary sparse ops and add sparse sqrt, tanh, sin #41356

Merged
merged 36 commits into from May 12, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
97ac270
refactor unary sparse ops and add relu
tiancaishaonvjituizi Apr 2, 2022
26f4662
add test
tiancaishaonvjituizi Apr 2, 2022
a7f3410
fix the bug in generated api code, tests are passed now
tiancaishaonvjituizi Apr 4, 2022
d4310af
Merge branch 'develop' into sparse_relu
tiancaishaonvjituizi Apr 20, 2022
7e5f102
update relu for new sparse api
tiancaishaonvjituizi Apr 20, 2022
71864fd
update test, implement api, fix sqrt grad
tiancaishaonvjituizi Apr 21, 2022
a99a5ba
manually register relu and relu_grad kernel to bypass the restriction
tiancaishaonvjituizi Apr 21, 2022
95aa0b3
polish sqrt docs
tiancaishaonvjituizi Apr 21, 2022
f706dea
reformat
tiancaishaonvjituizi Apr 21, 2022
d898df7
polish docs
tiancaishaonvjituizi Apr 21, 2022
b770f41
remove csr backward api
tiancaishaonvjituizi Apr 21, 2022
f92e8cd
fix test compile error
tiancaishaonvjituizi Apr 21, 2022
394ce5e
use allclose instead of array_equal
tiancaishaonvjituizi Apr 21, 2022
c577f46
move sqrt to math_kernel.cc, implement sin and tanh
tiancaishaonvjituizi Apr 21, 2022
3ad6fba
polish header file
tiancaishaonvjituizi Apr 21, 2022
56fc5da
reformat
tiancaishaonvjituizi Apr 21, 2022
1f18c59
refine
tiancaishaonvjituizi Apr 21, 2022
c606825
fix typo
tiancaishaonvjituizi Apr 22, 2022
5dd4507
fix typo
tiancaishaonvjituizi Apr 22, 2022
f59fa26
add test about error, reformat
tiancaishaonvjituizi Apr 23, 2022
dea61c7
fix test error
tiancaishaonvjituizi Apr 23, 2022
60c7359
fix format
tiancaishaonvjituizi Apr 23, 2022
ad8ceda
fix false positive warning in gcc>=9
tiancaishaonvjituizi Apr 26, 2022
178dd27
use more aggressive way
tiancaishaonvjituizi Apr 26, 2022
1ace46f
Merge remote-tracking branch 'origin/develop' into sparse_relu
tiancaishaonvjituizi Apr 26, 2022
7bb41d7
add api in paddle.sparse namespace
tiancaishaonvjituizi Apr 26, 2022
790cb0d
Merge remote-tracking branch 'tiancaishaonv/variant_fix_gcc9_fp_warni…
tiancaishaonvjituizi Apr 26, 2022
c44ac74
address reviews
tiancaishaonvjituizi Apr 27, 2022
d35e923
Merge remote-tracking branch 'origin/develop' into sparse_relu
tiancaishaonvjituizi Apr 27, 2022
b04ab6c
fix ci error
tiancaishaonvjituizi Apr 29, 2022
fa93d7d
rename to unary_kernel, update name
tiancaishaonvjituizi May 6, 2022
a6d2cd0
Merge remote-tracking branch 'origin/develop' into sparse_relu
tiancaishaonvjituizi May 6, 2022
67d14b4
remove unused files
tiancaishaonvjituizi May 6, 2022
268ac34
rename python files
tiancaishaonvjituizi May 6, 2022
39c9750
fix import path
tiancaishaonvjituizi May 7, 2022
06787c0
reformat
tiancaishaonvjituizi May 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/phi/kernels/activation_grad_kernel.h
Expand Up @@ -187,6 +187,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log1p);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

之前 dense tensor 的 SqrtGrad kernel 没有在头文件中声明


DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Round);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Floor);
Expand Down
57 changes: 4 additions & 53 deletions paddle/phi/kernels/sparse/activation_grad_kernel.cc
Expand Up @@ -13,58 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/sparse/activation_grad_kernel.h"
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& out_grad,
SparseCooTensor* x_grad) {
DenseTensor non_zero_indices =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());
DenseTensor non_zero_elements =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());
phi::Copy(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&non_zero_indices);
phi::ReluGradKernel<T, Context>(dev_ctx,
x.non_zero_elements(),
out_grad.non_zero_elements(),
&non_zero_elements);
x_grad->SetMember(non_zero_indices, non_zero_elements, x.dims(), true);
}

} // namespace sparse
} // namespace phi

PD_REGISTER_KERNEL(sparse_relu_grad,
CPU,
ALL_LAYOUT,
phi::sparse::SparseReluGradKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/kernels/sparse/utils.h"

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(sparse_relu_grad,
GPU,
ALL_LAYOUT,
phi::sparse::SparseReluGradKernel,
float,
double,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#endif
DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sparse_relu_grad, ReluGradKernel)
DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sparse_sqrt_grad, SqrtGradKernel)
23 changes: 18 additions & 5 deletions paddle/phi/kernels/sparse/activation_grad_kernel.h
Expand Up @@ -15,15 +15,28 @@ limitations under the License. */
#pragma once

#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& out_grad,
SparseCooTensor* x_grad);
#define DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(name) \
template <typename T, typename Context> \
void SparseCoo##name##GradKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& out_grad, \
SparseCooTensor* x_grad); \
\
template <typename T, typename Context> \
void SparseCsr##name##GradKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& out_grad, \
SparseCsrTensor* x_grad);

DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(Relu)
DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(Sqrt)

#undef DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL

} // namespace sparse
} // namespace phi
52 changes: 3 additions & 49 deletions paddle/phi/kernels/sparse/activation_kernel.cc
Expand Up @@ -13,54 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/sparse/activation_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/sparse/utils.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
DenseTensor non_zero_indices =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());
DenseTensor non_zero_elements =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());
phi::Copy(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&non_zero_indices);
phi::ReluKernel<T, Context>(
dev_ctx, x.non_zero_elements(), &non_zero_elements);
out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true);
}

} // namespace sparse
} // namespace phi

PD_REGISTER_KERNEL(sparse_relu,
CPU,
ALL_LAYOUT,
phi::sparse::SparseReluKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(sparse_relu,
GPU,
ALL_LAYOUT,
phi::sparse::SparseReluKernel,
float,
double,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#endif
DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sparse_relu, ReluKernel)
DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sparse_sqrt, SqrtKernel)
28 changes: 23 additions & 5 deletions paddle/phi/kernels/sparse/activation_kernel.h
Expand Up @@ -16,22 +16,40 @@ limitations under the License. */

#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out);
#define DECLARE_SPARSE_ACTIVATION_KERNEL(name) \
template <typename T, typename Context> \
void SparseCoo##name##Kernel( \
const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out); \
\
template <typename T, typename Context> \
void SparseCsr##name##Kernel( \
const Context& dev_ctx, const SparseCsrTensor& x, SparseCsrTensor* out);

DECLARE_SPARSE_ACTIVATION_KERNEL(Relu)
DECLARE_SPARSE_ACTIVATION_KERNEL(Sqrt)

#undef DECLARE_SPARSE_ACTIVATION_KERNEL

template <typename T, typename Context>
SparseCooTensor SparseRelu(const Context& dev_ctx, const SparseCooTensor& x) {
DenseTensor indices, values;
SparseCooTensor coo(indices, values, x.dims());
SparseReluKernel<T, Context>(dev_ctx, x, &coo);
SparseCooReluKernel<T, Context>(dev_ctx, x, &coo);
return coo;
}

template <typename T, typename Context>
SparseCooTensor SparseSqrt(const Context& dev_ctx, const SparseCooTensor& x) {
tiancaishaonvjituizi marked this conversation as resolved.
Show resolved Hide resolved
DenseTensor indices, values;
SparseCooTensor coo(indices, values, x.dims());
SparseCooSqrtKernel<T, Context>(dev_ctx, x, &coo);
return coo;
}

Expand Down
170 changes: 170 additions & 0 deletions paddle/phi/kernels/sparse/utils.h
@@ -0,0 +1,170 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#define DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func) \
namespace phi { \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要把命名空间都包含进去吗?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PD_REGISTER_KERNEL 也有必须写在顶层命名空间的要求,这个宏自身包含命名空间会让使用体验更一致一些

namespace sparse { \
\
template <typename T, typename Context> \
void SparseCoo##dense_kernel_func(const Context& dev_ctx, \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里系列函数声明不符合命名规范,请采用驼峰式命名,如果要用宏,也建议宏和对应需要使用该宏的函数尽量放在一起,避免阅读代码时做跳转

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已修改

const SparseCooTensor& x, \
SparseCooTensor* out) { \
DenseTensor non_zero_indices = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_indices(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_indices); \
phi::dense_kernel_func<T, Context>( \
dev_ctx, x.non_zero_elements(), &non_zero_elements); \
out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
} \
\
template <typename T, typename Context> \
void SparseCsr##dense_kernel_func(const Context& dev_ctx, \
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同时实现了 coo 和 csr 版本,python api 名也以 coo 和 csr 区分

const SparseCsrTensor& x, \
SparseCsrTensor* out) { \
DenseTensor non_zero_crows = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows()); \
DenseTensor non_zero_cols = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_crows(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_crows); \
phi::Copy(dev_ctx, \
x.non_zero_cols(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_cols); \
phi::dense_kernel_func<T, Context>( \
dev_ctx, x.non_zero_elements(), &non_zero_elements); \
out->SetMember( \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
} \
} \
}

#define DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func) \
namespace phi { \
namespace sparse { \
\
template <typename T, typename Context> \
void SparseCoo##dense_kernel_func(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& out_grad, \
SparseCooTensor* x_grad) { \
DenseTensor non_zero_indices = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_indices(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_indices); \
phi::dense_kernel_func<T, Context>(dev_ctx, \
x.non_zero_elements(), \
out_grad.non_zero_elements(), \
&non_zero_elements); \
x_grad->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
} \
\
template <typename T, typename Context> \
void SparseCsr##dense_kernel_func(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& out_grad, \
SparseCsrTensor* out) { \
DenseTensor non_zero_crows = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows()); \
DenseTensor non_zero_cols = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_crows(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_crows); \
phi::Copy(dev_ctx, \
x.non_zero_cols(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_cols); \
phi::dense_kernel_func<T, Context>(dev_ctx, \
x.non_zero_elements(), \
out_grad.non_zero_elements(), \
&non_zero_elements); \
out->SetMember( \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
} \
} \
}

#define REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
PD_REGISTER_KERNEL(sparse_coo_##kernel_name, \
CPU, \
ALL_LAYOUT, \
phi::sparse::SparseCoo##dense_kernel_func, \
float, \
double) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); \
} \
PD_REGISTER_KERNEL(sparse_csr_##kernel_name, \
CPU, \
ALL_LAYOUT, \
phi::sparse::SparseCsr##dense_kernel_func, \
float, \
double) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); \
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#define REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
PD_REGISTER_KERNEL(sparse_coo_##kernel_name, \
GPU, \
ALL_LAYOUT, \
phi::sparse::SparseCoo##dense_kernel_func, \
float, \
double, \
phi::dtype::float16) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); \
} \
\
PD_REGISTER_KERNEL(sparse_csr_##kernel_name, \
GPU, \
ALL_LAYOUT, \
phi::sparse::SparseCsr##dense_kernel_func, \
float, \
double, \
phi::dtype::float16) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); \
}
#else
// This macro definition is empty when GPU is disabled
#define REGISTER_GPU_SPARSE_UNARY_KERNEL(sparse_kernel_name, dense_kernel_func)
#endif

#define REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)

#define DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(kernel_name, \
dense_kernel_func) \
DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func) \
REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)

#define DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(kernel_name, \
dense_kernel_func) \
DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func) \
REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)