Skip to content

Commit

Permalink
refactor unary sparse ops and add relu
Browse files Browse the repository at this point in the history
  • Loading branch information
tiancaishaonvjituizi committed Apr 2, 2022
1 parent 50714d5 commit 97ac270
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 118 deletions.
1 change: 1 addition & 0 deletions paddle/phi/kernels/activation_grad_kernel.h
Expand Up @@ -187,6 +187,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log1p);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt);

DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Round);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Floor);
Expand Down
57 changes: 4 additions & 53 deletions paddle/phi/kernels/sparse/activation_grad_kernel.cc
Expand Up @@ -13,58 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/sparse/activation_grad_kernel.h"
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& out_grad,
SparseCooTensor* x_grad) {
DenseTensor non_zero_indices =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());
DenseTensor non_zero_elements =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());
phi::Copy(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&non_zero_indices);
phi::ReluGradKernel<T, Context>(dev_ctx,
x.non_zero_elements(),
out_grad.non_zero_elements(),
&non_zero_elements);
x_grad->SetMember(non_zero_indices, non_zero_elements, x.dims(), true);
}

} // namespace sparse
} // namespace phi

PD_REGISTER_KERNEL(sparse_relu_grad,
CPU,
ALL_LAYOUT,
phi::sparse::SparseReluGradKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/kernels/sparse/utils.h"

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(sparse_relu_grad,
GPU,
ALL_LAYOUT,
phi::sparse::SparseReluGradKernel,
float,
double,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#endif
DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sparse_relu_grad, ReluGradKernel)
DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sparse_sqrt_grad, SqrtGradKernel)
23 changes: 18 additions & 5 deletions paddle/phi/kernels/sparse/activation_grad_kernel.h
Expand Up @@ -15,15 +15,28 @@ limitations under the License. */
#pragma once

#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& out_grad,
SparseCooTensor* x_grad);
#define DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(name) \
template <typename T, typename Context> \
void SparseCoo##name##GradKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& out_grad, \
SparseCooTensor* x_grad); \
\
template <typename T, typename Context> \
void SparseCsr##name##GradKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& out_grad, \
SparseCsrTensor* x_grad);

DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(Relu)
DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL(Sqrt)

#undef DECLARE_SPARSE_ACTIVATION_GRAD_KERNEL

} // namespace sparse
} // namespace phi
52 changes: 3 additions & 49 deletions paddle/phi/kernels/sparse/activation_kernel.cc
Expand Up @@ -13,54 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/sparse/activation_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/sparse/utils.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
DenseTensor non_zero_indices =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());
DenseTensor non_zero_elements =
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());
phi::Copy(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&non_zero_indices);
phi::ReluKernel<T, Context>(
dev_ctx, x.non_zero_elements(), &non_zero_elements);
out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true);
}

} // namespace sparse
} // namespace phi

PD_REGISTER_KERNEL(sparse_relu,
CPU,
ALL_LAYOUT,
phi::sparse::SparseReluKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(sparse_relu,
GPU,
ALL_LAYOUT,
phi::sparse::SparseReluKernel,
float,
double,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
#endif
DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sparse_relu, ReluKernel)
DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sparse_sqrt, SqrtKernel)
28 changes: 23 additions & 5 deletions paddle/phi/kernels/sparse/activation_kernel.h
Expand Up @@ -16,22 +16,40 @@ limitations under the License. */

#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

namespace phi {
namespace sparse {

template <typename T, typename Context>
void SparseReluKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out);
#define DECLARE_SPARSE_ACTIVATION_KERNEL(name) \
template <typename T, typename Context> \
void SparseCoo##name##Kernel( \
const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out); \
\
template <typename T, typename Context> \
void SparseCsr##name##Kernel( \
const Context& dev_ctx, const SparseCsrTensor& x, SparseCsrTensor* out);

DECLARE_SPARSE_ACTIVATION_KERNEL(Relu)
DECLARE_SPARSE_ACTIVATION_KERNEL(Sqrt)

#undef DECLARE_SPARSE_ACTIVATION_KERNEL

template <typename T, typename Context>
SparseCooTensor SparseRelu(const Context& dev_ctx, const SparseCooTensor& x) {
DenseTensor indices, values;
SparseCooTensor coo(indices, values, x.dims());
SparseReluKernel<T, Context>(dev_ctx, x, &coo);
SparseCooReluKernel<T, Context>(dev_ctx, x, &coo);
return coo;
}

template <typename T, typename Context>
SparseCooTensor SparseSqrt(const Context& dev_ctx, const SparseCooTensor& x) {
DenseTensor indices, values;
SparseCooTensor coo(indices, values, x.dims());
SparseCooSqrtKernel<T, Context>(dev_ctx, x, &coo);
return coo;
}

Expand Down
170 changes: 170 additions & 0 deletions paddle/phi/kernels/sparse/utils.h
@@ -0,0 +1,170 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"

#define DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func) \
namespace phi { \
namespace sparse { \
\
template <typename T, typename Context> \
void SparseCoo##dense_kernel_func(const Context& dev_ctx, \
const SparseCooTensor& x, \
SparseCooTensor* out) { \
DenseTensor non_zero_indices = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_indices(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_indices); \
phi::dense_kernel_func<T, Context>( \
dev_ctx, x.non_zero_elements(), &non_zero_elements); \
out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
} \
\
template <typename T, typename Context> \
void SparseCsr##dense_kernel_func(const Context& dev_ctx, \
const SparseCsrTensor& x, \
SparseCsrTensor* out) { \
DenseTensor non_zero_crows = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows()); \
DenseTensor non_zero_cols = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_crows(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_crows); \
phi::Copy(dev_ctx, \
x.non_zero_cols(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_cols); \
phi::dense_kernel_func<T, Context>( \
dev_ctx, x.non_zero_elements(), &non_zero_elements); \
out->SetMember( \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
} \
} \
}

#define DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func) \
namespace phi { \
namespace sparse { \
\
template <typename T, typename Context> \
void SparseCoo##dense_kernel_func(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& out_grad, \
SparseCooTensor* x_grad) { \
DenseTensor non_zero_indices = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_indices(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_indices); \
phi::dense_kernel_func<T, Context>(dev_ctx, \
x.non_zero_elements(), \
out_grad.non_zero_elements(), \
&non_zero_elements); \
x_grad->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
} \
\
template <typename T, typename Context> \
void SparseCsr##dense_kernel_func(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& out_grad, \
SparseCsrTensor* out) { \
DenseTensor non_zero_crows = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows()); \
DenseTensor non_zero_cols = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols()); \
DenseTensor non_zero_elements = \
phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements()); \
phi::Copy(dev_ctx, \
x.non_zero_crows(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_crows); \
phi::Copy(dev_ctx, \
x.non_zero_cols(), \
dev_ctx.GetPlace(), \
false, \
&non_zero_cols); \
phi::dense_kernel_func<T, Context>(dev_ctx, \
x.non_zero_elements(), \
out_grad.non_zero_elements(), \
&non_zero_elements); \
out->SetMember( \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
} \
} \
}

#define REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
PD_REGISTER_KERNEL(sparse_coo_##kernel_name, \
CPU, \
ALL_LAYOUT, \
phi::sparse::SparseCoo##dense_kernel_func, \
float, \
double) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); \
} \
PD_REGISTER_KERNEL(sparse_csr_##kernel_name, \
CPU, \
ALL_LAYOUT, \
phi::sparse::SparseCsr##dense_kernel_func, \
float, \
double) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); \
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#define REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
PD_REGISTER_KERNEL(sparse_coo_##kernel_name, \
GPU, \
ALL_LAYOUT, \
phi::sparse::SparseCoo##dense_kernel_func, \
float, \
double, \
phi::dtype::float16) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); \
} \
\
PD_REGISTER_KERNEL(sparse_csr_##kernel_name, \
GPU, \
ALL_LAYOUT, \
phi::sparse::SparseCsr##dense_kernel_func, \
float, \
double, \
phi::dtype::float16) { \
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); \
}
#else
// This macro definition is empty when GPU is disabled
#define REGISTER_GPU_SPARSE_UNARY_KERNEL(sparse_kernel_name, dense_kernel_func)
#endif

#define REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)

#define DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(kernel_name, \
dense_kernel_func) \
DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func) \
REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)

#define DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(kernel_name, \
dense_kernel_func) \
DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func) \
REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)

0 comments on commit 97ac270

Please sign in to comment.