Skip to content

Commit

Permalink
Merge branch 'develop' into multilabelmarginloss
Browse files Browse the repository at this point in the history
  • Loading branch information
yangguohao committed Jun 1, 2022
2 parents 6c60290 + 126248a commit 4b7aee7
Show file tree
Hide file tree
Showing 243 changed files with 12,573 additions and 2,208 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
sha: v1.0.1
rev: v1.1.14
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
Expand Down
3 changes: 2 additions & 1 deletion cmake/cblas.cmake
Expand Up @@ -52,6 +52,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
set(OPENBLAS_INCLUDE_SEARCH_PATHS
${OPENBLAS_ROOT}/include
/usr/include
/usr/include/lapacke
/usr/include/openblas
/usr/local/opt/openblas/include)
set(OPENBLAS_LIB_SEARCH_PATHS
Expand All @@ -75,7 +76,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file})
string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp})

if (${ver} VERSION_GREATER_EQUAL "0.3.7")
if (${ver} VERSION_GREATER_EQUAL "0.3.5")
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/xpu.cmake
Expand Up @@ -9,15 +9,15 @@ SET(XPU_RT_LIB_NAME "libxpurt.so")

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220520")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220601")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220520")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220601")
else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()
Expand Down
276 changes: 203 additions & 73 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc

Large diffs are not rendered by default.

Expand Up @@ -30,7 +30,9 @@
"divide_double_grad", "log_double_grad", "elu_double_grad",
"leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad",
"square_double_grad", "celu_double_grad", "pad_double_grad",
"pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad"
"pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad",
"instance_norm_double_grad", "conv3d_double_grad",
"depthwise_conv2d_grad_grad"
])

# For API dispatch used at python-level
Expand Down
Expand Up @@ -1404,7 +1404,7 @@ def GenerateNodeDefinition(self, next_grad_node_creation_str,
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});
for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{
returns[i].resize(out_metas[i].size());
out_metas[i].size() == 0 ? returns[i].resize(1) : returns[i].resize(out_metas[i].size());
}}
"""

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
Expand Up @@ -250,7 +250,7 @@ TEST(EagerUtils, GetGradAccumulationNode) {
ASSERT_ANY_THROW(egr::EagerUtils::GetGradAccumulationNode(t0));
}

TEST(EagerUtils, FillZeroForEmptyGradInputs) {
TEST(EagerUtils, FillZeroForEmptyOptionalGradInput) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
grads = {std::vector<paddle::experimental::Tensor>(1)};
Expand All @@ -263,7 +263,7 @@ TEST(EagerUtils, FillZeroForEmptyGradInputs) {
slot_metas[0][0].SetTensorMeta(tensor_meta);
slot_metas[0][0].SetPlace(phi::CPUPlace());

EagerUtils::FillZeroForEmptyGradInputs(&grads, slot_metas);
EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0], slot_metas[0]);
eager_test::CompareTensorWithValue<float>(grads[0][0], 0.0);
}

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/to_static/run_program_op_node.h
Expand Up @@ -379,8 +379,8 @@ class GradNodeRunProgram : public egr::GradNodeBase {
"The hooked_grads.size() of RunProgramGradOp should "
"be equal to 1."));

egr::EagerUtils::FillZeroForEmptyGradInputs(&hooked_grads,
this->InputMeta());
egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&hooked_grads[0],
this->InputMeta()[0]);
VLOG(3) << "hooked_grads[0].size() : " << hooked_grads[0].size();
std::vector<paddle::experimental::Tensor> x_grad;
std::vector<paddle::experimental::Tensor> params_grad;
Expand Down
28 changes: 9 additions & 19 deletions paddle/fluid/eager/utils.cc
Expand Up @@ -467,26 +467,16 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode(
}
}

void EagerUtils::FillZeroForEmptyGradInputs(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* in_grads,
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
grad_in_metas) {
void EagerUtils::FillZeroForEmptyOptionalGradInput(
std::vector<paddle::experimental::Tensor>* in_grads,
const std::vector<GradSlotMeta>& grad_in_metas) {
for (size_t i = 0; i < in_grads->size(); i++) {
for (size_t j = 0; j < (*in_grads)[i].size(); j++) {
paddle::experimental::Tensor& grad = (*in_grads)[i][j];
if (!grad.initialized()) {
const GradSlotMeta& grad_in_meta = grad_in_metas[i][j];
PADDLE_ENFORCE(
grad_in_meta.HasTensorMeta(),
paddle::platform::errors::Fatal(
"Unable to fill empty grad inputs due to empty GradSlotMeta"));
const auto& tensor_meta = grad_in_meta.GetTensorMeta();
auto tensor_with_zero = paddle::experimental::full(
phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype,
grad_in_meta.GetPlace());
grad.set_impl(tensor_with_zero.impl());
}
paddle::experimental::Tensor& grad = (*in_grads)[i];
if (!grad.initialized() && grad_in_metas[i].HasTensorMeta()) {
auto tensor_with_zero = paddle::experimental::full(
phi::vectorize(grad_in_metas[i].GetTensorMeta().dims), 0.0,
grad_in_metas[i].GetTensorMeta().dtype, grad_in_metas[i].GetPlace());
grad.set_impl(tensor_with_zero.impl());
}
}
}
Expand Down
8 changes: 3 additions & 5 deletions paddle/fluid/eager/utils.h
Expand Up @@ -236,11 +236,9 @@ class EagerUtils {
/**
* Fill Zero
* **/
static void FillZeroForEmptyGradInputs(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads,
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& grad_out_metas);
static void FillZeroForEmptyOptionalGradInput(
std::vector<paddle::experimental::Tensor>* in_grads,
const std::vector<GradSlotMeta>& grad_in_metas);
static void FillZeroForEmptyGradInput(paddle::experimental::Tensor* in_grad,
const GradSlotMeta& grad_in_meta);
static void FillZeroForEmptyOptionalGradInput(
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/distributed_strategy.proto
Expand Up @@ -120,6 +120,7 @@ message BuildStrategy {
optional bool fix_op_run_order = 13 [ default = false ];
optional bool allow_cuda_graph_capture = 14 [ default = false ];
optional int32 reduce_strategy = 15 [ default = 0 ];
optional bool fuse_gemm_epilogue = 16 [ default = false ];
}

message ExecutionStrategy {
Expand Down
20 changes: 13 additions & 7 deletions paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc
Expand Up @@ -30,13 +30,19 @@ void FillConstData(LoDTensor* out_t, T value) {
void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const {
FusePassBase::Init("delete_fill_constant_op_pass", graph);
GraphPatternDetector detector;
auto fill_constant_op = detector.mutable_pattern()
->NewNode("fill_constant")
->assert_is_op("fill_constant")
->assert_is_not_op_input("ValueTensor")
->assert_is_not_op_input("str_value")
->assert_is_not_op_input("ShapeTensor")
->assert_is_not_op_input("ShapeTensorList");
auto fill_constant_op =
detector.mutable_pattern()
->NewNode("fill_constant")
->assert_is_op("fill_constant")
->assert_is_not_op_input("ValueTensor")
->assert_is_not_op_input("str_value")
->assert_is_not_op_input("ShapeTensor")
->assert_is_not_op_input("ShapeTensorList")
->assert_more([&](Node* node) {
return node->Op()
->GetAttrIfExists<std::vector<int64_t>>("shape")
.size() == 1;
});
auto fill_constant_out =
detector.mutable_pattern()
->NewNode("fill_constant_out")
Expand Down
29 changes: 25 additions & 4 deletions paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc
Expand Up @@ -22,6 +22,12 @@ namespace paddle {
namespace framework {
namespace ir {

static void GetTransposeAttrsFromOp(const OpDesc &op, bool *trans_x,
bool *trans_y) {
*trans_x = BOOST_GET_CONST(bool, op.GetAttr("trans_x"));
*trans_y = BOOST_GET_CONST(bool, op.GetAttr("trans_y"));
}

void FuseGemmEpiloguePass::ApplyImpl(ir::Graph *graph) const {
EpiloguePassActivationCache cache;

Expand Down Expand Up @@ -75,6 +81,9 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearFwd(ir::Graph *graph,
if (!IsGemmFromLinear_(matmul_x_shape, matmul_w_shape, matmul_op_desc))
return;

bool trans_x, trans_y;
GetTransposeAttrsFromOp(*matmul_op_desc, &trans_x, &trans_y);

OpDesc fused_gemm_epilogue_op_desc(matmul_op->Op()->Block());
std::string activation = "none";
fused_gemm_epilogue_op_desc.SetType("fused_gemm_epilogue");
Expand All @@ -85,6 +94,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearFwd(ir::Graph *graph,
fused_gemm_epilogue_op_desc.SetAttr("activation", activation);
fused_gemm_epilogue_op_desc.SetAttr("op_role",
matmul_op_desc->GetAttr("op_role"));
fused_gemm_epilogue_op_desc.SetAttr("trans_x", trans_x);
fused_gemm_epilogue_op_desc.SetAttr("trans_y", trans_y);
auto gemm_epilogue_node = g->CreateOpNode(&fused_gemm_epilogue_op_desc);

IR_NODE_LINK_TO(subgraph.at(x), gemm_epilogue_node);
Expand Down Expand Up @@ -154,6 +165,9 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActFwd(

auto activation = act_op->Op()->Type();

bool trans_x, trans_y;
GetTransposeAttrsFromOp(*matmul_op_desc, &trans_x, &trans_y);

OpDesc fused_gemm_epilogue_op_desc(matmul_op->Op()->Block());
fused_gemm_epilogue_op_desc.SetType("fused_gemm_epilogue");
fused_gemm_epilogue_op_desc.SetInput("X", {subgraph.at(x)->Name()});
Expand All @@ -163,6 +177,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActFwd(
fused_gemm_epilogue_op_desc.SetAttr("activation", activation);
fused_gemm_epilogue_op_desc.SetAttr("op_role",
matmul_op_desc->GetAttr("op_role"));
fused_gemm_epilogue_op_desc.SetAttr("trans_x", trans_x);
fused_gemm_epilogue_op_desc.SetAttr("trans_y", trans_y);

auto gemm_epilogue_node = g->CreateOpNode(&fused_gemm_epilogue_op_desc);

Expand Down Expand Up @@ -274,6 +290,9 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearBwd(ir::Graph *graph,
matmul_grad_op_desc))
return;

bool trans_x, trans_y;
GetTransposeAttrsFromOp(*matmul_grad_op_desc, &trans_x, &trans_y);

OpDesc fused_gemm_epilogue_grad_op_desc(ele_add_grad_op->Op()->Block());
std::string activation_grad = "none";
fused_gemm_epilogue_grad_op_desc.SetType("fused_gemm_epilogue_grad");
Expand All @@ -292,6 +311,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearBwd(ir::Graph *graph,
activation_grad);
fused_gemm_epilogue_grad_op_desc.SetAttr(
"op_role", matmul_grad_op_desc->GetAttr("op_role"));
fused_gemm_epilogue_grad_op_desc.SetAttr("trans_x", trans_x);
fused_gemm_epilogue_grad_op_desc.SetAttr("trans_y", trans_y);

auto gemm_epilogue_grad_node =
g->CreateOpNode(&fused_gemm_epilogue_grad_op_desc);
Expand Down Expand Up @@ -394,6 +415,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActBwd(

auto activation_grad = act_grad_op->Op()->Type();

bool trans_x, trans_y;
GetTransposeAttrsFromOp(*matmul_grad_op_desc, &trans_x, &trans_y);
OpDesc fused_gemm_epilogue_grad_op_desc(ele_add_grad_op->Op()->Block());
fused_gemm_epilogue_grad_op_desc.SetType("fused_gemm_epilogue_grad");
fused_gemm_epilogue_grad_op_desc.SetInput("DOut",
Expand All @@ -410,6 +433,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActBwd(
activation_grad);
fused_gemm_epilogue_grad_op_desc.SetAttr(
"op_role", matmul_grad_op_desc->GetAttr("op_role"));
fused_gemm_epilogue_grad_op_desc.SetAttr("trans_x", trans_x);
fused_gemm_epilogue_grad_op_desc.SetAttr("trans_y", trans_y);

auto gemm_epilogue_grad_node =
g->CreateOpNode(&fused_gemm_epilogue_grad_op_desc);
Expand Down Expand Up @@ -456,10 +481,6 @@ bool FuseGemmEpiloguePass::IsGemmFromLinear_(
if (tmp_vec.size() > 0) return false;
}
}
if (BOOST_GET_CONST(bool, matmul_v2_op->GetAttr("trans_x")) ||
BOOST_GET_CONST(bool, matmul_v2_op->GetAttr("trans_y")))
return false;

return true;
}

Expand Down
8 changes: 0 additions & 8 deletions paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc
Expand Up @@ -489,14 +489,6 @@ void QuantDequantMkldnnPass::UpdateActivations(ir::Graph* graph) const {
std::string activation;
if (op_desc->GetAttrIfExists<bool>("fuse_relu")) {
activation = "relu";
} else if (op_desc->GetAttrIfExists<bool>("fuse_brelu")) {
activation = "relu6";
float alpha = 6.0;
if (op_desc->HasAttr("fuse_brelu_threshold")) {
alpha = BOOST_GET_CONST(float,
op_desc->GetAttr("fuse_brelu_threshold"));
}
op_node->Op()->SetAttr("fuse_alpha", alpha);
}
op_node->Op()->SetAttr("fuse_activation", activation);
}
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc
Expand Up @@ -91,6 +91,10 @@ void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(scale_out, scale_out, scale_matmul_pattern);
GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, scale_matmul_pattern);

if ((scale_out->outputs).size() != 1) {
return;
}

if (scale_op->Op()->GetAttrIfExists<float>("bias") == 0.0) {
auto matmul_alpha = matmul_op->Op()->GetAttrIfExists<float>("alpha");
auto scale_scale = scale_op->Op()->GetAttrIfExists<float>("scale");
Expand Down
17 changes: 17 additions & 0 deletions paddle/fluid/framework/operator.cc
Expand Up @@ -1116,6 +1116,21 @@ class RuntimeInferShapeContext : public InferShapeContext {
const RuntimeContext& ctx_;
};

struct OperatorWithKernel::CacheImpl {
explicit CacheImpl(phi::KernelContext* kernel_ctx,
RuntimeInferShapeContext* infer_shape_ctx)
: kernel_ctx_(kernel_ctx), infer_shape_ctx_(infer_shape_ctx) {}

phi::KernelContext* getKernelContext() { return kernel_ctx_.get(); }
RuntimeInferShapeContext* getRuntimeInferShapeContext() {
return infer_shape_ctx_.get();
}

private:
std::unique_ptr<phi::KernelContext> kernel_ctx_;
std::unique_ptr<RuntimeInferShapeContext> infer_shape_ctx_;
};

static void CheckTensorNANOrInf(const std::string& op_type,
const std::string& name,
const framework::Tensor& tensor) {
Expand Down Expand Up @@ -2323,6 +2338,8 @@ Scope* OperatorWithKernel::PreparePhiData(
Tensor out;
framework::TensorCopySync(*tensor_in, expected_place, &out);
SetTensorToVariable(*var, out, trans_var);

need_prepare_phi_data_ = true;
}
}

Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/operator.h
Expand Up @@ -698,6 +698,7 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr;
mutable bool need_prepare_data_ = true;
mutable bool need_prepare_phi_data_ = false;
mutable bool enable_cache_runtime_context_ = false;
mutable bool all_kernels_must_compute_runtime_shape_ = false;
mutable std::mutex cache_update_mutex_;
Expand All @@ -710,6 +711,9 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<phi::KernelSignature> kernel_signature_;
mutable std::unique_ptr<phi::Kernel> pt_kernel_;
mutable std::unique_ptr<phi::ArgumentMappingFn> arg_map_fn_;

struct CacheImpl;
mutable CacheImpl* impl_{nullptr};
};

extern bool OpSupportGPU(const std::string& op_type);
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Expand Up @@ -273,6 +273,11 @@ std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
if (pass->Type() != "graph_viz_pass" && !disable_logs_) {
PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type());
}
// delete_fill_constant_op_pass is not apply under trt dynamic shape
if (pass->Type() == "delete_fill_constant_op_pass") {
bool use_dynamic = pass->Get<bool>("with_dynamic_shape");
if (use_dynamic) continue;
}
graph.reset(pass->Apply(graph.release()));
}
return graph;
Expand Down
5 changes: 0 additions & 5 deletions paddle/fluid/inference/api/analysis_config.cc
Expand Up @@ -633,11 +633,6 @@ void AnalysisConfig::Update() {
(pass == "conv_bn_fuse_pass")) {
continue;
}
// delete_fill_constant_op_pass is not used under trt dynamic shape
if ((!min_input_shape_.empty() || trt_tuned_dynamic_shape_) &&
pass == "delete_fill_constant_op_pass") {
continue;
}
pass_builder()->AppendPass(pass);
}
}
Expand Down

0 comments on commit 4b7aee7

Please sign in to comment.