Skip to content

Commit

Permalink
Merge branch 'PaddlePaddle:develop' into SoftMarginLoss
Browse files Browse the repository at this point in the history
  • Loading branch information
yangguohao committed Jun 20, 2022
2 parents cc269a9 + b6bc6f7 commit bfde404
Show file tree
Hide file tree
Showing 152 changed files with 5,852 additions and 1,796 deletions.
7 changes: 4 additions & 3 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
Expand Up @@ -136,7 +136,7 @@ int32_t BrpcPsClient::CreateClient2ClientConnection(
server_ip_port.append(":");
server_ip_port.append(std::to_string(client_list[i].port));
_client_channels[i].reset(new brpc::Channel());
if (_client_channels[i]->Init(server_ip_port.c_str(), "", &options) != 0) {
if (_client_channels[i]->Init(server_ip_port.c_str(), "", &options)) {
VLOG(0) << "BrpcPSClient connect to Client:" << server_ip_port
<< " Failed! Try again.";
std::string int_ip_port =
Expand Down Expand Up @@ -1195,7 +1195,8 @@ std::future<int32_t> BrpcPsClient::SendClient2ClientMsg(
int msg_type, int to_client_id, const std::string &msg) {
auto promise = std::make_shared<std::promise<int32_t>>();
std::future<int> fut = promise->get_future();
if (to_client_id >= _client_channels.size()) {
if (to_client_id >= 0 &&
static_cast<size_t>(to_client_id) >= _client_channels.size()) {
VLOG(0) << "to_client_id is out of range clients, which size is "
<< _client_channels.size();
promise->set_value(-1);
Expand Down Expand Up @@ -1778,7 +1779,7 @@ void BrpcPsClient::PushDenseTaskConsume() {
});
++merge_count;
}
for (uint32_t i = 0; i < merge_count; ++i) {
for (size_t i = 0; i < merge_count; ++i) {
merge_status[i].wait();
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/brpc_ps_server.cc
Expand Up @@ -713,7 +713,7 @@ int32_t BrpcPsService::CacheShuffle(Table *table,
};

std::vector<Table *> table_ptrs;
for (size_t i = 3; i < request.params_size(); ++i) {
for (int i = 3; i < request.params_size(); ++i) {
int table_id = std::stoi(request.params(i));
Table *table_ptr = _server->GetTable(table_id);
table_ptrs.push_back(table_ptr);
Expand Down
Expand Up @@ -681,7 +681,7 @@ void AsyncCommunicator::PushSparseFromTensorAsync(

if (tensor->lod().size() > 0) {
for (size_t i = 0; i < tensor->lod()[0].size() - 1; ++i) {
for (int j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1];
for (size_t j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1];
++j, output_len += fea_dim) {
uint64_t real_id = static_cast<uint64_t>(ids[j]);
if (real_id == padding_id) {
Expand Down Expand Up @@ -727,7 +727,7 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
++input_idx;
}
}
CHECK(output_len == g_tensor->numel());
CHECK(static_cast<size_t>(output_len) == g_tensor->numel());
}

std::vector<float *> push_g_vec(input_idx, nullptr);
Expand Down
7 changes: 4 additions & 3 deletions paddle/fluid/distributed/ps/service/graph_brpc_server.cc
Expand Up @@ -547,7 +547,8 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers(
seq.push_back(request_idx);
}
size_t remote_call_num = request_call_num;
if (request2server.size() != 0 && request2server.back() == rank) {
if (request2server.size() != 0 &&
static_cast<size_t>(request2server.back()) == rank) {
remote_call_num--;
local_buffers.resize(node_id_buckets.back().size());
local_actual_sizes.resize(node_id_buckets.back().size());
Expand Down Expand Up @@ -582,7 +583,7 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers(
for (size_t i = 0; i < node_num; i++) {
if (fail_num > 0 && failed[seq[i]]) {
size = 0;
} else if (request2server[seq[i]] != rank) {
} else if (static_cast<size_t>(request2server[seq[i]]) != rank) {
res[seq[i]]->copy_and_forward(&size, sizeof(int));
} else {
size = local_actual_sizes[local_index++];
Expand All @@ -596,7 +597,7 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers(
for (size_t i = 0; i < node_num; i++) {
if (fail_num > 0 && failed[seq[i]]) {
continue;
} else if (request2server[seq[i]] != rank) {
} else if (static_cast<size_t>(request2server[seq[i]]) != rank) {
char temp[actual_size[i] + 1];
res[seq[i]]->copy_and_forward(temp, actual_size[i]);
cntl->response_attachment().append(temp, actual_size[i]);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/ps_client.cc
Expand Up @@ -43,7 +43,7 @@ int32_t PSClient::Configure(

const auto &work_param = _config.worker_param().downpour_worker_param();

for (size_t i = 0; i < work_param.downpour_table_param_size(); ++i) {
for (int i = 0; i < work_param.downpour_table_param_size(); ++i) {
auto *accessor = CREATE_PSCORE_CLASS(
ValueAccessor,
work_param.downpour_table_param(i).accessor().accessor_class());
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/ps_local_client.cc
Expand Up @@ -23,7 +23,7 @@ namespace distributed {
int32_t PsLocalClient::Initialize() {
const auto& downpour_param = _config.server_param().downpour_server_param();
TableManager::Instance().Initialize();
for (size_t i = 0; i < downpour_param.downpour_table_param_size(); ++i) {
for (int i = 0; i < downpour_param.downpour_table_param_size(); ++i) {
auto* table = CREATE_PSCORE_CLASS(
Table, downpour_param.downpour_table_param(i).table_class());
table->SetShard(0, 1);
Expand Down
Expand Up @@ -51,7 +51,7 @@ void GraphPyService::add_table_feat_conf(std::string table_name,
int feat_idx = table_feat_mapping[idx][feat_name];
VLOG(0) << "table_name " << table_name << " mapping id " << idx;
VLOG(0) << " feat name " << feat_name << " feat id" << feat_idx;
if (feat_idx < table_feat_conf_feat_name[idx].size()) {
if (static_cast<size_t>(feat_idx) < table_feat_conf_feat_name[idx].size()) {
// overide
table_feat_conf_feat_name[idx][feat_idx] = feat_name;
table_feat_conf_feat_dtype[idx][feat_idx] = feat_dtype;
Expand Down
Expand Up @@ -81,14 +81,14 @@ class GraphPyService {

graph_proto->set_table_name("cpu_graph_table");
graph_proto->set_use_cache(false);
for (int i = 0; i < id_to_edge.size(); i++)
for (size_t i = 0; i < id_to_edge.size(); i++)
graph_proto->add_edge_types(id_to_edge[i]);
for (int i = 0; i < id_to_feature.size(); i++) {
for (size_t i = 0; i < id_to_feature.size(); i++) {
graph_proto->add_node_types(id_to_feature[i]);
auto feat_node = id_to_feature[i];
::paddle::distributed::GraphFeature* g_f =
graph_proto->add_graph_feature();
for (int x = 0; x < table_feat_conf_feat_name[i].size(); x++) {
for (size_t x = 0; x < table_feat_conf_feat_name[i].size(); x++) {
g_f->add_name(table_feat_conf_feat_name[i][x]);
g_f->add_dtype(table_feat_conf_feat_dtype[i][x]);
g_f->add_shape(table_feat_conf_feat_shape[i][x]);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/server.cc
Expand Up @@ -76,7 +76,7 @@ int32_t PSServer::Configure(
uint32_t barrier_table = UINT32_MAX;
uint32_t global_step_table = UINT32_MAX;

for (size_t i = 0; i < downpour_param.downpour_table_param_size(); ++i) {
for (int i = 0; i < downpour_param.downpour_table_param_size(); ++i) {
auto *table = CREATE_PSCORE_CLASS(
Table, downpour_param.downpour_table_param(i).table_class());

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/ps/table/common_graph_table.cc
Expand Up @@ -1205,7 +1205,7 @@ uint32_t GraphTable::get_thread_pool_index_by_shard_index(int64_t shard_index) {

int32_t GraphTable::clear_nodes(int type_id, int idx) {
auto &search_shards = type_id == 0 ? edge_shards[idx] : feature_shards[idx];
for (int i = 0; i < search_shards.size(); i++) {
for (size_t i = 0; i < search_shards.size(); i++) {
search_shards[i]->clear();
}
return 0;
Expand Down Expand Up @@ -1478,7 +1478,7 @@ std::vector<std::vector<int64_t>> GraphTable::get_all_id(int type_id, int idx,
std::vector<std::vector<int64_t>> res(slice_num);
auto &search_shards = type_id == 0 ? edge_shards[idx] : feature_shards[idx];
std::vector<std::future<std::vector<int64_t>>> tasks;
for (int i = 0; i < search_shards.size(); i++) {
for (size_t i = 0; i < search_shards.size(); i++) {
tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue(
[&search_shards, i]() -> std::vector<int64_t> {
return search_shards[i]->get_all_id();
Expand Down
18 changes: 12 additions & 6 deletions paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
Expand Up @@ -288,20 +288,26 @@ std::string CtrDymfAccessor::ParseToString(const float* v, int param) {
os << v[0] << " " << v[1] << " " << v[2] << " " << v[3] << " " << v[4];
// << v[5] << " " << v[6];
for (int i = common_feature_value.EmbedG2SumIndex();
i < common_feature_value.EmbedxWIndex(); i++) {
i < common_feature_value.EmbedxG2SumIndex(); i++) {
os << " " << v[i];
}
os << " " << common_feature_value.Slot(const_cast<float*>(v)) << " "
<< common_feature_value.MfDim(const_cast<float*>(v));
// os << " " << common_feature_value.Slot(const_cast<float*>(v)) << " "
// << common_feature_value.MfDim(const_cast<float*>(v));
auto show = common_feature_value.Show(const_cast<float*>(v));
auto click = common_feature_value.Click(const_cast<float*>(v));
auto score = ShowClickScore(show, click);
if (score >= _config.embedx_threshold() &&
param > common_feature_value.EmbedxG2SumIndex()) {
VLOG(0) << "common_feature_value.EmbedxG2SumIndex():"
<< common_feature_value.EmbedxG2SumIndex();
// VLOG(1) << "common_feature_value.EmbedxG2SumIndex():"
// << common_feature_value.EmbedxG2SumIndex();
// VLOG(1) << "common_feature_value.EmbedxWIndex():"
// << common_feature_value.EmbedxWIndex();
// VLOG(1) << "common_feature_value.MfDim():"
// << common_feature_value.MfDim(const_cast<float*>(v));
for (auto i = common_feature_value.EmbedxG2SumIndex();
i < common_feature_value.Dim(); ++i) {
i < common_feature_value.EmbedxWIndex() +
common_feature_value.MfDim(const_cast<float*>(v));
++i) {
os << " " << v[i];
}
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/ps/table/memory_dense_table.cc
Expand Up @@ -81,8 +81,8 @@ int32_t MemoryDenseTable::InitializeValue() {

fixed_len_params_dim_ = 0;
for (int x = 0; x < size; ++x) {
int dim = common.dims()[x];
if (dim != param_dim_) {
auto& dim = common.dims()[x];
if (static_cast<int>(dim) != param_dim_) {
fixed_len_params_dim_ += dim;
} else {
param_col_ids_.push_back(x);
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
Expand Up @@ -625,7 +625,7 @@ int32_t SSDSparseTable::Load(const std::string& path,
}

//加载path目录下数据[start_idx, end_idx)
int32_t SSDSparseTable::Load(size_t start_idx, int end_idx,
int32_t SSDSparseTable::Load(size_t start_idx, size_t end_idx,
const std::vector<std::string>& file_list,
const std::string& param) {
if (start_idx >= file_list.size()) {
Expand Down Expand Up @@ -699,7 +699,8 @@ int32_t SSDSparseTable::Load(size_t start_idx, int end_idx,
ssd_values.emplace_back(std::make_pair((char*)data_buffer_ptr,
value_size * sizeof(float)));
data_buffer_ptr += feature_value_size;
if (ssd_keys.size() == FLAGS_pserver_load_batch_size) {
if (static_cast<int>(ssd_keys.size()) ==
FLAGS_pserver_load_batch_size) {
_db->put_batch(local_shard_id, ssd_keys, ssd_values,
ssd_keys.size());
ssd_keys.clear();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/table/ssd_sparse_table.h
Expand Up @@ -79,7 +79,7 @@ class SSDSparseTable : public MemorySparseTable {
virtual int32_t Load(const std::string& path,
const std::string& param) override;
//加载path目录下数据[start_idx, end_idx)
virtual int32_t Load(size_t start_idx, int end_idx,
virtual int32_t Load(size_t start_idx, size_t end_idx,
const std::vector<std::string>& file_list,
const std::string& param);
int64_t LocalSize();
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/ps/wrapper/fleet.cc
Expand Up @@ -536,7 +536,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
output_len = 0;

if (tensor->lod().size() > 0) {
for (int i = 0; i < tensor->lod()[0].size() - 1; ++i) {
for (size_t i = 0; i < tensor->lod()[0].size() - 1; ++i) {
for (size_t j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1];
++j, output_len += fea_dim) {
uint64_t real_id = static_cast<uint64_t>(ids[j]);
Expand Down Expand Up @@ -566,7 +566,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
}
}
} else {
for (int i = 0; i < len; ++i, output_len += fea_dim) {
for (size_t i = 0; i < len; ++i, output_len += fea_dim) {
uint64_t real_id = static_cast<uint64_t>(ids[i]);
if (real_id == padding_id) {
continue;
Expand All @@ -592,7 +592,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
++input_idx;
}
}
CHECK(output_len == g_tensor->numel());
CHECK(static_cast<int64_t>(output_len) == g_tensor->numel());
}

std::vector<float*> push_g_vec(input_idx, nullptr);
Expand Down
Expand Up @@ -295,7 +295,7 @@ void RunBrpcPushSparse() {
fea_temp_value_ptr.data(), 0, fea_keys.data(), fea_keys.size(), true);
pull_update_status.wait();

for (size_t idx = 0; idx < tensor->numel(); ++idx) {
for (int64_t idx = 0; idx < tensor->numel(); ++idx) {
EXPECT_FLOAT_EQ(fea_temp_values[idx], fea_values[idx] - 1.0);
}

Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/distributed/test/ctr_accessor_test.cc
Expand Up @@ -222,15 +222,15 @@ TEST(downpour_feature_value_accessor_test, test_update) {
v.embed_w = value[i][5];

int idx = 6;
for (auto j = 0u; j < acc->common_feature_value.embed_sgd_dim; ++j) {
for (int j = 0; j < acc->common_feature_value.embed_sgd_dim; ++j) {
v.embed_g2sum.push_back(value[i][idx + j]);
}
idx += acc->common_feature_value.embed_sgd_dim;
for (auto j = 0u; j < acc->common_feature_value.embedx_dim; ++j) {
for (int j = 0; j < acc->common_feature_value.embedx_dim; ++j) {
v.embedx_w.push_back(value[i][idx + j]);
}
idx += acc->common_feature_value.embedx_dim;
for (auto j = 0u; j < acc->common_feature_value.embedx_sgd_dim; ++j) {
for (int j = 0; j < acc->common_feature_value.embedx_sgd_dim; ++j) {
v.embedx_g2sum.push_back(value[i][idx + j]);
}

Expand All @@ -239,7 +239,7 @@ TEST(downpour_feature_value_accessor_test, test_update) {
push_v.show = grad[i][1];
push_v.click = grad[i][2];
push_v.embed_g = grad[i][3];
for (auto j = 0; j < parameter.embedx_dim(); ++j) {
for (int j = 0; j < parameter.embedx_dim(); ++j) {
push_v.embedx_g.push_back(grad[i][4 + j]);
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/test/memory_sparse_table_test.cc
Expand Up @@ -142,7 +142,7 @@ TEST(MemorySparseTable, SGD) {
// table->PullSparse(pull_values.data(), value);

for (size_t i = 0; i < init_keys.size(); ++i) {
for (size_t j = 2; j < emb_dim + 3; ++j) {
for (int j = 2; j < emb_dim + 3; ++j) {
auto update_val = init_values[i * (emb_dim + 1) + j] -
0.1 * total_gradients[3 + i * (emb_dim + 4) + j];
VLOG(3) << total_gradients[i * (emb_dim + 4) + j + 3] << ":"
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/heter_pipeline_trainer.cc 100644 → 100755
Expand Up @@ -333,5 +333,5 @@ Scope* HeterPipelineTrainer::GetWorkerScope(int thread_id) {
}

} // end namespace framework
} // namespace paddle
} // end namespace paddle
#endif
12 changes: 12 additions & 0 deletions paddle/fluid/framework/ir/remove_padding_recover_padding_pass.cc
Expand Up @@ -130,6 +130,12 @@ void RemovePaddingRecoverPaddingPass::ApplyImpl(ir::Graph* graph) const {
// output
remove_padding.SetOutput("Out", {remove_padding_out_name});

// set out_threshold for int8
if (op_node->Op()->HasAttr("out_threshold")) {
remove_padding.SetAttr("out_threshold",
op_node->Op()->GetAttr("out_threshold"));
}

auto remove_padding_op_node = graph->CreateOpNode(&remove_padding);
auto remove_padding_out_node = graph->CreateVarNode(remove_padding_out);

Expand Down Expand Up @@ -184,6 +190,12 @@ void RemovePaddingRecoverPaddingPass::ApplyImpl(ir::Graph* graph) const {
// output
recover_padding.SetOutput("Out", {out_node->Name()});

// set out_threshold for int8
if (op_node->Op()->HasAttr("out_threshold")) {
recover_padding.SetAttr("out_threshold",
op_node->Op()->GetAttr("out_threshold"));
}

auto recover_padding_op_node = graph->CreateOpNode(&recover_padding);
auto recover_padding_input_node =
graph->CreateVarNode(recover_padding_input);
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Expand Up @@ -1943,6 +1943,7 @@ USE_TRT_CONVERTER(multiclass_nms);
USE_TRT_CONVERTER(multiclass_nms3);
USE_TRT_CONVERTER(nearest_interp);
USE_TRT_CONVERTER(nearest_interp_v2);
USE_TRT_CONVERTER(bilinear_interp_v2);
USE_TRT_CONVERTER(reshape);
USE_TRT_CONVERTER(reduce_sum);
USE_TRT_CONVERTER(gather_nd);
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Expand Up @@ -52,6 +52,7 @@ list(
conv3d_op.cc
mish_op.cc
nearest_interp_v2_op.cc
bilinear_interp_v2_op.cc
pool3d_op.cc
deformable_conv_op.cc
preln_emb_eltwise_layernorm.cc
Expand Down

1 comment on commit bfde404

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on bfde404 Jun 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #42364 Commit ID: bfde404 contains failed CI.

🔹 Failed: PR-CI-Static-Check

Unknown Failed
Unknown Failed

Please sign in to comment.