diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index 75966399148d4..47e53e64f592b 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -6,5 +6,6 @@ add_subdirectory(imperative) add_subdirectory(operators) add_subdirectory(pybind) add_subdirectory(eager) +add_subdirectory(jit) # NOTE: please add subdirectory inference at last. add_subdirectory(inference) diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc index 47e3476036d7e..f43493b10fe99 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc @@ -136,7 +136,7 @@ int32_t BrpcPsClient::CreateClient2ClientConnection( server_ip_port.append(":"); server_ip_port.append(std::to_string(client_list[i].port)); _client_channels[i].reset(new brpc::Channel()); - if (_client_channels[i]->Init(server_ip_port.c_str(), "", &options) != 0) { + if (_client_channels[i]->Init(server_ip_port.c_str(), "", &options)) { VLOG(0) << "BrpcPSClient connect to Client:" << server_ip_port << " Failed! Try again."; std::string int_ip_port = @@ -1195,7 +1195,8 @@ std::future BrpcPsClient::SendClient2ClientMsg( int msg_type, int to_client_id, const std::string &msg) { auto promise = std::make_shared>(); std::future fut = promise->get_future(); - if (to_client_id >= _client_channels.size()) { + if (to_client_id >= 0 && + static_cast(to_client_id) >= _client_channels.size()) { VLOG(0) << "to_client_id is out of range clients, which size is " << _client_channels.size(); promise->set_value(-1); @@ -1778,7 +1779,7 @@ void BrpcPsClient::PushDenseTaskConsume() { }); ++merge_count; } - for (uint32_t i = 0; i < merge_count; ++i) { + for (size_t i = 0; i < merge_count; ++i) { merge_status[i].wait(); } diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc index d859acbb42e44..4ca5f9c8207fe 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc @@ -713,7 +713,7 @@ int32_t BrpcPsService::CacheShuffle(Table *table, }; std::vector table_ptrs; - for (size_t i = 3; i < request.params_size(); ++i) { + for (int i = 3; i < request.params_size(); ++i) { int table_id = std::stoi(request.params(i)); Table *table_ptr = _server->GetTable(table_id); table_ptrs.push_back(table_ptr); diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.cc b/paddle/fluid/distributed/ps/service/communicator/communicator.cc index c50f1d909cd95..edbfd06d55a54 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.cc +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.cc @@ -681,7 +681,7 @@ void AsyncCommunicator::PushSparseFromTensorAsync( if (tensor->lod().size() > 0) { for (size_t i = 0; i < tensor->lod()[0].size() - 1; ++i) { - for (int j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1]; + for (size_t j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1]; ++j, output_len += fea_dim) { uint64_t real_id = static_cast(ids[j]); if (real_id == padding_id) { @@ -727,7 +727,7 @@ void AsyncCommunicator::PushSparseFromTensorAsync( ++input_idx; } } - CHECK(output_len == g_tensor->numel()); + CHECK(static_cast(output_len) == g_tensor->numel()); } std::vector push_g_vec(input_idx, nullptr); diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc index ce9397e511eb0..8128f2b2adbd9 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc +++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc @@ -547,7 +547,8 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers( seq.push_back(request_idx); } size_t remote_call_num = request_call_num; - if (request2server.size() != 0 && request2server.back() == rank) { + if (request2server.size() != 0 && + static_cast(request2server.back()) == rank) { remote_call_num--; local_buffers.resize(node_id_buckets.back().size()); local_actual_sizes.resize(node_id_buckets.back().size()); @@ -582,7 +583,7 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers( for (size_t i = 0; i < node_num; i++) { if (fail_num > 0 && failed[seq[i]]) { size = 0; - } else if (request2server[seq[i]] != rank) { + } else if (static_cast(request2server[seq[i]]) != rank) { res[seq[i]]->copy_and_forward(&size, sizeof(int)); } else { size = local_actual_sizes[local_index++]; @@ -596,7 +597,7 @@ int32_t GraphBrpcService::sample_neighbors_across_multi_servers( for (size_t i = 0; i < node_num; i++) { if (fail_num > 0 && failed[seq[i]]) { continue; - } else if (request2server[seq[i]] != rank) { + } else if (static_cast(request2server[seq[i]]) != rank) { char temp[actual_size[i] + 1]; res[seq[i]]->copy_and_forward(temp, actual_size[i]); cntl->response_attachment().append(temp, actual_size[i]); diff --git a/paddle/fluid/distributed/ps/service/ps_client.cc b/paddle/fluid/distributed/ps/service/ps_client.cc index a0216f2a7953a..2d02771a2cf8e 100644 --- a/paddle/fluid/distributed/ps/service/ps_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_client.cc @@ -43,7 +43,7 @@ int32_t PSClient::Configure( const auto &work_param = _config.worker_param().downpour_worker_param(); - for (size_t i = 0; i < work_param.downpour_table_param_size(); ++i) { + for (int i = 0; i < work_param.downpour_table_param_size(); ++i) { auto *accessor = CREATE_PSCORE_CLASS( ValueAccessor, work_param.downpour_table_param(i).accessor().accessor_class()); diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.cc b/paddle/fluid/distributed/ps/service/ps_local_client.cc index b6407ccebe52b..a52ed1996fff7 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_local_client.cc @@ -23,7 +23,7 @@ namespace distributed { int32_t PsLocalClient::Initialize() { const auto& downpour_param = _config.server_param().downpour_server_param(); TableManager::Instance().Initialize(); - for (size_t i = 0; i < downpour_param.downpour_table_param_size(); ++i) { + for (int i = 0; i < downpour_param.downpour_table_param_size(); ++i) { auto* table = CREATE_PSCORE_CLASS( Table, downpour_param.downpour_table_param(i).table_class()); table->SetShard(0, 1); diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc index 255c0d3d655aa..fb65e74b62f6f 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc @@ -51,7 +51,7 @@ void GraphPyService::add_table_feat_conf(std::string table_name, int feat_idx = table_feat_mapping[idx][feat_name]; VLOG(0) << "table_name " << table_name << " mapping id " << idx; VLOG(0) << " feat name " << feat_name << " feat id" << feat_idx; - if (feat_idx < table_feat_conf_feat_name[idx].size()) { + if (static_cast(feat_idx) < table_feat_conf_feat_name[idx].size()) { // overide table_feat_conf_feat_name[idx][feat_idx] = feat_name; table_feat_conf_feat_dtype[idx][feat_idx] = feat_dtype; diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h index 7dd0340125693..877214121e5a0 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h @@ -81,14 +81,14 @@ class GraphPyService { graph_proto->set_table_name("cpu_graph_table"); graph_proto->set_use_cache(false); - for (int i = 0; i < id_to_edge.size(); i++) + for (size_t i = 0; i < id_to_edge.size(); i++) graph_proto->add_edge_types(id_to_edge[i]); - for (int i = 0; i < id_to_feature.size(); i++) { + for (size_t i = 0; i < id_to_feature.size(); i++) { graph_proto->add_node_types(id_to_feature[i]); auto feat_node = id_to_feature[i]; ::paddle::distributed::GraphFeature* g_f = graph_proto->add_graph_feature(); - for (int x = 0; x < table_feat_conf_feat_name[i].size(); x++) { + for (size_t x = 0; x < table_feat_conf_feat_name[i].size(); x++) { g_f->add_name(table_feat_conf_feat_name[i][x]); g_f->add_dtype(table_feat_conf_feat_dtype[i][x]); g_f->add_shape(table_feat_conf_feat_shape[i][x]); diff --git a/paddle/fluid/distributed/ps/service/server.cc b/paddle/fluid/distributed/ps/service/server.cc index a6e0f39474b06..e7b3271171ea4 100644 --- a/paddle/fluid/distributed/ps/service/server.cc +++ b/paddle/fluid/distributed/ps/service/server.cc @@ -76,7 +76,7 @@ int32_t PSServer::Configure( uint32_t barrier_table = UINT32_MAX; uint32_t global_step_table = UINT32_MAX; - for (size_t i = 0; i < downpour_param.downpour_table_param_size(); ++i) { + for (int i = 0; i < downpour_param.downpour_table_param_size(); ++i) { auto *table = CREATE_PSCORE_CLASS( Table, downpour_param.downpour_table_param(i).table_class()); diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc index 55a9c794e8ead..d3af468482bfe 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.cc +++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc @@ -1205,7 +1205,7 @@ uint32_t GraphTable::get_thread_pool_index_by_shard_index(int64_t shard_index) { int32_t GraphTable::clear_nodes(int type_id, int idx) { auto &search_shards = type_id == 0 ? edge_shards[idx] : feature_shards[idx]; - for (int i = 0; i < search_shards.size(); i++) { + for (size_t i = 0; i < search_shards.size(); i++) { search_shards[i]->clear(); } return 0; @@ -1478,7 +1478,7 @@ std::vector> GraphTable::get_all_id(int type_id, int idx, std::vector> res(slice_num); auto &search_shards = type_id == 0 ? edge_shards[idx] : feature_shards[idx]; std::vector>> tasks; - for (int i = 0; i < search_shards.size(); i++) { + for (size_t i = 0; i < search_shards.size(); i++) { tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue( [&search_shards, i]() -> std::vector { return search_shards[i]->get_all_id(); diff --git a/paddle/fluid/distributed/ps/table/memory_dense_table.cc b/paddle/fluid/distributed/ps/table/memory_dense_table.cc index ab1361eba050f..857850ce50b6a 100644 --- a/paddle/fluid/distributed/ps/table/memory_dense_table.cc +++ b/paddle/fluid/distributed/ps/table/memory_dense_table.cc @@ -81,8 +81,8 @@ int32_t MemoryDenseTable::InitializeValue() { fixed_len_params_dim_ = 0; for (int x = 0; x < size; ++x) { - int dim = common.dims()[x]; - if (dim != param_dim_) { + auto& dim = common.dims()[x]; + if (static_cast(dim) != param_dim_) { fixed_len_params_dim_ += dim; } else { param_col_ids_.push_back(x); diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc index 237d0c9424b81..dc77a6c6c51e2 100644 --- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc @@ -625,7 +625,7 @@ int32_t SSDSparseTable::Load(const std::string& path, } //加载path目录下数据[start_idx, end_idx) -int32_t SSDSparseTable::Load(size_t start_idx, int end_idx, +int32_t SSDSparseTable::Load(size_t start_idx, size_t end_idx, const std::vector& file_list, const std::string& param) { if (start_idx >= file_list.size()) { @@ -699,7 +699,8 @@ int32_t SSDSparseTable::Load(size_t start_idx, int end_idx, ssd_values.emplace_back(std::make_pair((char*)data_buffer_ptr, value_size * sizeof(float))); data_buffer_ptr += feature_value_size; - if (ssd_keys.size() == FLAGS_pserver_load_batch_size) { + if (static_cast(ssd_keys.size()) == + FLAGS_pserver_load_batch_size) { _db->put_batch(local_shard_id, ssd_keys, ssd_values, ssd_keys.size()); ssd_keys.clear(); diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h index e6be77a4ba924..3e4d3afe59c3a 100644 --- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h +++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h @@ -79,7 +79,7 @@ class SSDSparseTable : public MemorySparseTable { virtual int32_t Load(const std::string& path, const std::string& param) override; //加载path目录下数据[start_idx, end_idx) - virtual int32_t Load(size_t start_idx, int end_idx, + virtual int32_t Load(size_t start_idx, size_t end_idx, const std::vector& file_list, const std::string& param); int64_t LocalSize(); diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.cc b/paddle/fluid/distributed/ps/wrapper/fleet.cc index 8d6276733e0e5..bddda8f8fff8a 100644 --- a/paddle/fluid/distributed/ps/wrapper/fleet.cc +++ b/paddle/fluid/distributed/ps/wrapper/fleet.cc @@ -536,7 +536,7 @@ void FleetWrapper::PushSparseFromTensorAsync( output_len = 0; if (tensor->lod().size() > 0) { - for (int i = 0; i < tensor->lod()[0].size() - 1; ++i) { + for (size_t i = 0; i < tensor->lod()[0].size() - 1; ++i) { for (size_t j = tensor->lod()[0][i]; j < tensor->lod()[0][i + 1]; ++j, output_len += fea_dim) { uint64_t real_id = static_cast(ids[j]); @@ -566,7 +566,7 @@ void FleetWrapper::PushSparseFromTensorAsync( } } } else { - for (int i = 0; i < len; ++i, output_len += fea_dim) { + for (size_t i = 0; i < len; ++i, output_len += fea_dim) { uint64_t real_id = static_cast(ids[i]); if (real_id == padding_id) { continue; @@ -592,7 +592,7 @@ void FleetWrapper::PushSparseFromTensorAsync( ++input_idx; } } - CHECK(output_len == g_tensor->numel()); + CHECK(static_cast(output_len) == g_tensor->numel()); } std::vector push_g_vec(input_idx, nullptr); diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc index bade56f239f65..7173c76287096 100644 --- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc @@ -295,7 +295,7 @@ void RunBrpcPushSparse() { fea_temp_value_ptr.data(), 0, fea_keys.data(), fea_keys.size(), true); pull_update_status.wait(); - for (size_t idx = 0; idx < tensor->numel(); ++idx) { + for (int64_t idx = 0; idx < tensor->numel(); ++idx) { EXPECT_FLOAT_EQ(fea_temp_values[idx], fea_values[idx] - 1.0); } diff --git a/paddle/fluid/distributed/test/ctr_accessor_test.cc b/paddle/fluid/distributed/test/ctr_accessor_test.cc index 51254391a4283..bb25fd6991665 100644 --- a/paddle/fluid/distributed/test/ctr_accessor_test.cc +++ b/paddle/fluid/distributed/test/ctr_accessor_test.cc @@ -222,15 +222,15 @@ TEST(downpour_feature_value_accessor_test, test_update) { v.embed_w = value[i][5]; int idx = 6; - for (auto j = 0u; j < acc->common_feature_value.embed_sgd_dim; ++j) { + for (int j = 0; j < acc->common_feature_value.embed_sgd_dim; ++j) { v.embed_g2sum.push_back(value[i][idx + j]); } idx += acc->common_feature_value.embed_sgd_dim; - for (auto j = 0u; j < acc->common_feature_value.embedx_dim; ++j) { + for (int j = 0; j < acc->common_feature_value.embedx_dim; ++j) { v.embedx_w.push_back(value[i][idx + j]); } idx += acc->common_feature_value.embedx_dim; - for (auto j = 0u; j < acc->common_feature_value.embedx_sgd_dim; ++j) { + for (int j = 0; j < acc->common_feature_value.embedx_sgd_dim; ++j) { v.embedx_g2sum.push_back(value[i][idx + j]); } @@ -239,7 +239,7 @@ TEST(downpour_feature_value_accessor_test, test_update) { push_v.show = grad[i][1]; push_v.click = grad[i][2]; push_v.embed_g = grad[i][3]; - for (auto j = 0; j < parameter.embedx_dim(); ++j) { + for (int j = 0; j < parameter.embedx_dim(); ++j) { push_v.embedx_g.push_back(grad[i][4 + j]); } diff --git a/paddle/fluid/distributed/test/memory_sparse_table_test.cc b/paddle/fluid/distributed/test/memory_sparse_table_test.cc index 1689b7716bbc4..485d81a7d6856 100644 --- a/paddle/fluid/distributed/test/memory_sparse_table_test.cc +++ b/paddle/fluid/distributed/test/memory_sparse_table_test.cc @@ -142,7 +142,7 @@ TEST(MemorySparseTable, SGD) { // table->PullSparse(pull_values.data(), value); for (size_t i = 0; i < init_keys.size(); ++i) { - for (size_t j = 2; j < emb_dim + 3; ++j) { + for (int j = 2; j < emb_dim + 3; ++j) { auto update_val = init_values[i * (emb_dim + 1) + j] - 0.1 * total_gradients[3 + i * (emb_dim + 4) + j]; VLOG(3) << total_gradients[i * (emb_dim + 4) + j + 3] << ":" diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index bb495860c90e5..76331bfe7c90f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1943,6 +1943,7 @@ USE_TRT_CONVERTER(multiclass_nms); USE_TRT_CONVERTER(multiclass_nms3); USE_TRT_CONVERTER(nearest_interp); USE_TRT_CONVERTER(nearest_interp_v2); +USE_TRT_CONVERTER(bilinear_interp_v2); USE_TRT_CONVERTER(reshape); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(gather_nd); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 4c52d91fa1259..e6c372e205b41 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -52,6 +52,7 @@ list( conv3d_op.cc mish_op.cc nearest_interp_v2_op.cc + bilinear_interp_v2_op.cc pool3d_op.cc deformable_conv_op.cc preln_emb_eltwise_layernorm.cc diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index 2ef8ec16c76df..9005557a51f3a 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -49,14 +49,30 @@ class ActivationOpConverter : public OpConverter { << "convert a fluid Activation op to tensorrt activation layer whose " "type is " << op_type_; - const nvinfer1::ITensor* input_tensor = - engine_->GetITensor(op_desc.Input("X")[0]); + auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); auto op_pair = ops.find(op_type_); - - nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER( - engine_, Activation, *const_cast(input_tensor), - op_pair->second); + nvinfer1::IActivationLayer* layer = nullptr; + if (op_type_ == "softplus") { + const float beta = op_desc.HasAttr("beta") + ? BOOST_GET_CONST(float, op_desc.GetAttr("beta")) + : 1.0f; + const float threshold = + op_desc.HasAttr("threshold") + ? BOOST_GET_CONST(float, op_desc.GetAttr("threshold")) + : 20.0f; + auto* layer_clip = TRT_ENGINE_ADD_LAYER( + engine_, Activation, *input_tensor, nvinfer1::ActivationType::kCLIP); + layer_clip->setAlpha(-3.40282e+038); + layer_clip->setBeta(threshold / beta); + layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, + *layer_clip->getOutput(0), op_pair->second); + layer->setAlpha(1.0f / beta); + layer->setBeta(beta); + } else { + layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, *input_tensor, + op_pair->second); + } #if IS_TRT_VERSION_GE(5130) // max(alpha, min(beta, x)) @@ -64,6 +80,41 @@ class ActivationOpConverter : public OpConverter { layer->setAlpha(0.); layer->setBeta(6.); } + if (op_type_ == "elu") { + const float alpha = op_desc.HasAttr("alpha") + ? BOOST_GET_CONST(float, op_desc.GetAttr("alpha")) + : 1.0f; + layer->setAlpha(alpha); + } + if (op_type_ == "selu") { + const float alpha = op_desc.HasAttr("alpha") + ? BOOST_GET_CONST(float, op_desc.GetAttr("alpha")) + : 1.0507009873554804934193349852946; + const float scale = op_desc.HasAttr("scale") + ? BOOST_GET_CONST(float, op_desc.GetAttr("scale")) + : 1.6732632423543772848170429916717; + layer->setAlpha(alpha); + layer->setBeta(scale); + } + if (op_type_ == "stanh") { + const float scale_a = + op_desc.HasAttr("scale_a") + ? BOOST_GET_CONST(float, op_desc.GetAttr("scale_a")) + : 0.67f; + const float scale_b = + op_desc.HasAttr("scale_b") + ? BOOST_GET_CONST(float, op_desc.GetAttr("scale_b")) + : 1.7159f; + layer->setAlpha(scale_b); + layer->setBeta(scale_a); + } + if (op_type_ == "thresholded_relu") { + const float threshold = + op_desc.HasAttr("threshold") + ? BOOST_GET_CONST(float, op_desc.GetAttr("threshold")) + : 1.0f; + layer->setAlpha(threshold); + } #endif auto output_name = op_desc.Output("Out")[0]; @@ -83,8 +134,13 @@ const std::unordered_map {"tanh", nvinfer1::ActivationType::kTANH}, #if IS_TRT_VERSION_GE(5130) {"relu6", nvinfer1::ActivationType::kCLIP}, + {"elu", nvinfer1::ActivationType::kELU}, + {"selu", nvinfer1::ActivationType::kSELU}, + {"softsign", nvinfer1::ActivationType::kSOFTSIGN}, + {"softplus", nvinfer1::ActivationType::kSOFTPLUS}, + {"stanh", nvinfer1::ActivationType::kSCALED_TANH}, + {"thresholded_relu", nvinfer1::ActivationType::kTHRESHOLDED_RELU}}; #endif -}; class ReluOpConverter : public ActivationOpConverter { public: @@ -101,11 +157,43 @@ class TanhOpConverter : public ActivationOpConverter { TanhOpConverter() { op_type_ = "tanh"; } }; +#if IS_TRT_VERSION_GE(5130) class Relu6OpConverter : public ActivationOpConverter { public: Relu6OpConverter() { op_type_ = "relu6"; } }; +class EluOpConverter : public ActivationOpConverter { + public: + EluOpConverter() { op_type_ = "elu"; } +}; + +class SeluOpConverter : public ActivationOpConverter { + public: + SeluOpConverter() { op_type_ = "selu"; } +}; + +class SoftsignOpConverter : public ActivationOpConverter { + public: + SoftsignOpConverter() { op_type_ = "softsign"; } +}; + +class SoftplusOpConverter : public ActivationOpConverter { + public: + SoftplusOpConverter() { op_type_ = "softplus"; } +}; + +class STanhOpConverter : public ActivationOpConverter { + public: + STanhOpConverter() { op_type_ = "stanh"; } +}; + +class ThreasholdedReluOpConverter : public ActivationOpConverter { + public: + ThreasholdedReluOpConverter() { op_type_ = "thresholded_relu"; } +}; +#endif + } // namespace tensorrt } // namespace inference } // namespace paddle @@ -113,4 +201,12 @@ class Relu6OpConverter : public ActivationOpConverter { REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter); REGISTER_TRT_OP_CONVERTER(sigmoid, SigmoidOpConverter); REGISTER_TRT_OP_CONVERTER(tanh, TanhOpConverter); +#if IS_TRT_VERSION_GE(5130) REGISTER_TRT_OP_CONVERTER(relu6, Relu6OpConverter); +REGISTER_TRT_OP_CONVERTER(elu, EluOpConverter); +REGISTER_TRT_OP_CONVERTER(selu, SeluOpConverter); +REGISTER_TRT_OP_CONVERTER(softsign, SoftsignOpConverter); +REGISTER_TRT_OP_CONVERTER(softplus, SoftplusOpConverter); +REGISTER_TRT_OP_CONVERTER(stanh, STanhOpConverter); +REGISTER_TRT_OP_CONVERTER(thresholded_relu, ThreasholdedReluOpConverter); +#endif diff --git a/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc new file mode 100644 index 0000000000000..f0e56082b8f77 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc @@ -0,0 +1,133 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class BilinearInterpolateV2OpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(3) << "convert a fluid bilinear_interp_v2 op"; + + framework::OpDesc op_desc(op, nullptr); + + std::string input_name = op_desc.Input("X").front(); + std::string output_name = op_desc.Output("Out").front(); + + auto input = engine_->GetITensor(input_name); + + auto data_layout = framework::StringToDataLayout( + BOOST_GET_CONST(std::string, op_desc.GetAttr("data_layout"))); + auto interp_method = + BOOST_GET_CONST(std::string, op_desc.GetAttr("interp_method")); + bool align_corners = + BOOST_GET_CONST(bool, op_desc.GetAttr("align_corners")); + auto align_mode = BOOST_GET_CONST(int, op_desc.GetAttr("align_mode")); + + auto resize_inputs = op_desc.Inputs(); + auto input_names = op_desc.Input("X"); + auto out_h = BOOST_GET_CONST(int, op_desc.GetAttr("out_h")); + auto out_w = BOOST_GET_CONST(int, op_desc.GetAttr("out_w")); + + auto layer = TRT_ENGINE_ADD_LAYER(engine_, Resize, *input); + if (align_mode == 0 && !align_corners) { + layer->setResizeMode(nvinfer1::ResizeMode::kLINEAR); + } + + auto in_dim = input->getDimensions(); + float scale_h = 1.f; + float scale_w = 1.f; + + // Scales Priority: Scale(tensor) > scale(attr) > out_d/out_h/out_w(attr) + bool has_scale_input_attr = + (resize_inputs.find("Scale") != resize_inputs.end()); + bool has_scale_input = + has_scale_input_attr && (op_desc.Input("Scale").size() > 0); + if (has_scale_input) { + auto* scale_var = scope.FindVar(op_desc.Input("Scale")[0]); + auto* scale_tensor = scale_var->GetMutable(); + auto* scale_d = scale_tensor->data(); + scale_h = scale_d[0]; + scale_w = scale_d[1]; + } else { + const std::vector scale_attr = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("scale")); + if (scale_attr.size() > 1) { + scale_h = scale_attr[0]; + scale_w = scale_attr[1]; + } + } + + // axis are different in static/dynamic mode + bool with_dynamic = engine_->with_dynamic_shape(); + int h_axis = (data_layout == framework::DataLayout::kNCHW) + with_dynamic; + int w_axis = + (data_layout == framework::DataLayout::kNCHW) + 1 + with_dynamic; + + if (scale_w > 0. && scale_h > 0.) { + out_h = static_cast(in_dim.d[h_axis] * scale_h); + out_w = static_cast(in_dim.d[w_axis] * scale_w); + } + + if (out_h > 0 && out_w > 0) { + scale_h = + static_cast(out_h) / static_cast(in_dim.d[h_axis]); + scale_w = + static_cast(out_w) / static_cast(in_dim.d[w_axis]); + } + + std::vector scales; + + if (engine_->with_dynamic_shape()) { + scales.push_back(1.f); + } + + if (data_layout == framework::DataLayout::kNCHW) { + scales.push_back(1.f); + scales.push_back(scale_h); + scales.push_back(scale_w); + } else if (data_layout == framework::DataLayout::kNHWC) { + scales.push_back(scale_h); + scales.push_back(scale_w); + scales.push_back(1.f); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Data layout must be NCHW or NHWC.")); + } + + layer->setScales(scales.data(), scales.size()); + RreplenishLayerAndOutput(layer, "bilinear_interp_v2", {output_name}, + test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(bilinear_interp_v2, BilinearInterpolateV2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc b/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc index e6422522e5018..ed113798a7325 100644 --- a/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc @@ -39,25 +39,60 @@ class ShuffleChannelOpConverter : public OpConverter { // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto input_dims = input->getDimensions(); - - int c = input_dims.d[0]; - int h = input_dims.d[1]; - int w = input_dims.d[2]; + auto output_name = op_desc.Output("Out")[0]; int group = BOOST_GET_CONST(int, op_desc.GetAttr("group")); - auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - nvinfer1::Dims4 reshape_dim(group, c / group, h, w); - layer->setReshapeDimensions(reshape_dim); - layer->setSecondTranspose({1, 0, 2, 3}); - auto* output = layer->getOutput(0); +#if IS_TRT_VERSION_GE(8000) + if (engine_->with_dynamic_shape()) { + auto* input_shape_tensor = Shape(input); + auto* channel_shape_tensor = GetEleTensorOfShape(input_shape_tensor, 1); + auto* group_tensor = + Add1DConstantLayer(group, output_name + "_group_tensor_"); + auto* new_channel_shape_tensor = Div(channel_shape_tensor, group_tensor); + std::vector shape_dim3{0, 2, 3}; + auto* shape_dim3_tensor = Gather(input_shape_tensor, shape_dim3); - auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *output); - nvinfer1::Dims3 reshape_dim2(c, h, w); - reshape_layer->setReshapeDimensions(reshape_dim2); + std::vector itensors; + itensors.push_back(shape_dim3_tensor); + itensors.push_back(group_tensor); + itensors.push_back(new_channel_shape_tensor); + auto* reshape_tensor = Concat(itensors); - auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(reshape_layer, "shuffle_channel", {output_name}, - test_mode); + auto* reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *reshape_tensor); + nvinfer1::Permutation transpose_new_input{0, 3, 4, 1, 2}; + reshape_layer->setSecondTranspose(transpose_new_input); + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setInput(1, *(reshape_layer->getOutput(0))); + nvinfer1::Permutation transpose_embed{0, 2, 1, 3, 4}; + layer->setSecondTranspose(transpose_embed); + auto* output = layer->getOutput(0); + auto* output_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *output); + output_layer->setInput(1, *input_shape_tensor); + + RreplenishLayerAndOutput(output_layer, "shuffle_channel", {output_name}, + test_mode); + } +#endif + if (!engine_->with_dynamic_shape()) { + int c = input_dims.d[0]; + int h = input_dims.d[1]; + int w = input_dims.d[2]; + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + nvinfer1::Dims4 reshape_dim(group, c / group, h, w); + layer->setReshapeDimensions(reshape_dim); + layer->setSecondTranspose({1, 0, 2, 3}); + auto* output = layer->getOutput(0); + + auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *output); + nvinfer1::Dims3 reshape_dim2(c, h, w); + reshape_layer->setReshapeDimensions(reshape_dim2); + + RreplenishLayerAndOutput(reshape_layer, "shuffle_channel", {output_name}, + test_mode); + } } }; diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 6ce9b9c0bf85a..d6aa04612d648 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -73,6 +73,12 @@ struct SimpleOpTypeSetTeller : public Teller { "conv2d_fusion", "pool2d", "relu", + "elu", + "selu", + "softsign", + "softplus", + "stanh", + "thresholded_relu", "exp", "log", "sqrt", @@ -138,6 +144,7 @@ struct SimpleOpTypeSetTeller : public Teller { "conv3d_transpose", "mish", "nearest_interp_v2", + "bilinear_interp_v2", "pool3d", "deformable_conv", "relu6", @@ -163,6 +170,12 @@ struct SimpleOpTypeSetTeller : public Teller { "conv2d_fusion", "pool2d", "relu", + "elu", + "selu", + "softsign", + "softplus", + "stanh", + "thresholded_relu", "exp", "log", "sqrt", @@ -227,6 +240,7 @@ struct SimpleOpTypeSetTeller : public Teller { "conv3d", "conv3d_transpose", "mish", + "bilinear_interp_v2", "nearest_interp_v2", "pool3d", "deformable_conv", @@ -261,30 +275,16 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, return false; for (auto& teller : tellers_) { - std::unordered_set act_op_list = {"relu", - "elu", - "selu", - "softsign", - "softplus", - "stanh", - "thresholded_relu", - "exp", - "log", - "sqrt", - "abs", - "sin", - "cos", - "tan", - "sinh", - "cosh", - "asin", - "acos", - "atan", - "asinh", - "atanh", - "ceil", - "floor", - "erf"}; + std::unordered_set act_op_list = { + "relu", "relu6", "sigmoid", + "elu", "selu", "softsign", + "softplus", "stanh", "thresholded_relu", + "exp", "log", "sqrt", + "abs", "sin", "cos", + "tan", "tanh", "sinh", + "cosh", "asin", "acos", + "atan", "asinh", "atanh", + "ceil", "floor", "erf"}; if (act_op_list.find(op_type) != act_op_list.end()) { auto* block = desc.Block(); if (block == nullptr) { @@ -877,6 +877,99 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } + if (op_type == "bilinear_interp_v2") { + std::vector attrs{"data_layout", "interp_method", + "align_corners", "scale", + "out_h", "out_w"}; + for (auto const attr : attrs) { + if (!desc.HasAttr(attr)) { + VLOG(3) << "The op_type " << op_type << " doesn't have the attr " + << attr << " and return false"; + return false; + } + } + + auto resize_inputs = desc.Inputs(); + if (resize_inputs.find("SizeTensor") != resize_inputs.end()) { + if (desc.Input("SizeTensor").size() >= 1) { + VLOG(3) + << "The Paddle-TRT doesn't support the SizeTensor for op_type " + << op_type; + return false; + } + } + + if (resize_inputs.find("OutSize") != resize_inputs.end()) { + if (desc.Input("OutSize").size() >= 1) { + VLOG(3) << "The Paddle-TRT doesn't support the OutSize for op_type " + << op_type; + return false; + } + } + + auto data_layout = framework::StringToDataLayout( + BOOST_GET_CONST(std::string, desc.GetAttr("data_layout"))); + if (data_layout != framework::DataLayout::kNCHW && + data_layout != framework::DataLayout::kNHWC) { + VLOG(3) << "The op_type " << op_type + << " is not NCHW or NHWC return false"; + return false; + } + auto interp_method = + BOOST_GET_CONST(std::string, desc.GetAttr("interp_method")); + if (interp_method != "bilinear") { + VLOG(3) << "The interp_method of op_type " << op_type + << " is not bilinear"; + return false; + } + + auto align_corners = BOOST_GET_CONST(bool, desc.GetAttr("align_corners")); + if (align_corners != false) { + VLOG(3) + << "The bilinear_interp_v2 only supports align_corners with false."; + return false; + } + + bool has_scale_input_size = + (resize_inputs.find("Scale") != resize_inputs.end()); + + if (has_scale_input_size && desc.Input("Scale").size() != 1) { + const std::vector scale = + BOOST_GET_CONST(std::vector, desc.GetAttr("scale")); + if (scale.size() <= 1) { + if (!desc.HasAttr("out_h") || !desc.HasAttr("out_w")) { + VLOG(3) << "The op_type " << op_type + << " doesn't have Scale and the scale size <=1 and without " + "out_h / out_w, it will return false"; + return false; + } + auto out_h = BOOST_GET_CONST(int, desc.GetAttr("out_h")); + auto out_w = BOOST_GET_CONST(int, desc.GetAttr("out_w")); + if (!(out_h <= 0 && out_w <= 0)) { + if (out_h <= 0) { + VLOG(3) << "The op_type " << op_type + << "'s out_h must be greater than 0 if scale is not set."; + return false; + } + if (out_w <= 0) { + VLOG(3) << "The op_type " << op_type + << "'s out_w must be greater than 0 if scale is not set."; + return false; + } + } + } else { + for (size_t i = 0; i < scale.size(); i++) { + if (scale[i] <= 0 && with_dynamic_shape) { + VLOG(3) << "dynamic shape not support Attr(scale[" << i << "]) " + << scale[i] + << " less than 1 and Input(Scale) vector not set."; + return false; + } + } + } + } + } + if (op_type == "hard_swish") { if (desc.Input("X").size() != 1) { VLOG(3) << "HardSwish op has only 1 input, but got " @@ -1511,11 +1604,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "shuffle_channel") { +#if !IS_TRT_VERSION_GE(8000) if (with_dynamic_shape) { VLOG(3) << "You are running the TRT Dynamic Shape mode, " - "the shuffle_channel op does not support dynamic shape yet"; + "the shuffle_channel op does not support dynamic shape " + "trt versions below 8.0 yet"; return false; } +#endif } if (op_type == "skip_layernorm") { diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h index f27b66b03f544..c53ae6d118470 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h @@ -14,8 +14,6 @@ #pragma once -#include - #include #include #include diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu index 1cfc9fade7b15..0150564e58206 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include @@ -63,9 +64,7 @@ void SplitPlugin::shareData(const SplitPlugin* another) { inner_cols_ = another->inner_cols_; same_shape_ = another->same_shape_; axis_shape_ = another->axis_shape_; - d_segment_offsets_ = another->d_segment_offsets_; segment_offsets_ = another->segment_offsets_; - d_output_ptrs_.resize(another->d_output_ptrs_.size(), nullptr); } int SplitPlugin::initialize() TRT_NOEXCEPT { @@ -93,9 +92,7 @@ int SplitPlugin::initialize() TRT_NOEXCEPT { segment_offsets.push_back(segment_offsets.back() + output_length_[i]); } axis_shape_ = dims.d[axis_]; - d_segment_offsets_ = segment_offsets; segment_offsets_ = std::move(segment_offsets); - d_output_ptrs_.resize(this->getNbOutputs(), nullptr); return 0; } @@ -133,13 +130,18 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { #endif + // this two thrust variables decalared here , not with in .h + // to avoid compiling error in cuda 11.6 + thrust::device_vector d_segment_offsets = segment_offsets_; + thrust::device_vector d_output_ptrs; + d_output_ptrs.resize(segment_offsets_.size(), nullptr); const int* d_segment_offsets_ptr = - thrust::raw_pointer_cast(&d_segment_offsets_[0]); + thrust::raw_pointer_cast(&d_segment_offsets[0]); float const* input_ptr = reinterpret_cast(inputs[0]); float* const* h_odatas = reinterpret_cast(outputs); - float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs_[0]); + float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs[0]); PADDLE_ENFORCE_GPU_SUCCESS(cudaMemcpyAsync( - output_ptrs, h_odatas, d_output_ptrs_.size() * sizeof(float*), + output_ptrs, h_odatas, d_output_ptrs.size() * sizeof(float*), cudaMemcpyHostToDevice, stream)); int outer_rows = outer_rows_ * batchSize; @@ -150,7 +152,7 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, std::min((outer_rows_ - 1) / block.z + 1, 65535u)); split_kernel<<>>( - d_segment_offsets_.size(), d_segment_offsets_ptr, input_ptr, output_ptrs, + segment_offsets_.size(), d_segment_offsets_ptr, input_ptr, output_ptrs, inner_cols_, axis_shape_, outer_rows); return cudaGetLastError() != cudaSuccess; } diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h index 49f028493ee87..93dc45215d4ee 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h @@ -14,8 +14,6 @@ #pragma once -#include - #include #include #include @@ -94,8 +92,6 @@ class SplitPlugin : public PluginTensorRTV2Ext { bool same_shape_; std::vector output_length_; std::vector segment_offsets_; - thrust::device_vector d_segment_offsets_; - thrust::device_vector d_output_ptrs_; private: void shareData(const SplitPlugin* another); diff --git a/paddle/fluid/jit/CMakeLists.txt b/paddle/fluid/jit/CMakeLists.txt new file mode 100644 index 0000000000000..b44060c0fad52 --- /dev/null +++ b/paddle/fluid/jit/CMakeLists.txt @@ -0,0 +1,38 @@ +cc_library( + jit_serializer + SRCS serializer.cc + DEPS lod_tensor device_context) + +cc_library( + jit_layer + SRCS layer.cc + DEPS executor parallel_executor executor_cache) + +cc_library( + jit_base_function + SRCS base_function.cc + DEPS scope proto_desc) + +if(WITH_TESTING AND NOT WIN32) + add_custom_target( + jit_download_program + COMMAND wget -nc https://paddle-ci.gz.bcebos.com/dy2st/Testing.tar.gz + COMMAND tar zxvf Testing.tar.gz) + set(JIT_DEPS + phi + elementwise_add_op + matmul_v2_op + activation_op + reduce_mean_op + feed_op + fetch_op + scale_op + jit_serializer + jit_layer + jit_base_function) + cc_test( + layer_test + SRCS layer_test.cc + DEPS ${JIT_DEPS}) + add_dependencies(layer_test jit_download_program) +endif() diff --git a/paddle/fluid/jit/ast.h b/paddle/fluid/jit/ast.h new file mode 100644 index 0000000000000..535b3a89dd60f --- /dev/null +++ b/paddle/fluid/jit/ast.h @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace jit { +using Variable = paddle::framework::Variable; +class BaseFunction; +class CompilationUnit; + +class ClassType { + public: + ClassType(const std::vector& names, + std::weak_ptr cu) + : const_names_(names), compilation_unit_(cu) {} + + static std::shared_ptr Create( + const std::vector& names, + std::weak_ptr cu) { + return std::make_shared(names, cu); + } + + // const std::vector Methods() const; + + // const Variable& GetAttribute(size_t slot) const; + // const Variable& GetAttribute(const std::string& name) const; + + // size_t AddAttribute(const std::string& name, Variable val); + + private: + // TODO(dev): disingwish parameter and buffer + std::vector const_names_; + std::vector const_value_; + + std::vector methods_; + std::vector static_method_; + std::weak_ptr compilation_unit_; +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/base_function.cc b/paddle/fluid/jit/base_function.cc new file mode 100644 index 0000000000000..fcbe64de8d70d --- /dev/null +++ b/paddle/fluid/jit/base_function.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/jit/base_function.h" + +namespace paddle { +namespace jit { + +Argument::Argument(const std::string &name, bool is_out) + : name_(name), is_output_(is_out) {} + +const std::string &Argument::Name() const { return name_; } + +std::vector FunctionSchema::GetInputArgNames() { + std::vector input_arg_names; + for (auto &arg : input_args) { + input_arg_names.emplace_back(arg.Name()); + } + return input_arg_names; +} + +std::vector FunctionSchema::GetOutputArgNames() { + std::vector output_arg_names; + for (auto &arg : output_args) { + output_arg_names.emplace_back(arg.Name()); + } + return output_arg_names; +} + +void FunctionSchema::AddInputArg(std::string name, bool is_output) { + input_args.emplace_back(name, is_output); +} + +void FunctionSchema::AddOutputArg(std::string name, bool is_output) { + output_args.emplace_back(name, is_output); +} + +BaseFunction::BaseFunction( + const framework::ProgramDesc &program_desc, + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict) + : program_desc_(program_desc) { + // Parse FunctionSchema + // skip_var_name_ = program_desc_.GetFetchTargetNames(); + for (auto &in_name : program_desc_.GetFeedTargetNames()) { + schema_.AddInputArg(in_name, false); + } + for (auto &out_name : program_desc_.GetFetchTargetNames()) { + schema_.AddOutputArg(out_name, true); + } + // share params into scope + SharePartialIntoScope(param_names_for_program, params_dict); + VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); + // remove feed fetch op + RemoveFeedFetch(); +} + +void BaseFunction::FetchOutput(std::vector *outs) { + for (auto &out_name : schema_.GetOutputArgNames()) { + VLOG(3) << "fetch out: " << out_name; + auto *var = scope_.FindVar(out_name); + auto &src_tensor = var->Get(); + Variable v; + auto *p = v.GetMutable(); + *p = src_tensor; + outs->emplace_back(v); + } +} + +void BaseFunction::ShareIntoScope(const VariableNameMap &ivals) { + VLOG(3) << "ivals size: " << ivals.size(); + for (auto it = ivals.begin(); it != ivals.end(); ++it) { + VLOG(3) << "share into scope: " << it->first; + DenseTensor dense_tensor = it->second.Get(); + auto *var = scope_.Var(it->first); + auto *dst_tensor = var->GetMutable(); + *dst_tensor = dense_tensor; + } +} + +void BaseFunction::SharePartialIntoScope( + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict) { + VLOG(3) << "ivals size: " << param_names_for_program.size(); + for (size_t i = 0; i < param_names_for_program.size(); ++i) { + std::string name = param_names_for_program[i]; + Variable val = params_dict.find(name)->second; + auto &dense_tensor = val.Get(); + VLOG(3) << "share into scope: " << name; + auto *var = scope_.Var(name); + auto *dst_tensor = var->GetMutable(); + *dst_tensor = dense_tensor; + } +} + +void BaseFunction::RemoveFeedFetch() { + for (size_t i = 0; i < program_desc_.Size(); ++i) { + auto *block = program_desc_.MutableBlock(i); + const auto &all_ops = block->AllOps(); + size_t op_size = all_ops.size(); + VLOG(3) << "op_size: " << op_size; + for (int i = op_size - 1; i >= 0; i--) { + auto op = all_ops[i]; + if (op->Type() == "feed" || op->Type() == "fetch") { + VLOG(3) << "remove op type: " << op->Type() << ", index: " << i; + block->RemoveOp(i, i + 1); + } + } + } +} + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/base_function.h b/paddle/fluid/jit/base_function.h new file mode 100644 index 0000000000000..3d4f9a29eb6b1 --- /dev/null +++ b/paddle/fluid/jit/base_function.h @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/none.h" +#include "paddle/utils/optional.h" + +namespace paddle { +namespace jit { + +using Variable = paddle::framework::Variable; +using VariableNameMap = std::map; +using DenseTensor = phi::DenseTensor; + +class Argument { + public: + explicit Argument(const std::string &name, bool is_out = false); + + const std::string &Name() const; + + private: + std::string name_; + // paddle::optional default_val_; + bool is_output_; +}; + +class FunctionSchema { + public: + FunctionSchema() = default; + + std::vector GetInputArgNames(); + + std::vector GetOutputArgNames(); + + void AddInputArg(std::string name, bool is_output); + + void AddOutputArg(std::string name, bool is_output); + + private: + std::vector input_args; + std::vector output_args; +}; + +// TODO(dev): make it as abstract class +class BaseFunction { + public: + BaseFunction(const framework::ProgramDesc &program_desc, + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict); + + virtual ~BaseFunction() {} + + virtual std::vector operator()(const VariableNameMap &inputs) = 0; + + protected: + void FetchOutput(std::vector *outs); + + void ShareIntoScope(const VariableNameMap &ivals); + + void SharePartialIntoScope( + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict); + + void RemoveFeedFetch(); + + protected: + framework::ProgramDesc program_desc_; + // TODO(dev): need a better way to share params + // std::vector ¶m_for_program_; + // std::vector skip_var_name_; + FunctionSchema schema_; + // global_scope place params + framework::Scope scope_; + // framework::Executor inner_exe_; +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/compilation_unit.h b/paddle/fluid/jit/compilation_unit.h new file mode 100644 index 0000000000000..815e9d3f4c090 --- /dev/null +++ b/paddle/fluid/jit/compilation_unit.h @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace paddle { +namespace jit { +class BaseFunction; + +class CompilationUnit { + public: + CompilationUnit() = default; + ~CompilationUnit() {} + + private: + std::vector> functions_; + std::unordered_map functions_idx_; +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/exector_function.h b/paddle/fluid/jit/exector_function.h new file mode 100644 index 0000000000000..3217c62fbd797 --- /dev/null +++ b/paddle/fluid/jit/exector_function.h @@ -0,0 +1,51 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/jit/base_function.h" + +namespace paddle { +namespace jit { + +class ExectorFunction : public BaseFunction { + public: + ExectorFunction(const framework::ProgramDesc &program_desc, + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict) + : BaseFunction(program_desc, param_names_for_program, params_dict), + inner_exe_(phi::CPUPlace()) {} + + ~ExectorFunction() {} + + std::vector operator()(const VariableNameMap &inputs) { + // share input into scope + ShareIntoScope(inputs); + // run program + inner_exe_.Run(program_desc_, &scope_, /*blockID=*/0, false, true, + schema_.GetOutputArgNames()); + VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); + // fetch outputs + std::vector res; + FetchOutput(&res); + return res; + } + + private: + // TODO(dev): support other devices exe + framework::Executor inner_exe_; +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc new file mode 100644 index 0000000000000..cb13a003affec --- /dev/null +++ b/paddle/fluid/jit/layer.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/jit/layer.h" + +namespace paddle { +namespace jit { +// TODO(dev): Make vector, num_slot as in argument +// Layer(const std::shared_ptr& type) : obj_(type, /*num_slot*/ 0U) +// {} +Layer::Layer( + const std::vector& func_names, + const std::vector& program_descs, + const std::vector>& param_names_for_each_program, + const VariableNameMap& params_dict) { + VLOG(3) << "program size: " << program_descs.size(); + // Layer manage the life time of all parameter. + for (size_t i = 0; i < func_names.size(); ++i) { + // TODO(dev): choose exector or pe by flag + function_dict[func_names[i]] = std::make_shared( + program_descs[i], param_names_for_each_program[i], params_dict); + } +} + +// TODO(dev): make it as const function +std::shared_ptr Layer::GetFunction(const std::string& name) { + VLOG(3) << "funcs_ size: " << function_dict.size(); + return function_dict[name]; +} + +std::vector Layer::forward(const VariableNameMap& inputs) { + auto func = GetFunction("forward"); + return (*func)(inputs); +} + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h new file mode 100644 index 0000000000000..0c2ad49c77197 --- /dev/null +++ b/paddle/fluid/jit/layer.h @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include + +#include "paddle/fluid/jit/ast.h" +#include "paddle/fluid/jit/base_function.h" +#include "paddle/fluid/jit/compilation_unit.h" +#include "paddle/fluid/jit/exector_function.h" +#include "paddle/fluid/jit/object.h" +#include "paddle/fluid/jit/pe_function.h" + +namespace paddle { +namespace jit { +using Variable = paddle::framework::Variable; +using VariableNameMap = std::map; +using DenseTensor = phi::DenseTensor; + +class Layer { + public: + // TODO(dev): Make vector, num_slot as in argument + // Layer(const std::shared_ptr& type) : obj_(type, /*num_slot*/ 0U) + // {} + Layer( + const std::vector& func_names, + const std::vector& program_descs, + const std::vector>& param_names_for_each_program, + const VariableNameMap& params_dict); + + // TODO(dev): make it as const function + std::shared_ptr GetFunction(const std::string& name); + + std::vector forward(const VariableNameMap& inputs); + + private: + // internal::Object obj_; + // std::vector all_program_desc_; + // std::vector> param_name_for_each_program_; + // std::vector all_param_; + std::map> function_dict; +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc new file mode 100644 index 0000000000000..9386569d48d1b --- /dev/null +++ b/paddle/fluid/jit/layer_test.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/jit/layer.h" + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/jit/serializer.h" +#include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/phi/api/include/tensor.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +USE_OP_ITSELF(elementwise_add); +USE_OP_ITSELF(matmul_v2); +USE_OP_ITSELF(relu); +USE_OP_ITSELF(reduce_mean); +USE_OP_ITSELF(feed); +USE_OP_ITSELF(fetch); +USE_OP_ITSELF(scale); + +PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(relu, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); + +namespace paddle { +namespace jit { + +VariableNameMap PrepareInputs() { + auto temp = DenseTensor(); + temp.Resize(phi::make_ddim({2, 4})); + phi::CPUContext cpu_ctx; + cpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get()); + cpu_ctx.Init(); + cpu_ctx.Alloc(&temp); + phi::funcs::set_constant(cpu_ctx, &temp, 2.); + Variable v; + auto *p = v.GetMutable(); + *p = temp; + // TODO(dev): associate the input name + return {{"x", v}}; +} + +TEST(layer, Construct) { + std::string path = "./Testing/"; + auto layer = jit::Load(path); + auto inputs = PrepareInputs(); + + auto outs = layer.forward(inputs); + auto out_vars = outs[0]; + auto out_dense_tensor = out_vars.Get(); + auto out_data = out_dense_tensor.data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); + + auto func = layer.GetFunction("infer"); + outs = (*func)(inputs); + out_vars = outs[0]; + out_dense_tensor = out_vars.Get(); + out_data = out_dense_tensor.data(); + EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); +} + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/object.h b/paddle/fluid/jit/object.h new file mode 100644 index 0000000000000..94aae67376007 --- /dev/null +++ b/paddle/fluid/jit/object.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace jit { +class ClassType; + +namespace internal { + +class Object { + public: + Object(const std::shared_ptr& type, size_t num_slot) + : type_(type) { + slots_.resize(num_slot); + } + + static std::unique_ptr Create(std::shared_ptr type, + size_t num_slot) { + return std::make_unique(type, num_slot); + } + + std::shared_ptr Type() const { return type_; } + + void SetSlot(size_t slot, Variable val) { + if (slot >= slots_.size()) { + slots_.resize(slot); + } + slots_[slot] = std::move(val); + } + + const Variable& GetSlot(size_t slot) { + // TODO(dev): Add ENFORCE_LT(slot, size()); + return slots_[slot]; + } + + Variable GetAttr(const std::string& name) const; + + void SetAttr(const std::string& name, Variable val); + + private: + std::shared_ptr type_; + // Store Tensors and Attributes + std::vector slots_; +}; + +} // namespace internal +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/pe_function.h b/paddle/fluid/jit/pe_function.h new file mode 100644 index 0000000000000..a3d7eb33f7103 --- /dev/null +++ b/paddle/fluid/jit/pe_function.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/executor_cache.h" +#include "paddle/fluid/jit/base_function.h" + +namespace paddle { +namespace jit { + +class PEFunction : public BaseFunction { + public: + PEFunction(const framework::ProgramDesc &program_desc, + const std::vector param_names_for_program, + const VariableNameMap ¶ms_dict) + : BaseFunction(program_desc, param_names_for_program, params_dict) {} + + ~PEFunction() {} + + std::vector operator()(const VariableNameMap &inputs) { + // bool is_test = true; + std::string prog_string; + std::hash string_hash; + program_desc_.Proto()->SerializePartialToString(&prog_string); + int64_t program_id = static_cast(string_hash(prog_string)); + const framework::BlockDesc &global_block = program_desc_.Block(0); + int64_t start_op_index = 0; + int64_t end_op_index = static_cast(global_block.OpSize()); + + ShareIntoScope(inputs); + std::vector input_var_names = schema_.GetInputArgNames(); + std::vector output_var_names = schema_.GetOutputArgNames(); + std::vector dout_var_names; + if (end_op_index > start_op_index) { + // TODO(dev): support other devices + auto cache_info = framework::GetExecutorInfoFromCache( + program_desc_, phi::CPUPlace(), start_op_index, end_op_index, + /*is_grad=*/false, program_id, &scope_); + auto ¶llel_executor = cache_info.first; + auto &skip_eager_delete_vars = + framework::ExecutorInfoCache::Instance().SkipEagerDeleteVars( + program_id, false); + if (cache_info.second /*is_new_created*/) { + parallel_executor->SkipMemoryReuse(/*scope_idx=*/0, input_var_names); + skip_eager_delete_vars.insert(skip_eager_delete_vars.end(), + output_var_names.begin(), + output_var_names.end()); + skip_eager_delete_vars.insert(skip_eager_delete_vars.end(), + dout_var_names.begin(), + dout_var_names.end()); + framework::details::ParseSafeEagerDeletionSkipVars( + program_desc_, end_op_index, output_var_names, + &skip_eager_delete_vars); + } + parallel_executor->RunWithoutFetch(skip_eager_delete_vars); + } + VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); + std::vector res; + FetchOutput(&res); + return res; + } +}; + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc new file mode 100644 index 0000000000000..a8bd934d12e5f --- /dev/null +++ b/paddle/fluid/jit/serializer.cc @@ -0,0 +1,131 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/jit/serializer.h" + +namespace paddle { +namespace jit { + +Layer Deserializer::operator()(const std::string& dir_path) { + const auto& file_name_prefixs = GetPdmodelFileNamePrefix(dir_path); + std::vector func_names; + std::vector program_descs; + std::vector> param_names_for_each_program; + // set is ordered + std::set param_names_set; + VariableNameMap params_dict; + for (auto& it : file_name_prefixs) { + func_names.emplace_back(it.first); + + auto program = LoadProgram(dir_path + it.second + PDMODEL_SUFFIX); + program_descs.emplace_back(program); + + // TODO(dev): load int/float params + std::vector persistable_var_names; + auto all_var_desc = program.Block(0).AllVars(); + for (auto* desc_ptr : all_var_desc) { + if (IsPersistable(desc_ptr)) { + persistable_var_names.emplace_back(desc_ptr->Name()); + } + } + + param_names_for_each_program.emplace_back(persistable_var_names); + param_names_set.insert(persistable_var_names.begin(), + persistable_var_names.end()); + } + + // Read from one pdiparams file, refine here + auto params_for_all_program = + ReadTensorData(dir_path + "export.forward.pdiparams", param_names_set); + params_dict.insert(params_for_all_program.begin(), + params_for_all_program.end()); + + return Layer(func_names, program_descs, param_names_for_each_program, + params_dict); +} + +bool Deserializer::IsPersistable(framework::VarDesc* desc_ptr) { + auto type = desc_ptr->GetType(); + if (type == framework::proto::VarType::FEED_MINIBATCH || + type == framework::proto::VarType::FETCH_LIST || + type == framework::proto::VarType::READER || + type == framework::proto::VarType::RAW) { + return false; + } + return desc_ptr->Persistable(); +} + +bool Deserializer::EndsWith(const std::string& str, const std::string& suffix) { + if (str.length() < suffix.length()) { + return false; + } + return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == + 0; +} + +const std::vector> +Deserializer::GetPdmodelFileNamePrefix(const std::string& path) { + std::vector> file_name_prefixs; + DIR* dir = opendir(path.c_str()); + struct dirent* ptr; + while ((ptr = readdir(dir)) != nullptr) { + std::string file_name = ptr->d_name; + if (EndsWith(file_name, PDMODEL_SUFFIX)) { + std::string prefix = file_name.substr( + 0, file_name.length() - std::string(PDMODEL_SUFFIX).length()); + std::string func_name = prefix.substr(prefix.find_first_of(".") + 1); + file_name_prefixs.emplace_back(std::make_pair(func_name, prefix)); + } + } + closedir(dir); + return file_name_prefixs; +} + +VariableNameMap Deserializer::ReadTensorData( + const std::string& file_name, const std::set& var_name) const { + VLOG(3) << "ReadTensorData from: " << file_name; + std::ifstream fin(file_name, std::ios::binary); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + // TODO(dev): Support other devices + auto& dev_ctx = *pool.Get(phi::CPUPlace()); + VariableNameMap res; + for (auto it = var_name.begin(); it != var_name.end(); it++) { + VLOG(3) << "load Tensor: " << *it; + Variable v; + // TODO(dev): Support framework::Vocab + DenseTensor* dense_tesnor = v.GetMutable(); + framework::DeserializeFromStream(fin, dense_tesnor, dev_ctx); + res[*it] = v; + } + return res; +} + +framework::ProgramDesc Deserializer::LoadProgram(const std::string& file_name) { + VLOG(3) << "LoadProgram " << file_name; + std::ifstream fin(file_name, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + std::string buffer(fin.tellg(), ' '); + fin.seekg(0, std::ios::beg); + fin.read(&buffer[0], buffer.size()); + fin.close(); + return framework::ProgramDesc(buffer); +} + +Layer Load(const std::string& file_path) { + auto deserializer = Deserializer(); + return deserializer(file_path); +} + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/jit/serializer.h b/paddle/fluid/jit/serializer.h new file mode 100644 index 0000000000000..4036c5add7b0b --- /dev/null +++ b/paddle/fluid/jit/serializer.h @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/jit/layer.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace paddle { +namespace jit { +static const char PDMODEL_SUFFIX[] = ".pdmodel"; +static const char PDPARAMS_SUFFIX[] = ".pdiparams"; + +// Export Layer into local disk +class Serializer { + public: + void operator()(const Layer& layer, const std::string& file_dir); + + // private: + // void WriteTensorData(const Layer& layer, const std::string& file_name) + // const; + // void WriteExtraInfo(const Layer& layer, const std::string& file_name) + // const; + // void WriteByteCode(const Layer& layer, const std::string& file_name) + // const; +}; + +class Deserializer { + public: + Layer operator()(const std::string& dir_path); + + private: + bool IsPersistable(framework::VarDesc* desc_ptr); + + bool EndsWith(const std::string& str, const std::string& suffix); + + const std::vector> + GetPdmodelFileNamePrefix(const std::string& path); + + VariableNameMap ReadTensorData(const std::string& file_name, + const std::set& var_name) const; + + // void ReadExtraInfo(const std::string& file_name) const; + // void ReadByteCode(const std::string& file_name) const; + + framework::ProgramDesc LoadProgram(const std::string& file_name); +}; + +void Export(const Layer& layer, const std::string& file_path); + +Layer Load(const std::string& file_path); + +} // namespace jit +} // namespace paddle diff --git a/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h b/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h index 452f388f03dcf..ff44fa0b77201 100644 --- a/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h +++ b/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h @@ -100,19 +100,28 @@ XPUOpMap& get_kp_ops() { {"equal", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace())})}, {"not_equal", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace())})}, // reduce op - {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_min", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_all", XPUKernelSet({pOpKernelType(vartype::BOOL, XPUPlace())})}, - {"reduce_any", XPUKernelSet({pOpKernelType(vartype::BOOL, XPUPlace())})}, + // {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_min", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_all", XPUKernelSet({pOpKernelType(vartype::BOOL, + // XPUPlace())})}, + // {"reduce_any", XPUKernelSet({pOpKernelType(vartype::BOOL, + // XPUPlace())})}, + // {"reduce_amax", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, + // {"reduce_amin", XPUKernelSet({pOpKernelType(vartype::FP32, + // XPUPlace())})}, {"pull_box_sparse", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"push_box_sparse", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_amax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"reduce_amin", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"c_sync_calc_stream", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"c_sync_comm_stream", diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 72d343692df73..1a9ff2e6694ea 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include "glog/logging.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" @@ -125,22 +126,26 @@ void ChromeTracingLogger::LogMemTraceEventNode( std::string( R"JSON( { - "name": "[memory]", "pid": %lld, "tid": "%lld", + "name": "[memory]", "pid": %lld, "tid": "%lld(C++)", "ts": %lld, "ph": "i", "cat": "%s", "args": { "place": "%s", "addr": "%llu", + "increase_bytes": %lld, "current_allocated": %llu, "current_reserved": %llu, - "increase_bytes": %lld + "peak_allocated": %llu, + "peak_reserved": %llu } }, )JSON"), - mem_node.ProcessId(), mem_node.ThreadId(), mem_node.TimeStampNs(), + mem_node.ProcessId(), mem_node.ThreadId(), nsToUs(mem_node.TimeStampNs()), StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(), - mem_node.Addr(), mem_node.CurrentAllocated(), mem_node.CurrentReserved(), - mem_node.IncreaseBytes()); + mem_node.Addr(), mem_node.IncreaseBytes(), mem_node.CurrentAllocated(), + mem_node.CurrentReserved(), mem_node.PeakAllocated(), + mem_node.PeakReserved()); + pid_tid_set_.insert({mem_node.ProcessId(), mem_node.ThreadId()}); } void ChromeTracingLogger::LogHostTraceEventNode( @@ -164,6 +169,8 @@ void ChromeTracingLogger::LogHostTraceEventNode( input_shapes = op_supplement_node->InputShapes(); input_dtypes = op_supplement_node->Dtypes(); callstack = op_supplement_node->CallStack(); + callstack = std::regex_replace(callstack, std::regex("\""), "\'"); + callstack = std::regex_replace(callstack, std::regex("\n"), "\\n"); } switch (host_node.Type()) { case TracerEventType::ProfileStep: diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc index 65f5e81238bc8..e5de858e15c76 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc @@ -209,6 +209,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode( mem_event.place = mem_event_proto.place(); mem_event.current_allocated = mem_event_proto.current_allocated(); mem_event.current_reserved = mem_event_proto.current_reserved(); + mem_event.peak_allocated = mem_event_proto.peak_allocated(); + mem_event.peak_reserved = mem_event_proto.peak_reserved(); return new MemTraceEventNode(mem_event); } diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto index 0f0c9c92c9c93..4ebfb6e73b331 100644 --- a/paddle/fluid/platform/profiler/dump/nodetree.proto +++ b/paddle/fluid/platform/profiler/dump/nodetree.proto @@ -51,10 +51,14 @@ enum TracerEventTypeProto { }; enum TracerMemEventTypeProto { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0; - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1; + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2; + // Used to mark reserved memory free which is released to device. + ReservedFree = 3; }; message KernelEventInfoProto { @@ -150,6 +154,10 @@ message MemTraceEventProto { required uint64 current_allocated = 8; // current total reserved memory required uint64 current_reserved = 9; + // current peak allocated memory + required uint64 peak_allocated = 10; + // current peak reserved memory + required uint64 peak_reserved = 11; } message OperatorSupplementEventProto { diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.cc b/paddle/fluid/platform/profiler/dump/serialization_logger.cc index eaf1353168ea4..7b1c5bdaa41bc 100644 --- a/paddle/fluid/platform/profiler/dump/serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.cc @@ -130,6 +130,8 @@ void SerializationLogger::LogMemTraceEventNode( mem_trace_event->set_place(mem_node.Place()); mem_trace_event->set_current_allocated(mem_node.CurrentAllocated()); mem_trace_event->set_current_reserved(mem_node.CurrentReserved()); + mem_trace_event->set_peak_allocated(mem_node.PeakAllocated()); + mem_trace_event->set_peak_reserved(mem_node.PeakReserved()); current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event); } diff --git a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc index dc6a6bf32d6e3..0a3bda1c34518 100644 --- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc @@ -53,9 +53,9 @@ TEST(SerializationLoggerTest, dump_case0) { std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); diff --git a/paddle/fluid/platform/profiler/event_node.h b/paddle/fluid/platform/profiler/event_node.h index acd5a03109f72..3ffa9241e9bfb 100644 --- a/paddle/fluid/platform/profiler/event_node.h +++ b/paddle/fluid/platform/profiler/event_node.h @@ -47,6 +47,8 @@ class MemTraceEventNode { std::string Place() const { return mem_event_.place; } uint64_t CurrentAllocated() const { return mem_event_.current_allocated; } uint64_t CurrentReserved() const { return mem_event_.current_reserved; } + uint64_t PeakAllocated() const { return mem_event_.peak_allocated; } + uint64_t PeakReserved() const { return mem_event_.peak_reserved; } // member function void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); } diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc index 4e40e87bbbf20..162bf5da642b4 100644 --- a/paddle/fluid/platform/profiler/event_python.cc +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -93,6 +93,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) { mem_python_node->place = (*memnode)->Place(); mem_python_node->current_allocated = (*memnode)->CurrentAllocated(); mem_python_node->current_reserved = (*memnode)->CurrentReserved(); + mem_python_node->peak_allocated = (*memnode)->PeakAllocated(); + mem_python_node->peak_reserved = (*memnode)->PeakReserved(); host_python_node->mem_node_ptrs.push_back(mem_python_node); } // copy OperatorSupplementEventNode's information if exists diff --git a/paddle/fluid/platform/profiler/event_python.h b/paddle/fluid/platform/profiler/event_python.h index 4d1f5ad4f788e..9c5ac28f36f5b 100644 --- a/paddle/fluid/platform/profiler/event_python.h +++ b/paddle/fluid/platform/profiler/event_python.h @@ -66,6 +66,10 @@ struct MemPythonNode { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // peak allocated memory + uint64_t peak_allocated; + // peak reserved memory + uint64_t peak_reserved; }; struct HostPythonNode { diff --git a/paddle/fluid/platform/profiler/test_event_node.cc b/paddle/fluid/platform/profiler/test_event_node.cc index b70034633ae66..3f825ce63cd83 100644 --- a/paddle/fluid/platform/profiler/test_event_node.cc +++ b/paddle/fluid/platform/profiler/test_event_node.cc @@ -50,9 +50,9 @@ TEST(NodeTreesTest, LogMe_case0) { std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); @@ -185,9 +185,9 @@ TEST(NodeTreesTest, HandleTrees_case0) { std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); op_supplement_events.push_back(OperatorSupplementEvent( 11600, "op1", std::map>>(), std::map>(), "op1()", 10, 10)); diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index bfa000e2683de..b2504a5ec458d 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -59,10 +59,14 @@ enum class TracerEventType { }; enum class TracerMemEventType { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0, - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1, + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2, + // Used to mark reserved memory free which is released to device. + ReservedFree = 3, // A flag to denote the number of current types NumTypes }; @@ -288,7 +292,8 @@ struct MemTraceEvent { MemTraceEvent(uint64_t timestamp_ns, uint64_t addr, TracerMemEventType type, uint64_t process_id, uint64_t thread_id, int64_t increase_bytes, const std::string& place, uint64_t current_allocated, - uint64_t current_reserved) + uint64_t current_reserved, uint64_t peak_allocated, + uint64_t peak_reserved) : timestamp_ns(timestamp_ns), addr(addr), type(type), @@ -297,7 +302,9 @@ struct MemTraceEvent { increase_bytes(increase_bytes), place(place), current_allocated(current_allocated), - current_reserved(current_reserved) {} + current_reserved(current_reserved), + peak_allocated(peak_allocated), + peak_reserved(peak_reserved) {} // timestamp of the record uint64_t timestamp_ns; @@ -318,6 +325,10 @@ struct MemTraceEvent { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // current peak allocated memory + uint64_t peak_allocated; + // current peak reserved memory + uint64_t peak_reserved; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/utils.cc b/paddle/fluid/platform/profiler/utils.cc index 1f8e113fdd914..446fa49eefbd1 100644 --- a/paddle/fluid/platform/profiler/utils.cc +++ b/paddle/fluid/platform/profiler/utils.cc @@ -83,7 +83,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, uint16_t RegistersPerThread, #endif const char* StringTracerMemEventType(TracerMemEventType type) { - static const char* categary_name_[] = {"Allocate", "Free"}; + static const char* categary_name_[] = {"Allocate", "Free", "ReservedAllocate", + "ReservedFree"}; return categary_name_[static_cast(type)]; } diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 20460c78d2867..e4d4bf1a1c441 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -123,6 +123,10 @@ set(PYBIND_SRCS communication.cc cuda_streams_py.cc) +if(WITH_CUSTOM_DEVICE) + set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi) +endif() + if(NOT ON_INFER) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if(WITH_NCCL) @@ -491,7 +495,7 @@ if(WITH_PYTHON) cc_library( paddle_pybind SHARED SRCS ${PYBIND_SRCS} - DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${GLOB_DEV_LIB}) + DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) if(NOT APPLE AND NOT WIN32) target_link_libraries(paddle_pybind rt) diff --git a/paddle/fluid/pybind/fleet_py.cc b/paddle/fluid/pybind/fleet_py.cc index 25f2c91002844..ea404b4f51e78 100644 --- a/paddle/fluid/pybind/fleet_py.cc +++ b/paddle/fluid/pybind/fleet_py.cc @@ -221,8 +221,8 @@ void BindGraphPyClient(py::module* m) { auto feats = self.get_node_feat(node_type, node_ids, feature_names); std::vector> bytes_feats(feats.size()); - for (int i = 0; i < feats.size(); ++i) { - for (int j = 0; j < feats[i].size(); ++j) { + for (size_t i = 0; i < feats.size(); ++i) { + for (size_t j = 0; j < feats[i].size(); ++j) { bytes_feats[i].push_back(py::bytes(feats[i][j])); } } @@ -234,8 +234,8 @@ void BindGraphPyClient(py::module* m) { std::vector feature_names, std::vector> bytes_feats) { std::vector> feats(bytes_feats.size()); - for (int i = 0; i < bytes_feats.size(); ++i) { - for (int j = 0; j < bytes_feats[i].size(); ++j) { + for (size_t i = 0; i < bytes_feats.size(); ++i) { + for (size_t j = 0; j < bytes_feats[i].size(); ++j) { feats[i].push_back(std::string(bytes_feats[i][j])); } } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 3de6c64617ddd..354ac0aef9f2d 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1535,40 +1535,40 @@ void BindImperative(py::module *m_ptr) { "Cannot copy this Tensor to GPU in CPU version Paddle, " "Please recompile or reinstall Paddle with CUDA support.")); #else - int device_count = platform::GetGPUDeviceCount(); - int device_id = 0; - if (handle == py::none()) { - if (platform::is_gpu_place(self->Place())) { - return self; - } - } else { - PyObject *py_obj = handle.ptr(); - PADDLE_ENFORCE_EQ( - PyCheckInteger(py_obj), true, - platform::errors::InvalidArgument( - " 'device_id' must be a positive integer")); - device_id = py::cast(handle); - } - PADDLE_ENFORCE_GE( - device_id, 0, - platform::errors::InvalidArgument( - "Can not copy Tensor to Invalid CUDAPlace(%d), device id " - "must inside [0, %d)", - device_id, device_count)); - PADDLE_ENFORCE_LT( - device_id, device_count, - platform::errors::InvalidArgument( - "Can not copy Tensor to Invalid CUDAPlace(%d), device id " - "must inside [0, %d)", - device_id, device_count)); - platform::CUDAPlace place = platform::CUDAPlace(device_id); - if (platform::is_same_place(self->Place(), place)) { - return self; - } else { - auto new_var = self->NewVarBase(place, blocking); - new_var->SetOverridedStopGradient(self->OverridedStopGradient()); - return new_var; - } + int device_count = platform::GetGPUDeviceCount(); + int device_id = 0; + if (handle == py::none()) { + auto default_place = + imperative::GetCurrentTracer()->ExpectedPlace(); + device_id = default_place.GetDeviceId(); + } else { + PyObject *py_obj = handle.ptr(); + PADDLE_ENFORCE_EQ( + PyCheckInteger(py_obj), true, + platform::errors::InvalidArgument( + " 'device_id' must be a positive integer")); + device_id = py::cast(handle); + } + PADDLE_ENFORCE_GE( + device_id, 0, + platform::errors::InvalidArgument( + "Can not copy Tensor to Invalid CUDAPlace(%d), device id " + "must inside [0, %d)", + device_id, device_count)); + PADDLE_ENFORCE_LT( + device_id, device_count, + platform::errors::InvalidArgument( + "Can not copy Tensor to Invalid CUDAPlace(%d), device id " + "must inside [0, %d)", + device_id, device_count)); + platform::CUDAPlace place = platform::CUDAPlace(device_id); + if (platform::is_same_place(self->Place(), place)) { + return self; + } else { + auto new_var = self->NewVarBase(place, blocking); + new_var->SetOverridedStopGradient(self->OverridedStopGradient()); + return new_var; + } #endif }, py::arg("device_id") = py::none(), py::arg("blocking") = true, R"DOC( @@ -1588,16 +1588,17 @@ void BindImperative(py::module *m_ptr) { # required: gpu import paddle x = paddle.to_tensor(1.0, place=paddle.CPUPlace()) - print(x.place) # CPUPlace + print(x.place) # Place(cpu) y = x.cuda() - print(y.place) # CUDAPlace(0) + print(y.place) # Place(gpu:0) y = x.cuda(None) - print(y.place) # CUDAPlace(0) + print(y.place) # Place(gpu:0) - y = x.cuda(1) - print(y.place) # CUDAPlace(1) + paddle.device.set_device("gpu:1") + y = x.cuda(None) + print(y.place) # Place(gpu:1) )DOC") .def( "_share_memory", @@ -1734,6 +1735,17 @@ void BindImperative(py::module *m_ptr) { return new_var; }, py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::CustomPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) .def( "_copy_to", [](const std::shared_ptr &self, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index cba7d03623516..b81f494f1a7df 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -147,6 +147,10 @@ limitations under the License. */ #include "paddle/fluid/platform/device/xpu/xpu_op_list.h" #endif +#ifdef PADDLE_WITH_CUSTOM_DEVICE +#include "paddle/phi/capi/capi.h" +#endif + #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #ifdef PADDLE_WITH_IPU diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index e20db18ea3f53..9715fd770422a 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -21,6 +21,10 @@ add_subdirectory(ops) add_subdirectory(tools) # phi tests add_subdirectory(tests) +# phi capi +if(WITH_CUSTOM_DEVICE) + add_subdirectory(capi) +endif() # make an unity target for compile deps set(PHI_DEPS diff --git a/paddle/phi/backends/custom/custom_device.cc b/paddle/phi/backends/custom/custom_device.cc index df757b286a6b1..541acd9ecafd0 100644 --- a/paddle/phi/backends/custom/custom_device.cc +++ b/paddle/phi/backends/custom/custom_device.cc @@ -348,7 +348,8 @@ class CustomDevice : public DeviceInterface { } } else { if (!pimpl_->memory_copy_p2p) { - std::unique_ptr tmp(new uint8_t[size]); + std::unique_ptr tmp( + reinterpret_cast(new uint8_t[size])); MemoryCopyD2H(src_dev_id, tmp.get(), src, size); MemoryCopyH2D(dst_dev_id, dst, tmp.get(), size); } else { @@ -440,7 +441,8 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->device_memory_set(device, ptr, value, size)); } else { - std::unique_ptr tmp(new uint8_t[size]); + std::unique_ptr tmp( + reinterpret_cast(new uint8_t[size])); memset(tmp.get(), value, size); MemoryCopyH2D(dev_id, ptr, tmp.get(), size); } diff --git a/paddle/phi/backends/device_ext.h b/paddle/phi/backends/device_ext.h index ff58f4f35fd32..77c9ee61858c1 100644 --- a/paddle/phi/backends/device_ext.h +++ b/paddle/phi/backends/device_ext.h @@ -25,6 +25,33 @@ extern "C" { #define PADDLE_CUSTOM_RUNTIME_MINOR_VERSION 1 #define PADDLE_CUSTOM_RUNTIME_PATCH_VERSION 1 +typedef enum { + UNDEFINED = 0, + BOOL, + UINT8, + UINT16, + UINT32, + UINT64, + INT8, + INT16, + INT32, + INT64, + FLOAT16, + FLOAT32, + FLOAT64, + BFLOAT16, +} C_DataType; + +typedef enum { + ANY = 0, + NHWC, + NCHW, + NCDHW, + NDHWC, + NUM_DATA_LAYOUTS, + ALL_LAYOUT = ANY, +} C_DataLayout; + typedef enum { C_SUCCESS = 0, // success C_WARNING, // results may not meet expectation (such as an asynchronous diff --git a/paddle/phi/capi/CMakeLists.txt b/paddle/phi/capi/CMakeLists.txt new file mode 100644 index 0000000000000..c00c38cfa3a8a --- /dev/null +++ b/paddle/phi/capi/CMakeLists.txt @@ -0,0 +1,13 @@ +add_subdirectory(lib) +cc_library( + phi_capi + SRCS all.cc + DEPS phi_c_data_type + phi_c_device_context + phi_c_int_array + phi_c_kernel_context + phi_c_kernel_factory + phi_c_kernel_registry + phi_c_place + phi_c_scalar + phi_c_tensor) diff --git a/paddle/phi/capi/all.cc b/paddle/phi/capi/all.cc new file mode 100644 index 0000000000000..3d9c9315b3136 --- /dev/null +++ b/paddle/phi/capi/all.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/capi/all.h" + +namespace paddle { +namespace capi {} // namespace capi +} // namespace paddle diff --git a/paddle/phi/capi/all.h b/paddle/phi/capi/all.h new file mode 100644 index 0000000000000..5bd31cafdf977 --- /dev/null +++ b/paddle/phi/capi/all.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" +#include "paddle/phi/capi/include/c_device_context.h" +#include "paddle/phi/capi/include/c_int_array.h" +#include "paddle/phi/capi/include/c_kernel_context.h" +#include "paddle/phi/capi/include/c_kernel_factory.h" +#include "paddle/phi/capi/include/c_kernel_registry.h" +#include "paddle/phi/capi/include/c_place.h" +#include "paddle/phi/capi/include/c_scalar.h" +#include "paddle/phi/capi/include/c_tensor.h" +#include "paddle/phi/capi/include/data_type.h" +#include "paddle/phi/capi/include/kernel_registry.h" + +#endif diff --git a/paddle/phi/capi/capi.h b/paddle/phi/capi/capi.h new file mode 100644 index 0000000000000..f8e5a90ddf883 --- /dev/null +++ b/paddle/phi/capi/capi.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/common.h" + +PD_DECLARE_CAPI(data_type); +PD_DECLARE_CAPI(device_context); +PD_DECLARE_CAPI(int_array); +PD_DECLARE_CAPI(kernel_context); +PD_DECLARE_CAPI(kernel_factory); +PD_DECLARE_CAPI(kernel_registry); +PD_DECLARE_CAPI(place); +PD_DECLARE_CAPI(scalar); +PD_DECLARE_CAPI(tensor); + +#endif diff --git a/paddle/phi/capi/include/c_data_type.h b/paddle/phi/capi/include/c_data_type.h new file mode 100644 index 0000000000000..e33d04705206c --- /dev/null +++ b/paddle/phi/capi/include/c_data_type.h @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include + +#include "paddle/phi/backends/device_ext.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef C_Status PD_Status; + +typedef C_DataType PD_DataType; + +typedef C_DataLayout PD_DataLayout; + +typedef struct { + size_t size; + void *data; +} PD_List; + +void PD_DeletePointerList(PD_List list); + +void PD_DeleteUInt8List(PD_List list); + +void PD_DeleteInt64List(PD_List list); + +void PD_DeleteInt32List(PD_List list); + +void PD_DeleteFloat64List(PD_List list); + +void PD_DeleteFloat32List(PD_List list); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_device_context.h b/paddle/phi/capi/include/c_device_context.h new file mode 100644 index 0000000000000..68621d58ad9d5 --- /dev/null +++ b/paddle/phi/capi/include/c_device_context.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" +#include "paddle/phi/capi/include/c_tensor.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_DeviceContext PD_DeviceContext; + +typedef C_Stream PD_Stream; + +PD_Stream PD_DeviceContextGetStream(const PD_DeviceContext *ctx, + PD_Status *status); + +void *PD_DeviceContextAllocateTensor(const PD_DeviceContext *ctx, + PD_Tensor *tensor, + size_t size, + PD_DataType dtype, + PD_Status *status); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_int_array.h b/paddle/phi/capi/include/c_int_array.h new file mode 100644 index 0000000000000..dbc13b3abea4f --- /dev/null +++ b/paddle/phi/capi/include/c_int_array.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_IntArray PD_IntArray; + +PD_List PD_IntArrayGetDataPointer(PD_IntArray *int_array); + +size_t PD_IntArrayGetElementCount(PD_IntArray *int_array); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_kernel_context.h b/paddle/phi/capi/include/c_kernel_context.h new file mode 100644 index 0000000000000..c06cb3cd30086 --- /dev/null +++ b/paddle/phi/capi/include/c_kernel_context.h @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" +#include "paddle/phi/capi/include/c_device_context.h" +#include "paddle/phi/capi/include/c_int_array.h" +#include "paddle/phi/capi/include/c_place.h" +#include "paddle/phi/capi/include/c_scalar.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_KernelContext PD_KernelContext; + +/** + * KernelContext + */ + +PD_DeviceContext *PD_KernelContextGetDeviceContext(PD_KernelContext *ctx); + +PD_Tensor *PD_KernelContextInputAt(PD_KernelContext *ctx, size_t index); + +// PD_Tensor *PD_KernelContextOptionalInputAt(PD_KernelContext *ctx, size_t +// index); + +PD_List PD_KernelContextMultiInputAt(PD_KernelContext *ctx, size_t index); + +PD_Tensor *PD_KernelContextOutputAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextMultiOutputAt(PD_KernelContext *ctx, size_t index); + +/** + * Attribute + */ + +bool PD_KernelContextBoolAttrAt(PD_KernelContext *ctx, size_t index); + +int32_t PD_KernelContextInt32AttrAt(PD_KernelContext *ctx, size_t index); + +int64_t PD_KernelContextInt64AttrAt(PD_KernelContext *ctx, size_t index); + +float PD_KernelContextFloatAttrAt(PD_KernelContext *ctx, size_t index); + +double PD_KernelContextDoubleAttrAt(PD_KernelContext *ctx, size_t index); + +PD_Scalar *PD_KernelContextScalarAttrAt(PD_KernelContext *ctx, size_t index); + +PD_IntArray *PD_KernelContextIntArrayAttrAt(PD_KernelContext *ctx, + size_t index); + +PD_DataType PD_KernelContextDataTypeAttrAt(PD_KernelContext *ctx, size_t index); + +PD_DataLayout PD_KernelContextDataLayoutAttrAt(PD_KernelContext *ctx, + size_t index); + +char *PD_KernelContextStringAttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListBoolAttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListInt32AttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListInt64AttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListFloatAttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListDoubleAttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListStringAttrAt(PD_KernelContext *ctx, size_t index); + +PD_List PD_KernelContextListScalarAttrAt(PD_KernelContext *ctx, size_t index); + +PD_Place *PD_KernelContextPlaceAttrAt(PD_KernelContext *ctx, size_t index); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_kernel_factory.h b/paddle/phi/capi/include/c_kernel_factory.h new file mode 100644 index 0000000000000..f84f16ba52011 --- /dev/null +++ b/paddle/phi/capi/include/c_kernel_factory.h @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_KernelKey PD_KernelKey; + +typedef struct PD_Kernel PD_Kernel; + +typedef struct PD_KernelArgsDef PD_KernelArgsDef; + +typedef struct PD_TensorArgDef PD_TensorArgDef; + +/** + * TensorArgDef + */ + +void PD_TensorArgDefSetDataLayout(PD_TensorArgDef *def, + PD_DataLayout layout, + PD_Status *status); + +void PD_TensorArgDefSetDataType(PD_TensorArgDef *def, + PD_DataType dtype, + PD_Status *status); + +/** + * KernelArgsDef + */ + +PD_List PD_KernelArgsDefGetInputArgDefs(PD_KernelArgsDef *def, + PD_Status *status); + +PD_List PD_KernelArgsDefGetOutputArgDefs(PD_KernelArgsDef *def, + PD_Status *status); + +/** + * KernelKey + */ + +PD_DataLayout PD_KernelKeyGetLayout(PD_KernelKey *key, PD_Status *status); + +PD_DataType PD_KernelKeyGetDataType(PD_KernelKey *key, PD_Status *status); + +/** + * Kernel + */ + +PD_KernelArgsDef *PD_KernelGetArgsDef(PD_Kernel *kernel, PD_Status *status); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_kernel_registry.h b/paddle/phi/capi/include/c_kernel_registry.h new file mode 100644 index 0000000000000..04990be436be9 --- /dev/null +++ b/paddle/phi/capi/include/c_kernel_registry.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include + +#include "paddle/phi/capi/include/c_data_type.h" +#include "paddle/phi/capi/include/c_kernel_context.h" +#include "paddle/phi/capi/include/c_kernel_factory.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + PD_ARG_TYPE_CONTEXT = 0, + PD_ARG_TYPE_TENSOR, + PD_ARG_TYPE_BOOL, + PD_ARG_TYPE_BFLOAT16, + PD_ARG_TYPE_FLOAT16, + PD_ARG_TYPE_FLOAT32, + PD_ARG_TYPE_FLOAT64, + PD_ARG_TYPE_INT32, + PD_ARG_TYPE_INT64, + PD_ARG_TYPE_STRING, + PD_ARG_TYPE_SCALAR, + PD_ARG_TYPE_INT_ARRAY, + PD_ARG_TYPE_DATA_TYPE, + PD_ARG_TYPE_DATA_LAYOUT, + PD_ARG_TYPE_PLACE, + PD_ARG_TYPE_LIST_BOOL, + PD_ARG_TYPE_LIST_INT32, + PD_ARG_TYPE_LIST_INT64, + PD_ARG_TYPE_LIST_BFLOAT16, + PD_ARG_TYPE_LIST_FLOAT16, + PD_ARG_TYPE_LIST_FLOAT32, + PD_ARG_TYPE_LIST_FLOAT64, + PD_ARG_TYPE_LIST_STRING, + PD_ARG_TYPE_LIST_SCALAR, + PD_ARG_TYPE_OPTIONAL_TENSOR, + PD_ARG_TYPE_LIST_TENSOR, + PD_ARG_TYPE_OPTIONAL_MULTI_TENSOR, +} PD_KernelArgumentType; + +void PD_RegisterPhiKernel(const char *kernel_name_cstr, + const char *backend_cstr, + PD_DataType pd_dtype, + PD_DataLayout pd_layout, + size_t in_nargs, + PD_KernelArgumentType *in_args_type, + size_t attr_nargs, + PD_KernelArgumentType *attr_args_type, + size_t out_nargs, + PD_KernelArgumentType *out_args_type, + void (*args_def_fn)(const PD_KernelKey *, + PD_Kernel *), + void (*fn)(PD_KernelContext *), + void *variadic_kernel_fn); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_place.h b/paddle/phi/capi/include/c_place.h new file mode 100644 index 0000000000000..bbdc45cbe8d46 --- /dev/null +++ b/paddle/phi/capi/include/c_place.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_Place PD_Place; + +bool PD_PlaceIsHost(PD_Place *place); + +int8_t PD_PlaceGetDeviceId(PD_Place *place); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_scalar.h b/paddle/phi/capi/include/c_scalar.h new file mode 100644 index 0000000000000..3ea3c3fc12c65 --- /dev/null +++ b/paddle/phi/capi/include/c_scalar.h @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_Scalar PD_Scalar; + +bool PD_ScalarGetBoolData(PD_Scalar *scalar); + +int8_t PD_ScalarGetInt8Data(PD_Scalar *scalar); + +int16_t PD_ScalarGetInt16Data(PD_Scalar *scalar); + +int32_t PD_ScalarGetInt32Data(PD_Scalar *scalar); + +int64_t PD_ScalarGetInt64Data(PD_Scalar *scalar); + +uint8_t PD_ScalarGetUInt8Data(PD_Scalar *scalar); + +uint16_t PD_ScalarGetUInt16Data(PD_Scalar *scalar); + +uint32_t PD_ScalarGetUInt32Data(PD_Scalar *scalar); + +uint64_t PD_ScalarGetUInt64Data(PD_Scalar *scalar); + +float PD_ScalarGetFloat32Data(PD_Scalar *scalar); + +double PD_ScalarGetFloat64Data(PD_Scalar *scalar); + +PD_DataType PD_ScalarGetDataType(PD_Scalar *scalar); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/c_tensor.h b/paddle/phi/capi/include/c_tensor.h new file mode 100644 index 0000000000000..494346713cf53 --- /dev/null +++ b/paddle/phi/capi/include/c_tensor.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PD_Tensor PD_Tensor; + +PD_DataType PD_TensorGetDataType(const PD_Tensor *tensor, PD_Status *status); + +PD_DataLayout PD_TensorGetDataLayout(const PD_Tensor *tensor, + PD_Status *status); + +int64_t PD_TensorGetByteSize(const PD_Tensor *tensor, PD_Status *status); + +void *PD_TensorGetDataPointer(const PD_Tensor *tensor, PD_Status *status); + +int64_t PD_TensorGetElementCount(const PD_Tensor *tensor, PD_Status *status); + +int64_t PD_TensorGetNumDims(const PD_Tensor *tensor, PD_Status *status); + +int64_t PD_TensorGetDim(const PD_Tensor *tensor, + size_t index, + PD_Status *status); + +void PD_TensorGetLoD(const PD_Tensor *tensor, + PD_List *data, + PD_List *offset, + PD_Status *status); + +bool PD_TensorIsInitialized(const PD_Tensor *tensor, PD_Status *status); + +bool PD_TensorIsValid(const PD_Tensor *tensor, PD_Status *status); + +void *PD_TensorGetHolder(const PD_Tensor *tensor, PD_Status *status); + +void PD_TensorSetDims(PD_Tensor *tensor, + int64_t ndims, + const int64_t *dims, + PD_Status *status); + +void PD_TensorSetDataType(PD_Tensor *tensor, + PD_DataType dtype, + PD_Status *status); + +void PD_TensorSetDataLayout(PD_Tensor *tensor, + PD_DataLayout layout, + PD_Status *status); + +void PD_TensorResetLoD(PD_Tensor *tensor, + PD_List data, + PD_List offset, + PD_Status *status); + +PD_Tensor *PD_NewTensor(); + +void PD_DeleteTensor(PD_Tensor *tensor); + +void PD_TensorShareDataWith(PD_Tensor *dst, + const PD_Tensor *src, + PD_Status *status); + +void PD_TensorShareLoDWith(PD_Tensor *dst, + const PD_Tensor *src, + PD_Status *status); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif diff --git a/paddle/phi/capi/include/common.h b/paddle/phi/capi/include/common.h new file mode 100644 index 0000000000000..2d2bc231f479b --- /dev/null +++ b/paddle/phi/capi/include/common.h @@ -0,0 +1,41 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#if !defined(_WIN32) && !defined(__APPLE__) + +#define PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ + _PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) + +#define _PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ + struct __test_global_namespace_##uniq_name##__ {}; \ + static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \ + __test_global_namespace_##uniq_name##__>::value, \ + msg) + +#define PD_DECLARE_CAPI(module_name) \ + PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE( \ + PD_DECLARE_tp_kernel_ns_check_##module_name##_, \ + "PD_DECLARE_KERNEL must be called in global namespace."); \ + extern int TouchCAPISymbolFor##module_name##_(); \ + UNUSED static int __declare_capi_symbol_for_##module_name##_ = \ + TouchCAPISymbolFor##module_name##_() + +#define PD_REGISTER_CAPI(module_name) \ + PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE( \ + PD_DECLARE_tp_kernel_ns_check_##module_name##_, \ + "PD_DECLARE_KERNEL must be called in global namespace."); \ + int TouchCAPISymbolFor##module_name##_() { return 0; } + +#endif diff --git a/paddle/phi/capi/include/data_type.h b/paddle/phi/capi/include/data_type.h new file mode 100644 index 0000000000000..6acbf026e8cb6 --- /dev/null +++ b/paddle/phi/capi/include/data_type.h @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" + +namespace phi { + +namespace capi { + +#define CPP_TYPE_TO_PD_DTYPE_REGISTER(_) \ + _(bool, PD_DataType::BOOL) \ + _(phi::dtype::bfloat16, PD_DataType::BFLOAT16) \ + _(phi::dtype::float16, PD_DataType::FLOAT16) \ + _(float, PD_DataType::FLOAT32) \ + _(double, PD_DataType::FLOAT64) \ + _(uint8_t, PD_DataType::UINT8) \ + _(uint16_t, PD_DataType::UINT16) \ + _(uint32_t, PD_DataType::UINT32) \ + _(uint64_t, PD_DataType::UINT64) \ + _(int8_t, PD_DataType::INT8) \ + _(int16_t, PD_DataType::INT16) \ + _(int32_t, PD_DataType::INT32) \ + _(int64_t, PD_DataType::INT64) + +template +struct CppTypeToPDType; + +#define CPP_TYPE_TO_PD_DTYPE(x, y) \ + template <> \ + struct CppTypeToPDType { \ + constexpr static PD_DataType Type() { return y; } \ + }; + +template +struct PDTypeToCppType; + +#define PD_DTYPE_TO_CPP_TYPE(x, y) \ + template <> \ + struct PDTypeToCppType { \ + using type = x; \ + }; + +CPP_TYPE_TO_PD_DTYPE_REGISTER(CPP_TYPE_TO_PD_DTYPE) +CPP_TYPE_TO_PD_DTYPE_REGISTER(PD_DTYPE_TO_CPP_TYPE) + +} // namespace capi +} // namespace phi + +#endif diff --git a/paddle/phi/capi/include/kernel_registry.h b/paddle/phi/capi/include/kernel_registry.h new file mode 100644 index 0000000000000..37b045a60658b --- /dev/null +++ b/paddle/phi/capi/include/kernel_registry.h @@ -0,0 +1,338 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/wrapper_base.h" + +namespace phi { +namespace capi { + +inline phi::capi::DeviceContext PD_GetDeviceContext(PD_KernelContext *ctx) { + return phi::capi::DeviceContext(PD_KernelContextGetDeviceContext(ctx)); +} + +inline phi::capi::DenseTensor PD_InputAt(PD_KernelContext *ctx, size_t index) { + return phi::capi::DenseTensor(PD_KernelContextInputAt(ctx, index)); +} + +inline paddle::optional PD_OptionalInputAt( + PD_KernelContext *ctx, size_t index) { + auto tensor = PD_KernelContextInputAt(ctx, index); + return tensor + ? paddle::optional(phi::capi::DenseTensor( + reinterpret_cast(tensor))) + : paddle::optional(paddle::none); +} + +inline std::vector PD_MultiInputAt( + PD_KernelContext *ctx, size_t index) { + std::vector ret; + auto list = PD_KernelContextMultiInputAt(ctx, index); + auto data = reinterpret_cast(list.data); + for (size_t i = 0; i < list.size; ++i) { + ret.emplace_back(data[i]); + } + return ret; +} + +inline phi::capi::DenseTensor PD_OutputAt(PD_KernelContext *ctx, size_t index) { + return phi::capi::DenseTensor(PD_KernelContextOutputAt(ctx, index)); +} + +inline std::vector PD_MultiOutputAt( + PD_KernelContext *ctx, size_t index) { + std::vector ret; + auto list = PD_KernelContextMultiOutputAt(ctx, index); + auto data = reinterpret_cast(list.data); + for (size_t i = 0; i < list.size; ++i) { + ret.emplace_back(data[i]); + } + return ret; +} + +template +inline std::vector PD_GetPointerVector(std::vector *vec) { + std::vector ret; + for (auto &item : vec) { + ret.push_back(&item); + } + return ret; +} + +template +inline T PD_AttrAt(PD_KernelContext *ctx, size_t index); + +template <> +inline bool PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextBoolAttrAt(ctx, index); +} + +template <> +inline int32_t PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextInt32AttrAt(ctx, index); +} + +template <> +inline int64_t PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextInt64AttrAt(ctx, index); +} + +template <> +inline float PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextFloatAttrAt(ctx, index); +} + +template <> +inline double PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextDoubleAttrAt(ctx, index); +} + +template <> +inline std::string PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextStringAttrAt(ctx, index); +} + +template <> +inline PD_DataType PD_AttrAt(PD_KernelContext *ctx, size_t index) { + return PD_KernelContextDataTypeAttrAt(ctx, index); +} + +template <> +inline PD_DataLayout PD_AttrAt(PD_KernelContext *ctx, + size_t index) { + return PD_KernelContextDataLayoutAttrAt(ctx, index); +} + +template <> +inline std::vector PD_AttrAt>( + PD_KernelContext *ctx, size_t index) { + auto list = PD_KernelContextListInt32AttrAt(ctx, index); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_AttrAt>( + PD_KernelContext *ctx, size_t index) { + auto list = PD_KernelContextListInt64AttrAt(ctx, index); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_AttrAt>(PD_KernelContext *ctx, + size_t index) { + auto list = PD_KernelContextListFloatAttrAt(ctx, index); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_AttrAt>(PD_KernelContext *ctx, + size_t index) { + auto list = PD_KernelContextListDoubleAttrAt(ctx, index); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline phi::capi::Scalar PD_AttrAt(PD_KernelContext *ctx, + size_t index) { + auto scalar = PD_KernelContextScalarAttrAt(ctx, index); + return phi::capi::Scalar(scalar); +} + +template <> +inline phi::capi::IntArray PD_AttrAt(PD_KernelContext *ctx, + size_t index) { + auto int_array = PD_KernelContextIntArrayAttrAt(ctx, index); + return phi::capi::IntArray(int_array); +} + +template <> +inline phi::capi::Place PD_AttrAt(PD_KernelContext *ctx, + size_t index) { + auto place = PD_KernelContextPlaceAttrAt(ctx, index); + return phi::capi::Place(place); +} + +template <> +inline std::vector PD_AttrAt>( + PD_KernelContext *ctx, size_t index) { + auto c_list = PD_KernelContextListScalarAttrAt(ctx, index); + auto data = reinterpret_cast(c_list.data); + std::vector list; + for (size_t i = 0; i < c_list.size; ++i) { + list.emplace_back(data[i]); + } + PD_DeletePointerList(c_list); + return list; +} + +template <> +inline std::vector PD_AttrAt>( + PD_KernelContext *ctx, size_t index) { + auto c_list = PD_KernelContextListScalarAttrAt(ctx, index); + auto data = reinterpret_cast(c_list.data); + std::vector list; + for (size_t i = 0; i < c_list.size; ++i) { + list.emplace_back(data[i]); + } + PD_DeletePointerList(c_list); + return list; +} + +template <> +inline std::vector PD_AttrAt>(PD_KernelContext *ctx, + size_t index) { + auto c_list = PD_KernelContextListBoolAttrAt(ctx, index); + std::vector list; + auto data = reinterpret_cast(c_list.data); + for (size_t i = 0; i < c_list.size; ++i) { + list[i] = static_cast(data[i]); + } + PD_DeleteUInt8List(c_list); + return list; +} + +#define CPP_TYPE_TO_PD_ARG_TYPE_REGISTER(_) \ + _(phi::capi::DenseTensor, ::PD_KernelArgumentType::PD_ARG_TYPE_TENSOR) \ + _(phi::capi::DeviceContext, ::PD_KernelArgumentType::PD_ARG_TYPE_CONTEXT) \ + _(bool, ::PD_KernelArgumentType::PD_ARG_TYPE_BOOL) \ + _(float, ::PD_KernelArgumentType::PD_ARG_TYPE_FLOAT32) \ + _(double, ::PD_KernelArgumentType::PD_ARG_TYPE_FLOAT64) \ + _(int32_t, ::PD_KernelArgumentType::PD_ARG_TYPE_INT32) \ + _(int64_t, ::PD_KernelArgumentType::PD_ARG_TYPE_INT64) \ + _(PD_DataType, ::PD_KernelArgumentType::PD_ARG_TYPE_DATA_TYPE) \ + _(PD_DataLayout, ::PD_KernelArgumentType::PD_ARG_TYPE_DATA_LAYOUT) \ + _(std::vector, ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT32) \ + _(std::vector, ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT64) \ + _(std::vector, ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT32) \ + _(std::vector, ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT64) \ + _(std::vector, ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_BOOL) \ + _(std::string, ::PD_KernelArgumentType::PD_ARG_TYPE_STRING) \ + _(phi::capi::Scalar, ::PD_KernelArgumentType::PD_ARG_TYPE_SCALAR) \ + _(phi::capi::IntArray, ::PD_KernelArgumentType::PD_ARG_TYPE_INT_ARRAY) \ + _(phi::capi::Place, ::PD_KernelArgumentType::PD_ARG_TYPE_PLACE) \ + _(std::vector, \ + ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_STRING) \ + _(std::vector, \ + ::PD_KernelArgumentType::PD_ARG_TYPE_LIST_SCALAR) + +template +struct CppTypeToPDArgumentType; + +#define CPP_TYPE_TO_PD_ARG_TYPE(x, y) \ + template <> \ + struct CppTypeToPDArgumentType { \ + constexpr static ::PD_KernelArgumentType Type() { return y; } \ + }; + +template <::PD_KernelArgumentType T> +struct PDArgumentTypeToCppType; + +#define PD_ARG_TYPE_TO_CPP_TYPE(x, y) \ + template <> \ + struct PDArgumentTypeToCppType { \ + using type = x; \ + }; + +CPP_TYPE_TO_PD_ARG_TYPE_REGISTER(CPP_TYPE_TO_PD_ARG_TYPE) +CPP_TYPE_TO_PD_ARG_TYPE_REGISTER(PD_ARG_TYPE_TO_CPP_TYPE) + +} // namespace capi + +using LoD = capi::LoD; +using Context = capi::DeviceContext; +using DenseTensor = capi::DenseTensor; +using Scalar = capi::Scalar; +using IntArray = capi::IntArray; +using Place = capi::Place; +using DataType = ::PD_DataType; +using DataLayout = ::PD_DataLayout; + +} // namespace phi + +#include "paddle/phi/capi/include/kernel_utils.h" + +// clang-format off + +#define PD_BUILD_PHI_KERNEL(kernel_name, \ + backend, \ + layout, \ + meta_kernel_fn, \ + ...) \ + static void \ + __CUSTOM_adefs_CFN_##kernel_name##_##backend##_##layout( \ + const PD_KernelKey* kernel_key, PD_Kernel* kernel); \ + template \ + struct __##kernel_name##_##backend##_##layout##__ { \ + __##kernel_name##_##backend##_##layout##__() { \ + ::phi::capi::CustomKernelArgsParseFunctor)> \ + parser; \ + PD_RegisterPhiKernel( \ + #kernel_name, \ + #backend, \ + ::phi::capi::CppTypeToPDType::Type(), \ + PD_DATALAYOUT(layout), \ + parser.in_args_type.size(), \ + parser.in_args_type.data(), \ + parser.attr_args_type.size(), \ + parser.attr_args_type.data(), \ + parser.out_args_type.size(), \ + parser.out_args_type.data(), \ + __CUSTOM_adefs_CFN_##kernel_name##_##backend##_##layout, \ + CUSTOM_PHI_KERNEL(meta_kernel_fn), \ + CUSTOM_PHI_VARIADIC_KERNEL( \ + meta_kernel_fn)); \ + } \ + static void Touch() {} \ + }; \ + PD_CUSTOM_PHI_KERNEL_STATIC_ASSERT_GLOBAL_NAMESPACE( \ + CUSTOM_tp_ns_check_##kernel_name##_##backend##_##layout, \ + "PD_BUILD_KERNEL must be called in global namespace."); \ + static void \ + __CUSTOM_adefs_FN_##kernel_name##_##backend##_##layout( \ + const ::phi::capi::KernelKey &kernel_key, \ + ::phi::capi::Kernel* kernel); \ + _PD_BUILD_PHI_KERNEL(__##kernel_name##_##backend##_##layout##__, \ + kernel_name, \ + backend, \ + layout, \ + meta_kernel_fn, \ + __VA_ARGS__) \ + void \ + __CUSTOM_adefs_CFN_##kernel_name##_##backend##_##layout( \ + const PD_KernelKey* kernel_key, PD_Kernel* kernel) { \ + auto cc_kernel = ::phi::capi::Kernel(kernel); \ + __CUSTOM_adefs_FN_##kernel_name##_##backend##_##layout( \ + ::phi::capi::KernelKey( \ + const_cast(kernel_key)), \ + &cc_kernel); \ + } \ + void \ + __CUSTOM_adefs_FN_##kernel_name##_##backend##_##layout( \ + const ::phi::capi::KernelKey &kernel_key, \ + ::phi::capi::Kernel* kernel) + +// clang-format on + +#endif diff --git a/paddle/phi/capi/include/kernel_utils.h b/paddle/phi/capi/include/kernel_utils.h new file mode 100644 index 0000000000000..7302e6f4677b3 --- /dev/null +++ b/paddle/phi/capi/include/kernel_utils.h @@ -0,0 +1,812 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/capi/include/common.h" + +#if !defined(_WIN32) && !defined(__APPLE__) + +namespace phi { +namespace capi { + +#define CUSTOM_PHI_KERNEL(...) \ + ::phi::capi::CustomKernelImpl::Compute + +#define CUSTOM_PHI_VARIADIC_KERNEL(...) \ + reinterpret_cast( \ + &::phi::capi::CustomKernelImpl::VariadicCompute) + +#define PD_CUSTOM_NARGS(...) \ + _PD_CUSTOM_NARGS((__VA_ARGS__, _PD_CUSTOM_RESQ_N())) +#define _PD_CUSTOM_NARGS(...) _PD_CUSTOM_ARG_N(__VA_ARGS__) +#define _PD_CUSTOM_ARG_N_EXPAND( \ + _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, N, ...) \ + N +#define _PD_CUSTOM_ARG_N(args) _PD_CUSTOM_ARG_N_EXPAND args +#define _PD_CUSTOM_RESQ_N() 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +#define PD_DATALAYOUT(arg__) PD_DataLayout::arg__ + +#ifdef __COUNTER__ +#define PD_CUSTOM_PHI_KERNEL_ID __COUNTER__ +#else +#define PD_CUSTOM_PHI_KERNEL_ID __LINE__ +#endif + +#define PD_CUSTOM_PHI_KERNEL_CONCATENATE(arg1, arg2) \ + PD_CUSTOM_PHI_KERNEL_CONCATENATE1(arg1, arg2) +#define PD_CUSTOM_PHI_KERNEL_CONCATENATE1(arg1, arg2) \ + PD_CUSTOM_PHI_KERNEL_CONCATENATE2(arg1, arg2) +#define PD_CUSTOM_PHI_KERNEL_CONCATENATE2(arg1, arg2) arg1##arg2 +#define PD_CUSTOM_PHI_KERNEL_EXPAND(x) x + +#define _PD_BUILD_KERNEL_INSTANTIATION(N, meta_kernel_fn, backend, ...) \ + PD_CUSTOM_PHI_KERNEL_CONCATENATE(_PD_BUILD_KERNEL_INSTANTIATION_, N) \ + (meta_kernel_fn, backend, __VA_ARGS__) + +#define _PD_BUILD_KERNEL_INSTANTIATION_1(meta_kernel_fn, backend, cpp_dtype) \ + template decltype(meta_kernel_fn) meta_kernel_fn +#define _PD_BUILD_KERNEL_INSTANTIATION_2( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_1(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_3( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_2(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_4( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_3(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_5( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_4(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_6( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_5(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_7( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_6(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_8( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_7(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_9( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_8(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_10( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_9(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_11( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_10(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_12( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_11(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_13( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_12(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_14( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_13(meta_kernel_fn, backend, __VA_ARGS__)) +#define _PD_BUILD_KERNEL_INSTANTIATION_15( \ + meta_kernel_fn, backend, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) meta_kernel_fn; \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_INSTANTIATION_14(meta_kernel_fn, backend, __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_1(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + int TouchCustomKernelSymbolFor_##kernel_name##_##backend##_##layout() { \ + return 0; \ + } + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_2(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_1(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_3(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_2(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_4(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_3(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_5(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_4(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_6(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_5(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_7(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_6(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_8(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_7(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_9(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_8(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_10(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_9(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_11(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_10(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_12(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_11(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_13(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_12(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_14(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_13(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT_15(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + registrar_id, \ + meta_kernel_fn, \ + cpp_dtype, \ + ...) \ + static const registrar_class PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + __reg_pt_kernel_##kernel_name##_##backend##_##layout##_, registrar_id); \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_14(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define _PD_BUILD_KERNEL_REGISTRAR_INIT( \ + N, registrar_class, kernel_name, backend, layout, meta_kernel_fn, ...) \ + PD_CUSTOM_PHI_KERNEL_EXPAND(PD_CUSTOM_PHI_KERNEL_CONCATENATE( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT_, N)(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + PD_CUSTOM_PHI_KERNEL_ID, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define PD_BUILD_KERNEL_REGISTRAR_INIT( \ + registrar_class, kernel_name, backend, layout, meta_kernel_fn, ...) \ + PD_CUSTOM_PHI_KERNEL_EXPAND( \ + _PD_BUILD_KERNEL_REGISTRAR_INIT(PD_CUSTOM_NARGS(__VA_ARGS__), \ + registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define PD_BUILD_KERNEL_INSTANTIATION(meta_kernel_fn, backend, ...) \ + _PD_BUILD_KERNEL_INSTANTIATION( \ + PD_CUSTOM_NARGS(__VA_ARGS__), meta_kernel_fn, backend, __VA_ARGS__) + +#define _PD_BUILD_2TA_KERNEL( \ + registrar_class, kernel_name, backend, layout, meta_kernel_fn, ...) \ + PD_BUILD_KERNEL_INSTANTIATION(meta_kernel_fn, backend, __VA_ARGS__); \ + PD_BUILD_KERNEL_REGISTRAR_INIT(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + meta_kernel_fn, \ + __VA_ARGS__); + +#define _PD_BUILD_PHI_KERNEL( \ + registrar_class, kernel_name, backend, layout, meta_kernel_fn, ...) \ + PD_CUSTOM_PHI_KERNEL_EXPAND(_PD_BUILD_2TA_KERNEL(registrar_class, \ + kernel_name, \ + backend, \ + layout, \ + meta_kernel_fn, \ + __VA_ARGS__)) + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_DEVICE_CONTEXT(dev_ctx) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(in_idx == 0, \ + "Kernel's DeviceContext should appear before Inputs."); \ + static_assert( \ + attr_idx == 0, \ + "Kernel's DeviceContext should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's DeviceContext should appear before Outputs."); \ + dev_ctx arg = PD_GetDeviceContext(ctx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_INPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(attr_idx == 0, \ + "Kernel's Input should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's Input should appear before Outputs."); \ + const tensor_type arg = PD_InputAt(ctx, in_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_OPTIONAL_INPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper &, \ + Tail...> { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(attr_idx == 0, \ + "Kernel's Input should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's Input should appear before Outputs."); \ + auto arg = PD_OptionalInputAt(ctx, in_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_INPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper &, \ + Tail...> { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(attr_idx == 0, \ + "Kernel's Input should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's Input should appear before Outputs."); \ + auto arg = PD_MultiInputAt(ctx, in_idx); \ + auto arg_wrapper = PD_GetPointerVector(&arg); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg_wrapper); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(attr_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(out_idx == 0, \ + "Kernel's Attributes should appear before Outputs."); \ + attr_type arg = PD_AttrAt(ctx, attr_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( \ + attr_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(out_idx == 0, \ + "Kernel's Attributes should appear before Outputs."); \ + attr_type arg = PD_AttrAt(ctx, attr_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_OUTPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + auto arg = PD_OutputAt(ctx, out_idx); \ + tensor_type *ptr = (arg.raw_data() ? &arg : nullptr); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., ptr); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_OUTPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper, Tail...> { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + auto arg = PD_MultiOutputAt(ctx, out_idx); \ + auto arg_wrapper = PD_GetPointerVector(&arg); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg_wrapper); \ + } \ + } + +template +struct CustomTypeTag {}; + +template +struct CustomKernelImpl; + +template +struct CustomKernelImpl { + static void Compute(PD_KernelContext *ctx) { + CustomKernelCallHelper>:: + template Compute<0, 0, 0, 0>(ctx); + } + + static void VariadicCompute(const phi::capi::DeviceContext &dev_ctx, + Args... args) { + return kernel_fn(static_cast(dev_ctx), std::forward(args)...); + } + + private: + template + struct CustomKernelCallHelper; + + /* DeviceContext Helpers */ + + PD_SPECIALIZE_CustomKernelCallHelper_FOR_DEVICE_CONTEXT( + phi::capi::DeviceContext); + + /* Input Helpers */ + + PD_SPECIALIZE_CustomKernelCallHelper_FOR_INPUT(phi::capi::DenseTensor); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_OPTIONAL_INPUT( + phi::capi::DenseTensor); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_INPUT(phi::capi::DenseTensor); + + /* Attribute Helpers */ + + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(bool); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(int32_t); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(int64_t); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(float); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(double); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(PD_DataType); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(PD_DataLayout); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(phi::capi::Place); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF(std::string); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + phi::capi::Scalar); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + phi::capi::IntArray); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( + std::vector); + + /* Output Helpers */ + + PD_SPECIALIZE_CustomKernelCallHelper_FOR_OUTPUT(phi::capi::DenseTensor); + PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_OUTPUT(phi::capi::DenseTensor); + + /* End case */ + template + struct CustomKernelCallHelper> { + template + static void Compute(PD_KernelContext *ctx, DevCtx dev_ctx, Args &...args) { + static_assert(dev_ctx_idx > 0, + "Kernel should pass DeviceContext as argument."); + static_assert(out_idx > 0, "Kernel should have output argument."); + return kernel_fn(dev_ctx, args...); + } + }; +}; + +template +struct CustomKernelArgsParseFunctor; + +template +struct CustomKernelArgsParseFunctor { + using Args = std::tuple; + enum : std::size_t { Arity = sizeof...(Args_) }; + using Indices = std::make_index_sequence; + template + using Arg = typename std::tuple_element::type; + + CustomKernelArgsParseFunctor() { + auto args_type = ParseArgType(Indices{}); + + for (auto arg_type : args_type) { + if (arg_type == + std::type_index(typeid(const phi::capi::DeviceContext *))) { + } else if (arg_type == + std::type_index(typeid(const phi::capi::DenseTensor &))) { + in_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_TENSOR); + } else if (arg_type == + std::type_index(typeid( + const paddle::optional &))) { + in_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_OPTIONAL_TENSOR); + } else if (arg_type == + std::type_index(typeid( + const std::vector &))) { + in_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_LIST_TENSOR); + } else if (arg_type == + std::type_index( + typeid(const paddle::optional< + std::vector> &))) { + in_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_OPTIONAL_MULTI_TENSOR); + } else if (arg_type == std::type_index(typeid(bool))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_BOOL); + } else if (arg_type == std::type_index(typeid(float))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_FLOAT32); + } else if (arg_type == std::type_index(typeid(double))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_FLOAT64); + } else if (arg_type == std::type_index(typeid(int32_t))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_INT32); + } else if (arg_type == std::type_index(typeid(int64_t))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_INT64); + } else if (arg_type == + std::type_index(typeid(const phi::capi::Place &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_PLACE); + } else if (arg_type == std::type_index(typeid(const std::string &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_STRING); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_LIST_BOOL); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT32); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT64); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT32); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT64); + } else if (arg_type == + std::type_index(typeid(const std::vector &))) { + attr_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_LIST_STRING); + } else if (arg_type == std::type_index(typeid( + const std::vector &))) { + attr_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_LIST_SCALAR); + } else if (arg_type == + std::type_index(typeid(const phi::capi::Scalar &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_SCALAR); + } else if (arg_type == + std::type_index(typeid(const phi::capi::IntArray &))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_INT_ARRAY); + } else if (arg_type == std::type_index(typeid(PD_DataType))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_DATA_TYPE); + } else if (arg_type == std::type_index(typeid(PD_DataLayout))) { + attr_args_type.push_back( + PD_KernelArgumentType::PD_ARG_TYPE_DATA_LAYOUT); + } else if (arg_type == std::type_index(typeid(PD_DataLayout))) { + attr_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_PLACE); + } else if (arg_type == + std::type_index(typeid(phi::capi::DenseTensor *))) { + out_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_TENSOR); + } else if (arg_type == std::type_index(typeid( + std::vector))) { + out_args_type.push_back(PD_KernelArgumentType::PD_ARG_TYPE_LIST_TENSOR); + } + } + } + + std::vector in_args_type; + std::vector attr_args_type; + std::vector out_args_type; + + private: + template + static std::vector ParseArgType( + std::index_sequence) { + return {std::type_index(typeid(Arg))...}; + } +}; + +} // namespace capi +} // namespace phi + +#endif diff --git a/paddle/phi/capi/include/type_utils.h b/paddle/phi/capi/include/type_utils.h new file mode 100644 index 0000000000000..ed892c881d715 --- /dev/null +++ b/paddle/phi/capi/include/type_utils.h @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "paddle/phi/capi/include/c_data_type.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/phi/common/layout.h" +#include "paddle/phi/core/enforce.h" + +namespace phi { +namespace capi { + +inline PD_DataType ToPDDataType(::paddle::experimental::DataType dtype) { +#define return_result(in, ret) \ + case ::paddle::experimental::DataType::in: \ + return PD_DataType::ret + switch (dtype) { + return_result(UNDEFINED, UNDEFINED); + return_result(FLOAT64, FLOAT64); + return_result(FLOAT32, FLOAT32); + return_result(FLOAT16, FLOAT16); + return_result(BFLOAT16, BFLOAT16); + return_result(INT64, INT64); + return_result(INT32, INT32); + return_result(INT16, INT16); + return_result(INT8, INT8); + return_result(UINT64, UINT64); + return_result(UINT32, UINT32); + return_result(UINT16, UINT16); + return_result(UINT8, UINT8); + return_result(BOOL, BOOL); + default: { + PADDLE_THROW( + ::phi::errors::Unavailable("DataType %d is not supported.", dtype)); + } + } +#undef return_result +} + +inline ::paddle::experimental::DataType ToPhiDataType(PD_DataType dtype) { +#define return_result(in, ret) \ + case PD_DataType::in: \ + return ::paddle::experimental::DataType::ret + switch (dtype) { + return_result(UNDEFINED, UNDEFINED); + return_result(FLOAT64, FLOAT64); + return_result(FLOAT32, FLOAT32); + return_result(FLOAT16, FLOAT16); + return_result(BFLOAT16, BFLOAT16); + return_result(INT64, INT64); + return_result(INT32, INT32); + return_result(INT16, INT16); + return_result(INT8, INT8); + return_result(UINT64, UINT64); + return_result(UINT32, UINT32); + return_result(UINT16, UINT16); + return_result(UINT8, UINT8); + return_result(BOOL, BOOL); + default: { + PADDLE_THROW( + ::phi::errors::Unavailable("DataType %d is not supported.", dtype)); + return ::paddle::experimental::DataType::UNDEFINED; + } + } +#undef return_result +} + +inline PD_DataLayout ToPDDataLayout(::paddle::experimental::DataLayout layout) { +#define return_result(in, ret) \ + case ::paddle::experimental::DataLayout::in: \ + return PD_DataLayout::ret + switch (layout) { + return_result(ANY, ANY); + return_result(NHWC, NHWC); + return_result(NCHW, NCHW); + return_result(NCDHW, NCDHW); + return_result(NDHWC, NDHWC); + default: { + PADDLE_THROW(::phi::errors::Unavailable("DataLayout %d is not supported.", + layout)); + return PD_DataLayout::ANY; + } + } +#undef return_result +} + +inline ::paddle::experimental::DataLayout ToPhiDataLayout( + PD_DataLayout layout) { +#define return_result(in, ret) \ + case PD_DataLayout::in: \ + return ::paddle::experimental::DataLayout::ret + switch (layout) { + return_result(ANY, ANY); + return_result(NHWC, NHWC); + return_result(NCHW, NCHW); + return_result(NCDHW, NCDHW); + return_result(NDHWC, NDHWC); + default: { + PADDLE_THROW(::phi::errors::Unavailable("DataLayout %d is not supported.", + layout)); + return ::paddle::experimental::DataLayout::ANY; + } + } +#undef return_result +} + +} // namespace capi +} // namespace phi + +#endif diff --git a/paddle/phi/capi/include/wrapper_base.h b/paddle/phi/capi/include/wrapper_base.h new file mode 100644 index 0000000000000..2b5421bc266cf --- /dev/null +++ b/paddle/phi/capi/include/wrapper_base.h @@ -0,0 +1,497 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if !defined(_WIN32) && !defined(__APPLE__) + +#include +#include +#include +#include +#include +#include + +#include "paddle/phi/api/ext/exception.h" +#include "paddle/phi/capi/include/c_device_context.h" +#include "paddle/phi/capi/include/c_int_array.h" +#include "paddle/phi/capi/include/c_kernel_context.h" +#include "paddle/phi/capi/include/c_kernel_factory.h" +#include "paddle/phi/capi/include/c_kernel_registry.h" +#include "paddle/phi/capi/include/c_place.h" +#include "paddle/phi/capi/include/c_scalar.h" +#include "paddle/phi/capi/include/c_tensor.h" +#include "paddle/phi/capi/include/data_type.h" +#include "paddle/utils/optional.h" + +#define PD_CHECK_STATUS(status) PD_CHECK(status == C_SUCCESS) + +namespace phi { + +namespace capi { + +using LoD = std::vector>; + +template +static inline PD_List PDListFromVector(std::vector* vec) { + PD_List list; + list.data = reinterpret_cast(vec->data()); + list.size = vec->size(); + return list; +} + +template +static inline std::vector PDListToVector(PD_List list) { + return std::vector(static_cast(list.data), + static_cast(list.data) + list.size); +} + +inline std::vector PD_TensorGetDims(PD_Tensor* tensor, + PD_Status* status) { + int64_t ndims = PD_TensorGetNumDims(tensor, status); + if (ndims > 0) { + std::vector shape(ndims); + for (int64_t i = 0; i < ndims; ++i) { + shape[i] = PD_TensorGetDim(tensor, i, status); + } + return shape; + } + return std::vector(); +} + +template +class WrapperBase { + public: + explicit WrapperBase(T* ptr, bool own = false) : data_(ptr), own_(own) {} + + inline T* raw_data() const { return data_; } + + inline bool own_data() const { return own_; } + + inline void reset(const T* ptr) { data_ = ptr; } + + private: + T* data_; + bool own_; +}; + +class DenseTensor : public WrapperBase { + public: + DenseTensor() : WrapperBase(PD_NewTensor(), true) {} + + explicit DenseTensor(PD_Tensor* tensor) : WrapperBase(tensor) {} + + ~DenseTensor() { + if (own_data()) { + PD_DeleteTensor(raw_data()); + } + } + + bool valid() const { + C_Status status; + auto ret = PD_TensorIsValid(raw_data(), &status); + PD_CHECK_STATUS(status); + return ret; + } + + bool initialized() const { + C_Status status; + auto ret = PD_TensorIsInitialized(raw_data(), &status); + PD_CHECK_STATUS(status); + return ret; + } + + void* Holder() const { + C_Status status; + auto holder = PD_TensorGetHolder(raw_data(), &status); + PD_CHECK_STATUS(status); + return holder; + } + + std::vector dims() const { + C_Status status; + auto dimension = PD_TensorGetDims(raw_data(), &status); + PD_CHECK_STATUS(status); + return dimension; + } + + PD_DataType dtype() const { + C_Status status; + auto data_type = PD_TensorGetDataType(raw_data(), &status); + PD_CHECK_STATUS(status); + return data_type; + } + + PD_DataLayout layout() const { + C_Status status; + auto data_layout = PD_TensorGetDataLayout(raw_data(), &status); + PD_CHECK_STATUS(status); + return data_layout; + } + + int64_t numel() const { + C_Status status; + auto element_count = PD_TensorGetElementCount(raw_data(), &status); + PD_CHECK_STATUS(status); + return element_count; + } + + int64_t memory_size() const { + C_Status status; + auto byte_size = PD_TensorGetByteSize(raw_data(), &status); + PD_CHECK_STATUS(status); + return byte_size; + } + + LoD lod() const { + PD_List data, offset; + C_Status status; + PD_TensorGetLoD(raw_data(), &data, &offset, &status); + PD_CHECK_STATUS(status); + LoD lod_; + auto ptr = static_cast(data.data); + auto offset_ptr = static_cast(offset.data); + for (size_t i = 0; i < offset.size - 1; ++i) { + lod_.emplace_back(ptr + offset_ptr[i], ptr + offset_ptr[i + 1]); + } + delete[] ptr; + delete[] offset_ptr; + return lod_; + } + + void ResetLoD(const LoD& lod) { + std::vector data, offset; + offset.push_back(0); + for (const auto& item : lod) { + data.insert(data.cend(), item.cbegin(), item.cend()); + offset.push_back(item.size()); + } + PD_List data_list, offset_list; + data_list = PDListFromVector(&data); + offset_list = PDListFromVector(&offset); + + C_Status status; + PD_TensorResetLoD(raw_data(), data_list, offset_list, &status); + PD_CHECK_STATUS(status); + } + + void Resize(const std::vector& dims) { + C_Status status; + PD_TensorSetDims(raw_data(), dims.size(), dims.data(), &status); + PD_CHECK_STATUS(status); + } + + void set_dtype(PD_DataType data_type) { + C_Status status; + PD_TensorSetDataType(raw_data(), data_type, &status); + PD_CHECK_STATUS(status); + } + + void set_layout(PD_DataLayout data_layout) { + C_Status status; + PD_TensorSetDataLayout(raw_data(), data_layout, &status); + PD_CHECK_STATUS(status); + } + + template + T* data() const { + C_Status status; + auto ptr = PD_TensorGetDataPointer(raw_data(), &status); + PD_CHECK_STATUS(status); + return static_cast(ptr); + } + + // template + // T* mutable_data(int64_t size = 0, const PD_DeviceContext* ctx = nullptr) { + // C_Status status; + // auto ptr = PD_DeviceContextAllocateTensor( + // ctx, raw_data(), size, phi::capi::CppTypeToPDType::Type(), + // &status); + // PD_CHECK_STATUS(status); + // return static_cast(ptr); + // } + + // void* mutable_data(PD_DataType data_type, + // int64_t size = 0, + // const PD_DeviceContext* ctx = nullptr) { + // C_Status status; + // auto ptr = PD_DeviceContextAllocateTensor( + // ctx, raw_data(), size, data_type, &status); + // PD_CHECK_STATUS(status); + // return static_cast(ptr); + // } + + DenseTensor& ShareDataWith(const DenseTensor& src) { + C_Status status; + PD_TensorShareDataWith(raw_data(), src.raw_data(), &status); + PD_CHECK_STATUS(status); + return *this; + } + + void share_lod(const DenseTensor& src) { + C_Status status; + PD_TensorShareLoDWith(raw_data(), src.raw_data(), &status); + PD_CHECK_STATUS(status); + } +}; + +class DeviceContext : public WrapperBase { + public: + explicit DeviceContext(PD_DeviceContext* context) + : WrapperBase(context) {} + + void* stream() const { + C_Status status; + auto stream_ = PD_DeviceContextGetStream(raw_data(), &status); + PD_CHECK_STATUS(status); + return stream_; + } + + void* Alloc(DenseTensor* tensor, + PD_DataType dtype, + int64_t requested_size = 0) const { + C_Status status; + auto ptr = PD_DeviceContextAllocateTensor( + raw_data(), tensor->raw_data(), requested_size, dtype, &status); + PD_CHECK_STATUS(status); + return static_cast(ptr); + } + + template + T* Alloc(DenseTensor* tensor, int64_t requested_size = 0) const { + C_Status status; + auto ptr = + PD_DeviceContextAllocateTensor(raw_data(), + tensor->raw_data(), + requested_size, + phi::capi::CppTypeToPDType::Type(), + &status); + PD_CHECK_STATUS(status); + return static_cast(ptr); + } + + void* HostAlloc(DenseTensor* tensor, + PD_DataType dtype, + int64_t requested_size = 0) const { + C_Status status; + auto ptr = PD_DeviceContextAllocateTensor( + nullptr, tensor->raw_data(), requested_size, dtype, &status); + PD_CHECK_STATUS(status); + return static_cast(ptr); + } + + template + T* HostAlloc(DenseTensor* tensor, int64_t requested_size = 0) const { + C_Status status; + auto ptr = + PD_DeviceContextAllocateTensor(nullptr, + tensor->raw_data(), + requested_size, + phi::capi::CppTypeToPDType::Type(), + &status); + PD_CHECK_STATUS(status); + return static_cast(ptr); + } +}; + +class Scalar : public WrapperBase { + public: + explicit Scalar(PD_Scalar* scalar) : WrapperBase(scalar) {} + + PD_DataType dtype() const { return PD_ScalarGetDataType(raw_data()); } + + template + inline T to() const; +}; + +template <> +inline bool Scalar::to() const { + return PD_ScalarGetBoolData(raw_data()); +} + +template <> +inline float Scalar::to() const { + return PD_ScalarGetFloat32Data(raw_data()); +} + +template <> +inline double Scalar::to() const { + return PD_ScalarGetFloat64Data(raw_data()); +} + +template <> +inline uint8_t Scalar::to() const { + return PD_ScalarGetUInt8Data(raw_data()); +} + +template <> +inline uint16_t Scalar::to() const { + return PD_ScalarGetUInt16Data(raw_data()); +} + +template <> +inline uint32_t Scalar::to() const { + return PD_ScalarGetUInt32Data(raw_data()); +} + +template <> +inline uint64_t Scalar::to() const { + return PD_ScalarGetUInt64Data(raw_data()); +} + +template <> +inline int8_t Scalar::to() const { + return PD_ScalarGetInt8Data(raw_data()); +} + +template <> +inline int16_t Scalar::to() const { + return PD_ScalarGetInt16Data(raw_data()); +} + +template <> +inline int32_t Scalar::to() const { + return PD_ScalarGetInt32Data(raw_data()); +} + +template <> +inline int64_t Scalar::to() const { + return PD_ScalarGetInt64Data(raw_data()); +} + +class IntArray : WrapperBase { + public: + explicit IntArray(PD_IntArray* int_array) + : WrapperBase(int_array) {} + + size_t size() const { return PD_IntArrayGetElementCount(raw_data()); } + + std::vector GetData() const { + auto list = PD_IntArrayGetDataPointer(raw_data()); + auto data = reinterpret_cast(list.data); + std::vector ret(data, data + list.size); + return ret; + } +}; + +class Place : WrapperBase { + public: + explicit Place(PD_Place* place) : WrapperBase(place) {} + + bool is_host() { return PD_PlaceIsHost(raw_data()); } + + int8_t GetDeviceID() { return PD_PlaceGetDeviceId(raw_data()); } +}; + +class TensorArgDef : WrapperBase { + public: + explicit TensorArgDef(PD_TensorArgDef* tensor_arg_def) + : WrapperBase(tensor_arg_def) {} + + // TensorArgDef& SetBackend() { + // return *this; + // } + + TensorArgDef& SetDataLayout(PD_DataLayout in_layout) { + C_Status status; + PD_TensorArgDefSetDataLayout(raw_data(), in_layout, &status); + PD_CHECK_STATUS(status); + return *this; + } + + TensorArgDef& SetDataType(PD_DataType in_dtype) { + C_Status status; + PD_TensorArgDefSetDataType(raw_data(), in_dtype, &status); + PD_CHECK_STATUS(status); + return *this; + } +}; + +class KernelArgsDef : WrapperBase { + public: + explicit KernelArgsDef(PD_KernelArgsDef* kernel_args_def) + : WrapperBase(kernel_args_def) {} + + std::vector input_defs() { + C_Status status; + auto list = PD_KernelArgsDefGetInputArgDefs(raw_data(), &status); + PD_CHECK_STATUS(status); + auto ptr = reinterpret_cast(list.data); + std::vector ret; + for (size_t i = 0; i < list.size; ++i) { + ret.emplace_back(ptr[i]); + } + PD_DeletePointerList(list); + return ret; + } + + std::vector output_defs() { + C_Status status; + auto list = PD_KernelArgsDefGetOutputArgDefs(raw_data(), &status); + PD_CHECK_STATUS(status); + auto ptr = reinterpret_cast(list.data); + std::vector ret; + for (size_t i = 0; i < list.size; ++i) { + ret.emplace_back(ptr[i]); + } + PD_DeletePointerList(list); + return ret; + } + + // std::vector + // attribute_defs() { + // } +}; + +class KernelKey : WrapperBase { + public: + explicit KernelKey(PD_KernelKey* kernel_key) + : WrapperBase(kernel_key) {} + + // Backend backend() const { return backend_; } + PD_DataLayout layout() const { + PD_Status status; + auto layout_ = PD_KernelKeyGetLayout(raw_data(), &status); + PD_CHECK_STATUS(status); + return layout_; + } + + PD_DataType dtype() const { + PD_Status status; + auto dtype_ = PD_KernelKeyGetDataType(raw_data(), &status); + PD_CHECK_STATUS(status); + return dtype_; + } +}; + +class Kernel : WrapperBase { + public: + explicit Kernel(PD_Kernel* kernel) : WrapperBase(kernel) {} + + KernelArgsDef args_def() const { + C_Status status; + auto ptr = PD_KernelGetArgsDef(raw_data(), &status); + PD_CHECK_STATUS(status); + return KernelArgsDef(ptr); + } + + TensorArgDef InputAt(size_t idx) { return args_def().input_defs()[idx]; } + + TensorArgDef OutputAt(size_t idx) { return args_def().input_defs()[idx]; } +}; + +} // namespace capi +} // namespace phi + +#endif diff --git a/paddle/phi/capi/lib/CMakeLists.txt b/paddle/phi/capi/lib/CMakeLists.txt new file mode 100644 index 0000000000000..de335bb668bdf --- /dev/null +++ b/paddle/phi/capi/lib/CMakeLists.txt @@ -0,0 +1,44 @@ +cc_library( + phi_c_data_type + SRCS c_data_type.cc + DEPS dense_tensor) + +cc_library( + phi_c_device_context + SRCS c_device_context.cc + DEPS phi_context) + +cc_library( + phi_c_int_array + SRCS c_int_array.cc + DEPS int_array) + +cc_library( + phi_c_kernel_context + SRCS c_kernel_context.cc + DEPS kernel_context) + +cc_library( + phi_c_kernel_factory + SRCS c_kernel_factory.cc + DEPS kernel_factory) + +cc_library( + phi_c_kernel_registry + SRCS c_kernel_registry.cc + DEPS dense_tensor) + +cc_library( + phi_c_place + SRCS c_place.cc + DEPS phi_place) + +cc_library( + phi_c_scalar + SRCS c_scalar.cc + DEPS scalar) + +cc_library( + phi_c_tensor + SRCS c_tensor.cc + DEPS dense_tensor) diff --git a/paddle/phi/capi/lib/c_data_type.cc b/paddle/phi/capi/lib/c_data_type.cc new file mode 100644 index 0000000000000..547df06338f0f --- /dev/null +++ b/paddle/phi/capi/lib/c_data_type.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_data_type.h" + +#include "paddle/phi/capi/include/common.h" + +void PD_DeletePointerList(PD_List list) { + auto data = reinterpret_cast(list.data); + if (data) delete[] data; +} + +void PD_DeleteUInt8List(PD_List list) { + auto data = reinterpret_cast(list.data); + if (data) delete[] data; +} + +void PD_DeleteInt64List(PD_List list) { + auto data = reinterpret_cast(list.data); + if (data) delete[] data; +} + +void PD_DeleteInt32List(PD_List list) { + auto data = reinterpret_cast(list.data); + delete[] data; +} + +void PD_DeleteFloat64List(PD_List list) { + auto data = reinterpret_cast(list.data); + if (data) delete[] data; +} + +void PD_DeleteFloat32List(PD_List list) { + auto data = reinterpret_cast(list.data); + if (data) delete[] data; +} + +PD_REGISTER_CAPI(data_type); diff --git a/paddle/phi/capi/lib/c_device_context.cc b/paddle/phi/capi/lib/c_device_context.cc new file mode 100644 index 0000000000000..96b46fbc0d4ff --- /dev/null +++ b/paddle/phi/capi/lib/c_device_context.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_device_context.h" + +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/core/dense_tensor.h" + +PD_Stream PD_DeviceContextGetStream(const PD_DeviceContext* ctx, + PD_Status* status) { + if (status) { + if (!ctx) { + *status = C_FAILED; + return nullptr; + } + *status = C_SUCCESS; + } + auto dev_ctx_type = + reinterpret_cast(ctx)->GetPlace().GetType(); + if (dev_ctx_type == phi::AllocationType::CUSTOM) { + return reinterpret_cast( + reinterpret_cast(ctx)->stream()); + } else if (dev_ctx_type == phi::AllocationType::CPU) { + return nullptr; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (dev_ctx_type == phi::AllocationType::GPU) { + return reinterpret_cast( + reinterpret_cast(ctx)->stream()); +#endif +#ifdef PADDLE_WITH_XPU + } else if (dev_ctx_type == phi::AllocationType::XPU) { + return nullptr; +#endif + } else { + PADDLE_THROW(phi::errors::Unavailable( + "Only support Custom/CPU/GPU/XPU DeviceContext")); + } +} + +void* PD_DeviceContextAllocateTensor(const PD_DeviceContext* ctx, + PD_Tensor* tensor, + size_t size, + PD_DataType dtype, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return nullptr; + } + *status = C_SUCCESS; + } + + auto dev_ctx = reinterpret_cast(ctx); + auto cc_tensor = reinterpret_cast(tensor); + auto phi_dtype = phi::capi::ToPhiDataType(dtype); + if (ctx) { + return dev_ctx->Alloc(cc_tensor, phi_dtype, size); + } else { + auto place = phi::CPUPlace(); + return cc_tensor->mutable_data(place, phi_dtype, size); + } +} + +PD_REGISTER_CAPI(device_context); diff --git a/paddle/phi/capi/lib/c_int_array.cc b/paddle/phi/capi/lib/c_int_array.cc new file mode 100644 index 0000000000000..7562700372c3b --- /dev/null +++ b/paddle/phi/capi/lib/c_int_array.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_int_array.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/common/int_array.h" + +PD_List PD_IntArrayGetDataPointer(PD_IntArray* int_array) { + auto cc_int_array = reinterpret_cast(int_array); + const auto& data = cc_int_array->GetData(); + PD_List list; + list.size = data.size(); + list.data = const_cast(data.data()); + return list; +} + +size_t PD_IntArrayGetSize(PD_IntArray* int_array) { + auto cc_int_array = reinterpret_cast(int_array); + return cc_int_array->size(); +} + +PD_REGISTER_CAPI(int_array); diff --git a/paddle/phi/capi/lib/c_kernel_context.cc b/paddle/phi/capi/lib/c_kernel_context.cc new file mode 100644 index 0000000000000..2e14b019c19ff --- /dev/null +++ b/paddle/phi/capi/lib/c_kernel_context.cc @@ -0,0 +1,223 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_kernel_context.h" + +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/core/kernel_context.h" + +PD_DeviceContext* PD_KernelContextGetDeviceContext(PD_KernelContext* ctx) { + auto kernel_context = reinterpret_cast(ctx); + auto dev_ctx_type = kernel_context->GetDeviceContext() + .GetPlace() + .GetType(); + if (dev_ctx_type == phi::AllocationType::CUSTOM) { + return reinterpret_cast(const_cast( + &kernel_context->GetDeviceContext())); + } else if (dev_ctx_type == phi::AllocationType::CPU) { + return reinterpret_cast(const_cast( + &kernel_context->GetDeviceContext())); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (dev_ctx_type == phi::AllocationType::GPU) { + return reinterpret_cast(const_cast( + &kernel_context->GetDeviceContext())); +#endif +#ifdef PADDLE_WITH_XPU + } else if (dev_ctx_type == phi::AllocationType::XPU) { + return reinterpret_cast(const_cast( + &kernel_context->GetDeviceContext())); +#endif + } else { + PADDLE_THROW(phi::errors::Unavailable( + "Only support Custom/CPU/GPU/XPU DeviceContext")); + } +} + +PD_Tensor* PD_KernelContextInputAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + const std::pair& range = kernel_context->InputRangeAt(index); + return reinterpret_cast(const_cast( + &kernel_context->InputAt(range.first))); +} + +PD_List PD_KernelContextMultiInputAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + const std::pair& range = kernel_context->InputRangeAt(index); + auto tensor_vec = kernel_context->InputsBetween( + range.first, range.second); + PD_List list; + list.size = tensor_vec.size(); + list.data = tensor_vec.data(); + return list; +} + +PD_Tensor* PD_KernelContextOutputAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + const std::pair& range = kernel_context->OutputRangeAt(index); + return reinterpret_cast( + kernel_context->MutableOutputAt(range.first)); +} + +PD_List PD_KernelContextMultiOutputAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + const std::pair& range = kernel_context->OutputRangeAt(index); + auto tensor_vec = kernel_context->MutableOutputBetween( + range.first, range.second); + PD_List list; + list.size = tensor_vec.size(); + list.data = tensor_vec.data(); + return list; +} + +bool PD_KernelContextBoolAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return kernel_context->AttrAt(index); +} + +int32_t PD_KernelContextInt32AttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return kernel_context->AttrAt(index); +} + +int64_t PD_KernelContextInt64AttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return kernel_context->AttrAt(index); +} + +float PD_KernelContextFloatAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return kernel_context->AttrAt(index); +} + +double PD_KernelContextDoubleAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return kernel_context->AttrAt(index); +} + +PD_Scalar* PD_KernelContextScalarAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return reinterpret_cast( + const_cast(&kernel_context->AttrAt(index))); +} + +PD_IntArray* PD_KernelContextIntArrayAttrAt(PD_KernelContext* ctx, + size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return reinterpret_cast(const_cast( + &kernel_context->AttrAt(index))); +} + +PD_List PD_KernelContextListBoolAttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + auto data = reinterpret_cast(new uint8_t[cc_list.size()]); + for (size_t i = 0; i < cc_list.size(); ++i) { + data[i] = static_cast(cc_list[i]); + } + list.data = data; + return list; +} + +PD_List PD_KernelContextListInt32AttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_KernelContextListInt64AttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_KernelContextListFloatAttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_KernelContextListDoubleAttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +char* PD_KernelContextStringAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return const_cast(kernel_context->AttrAt(index).data()); +} + +PD_List PD_KernelContextListStringAttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + auto data = new char*[list.size]; + for (size_t i = 0; i < list.size; ++i) { + data[i] = const_cast(cc_list[i].data()); + } + list.data = reinterpret_cast(data); + return list; +} + +PD_List PD_KernelContextListScalarAttrAt(PD_KernelContext* ctx, size_t index) { + PD_List list; + auto kernel_context = reinterpret_cast(ctx); + const auto& cc_list = kernel_context->AttrAt>(index); + list.size = cc_list.size(); + auto data = new PD_Scalar*[list.size]; + for (size_t i = 0; i < list.size; ++i) { + data[i] = + const_cast(reinterpret_cast(&cc_list[i])); + } + list.data = data; + return list; +} + +PD_Place* PD_KernelContextPlaceAttrAt(PD_KernelContext* ctx, size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return reinterpret_cast( + const_cast(&kernel_context->AttrAt(index))); +} + +PD_DataType PD_KernelContextDataTypeAttrAt(PD_KernelContext* ctx, + size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return phi::capi::ToPDDataType(kernel_context->AttrAt(index)); +} + +PD_DataLayout PD_KernelContextDataLayoutAttrAt(PD_KernelContext* ctx, + size_t index) { + auto kernel_context = reinterpret_cast(ctx); + return phi::capi::ToPDDataLayout( + kernel_context->AttrAt(index)); +} + +PD_REGISTER_CAPI(kernel_context); diff --git a/paddle/phi/capi/lib/c_kernel_factory.cc b/paddle/phi/capi/lib/c_kernel_factory.cc new file mode 100644 index 0000000000000..8bf94467b472a --- /dev/null +++ b/paddle/phi/capi/lib/c_kernel_factory.cc @@ -0,0 +1,150 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_kernel_factory.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/core/kernel_factory.h" + +/** + * TensorArgDef + */ + +void PD_TensorArgDefSetDataLayout(PD_TensorArgDef* def, + PD_DataLayout layout, + PD_Status* status) { + if (status) { + if (!def) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_def = reinterpret_cast(def); + cc_def->SetDataLayout(phi::capi::ToPhiDataLayout(layout)); +} + +void PD_TensorArgDefSetDataType(PD_TensorArgDef* def, + PD_DataType dtype, + PD_Status* status) { + if (status) { + if (!def) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_def = reinterpret_cast(def); + cc_def->SetDataType(phi::capi::ToPhiDataType(dtype)); +} + +/** + * KernelArgsDef + */ + +PD_List PD_KernelArgsDefGetInputArgDefs(PD_KernelArgsDef* def, + PD_Status* status) { + PD_List list; + if (status) { + if (!def) { + *status = C_FAILED; + list.size = 0; + list.data = nullptr; + return list; + } + *status = C_SUCCESS; + } + auto cc_def = reinterpret_cast(def); + auto& arg_defs = cc_def->input_defs(); + list.size = arg_defs.size(); + auto ptr = new PD_TensorArgDef*[list.size]; + list.data = ptr; + for (size_t i = 0; i < list.size; ++i) { + ptr[i] = reinterpret_cast(&arg_defs[i]); + } + return list; +} + +PD_List PD_KernelArgsDefGetOutputArgDefs(PD_KernelArgsDef* def, + PD_Status* status) { + PD_List list; + if (status) { + if (!def) { + *status = C_FAILED; + list.size = 0; + list.data = nullptr; + return list; + } + *status = C_SUCCESS; + } + auto cc_def = reinterpret_cast(def); + auto& arg_defs = cc_def->output_defs(); + list.size = arg_defs.size(); + auto ptr = new PD_TensorArgDef*[list.size]; + list.data = ptr; + for (size_t i = 0; i < list.size; ++i) { + ptr[i] = reinterpret_cast(&arg_defs[i]); + } + return list; +} + +/** + * KernelKey + */ + +PD_DataLayout PD_KernelKeyGetLayout(PD_KernelKey* key, PD_Status* status) { + if (status) { + if (!key) { + *status = C_FAILED; + return PD_DataLayout::ALL_LAYOUT; + } + *status = C_SUCCESS; + } + auto cc_key = reinterpret_cast(key); + return phi::capi::ToPDDataLayout(cc_key->layout()); +} + +PD_DataType PD_KernelKeyGetDataType(PD_KernelKey* key, PD_Status* status) { + if (status) { + if (!key) { + *status = C_FAILED; + return PD_DataType::UNDEFINED; + } + *status = C_SUCCESS; + } + auto cc_key = reinterpret_cast(key); + return phi::capi::ToPDDataType(cc_key->dtype()); +} + +/** + * Kernel + */ + +PD_KernelArgsDef* PD_KernelGetArgsDef(PD_Kernel* kernel, PD_Status* status) { + if (status) { + if (!kernel) { + *status = C_FAILED; + return nullptr; + } + *status = C_SUCCESS; + } + auto cc_kernel = reinterpret_cast(kernel); + return reinterpret_cast( + const_cast(&cc_kernel->args_def())); +} + +PD_REGISTER_CAPI(kernel_factory); diff --git a/paddle/phi/capi/lib/c_kernel_registry.cc b/paddle/phi/capi/lib/c_kernel_registry.cc new file mode 100644 index 0000000000000..6cf6208856bfa --- /dev/null +++ b/paddle/phi/capi/lib/c_kernel_registry.cc @@ -0,0 +1,174 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_kernel_registry.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/core/kernel_registry.h" + +void PD_KernelArgsParseFn(const phi::KernelKey& default_key, + phi::KernelArgsDef* args_def, + size_t in_nargs, + PD_KernelArgumentType* in_args_type, + size_t attr_nargs, + PD_KernelArgumentType* attr_args_type, + size_t out_nargs, + PD_KernelArgumentType* out_args_type) { + auto default_tensor_layout = phi::DataLayout::NCHW; + if (default_key.layout() != phi::DataLayout::ANY) { + default_tensor_layout = default_key.layout(); + } + // inputs + for (size_t i = 0; i < in_nargs; ++i) { + auto arg_type = in_args_type[i]; + if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_CONTEXT) { + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_TENSOR) { + args_def->AppendInput(default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid(const phi::DenseTensor&))); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_OPTIONAL_TENSOR) { + args_def->AppendInput( + default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid(const paddle::optional&))); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_TENSOR) { + args_def->AppendInput( + default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid(const std::vector&))); + } else if (arg_type == + PD_KernelArgumentType::PD_ARG_TYPE_OPTIONAL_MULTI_TENSOR) { + args_def->AppendInput( + default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid( + const paddle::optional>&))); + } else { + PADDLE_THROW(phi::errors::Unavailable( + "PD_KernelArgumentType %d is not supported.", arg_type)); + } + } + // attributes + for (size_t i = 0; i < attr_nargs; ++i) { + auto arg_type = attr_args_type[i]; + if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_BOOL) { + args_def->AppendAttribute(phi::AttributeType::BOOL); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_FLOAT32) { + args_def->AppendAttribute(phi::AttributeType::FLOAT32); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_FLOAT64) { + args_def->AppendAttribute(phi::AttributeType::FLOAT64); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_INT32) { + args_def->AppendAttribute(phi::AttributeType::INT32); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_INT64) { + args_def->AppendAttribute(phi::AttributeType::INT64); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_STRING) { + args_def->AppendAttribute(phi::AttributeType::STRING); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_SCALAR) { + args_def->AppendAttribute(phi::AttributeType::SCALAR); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_INT_ARRAY) { + args_def->AppendAttribute(phi::AttributeType::INT_ARRAY); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_DATA_TYPE) { + args_def->AppendAttribute(phi::AttributeType::DATA_TYPE); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_DATA_LAYOUT) { + args_def->AppendAttribute(phi::AttributeType::DATA_LAYOUT); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_PLACE) { + args_def->AppendAttribute(phi::AttributeType::PLACE); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_BOOL) { + args_def->AppendAttribute(phi::AttributeType::BOOLS); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT32) { + args_def->AppendAttribute(phi::AttributeType::INT32S); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_INT64) { + args_def->AppendAttribute(phi::AttributeType::INT64S); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT32) { + args_def->AppendAttribute(phi::AttributeType::FLOAT32S); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_FLOAT64) { + args_def->AppendAttribute(phi::AttributeType::FLOAT64S); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_STRING) { + args_def->AppendAttribute(phi::AttributeType::STRINGS); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_SCALAR) { + args_def->AppendAttribute(phi::AttributeType::SCALARS); + } else { + PADDLE_THROW(phi::errors::Unavailable( + "PD_KernelArgumentType %d is not supported.", arg_type)); + } + } + // outputs + for (size_t i = 0; i < out_nargs; ++i) { + auto arg_type = out_args_type[i]; + if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_TENSOR) { + args_def->AppendOutput(default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid(phi::DenseTensor*))); + } else if (arg_type == PD_KernelArgumentType::PD_ARG_TYPE_LIST_TENSOR) { + args_def->AppendOutput( + default_key.backend(), + default_tensor_layout, + default_key.dtype(), + std::type_index(typeid(std::vector))); + } else { + PADDLE_THROW(phi::errors::Unavailable( + "PD_KernelArgumentType %d is not supported.", arg_type)); + } + } +} + +void PD_RegisterPhiKernel(const char* kernel_name_cstr, + const char* backend_cstr, + PD_DataType pd_dtype, + PD_DataLayout pd_layout, + size_t in_nargs, + PD_KernelArgumentType* in_args_type, + size_t attr_nargs, + PD_KernelArgumentType* attr_args_type, + size_t out_nargs, + PD_KernelArgumentType* out_args_type, + void (*args_def_fn)(const PD_KernelKey*, PD_Kernel*), + void (*fn)(PD_KernelContext*), + void* variadic_kernel_fn) { + auto args_def_fn_wrapper = [args_def_fn](const phi::KernelKey& kernel_key, + phi::Kernel* kernel) { + args_def_fn(reinterpret_cast(&kernel_key), + reinterpret_cast(kernel)); + }; + phi::KernelFn kernel_fn = [fn](phi::KernelContext* ctx) { + fn(reinterpret_cast(ctx)); + }; + std::string kernel_name(kernel_name_cstr); + + auto dtype = phi::capi::ToPhiDataType(pd_dtype); + auto layout = phi::capi::ToPhiDataLayout(pd_layout); + phi::KernelKey kernel_key( + paddle::experimental::StringToBackend(backend_cstr), layout, dtype); + + phi::Kernel kernel(kernel_fn, variadic_kernel_fn); + PD_KernelArgsParseFn(kernel_key, + kernel.mutable_args_def(), + in_nargs, + in_args_type, + attr_nargs, + attr_args_type, + out_nargs, + out_args_type); + + args_def_fn_wrapper(kernel_key, &kernel); + phi::KernelFactory::Instance().kernels()[kernel_name][kernel_key] = kernel; +} + +PD_REGISTER_CAPI(kernel_registry); diff --git a/paddle/phi/capi/lib/c_place.cc b/paddle/phi/capi/lib/c_place.cc new file mode 100644 index 0000000000000..cccccbbb259f3 --- /dev/null +++ b/paddle/phi/capi/lib/c_place.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_place.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/common/place.h" + +bool PD_PlaceIsHost(PD_Place* place) { + auto cc_place = reinterpret_cast(place); + return cc_place->GetType() == phi::AllocationType::CPU; +} + +int8_t PD_PlaceGetDeviceId(PD_Place* place) { + auto cc_place = reinterpret_cast(place); + return cc_place->GetDeviceId(); +} + +PD_REGISTER_CAPI(place); diff --git a/paddle/phi/capi/lib/c_scalar.cc b/paddle/phi/capi/lib/c_scalar.cc new file mode 100644 index 0000000000000..655465c8f848f --- /dev/null +++ b/paddle/phi/capi/lib/c_scalar.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_scalar.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/common/scalar.h" + +PD_DataType PD_ScalarGetType(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return phi::capi::ToPDDataType(cc_scalar->dtype()); +} + +bool PD_ScalarGetBoolData(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +int8_t PD_ScalarGetInt8Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +int16_t PD_ScalarGetInt16Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +int32_t PD_ScalarGetInt32Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +int64_t PD_ScalarGetInt64Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +uint8_t PD_ScalarGetUInt8Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +uint16_t PD_ScalarGetUInt16Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +uint32_t PD_ScalarGetUInt32Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +uint64_t PD_ScalarGetUInt64Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +float PD_ScalarGetFloat32Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +double PD_ScalarGetFloat64Data(PD_Scalar* scalar) { + auto cc_scalar = reinterpret_cast(scalar); + return cc_scalar->to(); +} + +PD_REGISTER_CAPI(scalar); diff --git a/paddle/phi/capi/lib/c_tensor.cc b/paddle/phi/capi/lib/c_tensor.cc new file mode 100644 index 0000000000000..cd0bbd62d88a0 --- /dev/null +++ b/paddle/phi/capi/lib/c_tensor.cc @@ -0,0 +1,302 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/capi/include/c_tensor.h" + +#include "paddle/phi/capi/include/common.h" +#include "paddle/phi/capi/include/type_utils.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/meta_tensor.h" + +PD_DataType PD_TensorGetDataType(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return PD_DataType::UNDEFINED; + } + *status = C_SUCCESS; + } + auto cc_tensor = reinterpret_cast(tensor); + return phi::capi::ToPDDataType(cc_tensor->dtype()); +} + +PD_DataLayout PD_TensorGetDataLayout(const PD_Tensor* tensor, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return PD_DataLayout::ALL_LAYOUT; + } + *status = C_SUCCESS; + } + auto cc_tensor = reinterpret_cast(tensor); + return phi::capi::ToPDDataLayout(cc_tensor->layout()); +} + +int64_t PD_TensorGetByteSize(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return 0; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->memory_size(); +} + +void* PD_TensorGetDataPointer(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return nullptr; + } + *status = C_SUCCESS; + } + auto cc_tensor = reinterpret_cast(tensor); + return const_cast(cc_tensor->data()); +} + +int64_t PD_TensorGetElementCount(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return 0; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->numel(); +} + +int64_t PD_TensorGetNumDims(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return 0; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->dims().size(); +} + +int64_t PD_TensorGetDim(const PD_Tensor* tensor, + size_t index, + PD_Status* status) { + auto cc_tensor = reinterpret_cast(tensor); + + if (status) { + if (!tensor || index >= static_cast(cc_tensor->dims().size())) { + *status = C_FAILED; + return 0; + } + *status = C_SUCCESS; + } + + return cc_tensor->dims()[index]; +} + +void PD_TensorGetLoD(const PD_Tensor* tensor, + PD_List* data, + PD_List* offset, + PD_Status* status) { + auto cc_tensor = reinterpret_cast(tensor); + + if (status) { + if (!tensor || !data || !offset) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto lod = cc_tensor->lod(); + offset->size = lod.size() + 1; + auto offset_data = new size_t[offset->size]; + offset->data = offset_data; + offset_data[0] = 0; + + size_t sz = 0; + for (size_t i = 0; i < lod.size(); ++i) { + offset_data[i + 1] = lod[i].size() + offset_data[i]; + sz += lod[i].size(); + } + + auto data_ptr = new size_t[sz]; + data->data = data_ptr; + data->size = sz; + for (size_t i = 0; i < lod.size(); ++i) { + memcpy(data_ptr, lod[i].data(), lod[i].size() * sizeof(size_t)); + data_ptr += lod[i].size(); + } +} + +bool PD_TensorIsInitialized(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return false; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->initialized(); +} + +bool PD_TensorIsValid(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return false; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->valid(); +} + +void* PD_TensorGetHolder(const PD_Tensor* tensor, PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return nullptr; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + return cc_tensor->Holder().get(); +} + +void PD_TensorSetDims(PD_Tensor* tensor, + int64_t ndims, + const int64_t* dims, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + auto cc_tensor = reinterpret_cast(tensor); + std::vector shape(dims, dims + ndims); + cc_tensor->Resize(phi::make_ddim(shape)); +} + +void PD_TensorSetDataType(PD_Tensor* tensor, + PD_DataType dtype, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + cc_tensor->set_type(phi::capi::ToPhiDataType(dtype)); +} + +void PD_TensorSetDataLayout(PD_Tensor* tensor, + PD_DataLayout layout, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_tensor = reinterpret_cast(tensor); + cc_tensor->set_layout(phi::capi::ToPhiDataLayout(layout)); +} + +void PD_TensorResetLoD(PD_Tensor* tensor, + PD_List data, + PD_List offset, + PD_Status* status) { + if (status) { + if (!tensor) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + phi::LoD lod; + auto offset_ptr = static_cast(offset.data); + auto data_ptr = static_cast(data.data); + + for (size_t i = 0; i < offset.size - 1; ++i) { + lod.emplace_back(data_ptr + offset_ptr[i], data_ptr + offset_ptr[i + 1]); + } + auto cc_tensor = reinterpret_cast(tensor); + cc_tensor->ResetLoD(lod); +} + +PD_Tensor* PD_NewTensor() { + return reinterpret_cast(new phi::DenseTensor()); +} + +void PD_DeleteTensor(PD_Tensor* tensor) { + auto cc_tensor = reinterpret_cast(tensor); + delete cc_tensor; +} + +void PD_TensorShareDataWith(PD_Tensor* dst, + const PD_Tensor* src, + PD_Status* status) { + if (status) { + if (!dst || !src) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_dst_tensor = reinterpret_cast(dst); + auto cc_src_tensor = reinterpret_cast(src); + cc_dst_tensor->ShareDataWith(*cc_src_tensor); +} + +void PD_TensorShareLoDWith(PD_Tensor* dst, + const PD_Tensor* src, + PD_Status* status) { + if (status) { + if (!dst || !src) { + *status = C_FAILED; + return; + } + *status = C_SUCCESS; + } + + auto cc_dst_tensor = reinterpret_cast(dst); + auto cc_src_tensor = const_cast( + reinterpret_cast(src)); + + phi::MetaTensor meta_dst(cc_dst_tensor); + const phi::MetaTensor meta_src(cc_src_tensor); + meta_dst.share_lod(meta_src); +} + +PD_REGISTER_CAPI(tensor); diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h index daaf88a23950f..3e68462c88a5c 100644 --- a/paddle/phi/kernels/funcs/elementwise_base.h +++ b/paddle/phi/kernels/funcs/elementwise_base.h @@ -768,7 +768,7 @@ __global__ void VectorizedElementwiseKernel( ins, outs, data_offset, read_lens * BLOCK_NUM_X, read_lens, func); } - int remain = numel - data_offset; + kps::IndexType remain = numel - data_offset; if (remain > 0) { VectorizedElementwiseKernelImpl( - ins, outs, data_offset, remain, read_lens, func); + ins, outs, data_offset, static_cast(remain), read_lens, func); } } diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc index 25f222546656f..033c50e537da6 100644 --- a/paddle/phi/kernels/funcs/math_function.cc +++ b/paddle/phi/kernels/funcs/math_function.cc @@ -277,6 +277,12 @@ void set_constant(const paddle::platform::DeviceContext& context, paddle::framework::Tensor* tensor, float value) { TensorSetConstantWithPlace func(context, tensor, value); +#ifdef PADDLE_WITH_CUSTOM_DEVICE + if (paddle::platform::is_custom_place(context.GetPlace())) { + func(phi::CPUPlace()); + return; + } +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // tensor->place().apply_visitor(func); paddle::platform::VisitPlace(tensor->place(), func); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 9e4aac55f5d2d..181e9d92f0166 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -3150,11 +3150,11 @@ function collect_ccache_hits() { function test_op_benchmark() { # The PR will pass quickly when get approval from specific person. - # Xreki 12538138, luotao1 6836917, ZzSean 32410583 + # Xreki 12538138, luotao1 6836917, ZzSean 32410583, JamesLim-sy 61349199 set +x approval_line=$(curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000) if [ "${approval_line}" != "" ]; then - APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917) + APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917 61349199) echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "TRUE" ]; then echo "===================================" diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index 028fd57229e56..60dfde6b45c37 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -24,6 +24,7 @@ import os import copy import numpy as np +import tempfile from paddle.static.amp import decorate paddle.enable_static() @@ -272,18 +273,25 @@ def infer(use_cuda, save_dirname=None): clip_extra=True) -def main(net_type, use_cuda, is_local=True): - if use_cuda and not fluid.core.is_compiled_with_cuda(): - return +class TestImageClassification(unittest.TestCase): - # Directory for saving the trained model - save_dirname = "image_classification_" + net_type + ".inference.model" + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() - train(net_type, use_cuda, save_dirname, is_local) - #infer(use_cuda, save_dirname) + def tearDown(self): + self.temp_dir.cleanup() + def main(self, net_type, use_cuda, is_local=True): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return -class TestImageClassification(unittest.TestCase): + # Directory for saving the trained model + save_dirname = os.path.join( + self.temp_dir.name, + "image_classification_" + net_type + ".inference.model") + + train(net_type, use_cuda, save_dirname, is_local) + #infer(use_cuda, save_dirname) def test_amp_lists(self): white_list = copy.copy( @@ -413,11 +421,11 @@ def test_amp_lists_7(self): def test_vgg_cuda(self): with self.scope_prog_guard(): - main('vgg', use_cuda=True) + self.main('vgg', use_cuda=True) def test_resnet_cuda(self): with self.scope_prog_guard(): - main('resnet', use_cuda=True) + self.main('resnet', use_cuda=True) @contextlib.contextmanager def scope_prog_guard(self): diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index 668373838c0b0..71ba7f0c79ec9 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -25,6 +25,7 @@ import sys import os import struct +import tempfile paddle.enable_static() @@ -192,11 +193,13 @@ def main(use_cuda, is_local=True, use_bf16=False, pure_bf16=False): if use_bf16 and not fluid.core.is_compiled_with_mkldnn(): return + temp_dir = tempfile.TemporaryDirectory() # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" + save_dirname = os.path.join(temp_dir.name, "fit_a_line.inference.model") train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16) infer(use_cuda, save_dirname, use_bf16) + temp_dir.cleanup() class TestFitALineBase(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 7096a16d89faf..e2f78a0f36f7b 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -22,6 +22,7 @@ import numpy import unittest import os +import tempfile import numpy as np paddle.enable_static() @@ -244,10 +245,13 @@ def main(net_type, use_cuda, is_local=True): return # Directory for saving the trained model - save_dirname = "image_classification_" + net_type + ".inference.model" + temp_dir = tempfile.TemporaryDirectory() + save_dirname = os.path.join( + temp_dir.name, "image_classification_" + net_type + ".inference.model") train(net_type, use_cuda, save_dirname, is_local) infer(use_cuda, save_dirname) + temp_dir.cleanup() class TestImageClassification(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index eee1d7959eef7..cb962493e7ac8 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -20,6 +20,7 @@ import os import time import unittest +import tempfile import paddle import paddle.dataset.conll05 as conll05 @@ -354,12 +355,16 @@ def main(use_cuda, is_local=True): if use_cuda and not fluid.core.is_compiled_with_cuda(): return + temp_dir = tempfile.TemporaryDirectory() # Directory for saving the trained model - save_dirname = "label_semantic_roles.inference.model" + save_dirname = os.path.join(temp_dir.name, + "label_semantic_roles.inference.model") train(use_cuda, save_dirname, is_local) infer(use_cuda, save_dirname) + temp_dir.cleanup() + class TestLabelSemanticRoles(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 8a4b4c2683747..0a26a03eb878b 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -23,6 +23,7 @@ import paddle.fluid.framework as framework import paddle.fluid.layers as layers import paddle.fluid.nets as nets +import tempfile from paddle.fluid.executor import Executor from paddle.fluid.optimizer import SGDOptimizer @@ -318,10 +319,13 @@ def main(use_cuda): return # Directory for saving the inference model - save_dirname = "recommender_system.inference.model" + temp_dir = tempfile.TemporaryDirectory() + save_dirname = os.path.join(temp_dir.name, + "recommender_system.inference.model") train(use_cuda, save_dirname) infer(use_cuda, save_dirname) + temp_dir.cleanup() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 7a31035d2fb22..9499583c07bae 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -23,7 +23,9 @@ import contextlib import math import sys +import os import unittest +import tempfile from paddle.fluid.executor import Executor import paddle @@ -266,10 +268,13 @@ def main(use_cuda): return # Directory for saving the trained model - save_dirname = "rnn_encoder_decoder.inference.model" + temp_dir = tempfile.TemporaryDirectory() + save_dirname = os.path.join(temp_dir.name, + "rnn_encoder_decoder.inference.model") train(use_cuda, save_dirname) infer(use_cuda, save_dirname) + temp_dir.cleanup() class TestRnnEncoderDecoder(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/test_word2vec_book.py b/python/paddle/fluid/tests/book/test_word2vec_book.py index 37d5106e8502d..9e79fd3f523f8 100644 --- a/python/paddle/fluid/tests/book/test_word2vec_book.py +++ b/python/paddle/fluid/tests/book/test_word2vec_book.py @@ -22,6 +22,7 @@ import numpy as np import math import sys +import tempfile paddle.enable_static() @@ -247,7 +248,7 @@ def to_infer_tensor(lod_tensor): infer_inputs = [to_infer_tensor(t) for t in infer_inputs] infer_config = fluid.core.NativeConfig() - infer_config.model_dir = 'word2vec.inference.model' + infer_config.model_dir = save_dirname if target == "cuda": infer_config.use_gpu = True infer_config.device = 0 @@ -273,8 +274,9 @@ def main(target, is_sparse, is_parallel, use_bf16, pure_bf16): if use_bf16 and not fluid.core.is_compiled_with_mkldnn(): return + temp_dir = tempfile.TemporaryDirectory() if not is_parallel: - save_dirname = "word2vec.inference.model" + save_dirname = os.path.join(temp_dir.name, "word2vec.inference.model") else: save_dirname = None @@ -290,6 +292,7 @@ def main(target, is_sparse, is_parallel, use_bf16, pure_bf16): use_bf16=use_bf16, pure_bf16=pure_bf16) infer(target, save_dirname) + temp_dir.cleanup() FULL_TEST = os.getenv('FULL_TEST', diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c.cc b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c.cc new file mode 100644 index 0000000000000..f0ea48ed93595 --- /dev/null +++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/custom_phi_kernel.h" + +namespace paddle { + +namespace custom_kernel { + +// Here we use dot for test +// This test will fail when this kernel is supported in framework +template +void DotKernel(const phi::Context& dev_ctx, + const phi::DenseTensor& x, + const phi::DenseTensor& y, + phi::DenseTensor* out) { + auto const *x_ptr = x.data(), *x_ptr_ = &x_ptr[0]; + auto const *y_ptr = y.data(), *y_ptr_ = &y_ptr[0]; + T* z = dev_ctx.template Alloc(out); + + // Loop over the total N elements of both operands while sum-reducing every + // B pairs along the way where B is the dimension of the least ordered axis + auto&& d = x.dims(); + auto const N = x.numel(); + auto const B = d[d.size() - 1]; + + for (int j = 0; j < N / B; j++) { + T ss = 0; + for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++); + z[j] = ss; + } +} + +} // namespace custom_kernel +} // namespace paddle + +PD_BUILD_PHI_KERNEL( + dot, CPU, ALL_LAYOUT, paddle::custom_kernel::DotKernel, int8_t) {} diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py new file mode 100644 index 0000000000000..a94307161d431 --- /dev/null +++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py @@ -0,0 +1,81 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from paddle.fluid import core +from distutils.sysconfig import get_python_lib +from distutils.core import setup, Extension +from setuptools.command.build_ext import build_ext + + +# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes +# Avoid a gcc warning below: +# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid +# for C/ObjC but not for C++ +class BuildExt(build_ext): + + def build_extensions(self): + if '-Wstrict-prototypes' in self.compiler.compiler_so: + self.compiler.compiler_so.remove('-Wstrict-prototypes') + super(BuildExt, self).build_extensions() + + +# cc flags +paddle_extra_compile_args = [ + '-std=c++14', + '-shared', + '-fPIC', + '-Wno-parentheses', + '-DPADDLE_WITH_CUSTOM_KERNEL', +] +if core.is_compiled_with_npu(): + paddle_extra_compile_args += ['-D_GLIBCXX_USE_CXX11_ABI=0'] + +# include path +site_packages_path = get_python_lib() +paddle_custom_kernel_include = [ + os.path.join(site_packages_path, 'paddle', 'include'), +] +# include path third_party +compile_third_party_path = os.path.join(os.environ['PADDLE_ROOT'], + 'build/third_party') +paddle_custom_kernel_include += [ + os.path.join(compile_third_party_path, 'boost/src/extern_boost'), # boost + os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags + os.path.join(compile_third_party_path, 'install/glog/include'), # glog +] + +# libs path +paddle_custom_kernel_library_dir = [ + os.path.join(site_packages_path, 'paddle', 'fluid'), +] + +# libs +libs = [':core_avx.so'] +if not core.has_avx_core and core.has_noavx_core: + libs = [':core_noavx.so'] + +custom_kernel_dot_module = Extension( + 'custom_kernel_dot', + sources=['custom_kernel_dot_c.cc'], + include_dirs=paddle_custom_kernel_include, + library_dirs=paddle_custom_kernel_library_dir, + libraries=libs, + extra_compile_args=paddle_extra_compile_args) + +setup(name='custom_kernel_dot_c', + version='1.0', + description='custom kernel fot compiling', + cmdclass={'build_ext': BuildExt}, + ext_modules=[custom_kernel_dot_module]) diff --git a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py index d1929fef5cc54..e28bfe00e7c4f 100644 --- a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py +++ b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py @@ -56,6 +56,42 @@ def tearDown(self): del os.environ['CUSTOM_DEVICE_ROOT'] +class TestCustomKernelDotC(unittest.TestCase): + + def setUp(self): + # compile so and set to current path + cur_dir = os.path.dirname(os.path.abspath(__file__)) + + # --inplace to place output so file to current dir + cmd = 'cd {} && {} custom_kernel_dot_c_setup.py build_ext --inplace'.format( + cur_dir, sys.executable) + os.system(cmd) + + # set environment for loading and registering compiled custom kernels + # only valid in current process + os.environ['CUSTOM_DEVICE_ROOT'] = cur_dir + + def test_custom_kernel_dot_run(self): + # test dot run + x_data = np.random.uniform(1, 5, [2, 10]).astype(np.int8) + y_data = np.random.uniform(1, 5, [2, 10]).astype(np.int8) + result = np.sum(x_data * y_data, axis=1).reshape([2, 1]) + + import paddle + paddle.set_device('cpu') + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + out = paddle.dot(x, y) + + self.assertTrue( + np.array_equal(out.numpy(), result), + "custom kernel dot out: {},\n numpy dot out: {}".format( + out.numpy(), result)) + + def tearDown(self): + del os.environ['CUSTOM_DEVICE_ROOT'] + + if __name__ == '__main__': if os.name == 'nt' or sys.platform.startswith('darwin'): # only support Linux now diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py index 78078963a7dea..ff0b11128a4f0 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py @@ -15,6 +15,7 @@ import os import unittest import numpy as np +import tempfile import paddle from paddle import nn @@ -73,6 +74,9 @@ def forward(self, x): class TestDygraphModel(unittest.TestCase): + def tearDown(self): + self.temp_dir.cleanup() + def setUp(self): self.seed = 2021 @@ -93,8 +97,12 @@ def setUp(self): self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu'] # for saving model - self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams" - self.model_dy2stat_path = "infer_model/custom_relu_model_dy2sta" + self.temp_dir = tempfile.TemporaryDirectory() + self.model_save_dir = os.path.join(self.temp_dir.name, 'infer_model') + self.model_path_template = os.path.join( + self.model_save_dir, 'custom_relu_dygaph_model_{}.pdparams') + self.model_dy2stat_path = os.path.join( + self.model_save_dir, 'infer_model/custom_relu_model_dy2sta') # for dy2stat self.x_spec = paddle.static.InputSpec(shape=[None, self.in_dim], @@ -210,12 +218,16 @@ def setUp(self): self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu'] # for saving model - self.model_path_template = "infer_model/custom_relu_static_model_{}_{}" + self.temp_dir = tempfile.TemporaryDirectory() + self.model_save_dir = os.path.join(self.temp_dir.name, 'infer_model') + self.model_path_template = os.path.join( + self.model_save_dir, 'custom_relu_static_model_{}_{}') paddle.enable_static() def tearDown(self): paddle.disable_static() + self.temp_dir.cleanup() def test_train_eval(self): for device in self.devices: diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py index 29433b17153f5..1a53bf3354f36 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py @@ -18,6 +18,7 @@ import unittest import paddle import paddle.static as static +import tempfile import subprocess import numpy as np from paddle.vision.transforms import Compose, Normalize diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py index a7532ff3e7376..82b73609b2e11 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py @@ -30,43 +30,61 @@ def sample_program_configs(self): def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 1: - return np.ones([32]).astype(np.float32) + return np.random.random([32]).astype(np.float32) elif dims == 2: - return np.ones([3, 32]).astype(np.float32) + return np.random.random([3, 32]).astype(np.float32) elif dims == 3: - return np.ones([3, 32, 32]).astype(np.float32) + return np.random.random([3, 32, 32]).astype(np.float32) else: - return np.ones([batch, 3, 32, 32]).astype(np.float32) + return np.random.random([batch, 3, 32, 32]).astype(np.float32) for dims in [1, 2, 3, 4]: for batch in [1, 4]: - for op_type in ["relu", "sigmoid", "tanh", "relu6"]: - self.dims = dims - dics = [{}] - - ops_config = [{ - "op_type": op_type, - "op_inputs": { - "X": ["input_data"] - }, - "op_outputs": { - "Out": ["output_data"] - }, - "op_attrs": dics[0] - }] - ops = self.generate_op_config(ops_config) - - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={ - "input_data": - TensorConfig(data_gen=partial( - generate_input1, dims, batch, dics)) - }, - outputs=["output_data"]) - - yield program_config + for op_type in [ + "relu", "sigmoid", "tanh", "relu6", "elu", "selu", + "softsign", "stanh", "thresholded_relu", "softplus" + ]: + # few samples to reduce time + #for beta in [-0.2, 0.5, 0.67, 3]: + # for alpha in [-0.2, 0.5, 0.67, 3]: + for beta in [0.67]: + for alpha in [0.67]: + self.dims = dims + dics = [{}] + if op_type == "elu": + dics = [{"alpha": alpha}] + if op_type == "selu": + dics = [{"alpha": beta, "scale": alpha}] + if op_type == "stanh": + dics = [{"scale_a": beta, "scale_b": alpha}] + if op_type == "thresholded_relu": + dics = [{"threshold": alpha}] + if op_type == "softplus": + dics = [{"beta": beta}] + + ops_config = [{ + "op_type": op_type, + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dims, batch, dics)) + }, + outputs=["output_data"]) + + yield program_config def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bilinear_interp_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bilinear_interp_v2.py new file mode 100644 index 0000000000000..3fe041db9333e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bilinear_interp_v2.py @@ -0,0 +1,132 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + + +class TrtConvertBilinearInterpV2Test(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + inputs = program_config.inputs + weights = program_config.weights + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + return True + + def sample_program_configs(self): + + def generate_input1(attrs: List[Dict[str, Any]]): + return np.ones([1, 3, 64, 64]).astype(np.float32) + + def generate_input2(attrs: List[Dict[str, Any]]): + return np.random.uniform(low=0.5, high=6.0, + size=(2)).astype("float32") + + for data_layout in ["NCHW", "NHWC"]: + for scale_y in [2.0, -1.0, 0.0]: + for scale_x in [2.0, -1.0, 0.0]: + scale = [scale_y, scale_x] + for out_h in [32, 64, 128, 192]: + for out_w in [32, 64]: + dics = [{ + "data_layout": data_layout, + "interp_method": "bilinear", + "align_corners": False, + "align_mode": 0, + "scale": scale, + "out_h": out_h, + "out_w": out_w + }] + + ops_config = [{ + "op_type": "bilinear_interp_v2", + "op_inputs": { + "X": ["input_data"], + "Scale": ["input_scale"] + }, + "op_outputs": { + "Out": ["bilinear_interp_v2_output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={ + "input_scale": + TensorConfig( + data_gen=partial(generate_input2, dics)) + }, + inputs={ + "input_data": + TensorConfig( + data_gen=partial(generate_input1, dics)) + }, + outputs=["bilinear_interp_v2_output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 64, 64]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-2 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py index 9948b29321dc0..a53b61a00727b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py @@ -77,7 +77,9 @@ def clear_dynamic_shape(): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - if dynamic_shape == True: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[ + 2] * 10 < 8000 and dynamic_shape == True: return 0, 3 else: return 1, 2 diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py index 83a50c2a4472d..bf33d5532014f 100755 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py @@ -17,8 +17,10 @@ paddle.set_default_dtype("float64") from paddle.fluid.layers import sequence_mask +import os import numpy as np import unittest +import tempfile from convert import convert_params_for_net from rnn_numpy import SimpleRNN, LSTM, GRU @@ -336,16 +338,18 @@ def forward(self, input): rnn = paddle.jit.to_static( rnn, [paddle.static.InputSpec(shape=[None, None, 16], dtype=x.dtype)]) - paddle.jit.save(rnn, "./inference/%s_infer" % mode) + temp_dir = tempfile.TemporaryDirectory() + save_dirname = os.path.join(temp_dir.name, "./inference/%s_infer" % mode) + + paddle.jit.save(rnn, save_dirname) paddle.enable_static() new_scope = paddle.static.Scope() with paddle.static.scope_guard(new_scope): exe = paddle.static.Executor(place) - [inference_program, feed_target_names, fetch_targets - ] = paddle.static.load_inference_model("./inference/%s_infer" % mode, - exe) + [inference_program, feed_target_names, + fetch_targets] = paddle.static.load_inference_model(save_dirname, exe) results = exe.run(inference_program, feed={feed_target_names[0]: x.numpy()}, fetch_list=fetch_targets) @@ -353,6 +357,8 @@ def forward(self, input): y.numpy(), results[0]) # eval results equal predict results paddle.disable_static() + temp_dir.cleanup() + def load_tests(loader, tests, pattern): suite = unittest.TestSuite() diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py index e31baf9fe2e70..86e23c79d07a4 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_dataset.py @@ -172,8 +172,12 @@ def test_set_download_cmd(self): """ Testcase for InMemoryDataset from create to run. """ - filename1 = "afs:test_in_memory_dataset_run_a.txt" - filename2 = "afs:test_in_memory_dataset_run_b.txt" + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "afs:test_in_memory_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, + "afs:test_in_memory_dataset_run_b.txt") + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" @@ -223,19 +227,24 @@ def test_set_download_cmd(self): except Exception as e: self.assertTrue(False) - os.remove(filename1) - os.remove(filename2) + temp_dir.cleanup() def test_in_memory_dataset_run(self): """ Testcase for InMemoryDataset from create to run. """ - with open("test_in_memory_dataset_run_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset_run_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_in_memory_dataset_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -257,10 +266,7 @@ def test_in_memory_dataset_run(self): pipe_command="cat", use_var=slots_vars) dataset._init_distributed_settings(fea_eval=True, candidate_size=1) - dataset.set_filelist([ - "test_in_memory_dataset_run_a.txt", - "test_in_memory_dataset_run_b.txt" - ]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() dataset.slots_shuffle(["slot1"]) dataset.local_shuffle() @@ -282,14 +288,19 @@ def test_in_memory_dataset_run(self): except Exception as e: self.assertTrue(False) - os.remove("./test_in_memory_dataset_run_a.txt") - os.remove("./test_in_memory_dataset_run_b.txt") + temp_dir.cleanup() def test_in_memory_dataset_masterpatch(self): """ Testcase for InMemoryDataset from create to run. """ - with open("test_in_memory_dataset_masterpatch_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset_masterpatch_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset_masterpatch_b.txt") + + with open(filename1, "w") as f: data = "1 id1 1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 id1 1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 id2 1 1 1 1 1 0 1 0\n" @@ -300,7 +311,7 @@ def test_in_memory_dataset_masterpatch(self): data += "1 id5 1 1 1 1 1 0 1 0\n" data += "1 id5 1 1 1 1 1 0 1 0\n" f.write(data) - with open("test_in_memory_dataset_masterpatch_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 id6 1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 id6 1 1 2 3 4 4 6 6 6 6 1 5\n" data += "1 id6 1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -353,14 +364,19 @@ def test_in_memory_dataset_masterpatch(self): dataset.update_settings(merge_size=2) dataset.dataset.merge_by_lineid() - os.remove("./test_in_memory_dataset_masterpatch_a.txt") - os.remove("./test_in_memory_dataset_masterpatch_b.txt") + temp_dir.cleanup() def test_in_memory_dataset_masterpatch1(self): """ Testcase for InMemoryDataset from create to run. """ - with open("test_in_memory_dataset_masterpatch1_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset_masterpatch1_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset_masterpatch1_b.txt") + + with open(filename1, "w") as f: data = "1 id1 1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 id1 1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 id2 1 1 1 1 1 0 1 0\n" @@ -371,7 +387,7 @@ def test_in_memory_dataset_masterpatch1(self): data += "1 id5 1 1 1 1 1 0 1 0\n" data += "1 id5 1 1 1 1 1 0 1 0\n" f.write(data) - with open("test_in_memory_dataset_masterpatch1_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 id6 1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 id6 1 1 2 3 4 4 6 6 6 6 1 5\n" data += "1 id6 1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -427,8 +443,7 @@ def test_in_memory_dataset_masterpatch1(self): dataset._set_merge_by_lineid(2) dataset.dataset.merge_by_lineid() - os.remove("./test_in_memory_dataset_masterpatch1_a.txt") - os.remove("./test_in_memory_dataset_masterpatch1_b.txt") + temp_dir.cleanup() def test_in_memory_dataset_run_2(self): """ @@ -436,12 +451,18 @@ def test_in_memory_dataset_run_2(self): Use CUDAPlace Use float type id """ - with open("test_in_memory_dataset_run_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset_run_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_in_memory_dataset_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -462,10 +483,7 @@ def test_in_memory_dataset_run_2(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist([ - "test_in_memory_dataset_run_a.txt", - "test_in_memory_dataset_run_b.txt" - ]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() dataset.local_shuffle() @@ -540,19 +558,22 @@ def test_in_memory_dataset_run_2(self): fleet_ptr.set_client2client_config(1, 1, 1) fleet_ptr.get_cache_threshold(0) - os.remove("./test_in_memory_dataset_run_a.txt") - os.remove("./test_in_memory_dataset_run_b.txt") + temp_dir.cleanup() def test_queue_dataset_run(self): """ Testcase for QueueDataset from create to run. """ - with open("test_queue_dataset_run_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, "test_queue_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, "test_queue_dataset_run_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_queue_dataset_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -573,8 +594,7 @@ def test_queue_dataset_run(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist( - ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) + dataset.set_filelist([filename1, filename2]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) @@ -605,10 +625,7 @@ def test_queue_dataset_run(self): except Exception as e: self.assertTrue(False) - if os.path.exists("./test_queue_dataset_run_a.txt"): - os.remove("./test_queue_dataset_run_a.txt") - if os.path.exists("./test_queue_dataset_run_b.txt"): - os.remove("./test_queue_dataset_run_b.txt") + temp_dir.cleanup() def test_queue_dataset_run_2(self): """ @@ -616,12 +633,16 @@ def test_queue_dataset_run_2(self): Use CUDAPlace Use float type id """ - with open("test_queue_dataset_run_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, "test_queue_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, "test_queue_dataset_run_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_queue_dataset_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -642,8 +663,7 @@ def test_queue_dataset_run_2(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist( - ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) + dataset.set_filelist([filename1, filename2]) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) @@ -662,10 +682,7 @@ def test_queue_dataset_run_2(self): except Exception as e: self.assertTrue(False) - if os.path.exists("./test_queue_dataset_run_a.txt"): - os.remove("./test_queue_dataset_run_a.txt") - if os.path.exists("./test_queue_dataset_run_b.txt"): - os.remove("./test_queue_dataset_run_b.txt") + temp_dir.cleanup() def test_queue_dataset_run_3(self): """ @@ -673,13 +690,17 @@ def test_queue_dataset_run_3(self): Use CUDAPlace Use float type id """ - with open("test_queue_dataset_run_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, "test_queue_dataset_run_a.txt") + filename2 = os.path.join(temp_dir.name, "test_queue_dataset_run_b.txt") + + with open(filename1, "w") as f: data = "2 1 2 2 5 4 2 2 7 2 1 3\n" data += "2 6 2 2 1 4 2 2 4 2 2 3\n" data += "2 5 2 2 9 9 2 2 7 2 1 3\n" data += "2 7 2 2 1 9 2 3 7 2 5 3\n" f.write(data) - with open("test_queue_dataset_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "2 1 2 2 5 4 2 2 7 2 1 3\n" data += "2 6 2 2 1 4 2 2 4 2 2 3\n" data += "2 5 2 2 9 9 2 2 7 2 1 3\n" @@ -701,8 +722,7 @@ def test_queue_dataset_run_3(self): input_type=1, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist( - ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() exe = fluid.Executor(fluid.CPUPlace( @@ -722,10 +742,7 @@ def test_queue_dataset_run_3(self): except Exception as e: self.assertTrue(False) - if os.path.exists("./test_queue_dataset_run_a.txt"): - os.remove("./test_queue_dataset_run_a.txt") - if os.path.exists("./test_queue_dataset_run_b.txt"): - os.remove("./test_queue_dataset_run_b.txt") + temp_dir.cleanup() class TestDatasetWithDataLoader(TestDataset): @@ -789,12 +806,18 @@ def setUp(self): """ Test Dataset With Fetch Handler. TestCases. """ - with open("test_queue_dataset_run_a.txt", "w") as f: + self.temp_dir = tempfile.TemporaryDirectory() + self.filename1 = os.path.join(self.temp_dir.name, + "test_queue_dataset_run_a.txt") + self.filename2 = os.path.join(self.temp_dir.name, + "test_queue_dataset_run_b.txt") + + with open(self.filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_queue_dataset_run_b.txt", "w") as f: + with open(self.filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -805,15 +828,14 @@ def tearDown(self): """ Test Dataset With Fetch Handler. TestCases. """ - os.remove("./test_queue_dataset_run_a.txt") - os.remove("./test_queue_dataset_run_b.txt") + self.temp_dir.cleanup() def test_dataset_none(self): """ Test Dataset With Fetch Handler. TestCases. """ slots_vars, out = self.net() - files = ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"] + files = [self.filename1, self.filename2] dataset = self.get_dataset(slots_vars, files) exe = fluid.Executor(fluid.CPUPlace()) @@ -835,7 +857,7 @@ def test_infer_from_dataset(self): Test Dataset With Fetch Handler. TestCases. """ slots_vars, out = self.net() - files = ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"] + files = [self.filename1, self.filename2] dataset = self.get_dataset(slots_vars, files) exe = fluid.Executor(fluid.CPUPlace()) @@ -853,7 +875,7 @@ def test_fetch_handler(self): Test Dataset With Fetch Handler. TestCases. """ slots_vars, out = self.net() - files = ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"] + files = [self.filename1, self.filename2] dataset = self.get_dataset(slots_vars, files) exe = fluid.Executor(fluid.CPUPlace()) @@ -888,15 +910,20 @@ def test_dataset_fleet(self): """ Testcase for InMemoryDataset from create to run. """ + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run_b.txt") self.skipTest("parameter server will add pslib UT later") - with open("test_in_memory_dataset2_run_a.txt", "w") as f: + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_in_memory_dataset2_run_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -939,27 +966,29 @@ def test_dataset_fleet(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist([ - "test_in_memory_dataset2_run_a.txt", - "test_in_memory_dataset2_run_b.txt" - ]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() fleet._opt_info = None fleet._fleet_ptr = None - os.remove("./test_in_memory_dataset2_run_a.txt") - os.remove("./test_in_memory_dataset2_run_b.txt") + temp_dir.cleanup() def test_dataset_fleet2(self): """ Testcase for InMemoryDataset from create to run. """ - with open("test_in_memory_dataset2_run2_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run2_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run2_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_in_memory_dataset2_run2_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -1011,10 +1040,7 @@ def test_dataset_fleet2(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist([ - "test_in_memory_dataset2_run2_a.txt", - "test_in_memory_dataset2_run2_b.txt" - ]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() try: dataset.global_shuffle(fleet) @@ -1073,19 +1099,24 @@ def test_dataset_fleet2(self): except: print("warning: catch expected error") - os.remove("./test_in_memory_dataset2_run2_a.txt") - os.remove("./test_in_memory_dataset2_run2_b.txt") + temp_dir.cleanup() def test_bosps_dataset_fleet2(self): """ Testcase for InMemoryDataset from create to run. """ - with open("test_in_memory_dataset2_run2_a.txt", "w") as f: + temp_dir = tempfile.TemporaryDirectory() + filename1 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run2_a.txt") + filename2 = os.path.join(temp_dir.name, + "test_in_memory_dataset2_run2_b.txt") + + with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) - with open("test_in_memory_dataset2_run2_b.txt", "w") as f: + with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" @@ -1137,10 +1168,7 @@ def test_bosps_dataset_fleet2(self): thread_num=3, pipe_command="cat", use_var=slots_vars) - dataset.set_filelist([ - "test_in_memory_dataset2_run2_a.txt", - "test_in_memory_dataset2_run2_b.txt" - ]) + dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() try: dataset.global_shuffle(fleet) @@ -1190,6 +1218,7 @@ def test_bosps_dataset_fleet2(self): #dataset.get_pv_data_size() dataset.get_memory_data_size() dataset.get_shuffle_data_size() + temp_dir.cleanup() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py index 8d949bf51a7da..8c9be45707f29 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py @@ -18,6 +18,7 @@ import six import os import unittest +import tempfile from simple_nets import simple_fc_net_with_inputs BATCH_SIZE = 32 @@ -27,8 +28,6 @@ IMAGE_SHAPE = [2, 3] LABEL_SHAPE = [1] -ALL_WRITTEN_FILES = set() - def get_place_string(p): if isinstance(p, (fluid.CPUPlace or fluid.CUDAPlace)): @@ -42,13 +41,7 @@ def get_place_string(p): return 'CUDAPlace()' -def remove_all_written_files(): - for filename in ALL_WRITTEN_FILES: - os.remove(filename) - - def write_reader_data_to_file(filename, reader): - ALL_WRITTEN_FILES.add(filename) with open(filename, 'w') as fid: for instance_list in reader(): for i, instance in enumerate(instance_list): @@ -81,10 +74,10 @@ class DatasetLoaderTestBase(unittest.TestCase): def setUp(self): self.dataset_name = "QueueDataset" self.drop_last = False + self.temp_dir = tempfile.TemporaryDirectory() def tearDown(self): - return - remove_all_written_files() + self.temp_dir.cleanup() def build_network(self): main_prog = fluid.Program() @@ -129,7 +122,8 @@ def check_batch_number(self, place, randomize_batch_num=False): random_delta_batch_size = np.zeros(shape=[file_num]) for i in six.moves.range(file_num): - filename = 'dataset_test_{}.txt'.format(i) + filename = os.path.join(self.temp_dir.name, + 'dataset_test_{}.txt'.format(i)) filelist.append(filename) write_reader_data_to_file( filename, @@ -214,6 +208,7 @@ class QueueDatasetTestWithoutDropLast(DatasetLoaderTestBase): def setUp(self): self.dataset_name = "QueueDataset" self.drop_last = True + self.temp_dir = tempfile.TemporaryDirectory() class InMemoryDatasetTestWithoutDropLast(DatasetLoaderTestBase): @@ -221,6 +216,7 @@ class InMemoryDatasetTestWithoutDropLast(DatasetLoaderTestBase): def setUp(self): self.dataset_name = "InMemoryDataset" self.drop_last = False + self.temp_dir = tempfile.TemporaryDirectory() class InMemoryDatasetTestWithDropLast(DatasetLoaderTestBase): @@ -228,6 +224,7 @@ class InMemoryDatasetTestWithDropLast(DatasetLoaderTestBase): def setUp(self): self.dataset_name = "InMemoryDataset" self.drop_last = True + self.temp_dir = tempfile.TemporaryDirectory() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py index d4dc21e7646d6..30a86d02f3142 100644 --- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py +++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py @@ -19,19 +19,27 @@ import subprocess import unittest import paddle +import tempfile import paddle.fluid as fluid from paddle.fluid import core class TestGPUPackagePaddle(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDwon(self): + self.temp_dir.cleanup() + def test_import_paddle(self): if core.is_compiled_with_cuda(): if core.is_compiled_with_rocm(): os.environ['HIP_VISIBLE_DEVICES'] = '' else: os.environ['CUDA_VISIBLE_DEVICES'] = '' - test_file = 'test_no_gpu_run_rand.py' + test_file = os.path.join(self.temp_dir.name, + 'test_no_gpu_run_rand.py') with open(test_file, 'w') as wb: cmd_test = """ import paddle diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py index e9266a4643292..7310d19a522ff 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py @@ -17,6 +17,9 @@ import paddle.fluid as fluid import numpy as np import six +import cv2 +import os +import tempfile from test_imperative_resnet import ResNet, BottleneckBlock, ConvBNLayer, train_parameters, optimizer_setting import paddle.nn as nn from paddle.static import InputSpec @@ -737,6 +740,12 @@ def func_isinstance(): class TestPureFp16SaveLoad(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_save_dtype_exception(self): def func(): @@ -848,7 +857,7 @@ def train_resnet(self, 'opt': optimizer.state_dict(), 'scaler': scaler.state_dict() } - path = 'model.pdparams' + path = os.path.join(self.temp_dir.name, 'model.pdparams') paddle.save(obj, path) # paddle.load obj_load = paddle.load(path) @@ -888,6 +897,12 @@ def func_isinstance(): class TestPureFp16InferenceSaveLoad(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def inference_save_load(self): BATCH_SIZE = 16 BATCH_NUM = 4 @@ -951,7 +966,7 @@ def train(layer, loader, loss_fn, opt): train(layer, loader, loss_fn, adam) # save - path = "example_model/linear" + path = os.path.join(self.temp_dir.name, 'example_model/linear') paddle.jit.save(layer, path, input_spec=[InputSpec(shape=[IMAGE_SIZE], name='x')]) diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py index 0143bdb53242c..99097aaf0048e 100755 --- a/python/paddle/fluid/tests/unittests/test_newprofiler.py +++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py @@ -17,7 +17,7 @@ import unittest import numpy as np import tempfile - +import os import paddle import paddle.profiler as profiler import paddle.profiler.utils as utils @@ -28,13 +28,19 @@ class TestProfiler(unittest.TestCase): + def tearDown(self): + self.temp_dir.cleanup() + def test_profiler(self): def my_trace_back(prof): - profiler.export_chrome_tracing('./test_profiler_chrometracing/')( - prof) - profiler.export_protobuf('./test_profiler_pb/')(prof) + path = os.path.join(self.temp_dir.name, + './test_profiler_chrometracing') + profiler.export_chrome_tracing(path)(prof) + path = os.path.join(self.temp_dir.name, './test_profiler_pb') + profiler.export_protobuf(path)(prof) + self.temp_dir = tempfile.TemporaryDirectory() x_value = np.random.randn(2, 3, 3) x = paddle.to_tensor(x_value, stop_gradient=False, @@ -135,9 +141,10 @@ def my_sheduler1(num_step): paddle.grad(outputs=y, inputs=[x], grad_outputs=ones_like_y) prof.step() - prof.export(path='./test_profiler_pb.pb', format='pb') + path = os.path.join(self.temp_dir.name, './test_profiler_pb.pb') + prof.export(path=path, format='pb') prof.summary() - result = profiler.utils.load_profiler_result('./test_profiler_pb.pb') + result = profiler.utils.load_profiler_result(path) prof = None dataset = RandomDataset(10 * 4) simple_net = SimpleNet() diff --git a/python/paddle/fluid/tests/unittests/test_ops_nms.py b/python/paddle/fluid/tests/unittests/test_ops_nms.py index 54ea804cdbd9b..c775a47bd2472 100644 --- a/python/paddle/fluid/tests/unittests/test_ops_nms.py +++ b/python/paddle/fluid/tests/unittests/test_ops_nms.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import unittest import numpy as np import paddle from test_nms_op import nms +import tempfile def _find(condition): @@ -79,6 +81,11 @@ def setUp(self): self.devices = ['cpu'] if paddle.is_compiled_with_cuda(): self.devices.append('gpu') + self.temp_dir = tempfile.TemporaryDirectory() + self.path = os.path.join(self.temp_dir.name, './net') + + def tearDown(self): + self.temp_dir.cleanup() def test_nms(self): for device in self.devices: @@ -169,7 +176,6 @@ def fun(x): categories, 10) return out - path = "./net" boxes = np.random.rand(64, 4).astype('float32') boxes[:, 2] = boxes[:, 0] + boxes[:, 2] boxes[:, 3] = boxes[:, 1] + boxes[:, 3] @@ -177,14 +183,14 @@ def fun(x): origin = fun(paddle.to_tensor(boxes)) paddle.jit.save( fun, - path, + self.path, input_spec=[ paddle.static.InputSpec(shape=[None, 4], dtype='float32', name='x') ], ) - load_func = paddle.jit.load(path) + load_func = paddle.jit.load(self.path) res = load_func(paddle.to_tensor(boxes)) self.assertTrue( np.array_equal(origin, res), diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py index 0eec7633a2ec1..4f5cfba0c1ab3 100644 --- a/python/paddle/fluid/tests/unittests/test_profiler.py +++ b/python/paddle/fluid/tests/unittests/test_profiler.py @@ -18,6 +18,7 @@ import os import tempfile import numpy as np +import paddle import paddle.utils as utils import paddle.fluid as fluid import paddle.fluid.profiler as profiler @@ -205,4 +206,5 @@ def test_errors(self): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_translated_layer.py b/python/paddle/fluid/tests/unittests/test_translated_layer.py index 4b0be989efe48..ba44c78f2c74d 100644 --- a/python/paddle/fluid/tests/unittests/test_translated_layer.py +++ b/python/paddle/fluid/tests/unittests/test_translated_layer.py @@ -16,6 +16,8 @@ import unittest import numpy as np +import tempfile +import os import paddle import paddle.nn as nn import paddle.optimizer as opt @@ -76,6 +78,9 @@ def train(layer, loader, loss_fn, opt): class TestTranslatedLayer(unittest.TestCase): + def tearDown(self): + self.temp_dir.cleanup() + def setUp(self): # enable dygraph mode place = paddle.CPUPlace() @@ -100,11 +105,14 @@ def setUp(self): drop_last=True, num_workers=0) + self.temp_dir = tempfile.TemporaryDirectory() + # train train(self.layer, self.loader, self.loss_fn, self.sgd) # save - self.model_path = "linear.example.model" + self.model_path = os.path.join(self.temp_dir.name, + './linear.example.model') paddle.jit.save(self.layer, self.model_path) def test_inference_and_fine_tuning(self): diff --git a/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py b/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py new file mode 100644 index 0000000000000..745cb6a178032 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py @@ -0,0 +1,395 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import numpy as np +import unittest + + +def call_TripletMarginLoss_layer( + input, + positive, + negative, + p=2, + margin=0.3, + swap=False, + eps=1e-6, + reduction='mean', +): + triplet_margin_loss = paddle.nn.TripletMarginLoss(p=p, + epsilon=eps, + margin=margin, + swap=swap, + reduction=reduction) + res = triplet_margin_loss( + input=input, + positive=positive, + negative=negative, + ) + return res + + +def call_TripletMarginLoss_functional( + input, + positive, + negative, + p=2, + margin=0.3, + swap=False, + eps=1e-6, + reduction='mean', +): + res = paddle.nn.functional.triplet_margin_loss(input=input, + positive=positive, + negative=negative, + p=p, + epsilon=eps, + margin=margin, + swap=swap, + reduction=reduction) + return res + + +def test_static(place, + input_np, + positive_np, + negative_np, + p=2, + margin=0.3, + swap=False, + eps=1e-6, + reduction='mean', + functional=False): + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.static.data(name='input', + shape=input_np.shape, + dtype='float64') + positive = paddle.static.data(name='positive', + shape=positive_np.shape, + dtype='float64') + negative = paddle.static.data(name='negative', + shape=negative_np.shape, + dtype='float64') + feed_dict = { + "input": input_np, + "positive": positive_np, + "negative": negative_np + } + + if functional: + res = call_TripletMarginLoss_functional(input=input, + positive=positive, + negative=negative, + p=p, + eps=eps, + margin=margin, + swap=swap, + reduction=reduction) + else: + res = call_TripletMarginLoss_layer(input=input, + positive=positive, + negative=negative, + p=p, + eps=eps, + margin=margin, + swap=swap, + reduction=reduction) + + exe = paddle.static.Executor(place) + static_result = exe.run(prog, feed=feed_dict, fetch_list=[res]) + return static_result + + +def test_dygraph(place, + input, + positive, + negative, + p=2, + margin=0.3, + swap=False, + eps=1e-6, + reduction='mean', + functional=False): + paddle.disable_static() + input = paddle.to_tensor(input) + positive = paddle.to_tensor(positive) + negative = paddle.to_tensor(negative) + + if functional: + dy_res = call_TripletMarginLoss_functional(input=input, + positive=positive, + negative=negative, + p=p, + eps=eps, + margin=margin, + swap=swap, + reduction=reduction) + else: + dy_res = call_TripletMarginLoss_layer(input=input, + positive=positive, + negative=negative, + p=p, + eps=eps, + margin=margin, + swap=swap, + reduction=reduction) + dy_result = dy_res.numpy() + paddle.enable_static() + return dy_result + + +def calc_triplet_margin_loss( + input, + positive, + negative, + p=2, + margin=0.3, + swap=False, + reduction='mean', +): + positive_dist = np.linalg.norm((input - positive), p, axis=1) + negative_dist = np.linalg.norm((input - negative), p, axis=1) + + if swap: + swap_dist = np.linalg.norm((positive - negative), p, axis=1) + negative_dist = np.minimum(negative_dist, swap_dist) + expected = np.maximum(positive_dist - negative_dist + margin, 0) + + if reduction == 'mean': + expected = np.mean(expected) + elif reduction == 'sum': + expected = np.sum(expected) + else: + expected = expected + + return expected + + +class TestTripletMarginLoss(unittest.TestCase): + + def test_TripletMarginLoss(self): + shape = (2, 2) + input = np.random.uniform(0.1, 0.8, size=shape).astype(np.float64) + positive = np.random.uniform(0, 2, size=shape).astype(np.float64) + negative = np.random.uniform(0, 2, size=shape).astype(np.float64) + + places = [paddle.CPUPlace()] + if paddle.device.is_compiled_with_cuda(): + places.append(paddle.CUDAPlace(0)) + reductions = ['sum', 'mean', 'none'] + for place in places: + for reduction in reductions: + expected = calc_triplet_margin_loss(input=input, + positive=positive, + negative=negative, + reduction=reduction) + + dy_result = test_dygraph( + place=place, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + ) + + static_result = test_static( + place=place, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + ) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static(place=place, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + functional=True) + dy_functional = test_dygraph(place=place, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_TripletMarginLoss_error(self): + paddle.disable_static() + self.assertRaises(ValueError, + paddle.nn.loss.TripletMarginLoss, + reduction="unsupport reduction") + input = paddle.to_tensor([[0.1, 0.3]], dtype='float32') + positive = paddle.to_tensor([[0.0, 1.0]], dtype='float32') + negative = paddle.to_tensor([[0.2, 0.1]], dtype='float32') + self.assertRaises(ValueError, + paddle.nn.functional.triplet_margin_loss, + input=input, + positive=positive, + negative=negative, + reduction="unsupport reduction") + paddle.enable_static() + + def test_TripletMarginLoss_dimension(self): + paddle.disable_static() + + input = paddle.to_tensor([[0.1, 0.3], [1, 2]], dtype='float32') + positive = paddle.to_tensor([[0.0, 1.0]], dtype='float32') + negative = paddle.to_tensor([[0.2, 0.1]], dtype='float32') + self.assertRaises( + ValueError, + paddle.nn.functional.triplet_margin_loss, + input=input, + positive=positive, + negative=negative, + ) + TMLoss = paddle.nn.loss.TripletMarginLoss() + self.assertRaises( + ValueError, + TMLoss, + input=input, + positive=positive, + negative=negative, + ) + paddle.enable_static() + + def test_TripletMarginLoss_swap(self): + reduction = 'mean' + place = paddle.CPUPlace() + shape = (2, 2) + input = np.random.uniform(0.1, 0.8, size=shape).astype(np.float64) + positive = np.random.uniform(0, 2, size=shape).astype(np.float64) + negative = np.random.uniform(0, 2, size=shape).astype(np.float64) + expected = calc_triplet_margin_loss(input=input, + swap=True, + positive=positive, + negative=negative, + reduction=reduction) + + dy_result = test_dygraph( + place=place, + swap=True, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + ) + + static_result = test_static( + place=place, + swap=True, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + ) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static(place=place, + swap=True, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + functional=True) + dy_functional = test_dygraph(place=place, + swap=True, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_TripletMarginLoss_margin(self): + paddle.disable_static() + + input = paddle.to_tensor([[0.1, 0.3]], dtype='float32') + positive = paddle.to_tensor([[0.0, 1.0]], dtype='float32') + negative = paddle.to_tensor([[0.2, 0.1]], dtype='float32') + margin = -0.5 + self.assertRaises( + ValueError, + paddle.nn.functional.triplet_margin_loss, + margin=margin, + input=input, + positive=positive, + negative=negative, + ) + paddle.enable_static() + + def test_TripletMarginLoss_p(self): + p = 3 + shape = (2, 2) + reduction = 'mean' + place = paddle.CPUPlace() + input = np.random.uniform(0.1, 0.8, size=shape).astype(np.float64) + positive = np.random.uniform(0, 2, size=shape).astype(np.float64) + negative = np.random.uniform(0, 2, size=shape).astype(np.float64) + expected = calc_triplet_margin_loss(input=input, + p=p, + positive=positive, + negative=negative, + reduction=reduction) + + dy_result = test_dygraph( + place=place, + p=p, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + ) + + static_result = test_static( + place=place, + p=p, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + ) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static(place=place, + p=p, + input_np=input, + positive_np=positive, + negative_np=negative, + reduction=reduction, + functional=True) + dy_functional = test_dygraph(place=place, + p=p, + input=input, + positive=positive, + negative=negative, + reduction=reduction, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index c78c89964c92e..b5662f9ecf4f9 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -934,89 +934,91 @@ class Model(object): Args: network (paddle.nn.Layer): The network is an instance of paddle.nn.Layer. - inputs (InputSpec|list|tuple|dict|None): `inputs`, entry points of network, + inputs (InputSpec|list|tuple|dict|None, optional): `inputs`, entry points of network, could be a InputSpec instance, or list/tuple of InputSpec instances, or dict ({name: InputSpec}), and it couldn't be None in static - graph. - labels (InputSpec|list|tuple|None): `labels`, entry points of network, + graph. Default: None. + labels (InputSpec|list|tuple|None, optional): `labels`, entry points of network, could be a InputSpec instnace or list/tuple of InputSpec instances, or None. For static graph, if labels is required in loss, - labels must be set. Otherwise, it could be None. + labels must be set. Otherwise, it could be None. Default: None. Examples: 1. A common example .. code-block:: python + :name: code-example1 - import paddle - import paddle.nn as nn - import paddle.vision.transforms as T - from paddle.static import InputSpec - - device = paddle.set_device('cpu') # or 'gpu' - - net = nn.Sequential( - nn.Flatten(1), - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10)) - - # inputs and labels are not required for dynamic graph. - input = InputSpec([None, 784], 'float32', 'x') - label = InputSpec([None, 1], 'int64', 'label') - - model = paddle.Model(net, input, label) - optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameters=model.parameters()) - - model.prepare(optim, + import paddle + import paddle.nn as nn + import paddle.vision.transforms as T + from paddle.static import InputSpec + + device = paddle.set_device('cpu') # or 'gpu' + + net = nn.Sequential( + nn.Flatten(1), + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + + # inputs and labels are not required for dynamic graph. + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + + model = paddle.Model(net, input, label) + optim = paddle.optimizer.SGD(learning_rate=1e-3, + parameters=model.parameters()) + + model.prepare(optim, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - data = paddle.vision.datasets.MNIST(mode='train', transform=transform) - model.fit(data, epochs=2, batch_size=32, verbose=1) + + transform = T.Compose([ + T.Transpose(), + T.Normalize([127.5], [127.5]) + ]) + data = paddle.vision.datasets.MNIST(mode='train', transform=transform) + model.fit(data, epochs=2, batch_size=32, verbose=1) 2. An example using mixed precision training. .. code-block:: python - - # required: gpu - import paddle - import paddle.nn as nn - import paddle.vision.transforms as T + :name: code-example2 - def run_example_code(): - device = paddle.set_device('gpu') + # required: gpu + import paddle + import paddle.nn as nn + import paddle.vision.transforms as T - net = nn.Sequential(nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), - nn.Linear(200, 10)) + def run_example_code(): + device = paddle.set_device('gpu') - model = paddle.Model(net) - optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) + net = nn.Sequential(nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), + nn.Linear(200, 10)) - amp_configs = { - "level": "O1", - "custom_white_list": {'conv2d'}, - "use_dynamic_loss_scaling": True - } - model.prepare(optim, - paddle.nn.CrossEntropyLoss(), - paddle.metric.Accuracy(), - amp_configs=amp_configs) + model = paddle.Model(net) + optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) - transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) - data = paddle.vision.datasets.MNIST(mode='train', transform=transform) - model.fit(data, epochs=2, batch_size=32, verbose=1) + amp_configs = { + "level": "O1", + "custom_white_list": {'conv2d'}, + "use_dynamic_loss_scaling": True + } + model.prepare(optim, + paddle.nn.CrossEntropyLoss(), + paddle.metric.Accuracy(), + amp_configs=amp_configs) - # mixed precision training is only supported on GPU now. - if paddle.is_compiled_with_cuda(): - run_example_code() + transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) + data = paddle.vision.datasets.MNIST(mode='train', transform=transform) + model.fit(data, epochs=2, batch_size=32, verbose=1) + + # mixed precision training is only supported on GPU now. + if paddle.is_compiled_with_cuda(): + run_example_code() """ @@ -1059,12 +1061,12 @@ def train_batch(self, inputs, labels=None, update=True): inputs (numpy.ndarray|Tensor|list): Batch of input data. It could be a numpy array or paddle.Tensor, or a list of arrays or tensors (in case the model has multiple inputs). - labels (numpy.ndarray|Tensor|list): Batch of labels. It could be + labels (numpy.ndarray|Tensor|list, optional): Batch of labels. It could be a numpy array or paddle.Tensor, or a list of arrays or tensors (in case the model has multiple labels). If has no labels, - set None. Default is None. - update (bool): Whether update parameters after loss.backward() computing. - Using it to accumulate gradients. Default is True. + set None. Default: None. + update (bool, optional): Whether update parameters after loss.backward() computing. + Set it to False to accumulate gradients. Default: True. Returns: A list of scalar training loss if the model has no metrics, @@ -1074,29 +1076,30 @@ def train_batch(self, inputs, labels=None, update=True): Examples: .. code-block:: python + :name: code-example-train-batch - import numpy as np - import paddle - import paddle.nn as nn - from paddle.static import InputSpec - - device = paddle.set_device('cpu') # or 'gpu' - - net = nn.Sequential( - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10)) - - input = InputSpec([None, 784], 'float32', 'x') - label = InputSpec([None, 1], 'int64', 'label') - model = paddle.Model(net, input, label) - optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameters=model.parameters()) - model.prepare(optim, paddle.nn.CrossEntropyLoss()) - data = np.random.random(size=(4,784)).astype(np.float32) - label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) - loss = model.train_batch([data], [label]) - print(loss) + import paddle + import paddle.nn as nn + from paddle.static import InputSpec + + device = paddle.set_device('cpu') # or 'gpu' + + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(net, input, label) + optim = paddle.optimizer.SGD(learning_rate=1e-3, + parameters=model.parameters()) + model.prepare(optim, paddle.nn.CrossEntropyLoss()) + data = paddle.rand((4, 784), dtype="float32") + label = paddle.randint(0, 10, (4, 1), dtype="int64") + loss = model.train_batch([data], [label]) + print(loss) + # [array([2.192784], dtype=float32)] """ loss = self._adapter.train_batch(inputs, labels, update) if fluid._non_static_mode() and self._input_info is None: @@ -1112,10 +1115,10 @@ def eval_batch(self, inputs, labels=None): inputs (numpy.ndarray|Tensor|list): Batch of input data. It could be a numpy array or paddle.Tensor, or a list of arrays or tensors (in case the model has multiple inputs). - labels (numpy.ndarray|Tensor|list): Batch of labels. It could be + labels (numpy.ndarray|Tensor|list, optional): Batch of labels. It could be a numpy array or paddle.Tensor, or a list of arrays or tensors (in case the model has multiple labels). If has no labels, - set None. Default is None. + set None. Default: None. Returns: A list of scalar testing loss if the model has no metrics, @@ -1125,30 +1128,31 @@ def eval_batch(self, inputs, labels=None): Examples: .. code-block:: python - - import numpy as np - import paddle - import paddle.nn as nn - from paddle.static import InputSpec - - device = paddle.set_device('cpu') # or 'gpu' - - net = nn.Sequential( - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10)) - - input = InputSpec([None, 784], 'float32', 'x') - label = InputSpec([None, 1], 'int64', 'label') - model = paddle.Model(net, input, label) - optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameters=model.parameters()) - model.prepare(optim, - paddle.nn.CrossEntropyLoss()) - data = np.random.random(size=(4,784)).astype(np.float32) - label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) - loss = model.eval_batch([data], [label]) - print(loss) + :name: code-example-eval-batch + + import paddle + import paddle.nn as nn + from paddle.static import InputSpec + + device = paddle.set_device('cpu') # or 'gpu' + + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(net, input, label) + optim = paddle.optimizer.SGD(learning_rate=1e-3, + parameters=model.parameters()) + model.prepare(optim, + paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) + data = paddle.rand((4, 784), dtype="float32") + label = paddle.randint(0, 10, (4, 1), dtype="int64") + loss, acc = model.eval_batch([data], [label]) + print(loss, acc) + # [array([2.8825705], dtype=float32)] [0.0] """ loss = self._adapter.eval_batch(inputs, labels) if fluid._non_static_mode() and self._input_info is None: @@ -1172,28 +1176,31 @@ def predict_batch(self, inputs): Examples: .. code-block:: python - - import numpy as np - import paddle - import paddle.nn as nn - from paddle.static import InputSpec - - device = paddle.set_device('cpu') # or 'gpu' - - input = InputSpec([None, 784], 'float32', 'x') - label = InputSpec([None, 1], 'int64', 'label') - - net = nn.Sequential( - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10), - nn.Softmax()) - - model = paddle.Model(net, input, label) - model.prepare() - data = np.random.random(size=(4,784)).astype(np.float32) - out = model.predict_batch([data]) - print(out) + :name: code-example-predict-batch + + import paddle + import paddle.nn as nn + from paddle.static import InputSpec + + device = paddle.set_device('cpu') # or 'gpu' + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10), + nn.Softmax()) + + model = paddle.Model(net, input, label) + model.prepare() + data = paddle.rand((1, 784), dtype="float32") + out = model.predict_batch([data]) + print(out) + # [array([[0.08189095, 0.16740078, 0.06889386, 0.05085445, 0.10729759, + # 0.02217775, 0.14518553, 0.1591538 , 0.01808308, 0.17906217]], + # dtype=float32)] """ loss = self._adapter.predict_batch(inputs) if fluid._non_static_mode() and self._input_info is None: @@ -1229,6 +1236,7 @@ def save(self, path, training=True): Examples: .. code-block:: python + :name: code-example-save import paddle import paddle.nn as nn @@ -1259,7 +1267,7 @@ def forward(self, x): optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) - + transform = T.Compose([ T.Transpose(), T.Normalize([127.5], [127.5]) @@ -1294,14 +1302,14 @@ def load(self, path, skip_mismatch=False, reset_optimizer=False): optimizer states. The files would be `path.pdparams` and `path.pdopt` separately, and the latter is not necessary when no need to restore. - skip_mismatch (bool): Whether to skip the loading of mismatch + skip_mismatch (bool, optional): Whether to skip the loading of mismatch parameter or raise an error when mismatch happens (not found the parameter in file storing model states of or receives a - mismatch shape). - reset_optimizer (bool): If True, ignore the providing file storing + mismatch shape). Default: False. + reset_optimizer (bool, optional): If True, ignore the providing file storing optimizer states and initialize optimizer states from scratch. Otherwise, restore optimizer states from `path.pdopt` if - a optimizer has been set to the model. Default False. + a optimizer has been set to the model. Default: False. Returns: None @@ -1309,23 +1317,24 @@ def load(self, path, skip_mismatch=False, reset_optimizer=False): Examples: .. code-block:: python - - import paddle - import paddle.nn as nn - from paddle.static import InputSpec + :name: code-example-load + + import paddle + import paddle.nn as nn + from paddle.static import InputSpec - device = paddle.set_device('cpu') + device = paddle.set_device('cpu') - input = InputSpec([None, 784], 'float32', 'x') + input = InputSpec([None, 784], 'float32', 'x') - model = paddle.Model(nn.Sequential( - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10), - nn.Softmax()), input) + model = paddle.Model(nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10), + nn.Softmax()), input) - model.save('checkpoint/test') - model.load('checkpoint/test') + model.save('checkpoint/test') + model.load('checkpoint/test') """ def _load_state_from_path(path): @@ -1395,19 +1404,20 @@ def parameters(self, *args, **kwargs): Examples: .. code-block:: python + :name: code-example-parameters + + import paddle + import paddle.nn as nn + from paddle.static import InputSpec - import paddle - import paddle.nn as nn - from paddle.static import InputSpec - - input = InputSpec([None, 784], 'float32', 'x') - - model = paddle.Model(nn.Sequential( - nn.Linear(784, 200), - nn.Tanh(), - nn.Linear(200, 10)), input) + input = InputSpec([None, 784], 'float32', 'x') + + model = paddle.Model(nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)), input) - params = model.parameters() + params = model.parameters() """ return self._adapter.parameters() @@ -1501,16 +1511,16 @@ def prepare(self, Configures the model before runing. Args: - optimizer (Optimizer|None): Optimizer must be set in training + optimizer (Optimizer|None, optional): Optimizer must be set in training and should be a Optimizer instance. It can be None in eval - and test mode. - loss (Loss|callable function|None): Loss function can + and test mode. Default: None. + loss (Loss|Callable|None, optional): Loss function can be a `paddle.nn.Layer` instance or any callable function taken the predicted values and ground truth values as input. - It can be None when there is no loss. - metrics (Metric|list of Metric|None): If metrics is set, all - metrics will be calculated and output in train/eval mode. - amp_configs (str|dict|None): AMP configurations. If AMP or pure + It can be None when there is no loss. Default: None. + metrics (Metric|list[Metric]|None, optional): If metrics is set, all + metrics will be calculated and output in train/eval mode. Default: None. + amp_configs (str|dict|None, optional): AMP configurations. If AMP or pure float16 training is used, the key 'level' of 'amp_configs' should be set to 'O1' or 'O2' respectively. Otherwise, the value of 'level' defaults to 'O0', which means float32 @@ -1526,6 +1536,7 @@ def prepare(self, for details. For convenience, 'amp_configs' could be set to 'O1' or 'O2' if no more parameters are needed. 'amp_configs' could be None in float32 training. Default: None. + Returns: None """ @@ -1587,133 +1598,133 @@ def fit(self, evaluation will be done at the end of each epoch. Args: - train_data (Dataset|DataLoader): An iterable data loader is used for + train_data (Dataset|DataLoader, optional): An iterable data loader is used for train. An instance of paddle paddle.io.Dataset or paddle.io.Dataloader is recomended. Default: None. - eval_data (Dataset|DataLoader): An iterable data loader is used for + eval_data (Dataset|DataLoader, optional): An iterable data loader is used for evaluation at the end of epoch. If None, will not do evaluation. An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. Default: None. - batch_size (int): Integer number. The batch size of train_data - and eval_data. When train_data and eval_data are both the - instance of Dataloader, this parameter will be ignored. - Default: 1. - epochs (int): Integer number. The number of epochs to train - the model. Default: 1. - eval_freq (int): The frequency, in number of epochs, an evalutation + batch_size (int, optional): The batch size of train_data and eval_data. When + train_data and eval_data are both the instance of Dataloader, this + parameter will be ignored. Default: 1. + epochs (int, optional): The number of epochs to train the model. Default: 1. + eval_freq (int, optional): The frequency, in number of epochs, an evalutation is performed. Default: 1. - log_freq (int): The frequency, in number of steps, the training logs + log_freq (int, optional): The frequency, in number of steps, the training logs are printed. Default: 10. - save_dir(str|None): The directory to save checkpoint during training. + save_dir(str|None, optional): The directory to save checkpoint during training. If None, will not save checkpoint. Default: None. - save_freq (int): The frequency, in number of epochs, to save + save_freq (int, optional): The frequency, in number of epochs, to save checkpoint. Default: 1. - verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, + verbose (int, optional): The verbosity mode, should be 0, 1, or 2. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. - drop_last (bool): Whether drop the last incomplete batch of + drop_last (bool, optional): Whether drop the last incomplete batch of train_data when dataset size is not divisible by the batch size. When train_data is an instance of Dataloader, this parameter will be ignored. Default: False. - shuffle (bool): Whther to shuffle train_data. When train_data is + shuffle (bool, optional): Whther to shuffle train_data. When train_data is an instance of Dataloader, this parameter will be ignored. Default: True. - num_workers (int): The number of subprocess to load data, 0 for no + num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. Default: 0. - callbacks (Callback|None): A list of `Callback` instances to apply - during training. If None, `ProgBarLogger` and `ModelCheckpoint` - are automatically inserted. Default: None. - accumulate_grad_batches (int): The number of batches to accumulate gradident + callbacks (Callback|None, optional): A list of `Callback` instances to apply + during training. If None, :ref:`api_paddle_callbacks_ProgBarLogger` and + :ref:`api_paddle_callbacks_ModelCheckpoint` are automatically inserted. Default: None. + accumulate_grad_batches (int, optional): The number of batches to accumulate gradident during training process before optimizer updates. It can mimic large batch size. Default: 1. - num_iters (int|None): Integer number. The number of iterations to train - the model. If None, follow `epochs` to train the model, otherwise, train - the model `num_iters` times. Default: None. - + num_iters (int|None, optional): The number of iterations to evaluate the model. + If None, evaluate on whole input dataset, otherwise, evaluate `num_iters` times. + Default: None. + Returns: None Examples: - 1. An example use Dataset and set btch size, shuffle in fit. + 1. An example use Dataset and set batch size, shuffle in fit. How to make a batch is done internally. .. code-block:: python + :name: code-example-fit-1 - import paddle - import paddle.vision.transforms as T - from paddle.vision.datasets import MNIST - from paddle.static import InputSpec - - dynamic = True - if not dynamic: - paddle.enable_static() - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - val_dataset = MNIST(mode='test', transform=transform) - - input = InputSpec([None, 1, 28, 28], 'float32', 'image') - label = InputSpec([None, 1], 'int64', 'label') - - model = paddle.Model( - paddle.vision.models.LeNet(), - input, label) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - model.prepare( - optim, - paddle.nn.CrossEntropyLoss(), - paddle.metric.Accuracy(topk=(1, 2))) - model.fit(train_dataset, - val_dataset, - epochs=2, - batch_size=64, - save_dir='mnist_checkpoint') + import paddle + import paddle.vision.transforms as T + from paddle.vision.datasets import MNIST + from paddle.static import InputSpec + + dynamic = True + if not dynamic: + paddle.enable_static() + + transform = T.Compose([ + T.Transpose(), + T.Normalize([127.5], [127.5]) + ]) + train_dataset = MNIST(mode='train', transform=transform) + val_dataset = MNIST(mode='test', transform=transform) + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') + + model = paddle.Model( + paddle.vision.models.LeNet(), + input, label) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + paddle.nn.CrossEntropyLoss(), + paddle.metric.Accuracy(topk=(1, 2))) + model.fit(train_dataset, + val_dataset, + epochs=2, + batch_size=64, + save_dir='mnist_checkpoint') 2. An example use DataLoader, batch size and shuffle is set in DataLoader. .. code-block:: python + :name: code-example-fit-2 + + import paddle + import paddle.vision.transforms as T + from paddle.vision.datasets import MNIST + from paddle.static import InputSpec - import paddle - import paddle.vision.transforms as T - from paddle.vision.datasets import MNIST - from paddle.static import InputSpec + dynamic = True + if not dynamic: + paddle.enable_static() + + transform = T.Compose([ + T.Transpose(), + T.Normalize([127.5], [127.5]) + ]) + train_dataset = MNIST(mode='train', transform=transform) + train_loader = paddle.io.DataLoader(train_dataset, + batch_size=64) + val_dataset = MNIST(mode='test', transform=transform) + val_loader = paddle.io.DataLoader(val_dataset, + batch_size=64) + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') - dynamic = True - if not dynamic: - paddle.enable_static() - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - train_loader = paddle.io.DataLoader(train_dataset, - batch_size=64) - val_dataset = MNIST(mode='test', transform=transform) - val_loader = paddle.io.DataLoader(val_dataset, - batch_size=64) - - input = InputSpec([None, 1, 28, 28], 'float32', 'image') - label = InputSpec([None, 1], 'int64', 'label') - - model = paddle.Model( - paddle.vision.models.LeNet(), input, label) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - model.prepare( - optim, - paddle.nn.CrossEntropyLoss(), - paddle.metric.Accuracy(topk=(1, 2))) - model.fit(train_loader, - val_loader, - epochs=2, - save_dir='mnist_checkpoint') + model = paddle.Model( + paddle.vision.models.LeNet(), input, label) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + paddle.nn.CrossEntropyLoss(), + paddle.metric.Accuracy(topk=(1, 2))) + model.fit(train_loader, + val_loader, + epochs=2, + save_dir='mnist_checkpoint') """ assert train_data is not None, \ "train_data must be given!" @@ -1809,23 +1820,23 @@ def evaluate(self, eval_data (Dataset|DataLoader): An iterable data loader is used for evaluation. An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. - batch_size (int): Integer number. The batch size of train_data - and eval_data. When eval_data is the instance of Dataloader, - this argument will be ignored. Default: 1. - log_freq (int): The frequency, in number of steps, the eval logs + batch_size (int, optional): The batch size of train_data and eval_data. + When eval_data is the instance of Dataloader, this argument will be + ignored. Default: 1. + log_freq (int, optional): The frequency, in number of steps, the eval logs are printed. Default: 10. - verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, + verbose (int, optional): The verbosity mode, should be 0, 1, or 2. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. - num_workers (int): The number of subprocess to load data, + num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. Default: 0. - callbacks (Callback|None): A list of `Callback` instances to apply + callbacks (Callback|None, optional): A list of `Callback` instances to apply during training. If None, `ProgBarLogger` and `ModelCheckpoint` are automatically inserted. Default: None. - num_iters (int|None): Integer number. The number of iterations to - evaluate the model. If None, evaluate on whole input dataset, - otherwise, evaluate `num_iters` times. Default: None. + num_iters (int|None, optional): The number of iterations to evaluate the model. + If None, evaluate on whole input dataset, otherwise, evaluate `num_iters` times. + Default: None. Returns: dict: Result of metric. The key is the names of Metric, value is a scalar or numpy.array. @@ -1833,24 +1844,26 @@ def evaluate(self, Examples: .. code-block:: python + :name: code-example-evaluate - import paddle - import paddle.vision.transforms as T - from paddle.static import InputSpec + import paddle + import paddle.vision.transforms as T + from paddle.static import InputSpec - # declarative mode - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - val_dataset = paddle.vision.datasets.MNIST(mode='test', transform=transform) + # declarative mode + transform = T.Compose([ + T.Transpose(), + T.Normalize([127.5], [127.5]) + ]) + val_dataset = paddle.vision.datasets.MNIST(mode='test', transform=transform) - input = InputSpec([-1, 1, 28, 28], 'float32', 'image') - label = InputSpec([None, 1], 'int64', 'label') - model = paddle.Model(paddle.vision.models.LeNet(), input, label) - model.prepare(metrics=paddle.metric.Accuracy()) - result = model.evaluate(val_dataset, batch_size=64) - print(result) + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(paddle.vision.models.LeNet(), input, label) + model.prepare(metrics=paddle.metric.Accuracy()) + result = model.evaluate(val_dataset, batch_size=64) + print(result) + # {'acc': 0.0699} """ if eval_data is not None and isinstance(eval_data, Dataset): @@ -1912,21 +1925,20 @@ def predict(self, test_data (Dataset|DataLoader): An iterable data loader is used for predict. An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. - batch_size (int): Integer number. The batch size of train_data and eval_data. - When train_data and eval_data are both the instance of Dataloader, this - argument will be ignored. Default: 1. - num_workers (int): The number of subprocess to load data, 0 for no subprocess - used and loading data in main process. When train_data and eval_data are - both the instance of Dataloader, this argument will be ignored. Default: 0. - stack_outputs (bool): Whether stack output field like a batch, as for an output - filed of a sample is in shape [X, Y], test_data contains N samples, predict + batch_size (int, optional): The batch size of test_data. When test_data is the + instance of Dataloader, this argument will be ignored. Default: 1. + num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess + used and loading data in main process. When test_data is the instance of Dataloader, + this argument will be ignored. Default: 0. + stack_outputs (bool, optional): Whether stack output field like a batch, as for an output + field of a sample is in shape [X, Y], test_data contains N samples, predict output field will be in shape [N, X, Y] if stack_output is True, and will - be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs + be a length N list in shape [[X, Y], [X, Y], ..., [X, Y]] if stack_outputs is False. stack_outputs as False is used for LoDTensor output situation, it is recommended set as True if outputs contains no LoDTensor. Default: False. - verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, + verbose (int, optional): The verbosity mode, should be 0, 1, or 2. 0 = silent, 1 = progress bar, 2 = one line per batch. Default: 1. - callbacks(Callback): A Callback instance, default None. + callbacks(Callback, optional): A Callback instance, Default: None. Returns: list: output of models. @@ -1934,43 +1946,46 @@ def predict(self, Examples: .. code-block:: python + :name: code-example-predict - import numpy as np - import paddle - from paddle.static import InputSpec + import numpy as np + import paddle + from paddle.static import InputSpec - class MnistDataset(paddle.vision.datasets.MNIST): - def __init__(self, mode, return_label=True): - super(MnistDataset, self).__init__(mode=mode) - self.return_label = return_label - - def __getitem__(self, idx): - img = np.reshape(self.images[idx], [1, 28, 28]) - if self.return_label: - return img, np.array(self.labels[idx]).astype('int64') - return img, - - def __len__(self): - return len(self.images) - - test_dataset = MnistDataset(mode='test', return_label=False) - - # imperative mode - input = InputSpec([-1, 1, 28, 28], 'float32', 'image') - model = paddle.Model(paddle.vision.models.LeNet(), input) - model.prepare() - result = model.predict(test_dataset, batch_size=64) - print(len(result[0]), result[0][0].shape) - - # declarative mode - device = paddle.set_device('cpu') - paddle.enable_static() - input = InputSpec([-1, 1, 28, 28], 'float32', 'image') - model = paddle.Model(paddle.vision.models.LeNet(), input) - model.prepare() - - result = model.predict(test_dataset, batch_size=64) - print(len(result[0]), result[0][0].shape) + class MnistDataset(paddle.vision.datasets.MNIST): + def __init__(self, mode, return_label=True): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + + def __getitem__(self, idx): + img = np.reshape(self.images[idx], [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + test_dataset = MnistDataset(mode='test', return_label=False) + + # imperative mode + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + model = paddle.Model(paddle.vision.models.LeNet(), input) + model.prepare() + result = model.predict(test_dataset, batch_size=64) + print(len(result[0]), result[0][0].shape) + # 157 (64, 10) + + # declarative mode + device = paddle.set_device('cpu') + paddle.enable_static() + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + model = paddle.Model(paddle.vision.models.LeNet(), input) + model.prepare() + + result = model.predict(test_dataset, batch_size=64) + print(len(result[0]), result[0][0].shape) + # 157 (64, 10) """ if test_data is not None and isinstance(test_data, Dataset): @@ -2164,23 +2179,25 @@ def summary(self, input_size=None, dtype=None): Examples: .. code-block:: python + :name: code-example-summary + + import paddle + from paddle.static import InputSpec + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') - import paddle - from paddle.static import InputSpec - - input = InputSpec([None, 1, 28, 28], 'float32', 'image') - label = InputSpec([None, 1], 'int64', 'label') - - model = paddle.Model(paddle.vision.models.LeNet(), - input, label) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - model.prepare( - optim, - paddle.nn.CrossEntropyLoss()) - - params_info = model.summary() - print(params_info) + model = paddle.Model(paddle.vision.models.LeNet(), + input, label) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + paddle.nn.CrossEntropyLoss()) + + params_info = model.summary() + print(params_info) + # {'total_params': 61610, 'trainable_params': 61610} """ assert (input_size is not None or self._inputs diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index a1e02dab4707d..8b29659a1f400 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -109,6 +109,7 @@ from .layer.loss import HingeEmbeddingLoss # noqa: F401 from .layer.loss import CosineEmbeddingLoss # noqa: F401 from .layer.loss import TripletMarginWithDistanceLoss +from .layer.loss import TripletMarginLoss from .layer.norm import BatchNorm # noqa: F401 from .layer.norm import SyncBatchNorm # noqa: F401 from .layer.norm import GroupNorm # noqa: F401 @@ -316,4 +317,5 @@ def weight_norm(*args): 'CosineEmbeddingLoss', 'RReLU', 'TripletMarginWithDistanceLoss', + 'TripletMarginLoss', ] diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 43ce403ab0b23..cdb1135eba800 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -92,6 +92,7 @@ from .loss import hinge_embedding_loss # noqa: F401 from .loss import cosine_embedding_loss # noqa: F401 from .loss import triplet_margin_with_distance_loss +from .loss import triplet_margin_loss from .norm import batch_norm # noqa: F401 from .norm import instance_norm # noqa: F401 from .norm import layer_norm # noqa: F401 @@ -234,4 +235,5 @@ 'cosine_embedding_loss', 'rrelu', 'triplet_margin_with_distance_loss', + 'triplet_margin_loss', ] diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index c882ab08296ae..2f37f8a50f4d1 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -28,7 +28,7 @@ from paddle.utils import deprecated from paddle import _C_ops from paddle import in_dynamic_mode -from paddle.framework import core +from paddle.framework import core, _non_static_mode from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _non_static_mode, _current_expected_place __all__ = [] @@ -2999,3 +2999,124 @@ def triplet_margin_with_distance_loss(input, return paddle.sum(loss, name=name) elif reduction == 'none': return loss + + +def triplet_margin_loss(input, + positive, + negative, + margin=1.0, + p=2, + epsilon=1e-6, + swap=False, + reduction='mean', + name=None): + r""" + Measures the triplet loss given an input + tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`. + This is used for measuring a relative similarity between samples. A triplet + is composed by `input`, `positive` and `negative` (i.e., `input`, `positive examples` and `negative + examples` respectively). The shapes of all input tensors should be + :math:`(N, *)`. + + The loss function for each sample in the mini-batch is: + + .. math:: + L(input, pos, neg) = \max \{d(input_i, pos_i) - d(input_i, neg_i) + {\rm margin}, 0\} + + + where + + .. math:: + d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p + + Parameters: + input (Tensor): Input tensor, the data type is float32 or float64. + the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64. + + positive (Tensor): Positive tensor, the data type is float32 or float64. + The shape of label is the same as the shape of input. + + negative (Tensor): Negative tensor, the data type is float32 or float64. + The shape of label is the same as the shape of input. + + margin (float, Optional): Default: :math:`1`. + + p (int, Optional): The norm degree for pairwise distance. Default: :math:`2`. + + epsilon (float, Optional): Add small value to avoid division by zero, + default value is 1e-6. + + swap (bool,Optional): The distance swap change the negative distance to the distance between + positive sample and negative sample. For more details, see `Learning shallow convolutional feature descriptors with triplet losses`. + Default: ``False``. + + + reduction (str, Optional):Indicate how to average the loss by batch_size. + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the summed loss is returned. + Default: ``'mean'`` + + name (str, Optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Output: Tensor. The tensor variable storing the triplet_margin_loss of input and positive and negative. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + + input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) + positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) + negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) + loss = F.triplet_margin_loss(input, positive, negative, margin=1.0, reduction='none') + print(loss) + # Tensor([0. , 0.57496738, 0. ]) + + + loss = F.triplet_margin_loss(input, positive, negative, margin=1.0, reduction='mean') + print(loss) + # Tensor([0.19165580]) + + """ + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "'reduction' in 'triplet_margin_loss' should be 'sum', 'mean' or 'none', " + "but received {}.".format(reduction)) + if margin < 0: + raise ValueError( + "The margin between positive samples and negative samples should be greater than 0." + ) + if not _non_static_mode(): + check_variable_and_dtype(input, 'input', ['float32', 'float64'], + 'triplet_margin_loss') + check_variable_and_dtype(positive, 'positive', ['float32', 'float64'], + 'triplet_margin_loss') + check_variable_and_dtype(negative, 'negative', ['float32', 'float64'], + 'triplet_margin_loss') + + if not (input.shape == positive.shape == negative.shape): + raise ValueError("input's shape must equal to " + "positive's shape and " + "negative's shape") + + distance_function = paddle.nn.PairwiseDistance(p, epsilon=epsilon) + positive_dist = distance_function(input, positive) + negative_dist = distance_function(input, negative) + + if swap: + swap_dist = distance_function(positive, negative) + negative_dist = paddle.minimum(negative_dist, swap_dist) + + loss = paddle.clip(positive_dist - negative_dist + margin, min=0.0) + + if reduction == 'mean': + return paddle.mean(loss, name=name) + elif reduction == 'sum': + return paddle.sum(loss, name=name) + elif reduction == 'none': + return loss diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index a8e3d8ec1d464..e9ccee1bd3829 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -80,6 +80,7 @@ from .loss import SmoothL1Loss # noqa: F401 from .loss import HingeEmbeddingLoss # noqa: F401 from .loss import TripletMarginWithDistanceLoss +from .loss import TripletMarginLoss from .norm import BatchNorm1D # noqa: F401 from .norm import BatchNorm2D # noqa: F401 from .norm import BatchNorm3D # noqa: F401 diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 9b796d6965c33..1e72548ecc138 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -1507,3 +1507,109 @@ def forward(self, input, positive, negative): swap=self.swap, reduction=self.reduction, name=self.name) + + +class TripletMarginLoss(Layer): + r""" + Creates a criterion that measures the triplet loss given an input + tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`. + This is used for measuring a relative similarity between samples. A triplet + is composed by `input`, `positive` and `negative` (i.e., `input`, `positive examples` and `negative + examples` respectively). The shapes of all input tensors should be + :math:`(N, *)`. + + The loss function for each sample in the mini-batch is: + + .. math:: + L(input, pos, neg) = \max \{d(input_i, pos_i) - d(input_i, neg_i) + {\rm margin}, 0\} + + + where + + .. math:: + d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p + + Parameters: + margin (float, Optional):Default: :math:`1`. + + p (int, Optional):The norm degree for pairwise distance. Default: :math:`2`. + + epsilon (float, Optional):Add small value to avoid division by zero, + default value is 1e-6. + + swap (bool, Optional):The distance swap change the negative distance to the distance between + positive sample and negative sample. For more details, see `Learning shallow convolutional feature descriptors with triplet losses`. + Default: ``False``. + + reduction (str, Optional):Indicate how to average the loss by batch_size. + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the summed loss is returned. + Default: ``'mean'`` + + name (str,Optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Call Parameters: + input (Tensor):Input tensor, the data type is float32 or float64. + the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64. + + positive (Tensor):Positive tensor, the data type is float32 or float64. + The shape of label is the same as the shape of input. + + negative (Tensor):Negative tensor, the data type is float32 or float64. + The shape of label is the same as the shape of input. + + Returns: + Tensor. The tensor variable storing the triplet_margin_loss of input and positive and negative. + + Examples: + .. code-block:: python + + import paddle + + input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) + positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) + negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) + triplet_margin_loss = paddle.nn.TripletMarginLoss(reduction='none') + loss = triplet_margin_loss(input, positive, negative) + print(loss) + # Tensor([0. , 0.57496738, 0. ]) + + triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', ) + loss = triplet_margin_loss(input, positive, negative,) + print(loss) + # Tensor([0.19165580]) + + """ + + def __init__(self, + margin=1.0, + p=2., + epsilon=1e-6, + swap=False, + reduction='mean', + name=None): + super(TripletMarginLoss, self).__init__() + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in TripletMarginLoss should be 'sum', 'mean' or 'none', but " + "received %s, which is not allowed." % reduction) + self.margin = margin + self.p = p + self.epsilon = epsilon + self.swap = swap + self.reduction = reduction + self.name = name + + def forward(self, input, positive, negative): + return F.triplet_margin_loss(input, + positive, + negative, + margin=self.margin, + p=self.p, + epsilon=self.epsilon, + swap=self.swap, + reduction=self.reduction, + name=self.name) diff --git a/python/paddle/tests/test_hapi_amp.py b/python/paddle/tests/test_hapi_amp.py index eaf10dbfc4c75..24df22ab5ea54 100644 --- a/python/paddle/tests/test_hapi_amp.py +++ b/python/paddle/tests/test_hapi_amp.py @@ -20,7 +20,7 @@ os.environ['FLAGS_cudnn_deterministic'] = '1' import unittest - +import tempfile import numpy as np import paddle @@ -101,7 +101,9 @@ def test_save_load(self): batch_size=64, num_iters=2, log_freq=1) - model.save('./lenet_amp') + temp_dir = tempfile.TemporaryDirectory() + lenet_amp_path = os.path.join(temp_dir.name, './lenet_amp') + model.save(lenet_amp_path) with paddle.fluid.unique_name.guard(): paddle.seed(2021) @@ -119,7 +121,8 @@ def test_save_load(self): model._scaler.state_dict()['incr_count'])) # equal after load - new_model.load('./lenet_amp') + new_model.load(lenet_amp_path) + temp_dir.cleanup() self.assertEqual(new_model._scaler.state_dict()['incr_count'], model._scaler.state_dict()['incr_count']) self.assertEqual(new_model._scaler.state_dict()['decr_count'], diff --git a/python/paddle/tests/test_read_file.py b/python/paddle/tests/test_read_file.py index 0dad971a7308e..dc5c8fafcd8f6 100644 --- a/python/paddle/tests/test_read_file.py +++ b/python/paddle/tests/test_read_file.py @@ -16,6 +16,7 @@ import cv2 import shutil import unittest +import tempfile import numpy as np import paddle @@ -26,23 +27,25 @@ class TestReadFile(unittest.TestCase): def setUp(self): fake_img = (np.random.random((400, 300, 3)) * 255).astype('uint8') - cv2.imwrite('fake.jpg', fake_img) + self.temp_dir = tempfile.TemporaryDirectory() + self.img_path = os.path.join(self.temp_dir.name, 'fake.jpg') + cv2.imwrite(self.img_path, fake_img) def tearDown(self): - os.remove('fake.jpg') + self.temp_dir.cleanup() def read_file_decode_jpeg(self): if not paddle.is_compiled_with_cuda(): return - img_bytes = read_file('fake.jpg') + img_bytes = read_file(self.img_path) img = decode_jpeg(img_bytes, mode='gray') img = decode_jpeg(img_bytes, mode='rgb') img = decode_jpeg(img_bytes) - img_cv2 = cv2.imread('fake.jpg') + img_cv2 = cv2.imread(self.img_path) if paddle.in_dynamic_mode(): np.testing.assert_equal(img.shape, img_cv2.transpose(2, 0, 1).shape) else: diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py index 35a0f8edc4843..c46ab2eaf5f57 100644 --- a/python/paddle/tests/test_transforms.py +++ b/python/paddle/tests/test_transforms.py @@ -926,7 +926,8 @@ def test_image_load(self): fake_img = Image.fromarray((np.random.random( (32, 32, 3)) * 255).astype('uint8')) - path = 'temp.jpg' + temp_dir = tempfile.TemporaryDirectory() + path = os.path.join(temp_dir.name, 'temp.jpg') fake_img.save(path) set_image_backend('pil') @@ -939,7 +940,7 @@ def test_image_load(self): np_img = image_load(path) - os.remove(path) + temp_dir.cleanup() def test_affine(self): np_img = (np.random.rand(32, 26, 3) * 255).astype('uint8') diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py index ba58fe7f57d50..b1263f62dca73 100644 --- a/python/paddle/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -181,13 +181,16 @@ class ResNet(nn.Layer): Args: Block (BasicBlock|BottleneckBlock): block module of model. - depth (int, optional): layers of resnet, Default: 50. + depth (int, optional): layers of ResNet, Default: 50. width (int, optional): base width per convolution group for each convolution block, Default: 64. num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. with_pool (bool, optional): use pool before the last fc layer or not. Default: True. groups (int, optional): number of groups for each convolution block, Default: 1. + Returns: + ResNet model. An instance of :ref:`api_fluid_dygraph_Layer`. + Examples: .. code-block:: python @@ -330,7 +333,11 @@ def resnet18(pretrained=False, **kwargs): `"Deep Residual Learning for Image Recognition" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNet 18-layer model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -358,7 +365,11 @@ def resnet34(pretrained=False, **kwargs): `"Deep Residual Learning for Image Recognition" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNet 34-layer model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -386,7 +397,11 @@ def resnet50(pretrained=False, **kwargs): `"Deep Residual Learning for Image Recognition" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNet 50-layer model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -414,7 +429,11 @@ def resnet101(pretrained=False, **kwargs): `"Deep Residual Learning for Image Recognition" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNet 101-layer. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -442,7 +461,11 @@ def resnet152(pretrained=False, **kwargs): `"Deep Residual Learning for Image Recognition" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNet 152-layer model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -470,7 +493,11 @@ def resnext50_32x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-50 32x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -500,7 +527,11 @@ def resnext50_64x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-50 64x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -530,7 +561,11 @@ def resnext101_32x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-101 32x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -561,7 +596,11 @@ def resnext101_64x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-101 64x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -592,7 +631,11 @@ def resnext152_32x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-152 32x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -623,7 +666,11 @@ def resnext152_64x4d(pretrained=False, **kwargs): `"Aggregated Residual Transformations for Deep Neural Networks" `_ Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + ResNeXt-152 64x4d model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -654,7 +701,11 @@ def wide_resnet50_2(pretrained=False, **kwargs): `"Wide Residual Networks" `_. Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + Wide ResNet-50-2 model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python @@ -683,7 +734,11 @@ def wide_resnet101_2(pretrained=False, **kwargs): `"Wide Residual Networks" `_. Args: - pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained + on ImageNet. Default: False. + + Returns: + Wide ResNet-101-2 model. An instance of :ref:`api_fluid_dygraph_Layer`. Examples: .. code-block:: python diff --git a/python/setup.py.in b/python/setup.py.in index bb6416038f198..8b6a456865176 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -614,6 +614,8 @@ headers = ( list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/core', recursive=True)) + # phi core headers list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/infermeta', recursive=True)) + # phi infermeta headers list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/kernels', recursive=True)) + # phi kernels headers + # capi headers + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/capi', recursive=True)) + # phi capi headers # utila api headers list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/utils', recursive=True))) # paddle utils headers diff --git a/tools/ci_op_benchmark.sh b/tools/ci_op_benchmark.sh index 8e84eccc083f2..17cfb5923cf97 100644 --- a/tools/ci_op_benchmark.sh +++ b/tools/ci_op_benchmark.sh @@ -266,7 +266,7 @@ function check_CHANGE_OP_MAP { done if [ $exit_code -ne 0 ]; then LOG "[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details." - LOG "[INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR." + LOG "[INFO] Or you can apply for one RD (ZzSean(Recommend), JamesLim-sy, Xreki, luotao1) approval to pass this PR." exit ${exit_code} fi } @@ -305,11 +305,11 @@ function gpu_op_benchmark { # The PR will pass quickly when get approval from specific person. -# Xreki 12538138, luotao1 6836917, ZzSean 32410583 +# Xreki 12538138, luotao1 6836917, ZzSean 32410583, JamesLim-sy 61349199 set +x approval_line=$(curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000) if [ -n "${approval_line}" ]; then - APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917) + APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917 61349199) LOG "[INFO] current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "TRUE" ]; then LOG "[INFO] ===================================" diff --git a/tools/test_ci_op_benchmark.sh b/tools/test_ci_op_benchmark.sh index bf70d8bc3a495..605ed72a21642 100644 --- a/tools/test_ci_op_benchmark.sh +++ b/tools/test_ci_op_benchmark.sh @@ -273,7 +273,7 @@ function check_CHANGE_OP_MAP { done if [ $exit_code -ne 0 ]; then LOG "[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details." - LOG "[INFO] Or you can apply for one RD (ZzSean(Recommend), Xreki, luotao1) approval to pass this PR." + LOG "[INFO] Or you can apply for one RD (ZzSean(Recommend), JamesLim-sy, Xreki, luotao1) approval to pass this PR." exit $exit_code fi } @@ -317,11 +317,11 @@ function gpu_op_benchmark { } # The PR will pass quickly when get approval from specific person. -# Xreki 12538138, luotao1 6836917, ZzSean 32410583 +# Xreki 12538138, luotao1 6836917, ZzSean 32410583, JamesLim-sy 61349199 set +x approval_line=$(curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000) if [ -n "${approval_line}" ]; then - APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917) + APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 32410583 12538138 6836917 61349199) LOG "[INFO] current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "TRUE" ]; then LOG "[INFO] ==================================="