Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com//PaddlePaddle/Paddle int…
Browse files Browse the repository at this point in the history
…o onecyclelr
  • Loading branch information
Asthestarsfalll committed May 10, 2022
2 parents 348932b + 000edfd commit 98f9a9e
Show file tree
Hide file tree
Showing 223 changed files with 7,815 additions and 1,526 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Expand Up @@ -395,6 +395,11 @@ if(WITH_DISTRIBUTE)
MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
endif()
if(WITH_ROCM AND HIP_VERSION LESS_EQUAL 40020496)
# TODO(qili93): third-party rocksdb throw Illegal instruction with HIP version 40020496
MESSAGE(WARNING "Disable WITH_PSCORE when HIP_VERSION is less than or equal 40020496. Force WITH_PSCORE=OFF.")
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when HIP_VERSION is less than or equal 40020496" FORCE)
endif()
endif()

include(third_party) # download, build, install third_party, Contains about 20+ dependencies
Expand Down
29 changes: 29 additions & 0 deletions cmake/hip.cmake
Expand Up @@ -18,6 +18,33 @@ include_directories(${ROCM_PATH}/include)
message(STATUS "HIP version: ${HIP_VERSION}")
message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")

macro(find_hip_version hip_header_file)
file(READ ${hip_header_file} HIP_VERSION_FILE_CONTENTS)

string(REGEX MATCH "define HIP_VERSION_MAJOR +([0-9]+)" HIP_MAJOR_VERSION
"${HIP_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define HIP_VERSION_MAJOR +([0-9]+)" "\\1"
HIP_MAJOR_VERSION "${HIP_MAJOR_VERSION}")
string(REGEX MATCH "define HIP_VERSION_MINOR +([0-9]+)" HIP_MINOR_VERSION
"${HIP_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define HIP_VERSION_MINOR +([0-9]+)" "\\1"
HIP_MINOR_VERSION "${HIP_MINOR_VERSION}")
string(REGEX MATCH "define HIP_VERSION_PATCH +([0-9]+)" HIP_PATCH_VERSION
"${HIP_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define HIP_VERSION_PATCH +([0-9]+)" "\\1"
HIP_PATCH_VERSION "${HIP_PATCH_VERSION}")

if(NOT HIP_MAJOR_VERSION)
set(HIP_VERSION "???")
message(WARNING "Cannot find HIP version in ${HIP_PATH}/include/hip/hip_version.h")
else()
math(EXPR HIP_VERSION "${HIP_MAJOR_VERSION} * 10000000 + ${HIP_MINOR_VERSION} * 100000 + ${HIP_PATCH_VERSION}")
message(STATUS "Current HIP header is ${HIP_PATH}/include/hip/hip_version.h "
"Current HIP version is v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}.${HIP_PATCH_VERSION}. ")
endif()
endmacro()
find_hip_version(${HIP_PATH}/include/hip/hip_version.h)

macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
Expand Down Expand Up @@ -71,8 +98,10 @@ set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx908)
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx908)


if(HIP_COMPILER STREQUAL clang)
Expand Down
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Expand Up @@ -416,7 +416,7 @@ function(version version_file)
endif()
if(WITH_ROCM)
file(APPEND ${version_file}
"HIP version: ${HIP_VERSION}\n"
"HIP version: v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}\n"
"MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
endif()
if(WITH_ASCEND_CL)
Expand Down
2 changes: 1 addition & 1 deletion cmake/third_party.cmake
Expand Up @@ -357,7 +357,7 @@ if (WITH_PSCORE)
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct)

include(external/rocksdb) # download, build, install libmct
include(external/rocksdb) # download, build, install rocksdb
list(APPEND third_party_deps extern_rocksdb)
endif()

Expand Down
21 changes: 9 additions & 12 deletions paddle/fluid/distributed/ps/service/heter_client.h
Expand Up @@ -171,19 +171,16 @@ class HeterClient {
// switch client singleton
static std::shared_ptr<HeterClient> GetSwitchInstance(
const std::vector<std::string>& peer_endpoints, int32_t peer_role) {
std::unique_lock<std::mutex> lock(mtx_);
if (peer_endpoints.empty()) {
VLOG(4) << "init switch client failed, null peer_endpoints";
}
VLOG(4) << "peer role is: " << peer_role
<< ", addr is: " << peer_endpoints[0];
if (switch_s_instance_ == nullptr) {
std::unique_lock<std::mutex> lock(mtx_);
if (peer_endpoints.empty()) {
VLOG(4) << "init switch client failed, null peer_endpoints";
}
VLOG(4) << "peer role is: " << peer_role
<< ", addr is: " << peer_endpoints[0];
if (switch_s_instance_ == nullptr) {
switch_s_instance_.reset(new HeterClient());
switch_s_instance_->SetPeerSwitchList(peer_endpoints);
switch_s_instance_->InitClientChannels(false, peer_endpoints,
peer_role);
}
switch_s_instance_.reset(new HeterClient());
switch_s_instance_->SetPeerSwitchList(peer_endpoints);
switch_s_instance_->InitClientChannels(false, peer_endpoints, peer_role);
}
return switch_s_instance_;
}
Expand Down
7 changes: 5 additions & 2 deletions paddle/fluid/distributed/ps/service/heter_server.cc
Expand Up @@ -125,18 +125,21 @@ int SendAndRecvVariableHandler::SaveInSwitchWithShard(
brpc::Controller* cntl) {
VLOG(4) << "entering SaveInSwitchWithShard";
int32_t group_id = request->group_id();
if (group_id >= FLAGS_heter_world_size) {
LOG(ERROR) << "group id exceed maxmium";
}
auto& local_shard = _local_shards[group_id];
auto& request_io_buffer = cntl->request_attachment();
butil::IOBufBytesIterator io_buffer_itr(request_io_buffer);
for (int idx = 0; idx < request->send_var_names_size(); idx++) {
const auto& var_name = request->send_var_names(idx);
const auto& var_size = request->vars_len(idx);
WaitForVarsConsumed(group_id, var_name);
std::unique_lock<std::mutex> lk(scope_mutex_);
auto& value = local_shard[var_name];
value.resize(var_size);
io_buffer_itr.copy_and_forward(reinterpret_cast<void*>(value.data()),
var_size);
std::unique_lock<std::mutex> lk(scope_mutex_);
vars_ready_flag[group_id][var_name] = 1;
VLOG(4) << "saved var_name: " << var_name << "is saved ready!";
}
Expand All @@ -162,11 +165,11 @@ int SendAndRecvVariableHandler::QueryInSwitchWithShard(
VLOG(4) << "req var name: " << req_var_name;
response->add_send_var_names(req_var_name);
WaitForVarsProduced(group_id, req_var_name);
std::unique_lock<std::mutex> lk(scope_mutex_);
auto itr = local_shard.find(req_var_name);
auto& value = itr.value();
response_io_buffer.append(value.data(), value.size());
value.resize(0); // 清空内存
std::unique_lock<std::mutex> lk(scope_mutex_);
vars_ready_flag[group_id][req_var_name] = 0;
VLOG(4) << "query var_name: " << req_var_name << "is consumed ready!";
}
Expand Down
37 changes: 36 additions & 1 deletion paddle/fluid/eager/accumulation/accumulation_node.cc
Expand Up @@ -34,7 +34,42 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
*tensor = t;
} else {
// Accumulation
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, tensor);
PADDLE_ENFORCE_EQ(t.initialized(), true,
paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor, but we "
"got tensor: %s is empty please check you network "
"and make sure it creates grads.",
t.name()));
PADDLE_ENFORCE_NOT_NULL(
tensor, paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor to non-nullptr "
"tensor but we got nullptr please check you network "
"and make sure it creates grads."));

if (t.is_dense_tensor()) {
if (tensor->is_dense_tensor()) {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, tensor);

} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with
// add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer(
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer);
tensor->set_impl(new_buffer.impl());
}
} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function
// once it's supported
if (tensor->is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, tensor);
} else {
*tensor = std::move(*paddle::imperative::SelectedRowsMerge<
paddle::experimental::Tensor>(t, *tensor));
}
}
}
}

Expand Down
Expand Up @@ -416,10 +416,6 @@ def DetermineForwardPositionMap(self, forward_inputs_list,

self.forward_outputs_position_map[
return_name] = [return_type, return_pos]
print("Generated Forward Input Position Map: ",
self.forward_inputs_position_map)
print("Generated Forward Output Position Map: ",
self.forward_outputs_position_map)


class YamlGeneratorBase:
Expand Down
Expand Up @@ -551,12 +551,6 @@ def CollectBackwardInfo(self):
self.backward_inputs_list, self.backward_attrs_list, self.backward_returns_list = ParseYamlBackward(
backward_args_str, backward_returns_str)

logging.info(
f"Parsed Backward Inputs List: {self.backward_inputs_list}")
logging.info(f"Prased Backward Attrs List: {self.backward_attrs_list}")
logging.info(
f"Parsed Backward Returns List: {self.backward_returns_list}")

def CollectForwardInfoFromBackwardContents(self):

backward_forward_str = self.backward_forward_str
Expand Down Expand Up @@ -628,15 +622,6 @@ def SlotNameMatching(self):
backward_output_type, matched_forward_input_pos,
backward_output_pos
]
logging.info(
f"Generated Backward Fwd Input Map: {self.backward_forward_inputs_map}"
)
logging.info(
f"Generated Backward Grad Input Map: {self.backward_grad_inputs_map}"
)
logging.info(
f"Generated Backward Grad Output Map: {self.backward_grad_outputs_map}"
)

def GenerateNodeCreationCodes(self):
forward_api_name = self.forward_api_name
Expand Down Expand Up @@ -865,7 +850,10 @@ def GenerateForwardDefinition(self, is_inplaced):
f"if ({name}.get_ptr() != nullptr) amp_tensors_vector.push_back({{ *({name}.get_ptr()) }});\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = ({name}.get_ptr() != nullptr) ? paddle::make_optional<const paddle::experimental::Tensor&>(egr::EagerAmpAutoCast(\"{name}\", *({name}.get_ptr()), amp_dst_dtype, op_name)) : {name};\n"
f"auto NEW_{name}_temp_tensor = ({name}.get_ptr() != nullptr) ? egr::EagerAmpAutoCast(\"{name}\", *({name}.get_ptr()), amp_dst_dtype, op_name) : paddle::experimental::Tensor();\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = ({name}.get_ptr() != nullptr) ? paddle::make_optional<const paddle::experimental::Tensor&>(NEW_{name}_temp_tensor) : {name};\n"
)
else:
if is_inplaced and inplace_map and name in inplace_map.keys(
Expand Down Expand Up @@ -1041,11 +1029,6 @@ def GenerateForwardDefinition(self, is_inplaced):
returns_str)
self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n"

logging.info(
f"Generated Forward Definition: {self.forward_definition_str}")
logging.info(
f"Generated Forward Declaration: {self.forward_declaration_str}")

def GenerateInplacedForwardDygraphFunctions(self):
# Inplaced Version Dygraph Function Generation
forward_api_name = self.forward_api_name
Expand Down Expand Up @@ -1231,8 +1214,6 @@ def GenerateNodeDeclaration(self):
set_attribute_methods_str, tensor_wrapper_members_str,
attribute_members_str)

logging.info(f"Generated Node Declaration: {self.node_declaration_str}")

def GenerateNodeDefinition(self, grad_node_creation_str):
namespace = self.namespace
forward_api_name = self.forward_api_name
Expand Down Expand Up @@ -1436,8 +1417,6 @@ def GenerateNodeDefinition(self, grad_node_creation_str):
outputs_autograd_meta_str, compute_require_grad_str,
grad_node_creation_str, returns_str)

logging.info(f"Generated Node Definition: {self.node_definition_str}")

def run(self):
super().run()

Expand Down
Expand Up @@ -399,35 +399,15 @@ def run(self):

# Initialized orig_forward_inputs_list, orig_forward_returns_list, orig_forward_attrs_list
self.CollectOriginalForwardInfo()
logging.info(
f"Parsed Original Forward Inputs List: \n{self.orig_forward_inputs_list}"
)
logging.info(
f"Prased Original Forward Attrs List: \n{self.orig_forward_attrs_list}"
)
logging.info(
f"Parsed Original Forward Returns List: \n{self.orig_forward_returns_list}"
)

if SkipAPIGeneration(self.forward_api_name): return False

# Initialized forward_inputs_position_map, forward_outputs_position_map
self.DetermineForwardPositionMap(self.orig_forward_inputs_list,
self.orig_forward_returns_list)
logging.info(
f"Generated Forward Input Position Map: {self.forward_inputs_position_map}"
)
logging.info(
f"Generated Forward Output Position Map: {self.forward_outputs_position_map}"
)

# Code Generation
self.GeneratePythonCFunction()
logging.info(
f"Generated Python-C Function: {self.python_c_function_str}")
logging.info(
f"Generated Python-C Function Declaration: {self.python_c_function_reg_str}"
)

return True

Expand Down Expand Up @@ -536,8 +516,6 @@ def GeneratePythonCFile(filepath, python_c_str):
python_c_str = GeneratePythonCWrappers(generated_python_c_functions,
generated_python_c_registration)

logging.info(f"Generated Python-C Codes: \n{python_c_str}")

output_path = args.output_path
for path in [output_path]:
if os.path.exists(path):
Expand Down

0 comments on commit 98f9a9e

Please sign in to comment.