Skip to content

Commit

Permalink
Merge branch 'develop' into heaviside_3
Browse files Browse the repository at this point in the history
  • Loading branch information
BrilliantYuKaimin committed May 5, 2022
2 parents 12ebfdb + 6570814 commit f271150
Show file tree
Hide file tree
Showing 553 changed files with 24,609 additions and 6,273 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Expand Up @@ -17,9 +17,12 @@ if(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.19.2)
cmake_policy(VERSION 3.19.2)
else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM)
# use to get_property location of static lib
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
cmake_policy(SET CMP0026 OLD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
1 change: 0 additions & 1 deletion cmake/external/boost.cmake
Expand Up @@ -32,7 +32,6 @@ set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACH
MESSAGE(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}")

set(BOOST_PREFIX_DIR ${THIRD_PARTY_PATH}/boost)

set(BOOST_INCLUDE_DIR "${THIRD_PARTY_PATH}/boost/src/extern_boost" CACHE PATH "boost include directory." FORCE)
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${BOOST_INCLUDE_DIR})
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cinn.cmake
Expand Up @@ -26,7 +26,7 @@ add_definitions(-w)
######################################
include(ExternalProject)
set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
set(CINN_GIT_TAG eedb801ca39bfc6b9621bc76c24a0bf98cb8404b)
set(CINN_GIT_TAG release/v0.2)
set(CINN_OPTIONAL_ARGS -DPY_VERSION=${PY_VERSION}
-DWITH_CUDA=${WITH_GPU}
-DWITH_CUDNN=${WITH_GPU}
Expand Down
2 changes: 2 additions & 0 deletions cmake/external/paddle2onnx.cmake
Expand Up @@ -53,13 +53,15 @@ set(PADDLE2ONNX_OPTIONAL_ARGS
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_STANDARD=14
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
-DWITH_STATIC=OFF
-DMSVC_STATIC_CRT=${MSVC_STATIC_CRT}
-DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Expand Up @@ -17,7 +17,7 @@ endif()
# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220412")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220425")
else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()
Expand Down
1 change: 0 additions & 1 deletion cmake/flags.cmake
Expand Up @@ -163,7 +163,6 @@ if(NOT APPLE)
set(COMMON_FLAGS
${COMMON_FLAGS}
-Wno-format-truncation # Warning in boost gcc 8.2
-Wno-error=cast-function-type # Warning in boost gcc 8.2
-Wno-error=parentheses # Warning in boost gcc 8.2
-Wno-error=catch-value # Warning in boost gcc 8.2
-Wno-error=nonnull-compare # Warning in boost gcc 8.2
Expand Down
128 changes: 63 additions & 65 deletions cmake/generic.cmake
Expand Up @@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
endif()
endfunction()

function(create_dummy_static_lib TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs LIBS DEPS LIMIT)
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

list(REMOVE_DUPLICATES merge_LIBS)
set(index 1)
set(offset 1)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
if(merge_DEPS)
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
endif()
set(merge_list)
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
MATH(EXPR index "${index}+1")
endif()
MATH(EXPR offset "${offset}+1")
endforeach()
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
endfunction()

function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
Expand All @@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
# also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)

if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy static lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})

# OSX: use 'libtool' to merge archives
if(APPLE)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
)
endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)

foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)

add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
endif()

add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# LINUX: use "ar" to extract objects and re-add to a common lib
if(LINUX)
set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")

list(APPEND target_OBJS "${objlistfile}")
foreach(lib ${libs})
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
endforeach()

# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})

# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
file(APPEND ${mri_file} "save\nend\n")

add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND ${CMAKE_AR} -M < ${mri_file}
COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
endif()

# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
if(WIN32)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc compiler will put libarary in directory of "/Release/xxxlib" by default
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
endif()
endfunction()

function(check_coverage_opt TARGET_NAME SRCS)
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
Expand Down Expand Up @@ -1076,4 +1075,3 @@ function(math_library TARGET)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()
endfunction()

6 changes: 2 additions & 4 deletions cmake/third_party.cmake
Expand Up @@ -357,10 +357,8 @@ if (WITH_PSCORE)
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct)

if (WITH_HETERPS)
include(external/rocksdb) # download, build, install libmct
list(APPEND third_party_deps extern_rocksdb)
endif()
include(external/rocksdb) # download, build, install libmct
list(APPEND third_party_deps extern_rocksdb)
endif()

if(WITH_XBYAK)
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/distributed/collective/CMakeLists.txt
@@ -1,20 +1,20 @@
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)

if (WITH_DISTRIBUTE)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
endif()

if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()

if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupHeter.cc 100644 → 100755
Expand Up @@ -116,7 +116,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
HeterClient* client_ =
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
auto dense_cpu_tensor = cpu_tensors[0];
std::vector<int> send_size;
std::vector<int64_t> send_size;
send_size.push_back(dense_cpu_tensor.numel());
int ret = client_->Send(
gid_, {dense_cpu_tensor.name()}, send_size, dense_cpu_tensor.data(),
Expand Down Expand Up @@ -212,7 +212,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
auto dense_cpu_tensor = cpu_tensors[0];
if (gloo_rank_ == 0) {
std::vector<int> send_size;
std::vector<int64_t> send_size;
send_size.push_back(dense_cpu_tensor.numel());
int ret = client_->Send(
gid_, {dense_cpu_tensor.name()}, send_size,
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/distributed/collective/reducer.cc
Expand Up @@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) {
while (!queue.empty()) {
egr::GradNodeBase *node = queue.front();
queue.pop();
const std::vector<std::vector<egr::Edge>> &edges = node->GetEdges();
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const egr::Edge &edge = edges[i][j];
const paddle::small_vector<std::vector<egr::GradSlotMeta>,
egr::kSlotSmallVectorSize> &metas =
node->OutputMeta();
for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const egr::Edge &edge = metas[i][j].GetEdge();
auto next_node_shared = edge.GetMutableGradNode();
if (!next_node_shared || !next_node_shared.get()) {
continue;
Expand Down
70 changes: 70 additions & 0 deletions paddle/fluid/distributed/common/topk_calculator.h
@@ -0,0 +1,70 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <queue>
#include <unordered_map>

namespace paddle {
namespace distributed {
class TopkCalculator {
public:
TopkCalculator(int shard_num, size_t k)
: _shard_num(shard_num), _total_max_size(k) {
_shard_max_size = _total_max_size / shard_num;
_shard_max_size = _shard_max_size > 1 ? _shard_max_size : 1;
for (int i = 0; i < shard_num; ++i) {
_mpq.emplace(i, std::priority_queue<double, std::vector<double>,
std::greater<double>>());
}
}
~TopkCalculator() {}
bool push(int shard_id, double value) {
if (_mpq.find(shard_id) == _mpq.end()) {
return false;
}
auto &pq = _mpq[shard_id];
if (pq.size() < _shard_max_size) {
pq.push(value);
} else {
if (pq.top() < value) {
pq.pop();
pq.push(value);
}
}
return true;
}
// TODO 再进行一次堆排序merge各个shard的结果
int top() {
double total = 0;
for (const auto &item : _mpq) {
auto &pq = item.second;
if (!pq.empty()) {
total += pq.top();
}
}
return total / _shard_num;
}

private:
std::unordered_map<int, std::priority_queue<double, std::vector<double>,
std::greater<double>>>
_mpq;
int _shard_num;
size_t _total_max_size;
size_t _shard_max_size;
};

} // namespace distributed
} // namespace paddle
11 changes: 10 additions & 1 deletion paddle/fluid/distributed/ps/service/CMakeLists.txt
@@ -1,7 +1,16 @@
set(BRPC_SRCS ps_client.cc server.cc)
set_source_files_properties(${BRPC_SRCS})

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context)

if(WITH_HETERPS)

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context rocksdb)

else()

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context)

endif()

brpc_library(sendrecv_rpc SRCS
${BRPC_SRCS}
Expand Down

0 comments on commit f271150

Please sign in to comment.