Skip to content

Commit

Permalink
Merge branch 'develop' into pixel_unshuffle
Browse files Browse the repository at this point in the history
  • Loading branch information
BrilliantYuKaimin committed Apr 25, 2022
2 parents 948f32b + e52e6d0 commit e270ab7
Show file tree
Hide file tree
Showing 288 changed files with 11,240 additions and 2,606 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Expand Up @@ -17,9 +17,12 @@ if(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.19.2)
cmake_policy(VERSION 3.19.2)
else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM)
# use to get_property location of static lib
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
cmake_policy(SET CMP0026 OLD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
6 changes: 5 additions & 1 deletion cmake/cuda.cmake
Expand Up @@ -132,7 +132,11 @@ function(select_nvcc_arch_flags out_variable)
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set(cuda_arch_bin "80")
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
set(cuda_arch_bin "80")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
set(cuda_arch_bin "80 86")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
Expand Down
1 change: 0 additions & 1 deletion cmake/external/boost.cmake
Expand Up @@ -32,7 +32,6 @@ set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACH
MESSAGE(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}")

set(BOOST_PREFIX_DIR ${THIRD_PARTY_PATH}/boost)

set(BOOST_INCLUDE_DIR "${THIRD_PARTY_PATH}/boost/src/extern_boost" CACHE PATH "boost include directory." FORCE)
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${BOOST_INCLUDE_DIR})
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cinn.cmake
Expand Up @@ -26,7 +26,7 @@ add_definitions(-w)
######################################
include(ExternalProject)
set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
set(CINN_GIT_TAG 08d7680dd91dfaa65787969050eb8f1143654f10)
set(CINN_GIT_TAG release/v0.2)
set(CINN_OPTIONAL_ARGS -DPY_VERSION=${PY_VERSION}
-DWITH_CUDA=${WITH_GPU}
-DWITH_CUDNN=${WITH_GPU}
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/lite.cmake
Expand Up @@ -50,7 +50,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)

if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG 4ab64daecc11fbf74fffdc6a4733f388472e7d5d)
set(LITE_GIT_TAG 81ef66554099800c143a0feff6e0a491b3b0d12e)
endif()

if(NOT CUDA_ARCH_NAME)
Expand Down
128 changes: 63 additions & 65 deletions cmake/generic.cmake
Expand Up @@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
endif()
endfunction()

function(create_dummy_static_lib TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs LIBS DEPS LIMIT)
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

list(REMOVE_DUPLICATES merge_LIBS)
set(index 1)
set(offset 1)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
if(merge_DEPS)
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
endif()
set(merge_list)
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
MATH(EXPR index "${index}+1")
endif()
MATH(EXPR offset "${offset}+1")
endforeach()
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
endfunction()

function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
Expand All @@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
# also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)

if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy static lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})

# OSX: use 'libtool' to merge archives
if(APPLE)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
)
endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)

foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)

add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
endif()

add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# LINUX: use "ar" to extract objects and re-add to a common lib
if(LINUX)
set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")

list(APPEND target_OBJS "${objlistfile}")
foreach(lib ${libs})
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
endforeach()

# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})

# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
file(APPEND ${mri_file} "save\nend\n")

add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND ${CMAKE_AR} -M < ${mri_file}
COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
endif()

# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
if(WIN32)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc compiler will put libarary in directory of "/Release/xxxlib" by default
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
endif()
endfunction()

function(check_coverage_opt TARGET_NAME SRCS)
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
Expand Down Expand Up @@ -1076,4 +1075,3 @@ function(math_library TARGET)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()
endfunction()

6 changes: 2 additions & 4 deletions cmake/third_party.cmake
Expand Up @@ -357,10 +357,8 @@ if (WITH_PSCORE)
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct)

if (WITH_HETERPS)
include(external/rocksdb) # download, build, install libmct
list(APPEND third_party_deps extern_rocksdb)
endif()
include(external/rocksdb) # download, build, install libmct
list(APPEND third_party_deps extern_rocksdb)
endif()

if(WITH_XBYAK)
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/distributed/collective/CMakeLists.txt
@@ -1,20 +1,20 @@
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)

if (WITH_DISTRIBUTE)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
endif()

if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()

if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()
70 changes: 70 additions & 0 deletions paddle/fluid/distributed/common/topk_calculator.h
@@ -0,0 +1,70 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <queue>
#include <unordered_map>

namespace paddle {
namespace distributed {
class TopkCalculator {
public:
TopkCalculator(int shard_num, size_t k)
: _shard_num(shard_num), _total_max_size(k) {
_shard_max_size = _total_max_size / shard_num;
_shard_max_size = _shard_max_size > 1 ? _shard_max_size : 1;
for (int i = 0; i < shard_num; ++i) {
_mpq.emplace(i, std::priority_queue<double, std::vector<double>,
std::greater<double>>());
}
}
~TopkCalculator() {}
bool push(int shard_id, double value) {
if (_mpq.find(shard_id) == _mpq.end()) {
return false;
}
auto &pq = _mpq[shard_id];
if (pq.size() < _shard_max_size) {
pq.push(value);
} else {
if (pq.top() < value) {
pq.pop();
pq.push(value);
}
}
return true;
}
// TODO 再进行一次堆排序merge各个shard的结果
int top() {
double total = 0;
for (const auto &item : _mpq) {
auto &pq = item.second;
if (!pq.empty()) {
total += pq.top();
}
}
return total / _shard_num;
}

private:
std::unordered_map<int, std::priority_queue<double, std::vector<double>,
std::greater<double>>>
_mpq;
int _shard_num;
size_t _total_max_size;
size_t _shard_max_size;
};

} // namespace distributed
} // namespace paddle
6 changes: 5 additions & 1 deletion paddle/fluid/distributed/ps/service/CMakeLists.txt
@@ -1,7 +1,11 @@
set(BRPC_SRCS ps_client.cc server.cc)
set_source_files_properties(${BRPC_SRCS})

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context)
if(WITH_HETERPS)
set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context rocksdb)
else()
set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context)
endif()

brpc_library(sendrecv_rpc SRCS
${BRPC_SRCS}
Expand Down

1 comment on commit e270ab7

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.