Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… add_send_uv
  • Loading branch information
DesmonDay committed Aug 8, 2022
2 parents a91b584 + ad71655 commit 7ab77f9
Show file tree
Hide file tree
Showing 1,132 changed files with 42,970 additions and 23,613 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -243,6 +243,7 @@ include(simd)
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_MULTINODE_TESTING "Test multinode apis and ops" OFF)
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
Expand Down
3 changes: 3 additions & 0 deletions cmake/configure.cmake
Expand Up @@ -241,3 +241,6 @@ endif()
if(WITH_CUSTOM_DEVICE AND NOT WIN32)
add_definitions(-DPADDLE_WITH_CUSTOM_DEVICE)
endif()
if(WITH_GPU_GRAPH)
add_definitions(-DPADDLE_WITH_GPU_GRAPH)
endif()
7 changes: 6 additions & 1 deletion cmake/external/cinn.cmake
Expand Up @@ -16,6 +16,12 @@ if(NOT WITH_CINN)
return()
endif()

if(NOT CINN_GIT_TAG)
set(CINN_GIT_TAG release/v0.2)
endif()

message(STATUS "CINN version: " ${CINN_GIT_TAG})

# TODO(zhhsplendid): CINN has lots of warnings during early development.
# They will be treated as errors under paddle. We set no-error now and we will
# clean the code in the future.
Expand All @@ -26,7 +32,6 @@ add_definitions(-w)
######################################
include(ExternalProject)
set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
set(CINN_GIT_TAG release/v0.2)
set(CINN_OPTIONAL_ARGS
-DPY_VERSION=${PY_VERSION}
-DWITH_CUDA=${WITH_GPU}
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/dgc.cmake
Expand Up @@ -33,7 +33,7 @@ ExternalProject_Add(
URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251"
PREFIX "${DGC_PREFIX_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND make -j $(nproc)
BUILD_COMMAND make -j${NPROC}
INSTALL_COMMAND
mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc && cp
${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES} && cp
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/gflags.cmake
Expand Up @@ -29,8 +29,8 @@ else()
set(GFLAGS_LIBRARIES
"${GFLAGS_INSTALL_DIR}/lib/libgflags.a"
CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
set(BUILD_COMMAND $(MAKE) --silent)
set(INSTALL_COMMAND $(MAKE) install)
set(BUILD_COMMAND ${CMAKE_COMMAND} --build .)
set(INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install)
endif()

include_directories(${GFLAGS_INCLUDE_DIR})
Expand Down
13 changes: 12 additions & 1 deletion cmake/external/gtest.cmake
Expand Up @@ -38,6 +38,9 @@ if(WIN32)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib"
CACHE FILEPATH "gtest main libraries." FORCE)
set(GMOCK_LIBRARIES
"${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.lib"
CACHE FILEPATH "gmock libraries." FORCE)
string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}")
Expand All @@ -49,6 +52,9 @@ else()
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a"
CACHE FILEPATH "gtest main libraries." FORCE)
set(GMOCK_LIBRARIES
"${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.a"
CACHE FILEPATH "gmock libraries." FORCE)
set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
Expand Down Expand Up @@ -86,7 +92,8 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GTEST_LIBRARIES}
BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES})
BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}
BUILD_BYPRODUCTS ${GMOCK_LIBRARIES})

add_library(gtest STATIC IMPORTED GLOBAL)
set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
Expand All @@ -96,3 +103,7 @@ add_library(gtest_main STATIC IMPORTED GLOBAL)
set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION
${GTEST_MAIN_LIBRARIES})
add_dependencies(gtest_main extern_gtest)

add_library(gmock STATIC IMPORTED GLOBAL)
set_property(TARGET gmock PROPERTY IMPORTED_LOCATION ${GMOCK_LIBRARIES})
add_dependencies(gmock extern_gtest)
9 changes: 8 additions & 1 deletion cmake/external/mkldnn.cmake
Expand Up @@ -61,6 +61,12 @@ else()
CACHE FILEPATH "mkldnn library." FORCE)
endif()

if(LINUX)
set(BUILD_BYPRODUCTS_ARGS ${MKLDNN_LIB})
else()
set(BUILD_BYPRODUCTS_ARGS "")
endif()

ExternalProject_Add(
${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
Expand All @@ -83,7 +89,8 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DDNNL_BUILD_TESTS=OFF
-DDNNL_BUILD_EXAMPLES=OFF
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR})
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
BUILD_BYPRODUCTS ${BUILD_BYPRODUCTS_ARGS})

message(STATUS "MKLDNN library: ${MKLDNN_LIB}")
add_definitions(-DPADDLE_WITH_MKLDNN)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/openblas.cmake
Expand Up @@ -53,7 +53,7 @@ if(NOT WIN32)
PREFIX ${CBLAS_PREFIX_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND make -j$(nproc) ${COMMON_ARGS} ${OPTIONAL_ARGS}
BUILD_COMMAND make -j${NPROC} ${COMMON_ARGS} ${OPTIONAL_ARGS}
INSTALL_COMMAND make install NO_SHARED=1 NO_LAPACK=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/paddle2onnx.cmake
Expand Up @@ -24,7 +24,7 @@ endif()
include(ExternalProject)

set(PADDLE2ONNX_PROJECT "extern_paddle2onnx")
set(PADDLE2ONNX_VERSION "1.0.0rc")
set(PADDLE2ONNX_VERSION "1.0.0rc2")
set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx)
set(PADDLE2ONNX_SOURCE_DIR
${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT})
Expand Down
3 changes: 2 additions & 1 deletion cmake/external/rocksdb.cmake
Expand Up @@ -44,7 +44,8 @@ ExternalProject_Add(
${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
&& cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
${ROCKSDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1)
BUILD_IN_SOURCE 1
BYPRODUCTS ${ROCKSDB_LIBRARIES})

add_dependencies(extern_rocksdb snappy)

Expand Down
4 changes: 2 additions & 2 deletions cmake/external/xpu.cmake
Expand Up @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220728")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220802")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand All @@ -19,7 +19,7 @@ endif()
if(NOT DEFINED XPU_XDNN_BASE_URL)
set(XPU_XDNN_BASE_URL_WITHOUT_DATE
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220728")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220802")
else()
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Expand Up @@ -510,7 +510,7 @@ function(op_library TARGET)
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator
if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(gelu, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
Expand Down
12 changes: 11 additions & 1 deletion cmake/phi.cmake
Expand Up @@ -15,7 +15,7 @@
function(generate_unify_header DIR_NAME)
set(options "")
set(oneValueArgs HEADER_NAME SKIP_SUFFIX)
set(multiValueArgs "")
set(multiValueArgs EXCLUDES)
cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

Expand All @@ -33,6 +33,9 @@ function(generate_unify_header DIR_NAME)
set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}")
endif()

# exclude files
list(LENGTH generate_unify_header_EXCLUDES generate_unify_header_EXCLUDES_len)

# generate target header file
set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
file(
Expand All @@ -43,6 +46,13 @@ function(generate_unify_header DIR_NAME)
# get all top-level headers and write into header file
file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
foreach(header ${HEADERS})
if(${generate_unify_header_EXCLUDES_len} GREATER 0)
get_filename_component(header_file_name ${header} NAME)
list(FIND generate_unify_header_EXCLUDES ${header_file_name} _index)
if(NOT ${_index} EQUAL -1)
continue()
endif()
endif()
if("${skip_suffix}" STREQUAL "")
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
file(APPEND ${header_file} "#include \"${header}\"\n")
Expand Down
3 changes: 3 additions & 0 deletions cmake/third_party.cmake
Expand Up @@ -27,6 +27,9 @@ set(THIRD_PARTY_CACHE_PATH
set(THIRD_PARTY_BUILD_TYPE Release)
set(third_party_deps)

include(ProcessorCount)
ProcessorCount(NPROC)

# cache funciton to avoid repeat download code of third_party.
# This function has 4 parameters, URL / REPOSITOR / TAG / DIR:
# 1. URL: specify download url of 3rd party
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/distributed/CMakeLists.txt
Expand Up @@ -47,3 +47,4 @@ add_subdirectory(ps)
add_subdirectory(test)
add_subdirectory(index_dataset)
add_subdirectory(fleet_executor)
add_subdirectory(auto_parallel)
28 changes: 28 additions & 0 deletions paddle/fluid/distributed/auto_parallel/CMakeLists.txt
@@ -0,0 +1,28 @@
cc_library(
device_mesh
SRCS device_mesh.cc
DEPS auto_parallel_proto)
cc_test(
device_mesh_test
SRCS device_mesh_test.cc
DEPS device_mesh)

cc_library(
process_mesh
SRCS process_mesh.cc
DEPS auto_parallel_proto)
cc_test(
process_mesh_test
SRCS process_mesh_test.cc
DEPS process_mesh)

cc_library(
dist_mapper
SRCS dist_mapper.cc
DEPS device_mesh auto_parallel_proto)
cc_test(
dist_mapper_test
SRCS dist_mapper_test.cc
DEPS dist_mapper)

proto_library(auto_parallel_proto SRCS auto_parallel.proto)
120 changes: 120 additions & 0 deletions paddle/fluid/distributed/auto_parallel/auto_parallel.proto
@@ -0,0 +1,120 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless optional by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

syntax = "proto2";

package paddle.distributed.auto_parallel;

// ProcessMesh is used to organize processes and like n-dimension array.
message ProcessMeshProto {
// The size of each dimension.
repeated int64 shape = 1;

// These process ids are stored by a row-major way.
// There are no duplicate process ids within one process mesh.
repeated int64 process_ids = 2;

// The name of each dimension.
repeated string dim_names = 3;

}

// This proto describes the capability of one device such as the computation and memory.
message DeviceCapabilityProto {
optional double single_precision_flops = 1;

optional double double_precision_flops = 2;

optional double memory_size_in_bytes = 3;

optional double clock_rate_in_ghz = 4;
}

// This proto represents a device.
message DeviceProto {
// The global id of this device within the cluster.
optional int64 global_id = 1;

// The local id of this device within the machine.
optional int64 local_id = 2;

// The id of the machine own this device.
optional int64 machine_id = 3;

// The id of the machine has this device.
optional string type = 4;

// The capability of this device.
optional DeviceCapabilityProto capability = 5;
}

// This proto describes the capability of the link between two devices.
message LinkCapabilityProto {
optional int64 bandwidth = 1; // Bytes/s
optional int64 latency = 2;
}

message LinkProto {
// The global id of the source device.
optional int64 source_id = 1;

// The global id of the source device.
optional int64 target_id = 2;

// Represent the link type.
optional string type = 3;

// The capability of this link.
optional LinkCapabilityProto capability = 4;
}

// DeviceMesh is used to organize devices and like n-dimension array.
message DeviceMeshProto {
// The global id of this mesh.
optional string name = 1;

// The size of each dimension.
repeated int64 shape = 2;

// These device ids are stored by a row-major way.
// There are no duplicate device ids within one device mesh.
repeated int64 device_ids = 3;

// The name of each dimension.
repeated string dim_names = 4;

// The devices of this mesh.
repeated DeviceProto devices = 5;

// The links are between devices.
repeated LinkProto links = 6;
}

// Record the mapping between the logical processes and the physical devices.
message DistributedMapperProto {
// The device meshes used by this distributed computation,
// which may be shared by different multiple device meshes.
repeated DeviceMeshProto device_meshes = 1;

message MapperEntryProto {
optional int64 process_id = 1;
optional string device_mesh_name = 2;
repeated int64 device_ids = 3;
}

// The mapping from process ids to device ids.
// It is also possible for one process to use multiple devices.
// It is possible for one device shared by multiple processes.
repeated MapperEntryProto process_id_to_device_ids = 2;
}

0 comments on commit 7ab77f9

Please sign in to comment.