Skip to content

Commit

Permalink
merge all phi kernel lib to several big static lib, reduce link comma…
Browse files Browse the repository at this point in the history
…nd (#42185)

* merge all phi lib to several big static lib

* merge all phi lib to several big static lib
  • Loading branch information
zhwesky2010 committed Apr 25, 2022
1 parent 30f65c2 commit e52e6d0
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 88 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Expand Up @@ -20,6 +20,9 @@ else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM)
# use to get_property location of static lib
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
cmake_policy(SET CMP0026 OLD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
128 changes: 63 additions & 65 deletions cmake/generic.cmake
Expand Up @@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
endif()
endfunction()

function(create_dummy_static_lib TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs LIBS DEPS LIMIT)
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

list(REMOVE_DUPLICATES merge_LIBS)
set(index 1)
set(offset 1)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
if(merge_DEPS)
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
endif()
set(merge_list)
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
MATH(EXPR index "${index}+1")
endif()
MATH(EXPR offset "${offset}+1")
endforeach()
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
endfunction()

function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
Expand All @@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
# also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)

if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy static lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})

# OSX: use 'libtool' to merge archives
if(APPLE)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
)
endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)

foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)

add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
endif()

add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# LINUX: use "ar" to extract objects and re-add to a common lib
if(LINUX)
set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")

list(APPEND target_OBJS "${objlistfile}")
foreach(lib ${libs})
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
endforeach()

# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})

# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
file(APPEND ${mri_file} "save\nend\n")

add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND ${CMAKE_AR} -M < ${mri_file}
COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
endif()

# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
if(WIN32)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc compiler will put libarary in directory of "/Release/xxxlib" by default
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
endif()
endfunction()

function(check_coverage_opt TARGET_NAME SRCS)
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
Expand Down Expand Up @@ -1076,4 +1075,3 @@ function(math_library TARGET)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()
endfunction()

14 changes: 7 additions & 7 deletions paddle/fluid/distributed/collective/CMakeLists.txt
@@ -1,20 +1,20 @@
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)

if (WITH_DISTRIBUTE)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
endif()

if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()

if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()
2 changes: 1 addition & 1 deletion paddle/fluid/eager/accumulation/CMakeLists.txt
@@ -1 +1 @@
cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi phi_api grad_node_info)
cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi_api grad_node_info)
2 changes: 1 addition & 1 deletion paddle/fluid/eager/api/utils/CMakeLists.txt
@@ -1,3 +1,3 @@
cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi phi_api autograd_meta grad_node_info accumulation_node)
cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi_api autograd_meta grad_node_info accumulation_node)
cc_library(hook_utils SRCS hook_utils.cc DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node)
cc_library(global_utils SRCS global_utils.cc DEPS place tracer)
2 changes: 1 addition & 1 deletion paddle/fluid/eager/pylayer/CMakeLists.txt
@@ -1 +1 @@
cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi phi_api grad_node_info)
cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi_api grad_node_info)
6 changes: 3 additions & 3 deletions paddle/fluid/framework/CMakeLists.txt
Expand Up @@ -206,11 +206,11 @@ ENDIF()
IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
phi phi_utils kernel_factory infershape_utils op_utils)
phi_utils kernel_factory infershape_utils op_utils)
ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
phi phi_utils kernel_factory infershape_utils op_utils)
phi_utils kernel_factory infershape_utils op_utils)
ENDIF()

cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
Expand Down Expand Up @@ -418,7 +418,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
cc_library(generator SRCS generator.cc DEPS enforce place)

cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place phi var_type_traits phi phi_api_utils op_info shape_inference)
cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place var_type_traits phi phi_api_utils op_info shape_inference)
cc_test(infershape_utils_test SRCS infershape_utils_test.cc DEPS infershape_utils infermeta_utils meta_tensor)

# Get the current working branch
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/imperative/CMakeLists.txt
@@ -1,9 +1,9 @@
cc_library(imperative_flag SRCS flags.cc DEPS gflags flags)
cc_library(var_helper SRCS var_helper.cc DEPS tensor phi_api)
IF(WITH_XPU)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper)
ELSE()
cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper)
ENDIF()
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api)
add_subdirectory(jit)
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/inference/CMakeLists.txt
Expand Up @@ -36,7 +36,7 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(utils_modules stringpiece pretty_log string_helper)
set(utils_modules stringpiece pretty_log string_helper benchmark)

add_subdirectory(api)

Expand All @@ -50,9 +50,9 @@ if(WITH_ONNXRUNTIME)
set(STATIC_INFERENCE_API ${STATIC_INFERENCE_API} onnxruntime_predictor)
endif()

#TODO(wilber, T8T9): Do we still need to support windows gpu static library?
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules})
cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} ${utils_modules})
else()
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules})
endif()
Expand Down Expand Up @@ -84,7 +84,7 @@ set(SHARED_INFERENCE_SRCS
${PADDLE_CUSTOM_OP_SRCS})

# shared inference library deps
set(SHARED_INFERENCE_DEPS ${fluid_modules} ${phi_modules} analysis_predictor)
set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor ${utils_modules})

if (WITH_CRYPTO)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto)
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/inference/tensorrt/CMakeLists.txt
@@ -1,8 +1,8 @@
# Compiling with WITH_PYTHON=ON and WITH_TENSORRT=ON failed on windows. Temporarily add paddle_inference_api dependency to solve the problem
if(WIN32)
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api)
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api)
else()
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost)
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost)
endif()
nv_library(tensorrt_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/CMakeLists.txt
Expand Up @@ -27,7 +27,7 @@ set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_contex
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})

cc_library(phi DEPS ${PHI_DEPS})
create_dummy_static_lib(phi LIBS ${PHI_DEPS} LIMIT 100)

set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h CACHE INTERNAL "phi/extension.h file")
file(WRITE ${phi_extension_header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/kernels/CMakeLists.txt
Expand Up @@ -36,7 +36,7 @@ set(MANUAL_BUILD_KERNELS ${AUTOTUNE_KERNELS} cross_entropy_kernel adam_kernel ad
matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel
put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel
softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel
triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel)
triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel reduce_mean_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel)
foreach(src ${AUTOTUNE_KERNELS})
kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
endforeach()
Expand All @@ -52,6 +52,7 @@ kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matri
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
Expand Down

0 comments on commit e52e6d0

Please sign in to comment.