Skip to content

Commit

Permalink
Merge branch 'PaddlePaddle:develop' into onecyclelr
Browse files Browse the repository at this point in the history
  • Loading branch information
Asthestarsfalll authored Apr 29, 2022
2 parents af0420f + 683f152 commit 6d48016
Show file tree
Hide file tree
Showing 356 changed files with 13,912 additions and 3,793 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM)
# use to get_property location of static lib
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
cmake_policy(SET CMP0026 OLD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
2 changes: 2 additions & 0 deletions cmake/external/paddle2onnx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ set(PADDLE2ONNX_OPTIONAL_ARGS
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_STANDARD=14
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
-DWITH_STATIC=OFF
-DMSVC_STATIC_CRT=${MSVC_STATIC_CRT}
-DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endif()
# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220412")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220425")
else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()
Expand Down
1 change: 0 additions & 1 deletion cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ if(NOT APPLE)
set(COMMON_FLAGS
${COMMON_FLAGS}
-Wno-format-truncation # Warning in boost gcc 8.2
-Wno-error=cast-function-type # Warning in boost gcc 8.2
-Wno-error=parentheses # Warning in boost gcc 8.2
-Wno-error=catch-value # Warning in boost gcc 8.2
-Wno-error=nonnull-compare # Warning in boost gcc 8.2
Expand Down
128 changes: 63 additions & 65 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
endif()
endfunction()

function(create_dummy_static_lib TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs LIBS DEPS LIMIT)
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

list(REMOVE_DUPLICATES merge_LIBS)
set(index 1)
set(offset 1)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
if(merge_DEPS)
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
endif()
set(merge_list)
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
MATH(EXPR index "${index}+1")
endif()
MATH(EXPR offset "${offset}+1")
endforeach()
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
endfunction()

function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
Expand All @@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
# also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)

if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy static lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})

# OSX: use 'libtool' to merge archives
if(APPLE)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
)
endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)

foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)

add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
endif()

add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# LINUX: use "ar" to extract objects and re-add to a common lib
if(LINUX)
set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")

list(APPEND target_OBJS "${objlistfile}")
foreach(lib ${libs})
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
endforeach()

# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})

# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})

# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
file(APPEND ${mri_file} "save\nend\n")

add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")

target_link_libraries(${TARGET_NAME} ${libs_deps})
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND ${CMAKE_AR} -M < ${mri_file}
COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
endif()

# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
if(WIN32)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc compiler will put libarary in directory of "/Release/xxxlib" by default
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
endif()
endfunction()

function(check_coverage_opt TARGET_NAME SRCS)
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
Expand Down Expand Up @@ -1076,4 +1075,3 @@ function(math_library TARGET)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()
endfunction()

14 changes: 7 additions & 7 deletions paddle/fluid/distributed/collective/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)

if (WITH_DISTRIBUTE)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
endif()

if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()

if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
endif()
endif()
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupHeter.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
HeterClient* client_ =
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
auto dense_cpu_tensor = cpu_tensors[0];
std::vector<int> send_size;
std::vector<int64_t> send_size;
send_size.push_back(dense_cpu_tensor.numel());
int ret = client_->Send(
gid_, {dense_cpu_tensor.name()}, send_size, dense_cpu_tensor.data(),
Expand Down Expand Up @@ -212,7 +212,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
auto dense_cpu_tensor = cpu_tensors[0];
if (gloo_rank_ == 0) {
std::vector<int> send_size;
std::vector<int64_t> send_size;
send_size.push_back(dense_cpu_tensor.numel());
int ret = client_->Send(
gid_, {dense_cpu_tensor.name()}, send_size,
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) {
while (!queue.empty()) {
egr::GradNodeBase *node = queue.front();
queue.pop();
const std::vector<std::vector<egr::Edge>> &edges = node->GetEdges();
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const egr::Edge &edge = edges[i][j];
const paddle::small_vector<std::vector<egr::GradSlotMeta>,
egr::kSlotSmallVectorSize> &metas =
node->OutputMeta();
for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const egr::Edge &edge = metas[i][j].GetEdge();
auto next_node_shared = edge.GetMutableGradNode();
if (!next_node_shared || !next_node_shared.get()) {
continue;
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/distributed/ps/service/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
set(BRPC_SRCS ps_client.cc server.cc)
set_source_files_properties(${BRPC_SRCS})


if(WITH_HETERPS)

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context rocksdb)

else()

set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context)

endif()

brpc_library(sendrecv_rpc SRCS
Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ DEFINE_int32(pserver_sparse_merge_thread, 1, "pserver sparse merge thread num");
DEFINE_int32(pserver_sparse_table_shard_num, 1000,
"sparse table shard for save & load");

DEFINE_int32(heter_world_size, 100, "group size"); // 可配置

namespace paddle {
namespace framework {
class Scope;
Expand Down
17 changes: 12 additions & 5 deletions paddle/fluid/distributed/ps/service/heter_client.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h"

DEFINE_int32(heter_world_size, 100, "group size"); // group max size
DEFINE_int32(switch_send_recv_timeout_s, 600, "switch_send_recv_timeout_s");

namespace paddle {
namespace distributed {

std::shared_ptr<HeterClient> HeterClient::s_instance_ = nullptr;
std::mutex HeterClient::mtx_;
std::shared_ptr<HeterClient> HeterClient::switch_s_instance_ = nullptr;

int GetMicroId(const platform::DeviceContext& ctx,
const framework::Scope* scope) {
Expand Down Expand Up @@ -222,6 +226,7 @@ int HeterClient::Send(const platform::DeviceContext& ctx,
distributed::MultiVarMsg request;
// 1. set req message_name(string)
request.set_message_name(message_name);
request.set_group_id(0);

// 2. set req send_var_names(<string>)
for (auto& send_var_name : send_var_names) {
Expand Down Expand Up @@ -263,7 +268,7 @@ int HeterClient::Send(const platform::DeviceContext& ctx,
}

int HeterClient::Send(int group_id, const std::vector<std::string>& var_names,
const std::vector<int>& vars_len, void* data_ptr,
const std::vector<int64_t>& vars_size, void* data_ptr,
int64_t data_size) {
OnHeterRpcDone* closure = new OnHeterRpcDone([](void* done) {
auto* closure = reinterpret_cast<OnHeterRpcDone*>(done);
Expand All @@ -282,7 +287,7 @@ int HeterClient::Send(int group_id, const std::vector<std::string>& var_names,
for (auto& send_var_name : var_names) {
request.add_send_var_names(send_var_name);
}
for (auto var_len : vars_len) {
for (auto var_len : vars_size) {
request.add_vars_len(var_len);
}
auto& request_buffer = closure->cntl.request_attachment();
Expand All @@ -301,6 +306,7 @@ int HeterClient::Send(int group_id, const std::vector<std::string>& var_names,
::paddle::distributed::PsService_Stub stub(channel);
stub.SendToSwitch(&closure->cntl, &request, &closure->ps_response, closure);
fut.wait();
delete closure;
return 0;
}

Expand All @@ -325,6 +331,7 @@ int HeterClient::Recv(const platform::DeviceContext& ctx,
distributed::MultiVarMsg request;
// 1. set req message_name(string)
request.set_message_name(message_name);
request.set_group_id(0);

// 2. set req recv_var_names(<string>)
for (auto& recv_var_name : recv_var_names) {
Expand Down Expand Up @@ -396,8 +403,8 @@ int HeterClient::Recv(int group_id, const std::vector<std::string>& var_names,
// save in worker
auto& res_io_buffer = closure->cntl.response_attachment();
butil::IOBufBytesIterator io_buffer_itr(res_io_buffer);
io_buffer_itr.copy_and_forward(reinterpret_cast<void*>(data_ptr),
data_size * sizeof(float));
io_buffer_itr.copy_and_forward(reinterpret_cast<void*>(data_ptr), data_size);
delete closure;
VLOG(4) << "Recv done";
return 0;
}
Expand Down
Loading

0 comments on commit 6d48016

Please sign in to comment.