Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Distributed】Delete scale_and_split_grads function #64154

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def forward_backward_pipeline(
), "comm buffers should be created"
for _, buffers in self._chunk_2_comm_buffers.items():
for buffer in buffers:
buffer.scale_and_split_grads()
buffer.scale_grads()

if self._enable_timer:
self.timers("allreduce_shared_weight_gradients").start()
Expand Down Expand Up @@ -1151,7 +1151,7 @@ def _sync_overlap_grads(self):

for _, buffers in self._chunk_2_comm_buffers.items():
for buffer in buffers:
buffer.scale_and_split_grads()
buffer.scale_grads()

def _backward_step_helper(self, micro_step):
virtual_pp_rank = self._get_virtual_pp_rank(micro_step, forward=False)
Expand Down Expand Up @@ -1854,7 +1854,7 @@ def _sync_overlap_grads(self):

for buffers in self._chunk_2_comm_buffers.values():
for buffer in buffers:
buffer.scale_and_split_grads()
buffer.scale_grads()

def forward_backward_pipeline(
self, data, scaler, forward_only=False, compute_loss=True
Expand Down
9 changes: 0 additions & 9 deletions python/paddle/distributed/fleet/utils/tensor_fusion_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,15 +625,6 @@ def scale_grads(self):

self._reset_params_checked_in()

@imperative_base.no_grad
def scale_and_split_grads(self):
assert self._task is not None, "Task is not initialized. "
self._task.wait()
scale_factor = 1.0 / self._comm_group.nranks
self.grad_storage.scale_(scale_factor)

self._reset_params_checked_in()


def obtain_storage(
parameters,
Expand Down
8 changes: 8 additions & 0 deletions test/deprecated/cpp/inference/test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ set(IMG_CLS_RESNET_INSTALL_DIR
set(IMG_CLS_RESNET_MODEL_DIR
"${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model")

if(NOT EXISTS
${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model.tgz
)
inference_download_and_uncompress_without_verify(
${IMG_CLS_RESNET_INSTALL_DIR} ${INFERENCE_URL}
"image_classification_resnet.inference.model.tgz")
endif()

if(WITH_ONNXRUNTIME)
set(MOBILENETV2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/MobileNetV2")
set(MOBILENETV2_MODEL_DIR "${MOBILENETV2_INSTALL_DIR}/MobileNetV2")
Expand Down