Skip to content

Commit

Permalink
PR #10495: Enforce same input/output layout for offloading ops
Browse files Browse the repository at this point in the history
Imported from GitHub PR openxla/xla#10495

This patch makes sure that the host offloader will not introduce layout mismatches
after removing the offloading custom calls by constraining the layout assignment
to assign the same layout for the custom call's input and output.
Copybara import of the project:

--
ce6aeac3ac5445ab014d36c92a181dbe7afc35e7 by Jaroslav Sevcik <jsevcik@nvidia.com>:

Enforce same input/output layout for offloading ops

Merging this change closes #10495

FUTURE_COPYBARA_INTEGRATE_REVIEW=openxla/xla#10495 from jaro-sevcik:host-offload-call-layout-fix ce6aeac3ac5445ab014d36c92a181dbe7afc35e7
PiperOrigin-RevId: 628707325
  • Loading branch information
jaro-sevcik authored and tensorflower-gardener committed Apr 29, 2024
1 parent 57f8705 commit aeb8034
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
1 change: 1 addition & 0 deletions third_party/xla/xla/service/gpu/BUILD
Expand Up @@ -4089,6 +4089,7 @@ cc_library(
"//xla:xla_proto_cc",
"//xla/hlo/ir:hlo",
"//xla/service:computation_layout",
"//xla/service:host_memory_offload_annotations_hdr",
"//xla/service:layout_assignment",
"//xla/service:logical_buffer",
"//xla/stream_executor",
Expand Down
19 changes: 19 additions & 0 deletions third_party/xla/xla/service/gpu/gpu_layout_assignment.cc
Expand Up @@ -41,6 +41,7 @@ limitations under the License.
#include "xla/service/gpu/matmul_utils.h"
#include "xla/service/gpu/reduction_utils.h"
#include "xla/service/gpu/stream_executor_util.h"
#include "xla/service/host_memory_offload_annotations.h"
#include "xla/service/logical_buffer.h"
#include "xla/shape.h"
#include "xla/shape_layout.h"
Expand Down Expand Up @@ -568,5 +569,23 @@ bool GpuLayoutAssignment::PropagateReductionLayoutToOperand(
{/*is_row_reduction=*/true, {1, kept_dimension_size, reduction_size}});
}

bool GpuLayoutAssignment::InstructionCanChangeLayoutInstance(
const HloInstruction* instruction) {
// The host offloading custom calls will be eventually removed
// by the offloader, so we need to make sure that the calls do not change
// the layout and thus cause layout mismatches after the removal.
const HloCustomCallInstruction* custom_call =
DynCast<HloCustomCallInstruction>(instruction);
if (custom_call != nullptr &&
(custom_call->custom_call_target() ==
host_memory_offload_annotations::kMoveToHostCustomCallTarget ||
custom_call->custom_call_target() ==
host_memory_offload_annotations::kMoveToDeviceCustomCallTarget)) {
return false;
}

return LayoutAssignment::InstructionCanChangeLayoutInstance(instruction);
}

} // namespace gpu
} // namespace xla
3 changes: 3 additions & 0 deletions third_party/xla/xla/service/gpu/gpu_layout_assignment.h
Expand Up @@ -68,6 +68,9 @@ class GpuLayoutAssignment : public LayoutAssignment {

bool PropagateReductionLayoutToOperand(const HloInstruction* user) override;

bool InstructionCanChangeLayoutInstance(
const HloInstruction* instruction) override;

const se::GpuComputeCapability gpu_version_;
const se::dnn::VersionInfo dnn_version_;
};
Expand Down
56 changes: 56 additions & 0 deletions third_party/xla/xla/service/gpu/gpu_layout_assignment_test.cc
Expand Up @@ -421,6 +421,62 @@ ENTRY entry {
expect_layout(call_0->operand(1)->shape(), {1, 2, 0});
}

TEST_F(LayoutAssignmentTest, MoveToHostCustomCallConstrained) {
const char* module_str = R"(
HloModule TestModule
ENTRY entry {
Arg_0 = f32[2,5,5]{2,1,0} parameter(0)
custom-call.0 = f32[2,5,5] custom-call(Arg_0), custom_call_target="MoveToHost"
ROOT custom-call.1 = f32[2,5,5]{2, 1, 0} custom-call(custom-call.0), custom_call_target="fixed_call", operand_layout_constraints={f32[2,5,5]{1,2,0}}
}
)";
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> m,
ParseAndReturnVerifiedModule(module_str));
ComputationLayout computation_layout(
m->entry_computation()->ComputeProgramShape());

GpuLayoutAssignment layout_assignment(
&computation_layout, GetGpuComputeCapability(), GetDnnVersion());

EXPECT_THAT(layout_assignment.Run(m.get()), IsOkAndHolds(true));

const HloInstruction* call_0 = FindInstruction(m.get(), "custom-call.0");
const Layout input_layout = call_0->operand(0)->shape().layout();
const Layout output_layout = call_0->shape().layout();
EXPECT_TRUE(LayoutUtil::Equal(input_layout, output_layout))
<< "Expected the same input/output layouts. Input: " << input_layout
<< ". Output: " << output_layout;
}

TEST_F(LayoutAssignmentTest, MoveToDeviceCustomCallConstrained) {
const char* module_str = R"(
HloModule TestModule
ENTRY entry {
Arg_0 = f32[2,5,5]{2,1,0} parameter(0)
custom-call.0 = f32[2,5,5] custom-call(Arg_0), custom_call_target="MoveToDevice"
ROOT custom-call.1 = f32[2,5,5]{2, 1, 0} custom-call(custom-call.0), custom_call_target="fixed_call", operand_layout_constraints={f32[2,5,5]{1,2,0}}
}
)";
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> m,
ParseAndReturnVerifiedModule(module_str));
ComputationLayout computation_layout(
m->entry_computation()->ComputeProgramShape());

GpuLayoutAssignment layout_assignment(
&computation_layout, GetGpuComputeCapability(), GetDnnVersion());

EXPECT_THAT(layout_assignment.Run(m.get()), IsOkAndHolds(true));

const HloInstruction* call_0 = FindInstruction(m.get(), "custom-call.0");
const Layout input_layout = call_0->operand(0)->shape().layout();
const Layout output_layout = call_0->shape().layout();
EXPECT_TRUE(LayoutUtil::Equal(input_layout, output_layout))
<< "Expected the same input/output layouts. Input: " << input_layout
<< ". Output: " << output_layout;
}

TEST_F(LayoutAssignmentTest, ConvCuDNNF8) {
if (!GetCudaComputeCapability().IsAtLeast(
se::CudaComputeCapability::HOPPER)) {
Expand Down

0 comments on commit aeb8034

Please sign in to comment.