Skip to content

Commit

Permalink
Backport simd fixes to 0.38.0 (#4334)
Browse files Browse the repository at this point in the history
* x64: Fix codegen for the `i8x16.swizzle` instruction (#4318)

This commit fixes a mistake in the `Swizzle` opcode implementation in
the x64 backend of Cranelift. Previously an input register was casted to
a writable register and then modified, which I believe instructions are
not supposed to do. This was discovered as part of my investigation
into #4315.

* x64: Fix codegen for the `select` instruction with v128 (#4317)

This commit fixes a bug in the previous codegen for the `select`
instruction when the operations of the `select` were of the `v128` type.
Previously teh `XmmCmove` instruction only stored an `OperandSize` of 32
or 64 for a 64 or 32-bit move, but this was also used for these 128-bit
types which meant that when used the wrong move instruction was
generated. The fix applied here is to store the whole `Type` being moved
so the 128-bit variant can be selected as well.
  • Loading branch information
alexcrichton committed Jun 27, 2022
1 parent 99c6463 commit b3ad5f9
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 31 deletions.
12 changes: 5 additions & 7 deletions cranelift/codegen/src/isa/x64/inst.isle
Expand Up @@ -168,7 +168,7 @@
(dst WritableGpr))

;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize)
(XmmCmove (ty Type)
(cc CC)
(consequent XmmMem)
(alternative Xmm)
Expand Down Expand Up @@ -1876,10 +1876,9 @@

(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_xmm ty cc consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(let ((dst WritableXmm (temp_writable_xmm)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmove size cc consequent alternative dst)
(MInst.XmmCmove ty cc consequent alternative dst)
dst)))

;; Helper for creating `cmove` instructions directly from values. This allows us
Expand Down Expand Up @@ -1932,9 +1931,8 @@
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(tmp WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty))
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
(cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
Expand Down
15 changes: 10 additions & 5 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Expand Up @@ -1112,7 +1112,7 @@ pub(crate) fn emit(
}

Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
Expand All @@ -1130,10 +1130,15 @@ pub(crate) fn emit(
// Jump if cc is *not* set.
one_way_jmp(sink, cc.invert(), next);

let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
let op = match *ty {
types::F64 => SseOpcode::Movsd,
types::F32 => SseOpcode::Movsd,
types::F32X4 => SseOpcode::Movaps,
types::F64X2 => SseOpcode::Movapd,
ty => {
debug_assert!(ty.is_vector() && ty.bytes() == 16);
SseOpcode::Movdqa
}
};
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
inst.emit(&[], sink, info, state);
Expand Down
25 changes: 14 additions & 11 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Expand Up @@ -617,14 +617,14 @@ impl Inst {
}
}

pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove {
size,
ty,
cc,
consequent: src,
alternative: dst.to_reg(),
Expand Down Expand Up @@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
}

Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
dst,
..
} => {
let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
let consequent = consequent.pretty_print(size.to_bytes(), allocs);
let size = u8::try_from(ty.bytes()).unwrap();
let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
let consequent = consequent.pretty_print(size, allocs);
format!(
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
cc.invert().to_string(),
if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
match *ty {
types::F64 => "sd",
types::F32 => "ss",
types::F32X4 => "aps",
types::F64X2 => "apd",
_ => "dqa",
},
consequent,
dst,
Expand Down
13 changes: 5 additions & 8 deletions cranelift/codegen/src/isa/x64/lower.rs
Expand Up @@ -2315,11 +2315,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
debug_assert!(ty == types::F32 || ty == types::F64);
emit_moves(ctx, dst, rhs, ty);
ctx.emit(Inst::xmm_cmove(
if ty == types::F64 {
OperandSize::Size64
} else {
OperandSize::Size32
},
ty,
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
Expand Down Expand Up @@ -2602,17 +2598,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));

// Use the `zero_mask` on a writable `swizzle_mask`.
let swizzle_mask = Writable::from_reg(swizzle_mask);
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddusb,
RegMem::from(zero_mask),
swizzle_mask,
swizzle_mask_tmp,
));

// Shuffle `dst` using the fixed-up `swizzle_mask`.
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(swizzle_mask),
RegMem::from(swizzle_mask_tmp),
dst,
));
}
Expand Down
19 changes: 19 additions & 0 deletions tests/misc_testsuite/simd/v128-select.wast
@@ -0,0 +1,19 @@
(module
(func (export "select") (param v128 v128 i32) (result v128)
local.get 0
local.get 1
local.get 2
select)
)

(assert_return (invoke "select"
(v128.const i64x2 1 1)
(v128.const i64x2 2 2)
(i32.const 0))
(v128.const i64x2 2 2))

(assert_return (invoke "select"
(v128.const i64x2 1 1)
(v128.const i64x2 2 2)
(i32.const 1))
(v128.const i64x2 1 1))

0 comments on commit b3ad5f9

Please sign in to comment.