Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport simd fixes to 0.38.0 #4334

Merged
merged 2 commits into from Jun 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 5 additions & 7 deletions cranelift/codegen/src/isa/x64/inst.isle
Expand Up @@ -168,7 +168,7 @@
(dst WritableGpr))

;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize)
(XmmCmove (ty Type)
(cc CC)
(consequent XmmMem)
(alternative Xmm)
Expand Down Expand Up @@ -1876,10 +1876,9 @@

(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_xmm ty cc consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(let ((dst WritableXmm (temp_writable_xmm)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmove size cc consequent alternative dst)
(MInst.XmmCmove ty cc consequent alternative dst)
dst)))

;; Helper for creating `cmove` instructions directly from values. This allows us
Expand Down Expand Up @@ -1932,9 +1931,8 @@
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(tmp WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty))
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
(cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
Expand Down
15 changes: 10 additions & 5 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Expand Up @@ -1112,7 +1112,7 @@ pub(crate) fn emit(
}

Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
Expand All @@ -1130,10 +1130,15 @@ pub(crate) fn emit(
// Jump if cc is *not* set.
one_way_jmp(sink, cc.invert(), next);

let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
let op = match *ty {
types::F64 => SseOpcode::Movsd,
types::F32 => SseOpcode::Movsd,
types::F32X4 => SseOpcode::Movaps,
types::F64X2 => SseOpcode::Movapd,
ty => {
debug_assert!(ty.is_vector() && ty.bytes() == 16);
SseOpcode::Movdqa
}
};
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
inst.emit(&[], sink, info, state);
Expand Down
25 changes: 14 additions & 11 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Expand Up @@ -617,14 +617,14 @@ impl Inst {
}
}

pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove {
size,
ty,
cc,
consequent: src,
alternative: dst.to_reg(),
Expand Down Expand Up @@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
}

Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
dst,
..
} => {
let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
let consequent = consequent.pretty_print(size.to_bytes(), allocs);
let size = u8::try_from(ty.bytes()).unwrap();
let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
let consequent = consequent.pretty_print(size, allocs);
format!(
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
cc.invert().to_string(),
if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
match *ty {
types::F64 => "sd",
types::F32 => "ss",
types::F32X4 => "aps",
types::F64X2 => "apd",
_ => "dqa",
},
consequent,
dst,
Expand Down
13 changes: 5 additions & 8 deletions cranelift/codegen/src/isa/x64/lower.rs
Expand Up @@ -2315,11 +2315,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
debug_assert!(ty == types::F32 || ty == types::F64);
emit_moves(ctx, dst, rhs, ty);
ctx.emit(Inst::xmm_cmove(
if ty == types::F64 {
OperandSize::Size64
} else {
OperandSize::Size32
},
ty,
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
Expand Down Expand Up @@ -2602,17 +2598,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));

// Use the `zero_mask` on a writable `swizzle_mask`.
let swizzle_mask = Writable::from_reg(swizzle_mask);
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddusb,
RegMem::from(zero_mask),
swizzle_mask,
swizzle_mask_tmp,
));

// Shuffle `dst` using the fixed-up `swizzle_mask`.
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(swizzle_mask),
RegMem::from(swizzle_mask_tmp),
dst,
));
}
Expand Down
19 changes: 19 additions & 0 deletions tests/misc_testsuite/simd/v128-select.wast
@@ -0,0 +1,19 @@
(module
(func (export "select") (param v128 v128 i32) (result v128)
local.get 0
local.get 1
local.get 2
select)
)

(assert_return (invoke "select"
(v128.const i64x2 1 1)
(v128.const i64x2 2 2)
(i32.const 0))
(v128.const i64x2 2 2))

(assert_return (invoke "select"
(v128.const i64x2 1 1)
(v128.const i64x2 2 2)
(i32.const 1))
(v128.const i64x2 1 1))