Skip to content

Commit

Permalink
wazevo(amd64): ensures reg operand for align sensitive instructions (t…
Browse files Browse the repository at this point in the history
…etratelabs#2066)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
  • Loading branch information
mathetake committed Feb 16, 2024
1 parent 25698af commit a2b2f35
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 11 deletions.
39 changes: 30 additions & 9 deletions internal/engine/wazevo/backend/isa/amd64/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,8 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
m.lowerVImul(instr)
case ssa.OpcodeVIneg:
x, lane := instr.ArgWithLane()
rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rd := m.c.VRegOf(instr.Return())
var vecOp sseOpcode
switch lane {
Expand Down Expand Up @@ -618,7 +619,8 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {

case ssa.OpcodeVFneg:
x, lane := instr.ArgWithLane()
rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rd := m.c.VRegOf(instr.Return())

tmp := m.c.AllocateVReg(ssa.TypeV128)
Expand Down Expand Up @@ -659,7 +661,8 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {

case ssa.OpcodeVSqrt:
x, lane := instr.ArgWithLane()
rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rd := m.c.VRegOf(instr.Return())

var vecOp sseOpcode
Expand Down Expand Up @@ -794,7 +797,8 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
default:
panic("BUG: unexpected lane type")
}
m.lowerVbBinOp(vecOp, y, x, instr.Return())
// There's no way to ensure 128-bit alignment, so use lowerVbBinOpUnaligned.
m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return())

case ssa.OpcodeVMaxPseudo:
x, y, lane := instr.Arg2WithLane()
Expand All @@ -807,7 +811,8 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
default:
panic("BUG: unexpected lane type")
}
m.lowerVbBinOp(vecOp, y, x, instr.Return())
// There's no way to ensure 128-bit alignment, so use lowerVbBinOpUnaligned.
m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return())

case ssa.OpcodeVIshl:
x, y, lane := instr.Arg2WithLane()
Expand Down Expand Up @@ -867,13 +872,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {

case ssa.OpcodeFvpromoteLow:
x := instr.Arg()
src := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
src := m.getOperand_Reg(m.c.ValueDefinition(x))
dst := m.c.VRegOf(instr.Return())
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtps2pd, src, dst))

case ssa.OpcodeFvdemote:
x := instr.Arg()
src := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
src := m.getOperand_Reg(m.c.ValueDefinition(x))
dst := m.c.VRegOf(instr.Return())
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtpd2ps, src, dst))

Expand Down Expand Up @@ -1456,7 +1463,8 @@ func (m *machine) lowerXmmRmR(instr *ssa.Instruction) {
}

xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
rn := m.getOperand_Mem_Reg(yDef)
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
rn := m.getOperand_Reg(yDef)
rm := m.getOperand_Reg(xDef)
rd := m.c.VRegOf(instr.Return())

Expand Down Expand Up @@ -2960,14 +2968,27 @@ func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) {
m.copyTo(tmpY, m.c.VRegOf(ret))
}

func (m *machine) lowerVbBinOpUnaligned(op sseOpcode, x, y, ret ssa.Value) {
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rm := m.getOperand_Reg(m.c.ValueDefinition(y))
rd := m.c.VRegOf(ret)

tmp := m.copyToTmp(rn.reg())

binOp := m.allocateInstr()
binOp.asXmmRmR(op, rm, tmp)
m.insert(binOp)

m.copyTo(tmp, rd)
}

func (m *machine) lowerVbBinOp(op sseOpcode, x, y, ret ssa.Value) {
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rm := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
rd := m.c.VRegOf(ret)

tmp := m.copyToTmp(rn.reg())

// op between rn, rm.
binOp := m.allocateInstr()
binOp.asXmmRmR(op, rm, tmp)
m.insert(binOp)
Expand Down
5 changes: 3 additions & 2 deletions internal/engine/wazevo/backend/isa/amd64/machine_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,8 @@ func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed b
switch lane {
case ssa.VecLaneF32x4:
if signed {
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
// There's no way to ensure 128-bit alignment, so use getOperand_Reg.
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
} else {
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
Expand Down Expand Up @@ -850,7 +851,7 @@ func (m *machine) lowerVIabs(instr *ssa.Instruction) {
case ssa.VecLaneI32x4:
vecOp = sseOpcodePabsd
}
rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
rn := m.getOperand_Reg(m.c.ValueDefinition(x))

i := m.allocateInstr()
i.asXmmUnaryRmR(vecOp, rn, rd)
Expand Down

0 comments on commit a2b2f35

Please sign in to comment.