Skip to content

Commit

Permalink
Merge #2787
Browse files Browse the repository at this point in the history
2787: Fix singlepass codegen regression r=ptitSeb a=ptitSeb

# Description
Issue was with the conversion Hash -> vec of the used GPR and SIMD registers before mass Push/Pop. Because a register might be allocated/deallocated in between, the order of the resulting vector might change.
The fix use a simple `sort` on the vector before push/pop elements, to be sure the order is deterministic.
The PR also contains a few ARM64 machine change, so the test can be run also on Aarch64 Linux & macOS (tested to be working too).

Fixes #2782 

Co-authored-by: ptitSeb <sebastien.chev@gmail.com>
  • Loading branch information
bors[bot] and ptitSeb committed Feb 14, 2022
2 parents 4fdf76b + c2a9982 commit 2b396e0
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 118 deletions.
24 changes: 17 additions & 7 deletions lib/compiler-singlepass/src/arm64_decl.rs
Expand Up @@ -4,6 +4,7 @@ use crate::common_decl::{MachineState, MachineValue, RegisterIndex};
use crate::location::CombinedRegister;
use crate::location::Reg as AbstractReg;
use std::collections::BTreeMap;
use std::slice::Iter;
use wasmer_compiler::CallingConvention;
use wasmer_types::Type;

Expand Down Expand Up @@ -98,7 +99,13 @@ impl AbstractReg for GPR {
self as usize
}
fn from_index(n: usize) -> Result<GPR, ()> {
const REGS: [GPR; 32] = [
match n {
0..=31 => Ok(GPR::iterator().nth(n).unwrap().clone()),
_ => Err(()),
}
}
fn iterator() -> Iter<'static, GPR> {
static GPRS: [GPR; 32] = [
GPR::X0,
GPR::X1,
GPR::X2,
Expand Down Expand Up @@ -132,7 +139,7 @@ impl AbstractReg for GPR {
GPR::X30,
GPR::XzrSp,
];
REGS.get(n).cloned().ok_or(())
GPRS.iter()
}
}

Expand All @@ -147,7 +154,13 @@ impl AbstractReg for NEON {
self as usize
}
fn from_index(n: usize) -> Result<NEON, ()> {
const REGS: [NEON; 32] = [
match n {
0..=31 => Ok(NEON::iterator().nth(n).unwrap().clone()),
_ => Err(()),
}
}
fn iterator() -> Iter<'static, NEON> {
const NEONS: [NEON; 32] = [
NEON::V0,
NEON::V1,
NEON::V2,
Expand Down Expand Up @@ -181,10 +194,7 @@ impl AbstractReg for NEON {
NEON::V30,
NEON::V31,
];
match n {
0..=31 => Ok(REGS[n]),
_ => Err(()),
}
NEONS.iter()
}
}

Expand Down
11 changes: 6 additions & 5 deletions lib/compiler-singlepass/src/codegen.rs
Expand Up @@ -737,8 +737,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
.collect();

// Save used GPRs. Preserve correct stack alignment
let mut used_stack = self.machine.push_used_gpr();
let used_gprs = self.machine.get_used_gprs();
let mut used_stack = self.machine.push_used_gpr(&used_gprs);
for r in used_gprs.iter() {
let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone();
if content == MachineValue::Undefined {
Expand All @@ -752,7 +752,7 @@ impl<'a, M: Machine> FuncGen<'a, M> {
// Save used SIMD registers.
let used_simds = self.machine.get_used_simd();
if used_simds.len() > 0 {
used_stack += self.machine.push_used_simd();
used_stack += self.machine.push_used_simd(&used_simds);

for r in used_simds.iter().rev() {
let content =
Expand Down Expand Up @@ -842,7 +842,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
self.state.stack_values.push(MachineValue::Undefined);
}
}
self.machine.move_location(params_size[i], *param, loc);
self.machine
.move_location_for_native(params_size[i], *param, loc);
}
_ => {
return Err(CodegenError {
Expand Down Expand Up @@ -914,14 +915,14 @@ impl<'a, M: Machine> FuncGen<'a, M> {

// Restore SIMDs.
if !used_simds.is_empty() {
self.machine.pop_used_simd();
self.machine.pop_used_simd(&used_simds);
for _ in 0..used_simds.len() {
self.state.stack_values.pop().unwrap();
}
}

// Restore GPRs.
self.machine.pop_used_gpr();
self.machine.pop_used_gpr(&used_gprs);
for _ in used_gprs.iter().rev() {
self.state.stack_values.pop().unwrap();
}
Expand Down
46 changes: 45 additions & 1 deletion lib/compiler-singlepass/src/emitter_arm64.rs
Expand Up @@ -380,6 +380,30 @@ impl EmitterARM64 for Assembler {
assert!((disp & 0x3) == 0 && (disp < 0x4000));
dynasm!(self ; ldr S(reg), [X(addr), disp]);
}
(Size::S64, Location::SIMD(reg), Location::Memory2(addr, r2, mult, offs)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
let r2 = r2.into_index() as u32;
assert!(offs == 0);
let mult = mult as u32;
match mult {
0 => dynasm!(self ; ldr D(reg), [X(addr)]),
1 => dynasm!(self ; ldr D(reg), [X(addr), X(r2)]),
_ => dynasm!(self ; ldr D(reg), [X(addr), X(r2), LSL mult]),
};
}
(Size::S32, Location::SIMD(reg), Location::Memory2(addr, r2, mult, offs)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
let r2 = r2.into_index() as u32;
assert!(offs == 0);
let mult = mult as u32;
match mult {
0 => dynasm!(self ; ldr S(reg), [X(addr)]),
1 => dynasm!(self ; ldr S(reg), [X(addr), X(r2)]),
_ => dynasm!(self ; ldr S(reg), [X(addr), X(r2), LSL mult]),
};
}
_ => panic!("singlepass can't emit LDR {:?}, {:?}, {:?}", sz, reg, addr),
}
}
Expand All @@ -401,6 +425,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; stur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; stur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand All @@ -425,6 +454,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; ldur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; ldur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand Down Expand Up @@ -1363,7 +1397,8 @@ impl EmitterARM64 for Assembler {
let dst = dst.into_index() as u32;
dynasm!(self ; ror X(dst), X(src1), X(src2));
}
(Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => {
(Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst))
| (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => {
let src1 = src1.into_index() as u32;
let imm = imm as u32;
let dst = dst.into_index() as u32;
Expand All @@ -1372,6 +1407,15 @@ impl EmitterARM64 for Assembler {
}
dynasm!(self ; ror X(dst), X(src1), imm);
}
(Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst))
| (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => {
let src1 = src1.into_index() as u32;
let dst = dst.into_index() as u32;
if imm == 0 || imm > 31 {
unreachable!();
}
dynasm!(self ; ror W(dst), W(src1), imm as u32);
}
(Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => {
let src1 = src1.into_index() as u32;
let src2 = src2.into_index() as u32;
Expand Down
2 changes: 2 additions & 0 deletions lib/compiler-singlepass/src/location.rs
Expand Up @@ -2,6 +2,7 @@ use crate::common_decl::RegisterIndex;
use crate::machine::*;
use std::fmt::Debug;
use std::hash::Hash;
use std::slice::Iter;

#[allow(dead_code)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
Expand Down Expand Up @@ -42,6 +43,7 @@ pub trait Reg: Copy + Clone + Eq + PartialEq + Debug + Hash + Ord {
fn is_reserved(self) -> bool;
fn into_index(self) -> usize;
fn from_index(i: usize) -> Result<Self, ()>;
fn iterator() -> Iter<'static, Self>;
}

pub trait Descriptor<R: Reg, S: Reg> {
Expand Down
15 changes: 10 additions & 5 deletions lib/compiler-singlepass/src/machine.rs
Expand Up @@ -83,9 +83,9 @@ pub trait Machine {
/// reserve a GPR
fn reserve_gpr(&mut self, gpr: Self::GPR);
/// Push used gpr to the stack. Return the bytes taken on the stack
fn push_used_gpr(&mut self) -> usize;
fn push_used_gpr(&mut self, grps: &Vec<Self::GPR>) -> usize;
/// Pop used gpr to the stack
fn pop_used_gpr(&mut self);
fn pop_used_gpr(&mut self, grps: &Vec<Self::GPR>);
/// Picks an unused SIMD register.
///
/// This method does not mark the register as used
Expand All @@ -101,9 +101,9 @@ pub trait Machine {
/// Releases a temporary XMM register.
fn release_simd(&mut self, simd: Self::SIMD);
/// Push used simd regs to the stack. Return bytes taken on the stack
fn push_used_simd(&mut self) -> usize;
fn push_used_simd(&mut self, simds: &Vec<Self::SIMD>) -> usize;
/// Pop used simd regs to the stack
fn pop_used_simd(&mut self);
fn pop_used_simd(&mut self, simds: &Vec<Self::SIMD>);
/// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
fn round_stack_adjust(&self, value: usize) -> usize;
/// Set the source location of the Wasm to the given offset.
Expand Down Expand Up @@ -140,7 +140,12 @@ pub trait Machine {
/// GPR Reg used for local pointer on the stack
fn local_pointer(&self) -> Self::GPR;
/// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location<Self::GPR, Self::SIMD>);
fn move_location_for_native(
&mut self,
size: Size,
loc: Location<Self::GPR, Self::SIMD>,
dest: Location<Self::GPR, Self::SIMD>,
);
/// Determine whether a local should be allocated on the stack.
fn is_local_on_stack(&self, idx: usize) -> bool;
/// Determine a local's location.
Expand Down

0 comments on commit 2b396e0

Please sign in to comment.