Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix singlepass codegen regression #2787

Merged
merged 8 commits into from Feb 14, 2022
11 changes: 6 additions & 5 deletions lib/compiler-singlepass/src/codegen.rs
Expand Up @@ -737,8 +737,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
.collect();

// Save used GPRs. Preserve correct stack alignment
let mut used_stack = self.machine.push_used_gpr();
let used_gprs = self.machine.get_used_gprs();
let mut used_stack = self.machine.push_used_gpr(&used_gprs);
for r in used_gprs.iter() {
let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone();
if content == MachineValue::Undefined {
Expand All @@ -752,7 +752,7 @@ impl<'a, M: Machine> FuncGen<'a, M> {
// Save used SIMD registers.
let used_simds = self.machine.get_used_simd();
if used_simds.len() > 0 {
used_stack += self.machine.push_used_simd();
used_stack += self.machine.push_used_simd(&used_simds);

for r in used_simds.iter().rev() {
let content =
Expand Down Expand Up @@ -842,7 +842,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
self.state.stack_values.push(MachineValue::Undefined);
}
}
self.machine.move_location(params_size[i], *param, loc);
self.machine
.move_location_for_native(params_size[i], *param, loc);
}
_ => {
return Err(CodegenError {
Expand Down Expand Up @@ -914,14 +915,14 @@ impl<'a, M: Machine> FuncGen<'a, M> {

// Restore SIMDs.
if !used_simds.is_empty() {
self.machine.pop_used_simd();
self.machine.pop_used_simd(&used_simds);
for _ in 0..used_simds.len() {
self.state.stack_values.pop().unwrap();
}
}

// Restore GPRs.
self.machine.pop_used_gpr();
self.machine.pop_used_gpr(&used_gprs);
for _ in used_gprs.iter().rev() {
self.state.stack_values.pop().unwrap();
}
Expand Down
10 changes: 10 additions & 0 deletions lib/compiler-singlepass/src/emitter_arm64.rs
Expand Up @@ -401,6 +401,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; stur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; stur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand All @@ -425,6 +430,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; ldur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; ldur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand Down
15 changes: 10 additions & 5 deletions lib/compiler-singlepass/src/machine.rs
Expand Up @@ -83,9 +83,9 @@ pub trait Machine {
/// reserve a GPR
fn reserve_gpr(&mut self, gpr: Self::GPR);
/// Push used gpr to the stack. Return the bytes taken on the stack
fn push_used_gpr(&mut self) -> usize;
fn push_used_gpr(&mut self, grps: &Vec<Self::GPR>) -> usize;
/// Pop used gpr to the stack
fn pop_used_gpr(&mut self);
fn pop_used_gpr(&mut self, grps: &Vec<Self::GPR>);
/// Picks an unused SIMD register.
///
/// This method does not mark the register as used
Expand All @@ -101,9 +101,9 @@ pub trait Machine {
/// Releases a temporary XMM register.
fn release_simd(&mut self, simd: Self::SIMD);
/// Push used simd regs to the stack. Return bytes taken on the stack
fn push_used_simd(&mut self) -> usize;
fn push_used_simd(&mut self, simds: &Vec<Self::SIMD>) -> usize;
/// Pop used simd regs to the stack
fn pop_used_simd(&mut self);
fn pop_used_simd(&mut self, simds: &Vec<Self::SIMD>);
/// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
fn round_stack_adjust(&self, value: usize) -> usize;
/// Set the source location of the Wasm to the given offset.
Expand Down Expand Up @@ -140,7 +140,12 @@ pub trait Machine {
/// GPR Reg used for local pointer on the stack
fn local_pointer(&self) -> Self::GPR;
/// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location<Self::GPR, Self::SIMD>);
fn move_location_for_native(
&mut self,
size: Size,
loc: Location<Self::GPR, Self::SIMD>,
dest: Location<Self::GPR, Self::SIMD>,
);
/// Determine whether a local should be allocated on the stack.
fn is_local_on_stack(&self, idx: usize) -> bool;
/// Determine a local's location.
Expand Down
34 changes: 21 additions & 13 deletions lib/compiler-singlepass/src/machine_arm64.rs
Expand Up @@ -1175,8 +1175,7 @@ impl Machine for MachineARM64 {
self.used_gprs.insert(gpr);
}

fn push_used_gpr(&mut self) -> usize {
let used_gprs = self.get_used_gprs();
fn push_used_gpr(&mut self, used_gprs: &Vec<GPR>) -> usize {
if used_gprs.len() % 2 == 1 {
self.emit_push(Size::S64, Location::GPR(GPR::XzrSp));
}
Expand All @@ -1185,8 +1184,7 @@ impl Machine for MachineARM64 {
}
((used_gprs.len() + 1) / 2) * 16
}
fn pop_used_gpr(&mut self) {
let used_gprs = self.get_used_gprs();
fn pop_used_gpr(&mut self, used_gprs: &Vec<GPR>) {
for r in used_gprs.iter().rev() {
self.emit_pop(Size::S64, Location::GPR(*r));
}
Expand Down Expand Up @@ -1237,8 +1235,7 @@ impl Machine for MachineARM64 {
assert_eq!(self.used_simd.remove(&simd), true);
}

fn push_used_simd(&mut self) -> usize {
let used_neons = self.get_used_simd();
fn push_used_simd(&mut self, used_neons: &Vec<NEON>) -> usize {
let stack_adjust = if used_neons.len() & 1 == 1 {
(used_neons.len() * 8) as u32 + 8
} else {
Expand All @@ -1255,8 +1252,7 @@ impl Machine for MachineARM64 {
}
stack_adjust as usize
}
fn pop_used_simd(&mut self) {
let used_neons = self.get_used_simd();
fn pop_used_simd(&mut self, used_neons: &Vec<NEON>) {
for (i, r) in used_neons.iter().enumerate() {
self.assembler.emit_ldr(
Size::S64,
Expand Down Expand Up @@ -1407,13 +1403,17 @@ impl Machine for MachineARM64 {
);
}
// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location) {
fn move_location_for_native(&mut self, size: Size, loc: Location, dest: Location) {
match loc {
Location::Imm64(_) => {
self.move_location(Size::S64, loc, Location::GPR(GPR::X17));
self.emit_push(Size::S64, Location::GPR(GPR::X17));
Location::Imm64(_)
| Location::Imm32(_)
| Location::Imm8(_)
| Location::Memory(_, _)
| Location::Memory2(_, _, _, _) => {
self.move_location(size, loc, Location::GPR(GPR::X17));
self.move_location(size, Location::GPR(GPR::X17), dest);
}
_ => self.emit_push(Size::S64, loc),
_ => self.move_location(size, loc, dest),
}
}

Expand Down Expand Up @@ -1965,6 +1965,14 @@ impl Machine for MachineARM64 {
self.assembler.emit_fmax(sz, input, input, tmp);
self.move_location(sz, tmp, output);
}
(Size::S32, Location::Memory(_, _), _) | (Size::S64, Location::Memory(_, _), _) => {
let src = self.location_to_neon(sz, input, &mut tempn, ImmType::None, true);
let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false);
self.assembler.emit_fmax(sz, src, src, tmp);
if tmp != output {
self.move_location(sz, tmp, output);
}
}
_ => panic!(
"singlepass can't emit canonicalize_nan {:?} {:?} {:?}",
sz, input, output
Expand Down
53 changes: 19 additions & 34 deletions lib/compiler-singlepass/src/machine_x64.rs
Expand Up @@ -1626,15 +1626,13 @@ impl Machine for MachineX86_64 {
self.used_gprs.insert(gpr);
}

fn push_used_gpr(&mut self) -> usize {
let used_gprs = self.get_used_gprs();
fn push_used_gpr(&mut self, used_gprs: &Vec<GPR>) -> usize {
for r in used_gprs.iter() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HashSet still has a randomized iteration order so this makes the codegen of singlepass non-deterministic (but still correct).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but do we need to generate the same code on each run?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do want to have deterministic reproducible builds for a given input. See #2173.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. I'll remove the hashmaps

self.assembler.emit_push(Size::S64, Location::GPR(*r));
}
used_gprs.len() * 8
}
fn pop_used_gpr(&mut self) {
let used_gprs = self.get_used_gprs();
fn pop_used_gpr(&mut self, used_gprs: &Vec<GPR>) {
for r in used_gprs.iter().rev() {
self.assembler.emit_pop(Size::S64, Location::GPR(*r));
}
Expand Down Expand Up @@ -1682,8 +1680,7 @@ impl Machine for MachineX86_64 {
assert_eq!(self.used_simd.remove(&simd), true);
}

fn push_used_simd(&mut self) -> usize {
let used_xmms = self.get_used_simd();
fn push_used_simd(&mut self, used_xmms: &Vec<XMM>) -> usize {
self.adjust_stack((used_xmms.len() * 8) as u32);

for (i, r) in used_xmms.iter().enumerate() {
Expand All @@ -1696,8 +1693,7 @@ impl Machine for MachineX86_64 {

used_xmms.len() * 8
}
fn pop_used_simd(&mut self) {
let used_xmms = self.get_used_simd();
fn pop_used_simd(&mut self, used_xmms: &Vec<XMM>) {
for (i, r) in used_xmms.iter().enumerate() {
self.move_location(
Size::S64,
Expand Down Expand Up @@ -1806,34 +1802,23 @@ impl Machine for MachineX86_64 {
);
}
// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location) {
fn move_location_for_native(&mut self, _size: Size, loc: Location, dest: Location) {
match loc {
Location::Imm64(_) => {
// x86_64 does not support `mov imm64, mem`. We must first place the immdiate value
// into a register and then write the register to the memory. Now the problem is
// that there might not be any registers available to clobber. In order to make
// this work out we spill a register thus retaining both the original value of the
// register and producing the required data at the top of the stack.
//
// FIXME(#2723): figure out how to not require spilling a register here. It should
// definitely be possible to `pick_gpr`/`pick_temp_gpr` to grab an otherwise unused
// register and just clobber its value here.
self.assembler.emit_push(Size::S64, Location::GPR(GPR::R9));
self.move_location(Size::S64, loc, Location::GPR(GPR::R9));
self.assembler.emit_xchg(
Size::S64,
Location::GPR(GPR::R9),
Location::Memory(GPR::RSP, 0),
);
}
Location::SIMD(_) => {
// Dummy value slot to be filled with `mov`.
self.assembler.emit_push(Size::S64, Location::GPR(GPR::RAX));

// XMM registers can be directly stored to memory.
self.move_location(Size::S64, loc, Location::Memory(GPR::RSP, 0));
Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
let tmp = self.pick_temp_gpr();
if let Some(x) = tmp {
self.assembler.emit_mov(Size::S64, loc, Location::GPR(x));
self.assembler.emit_mov(Size::S64, Location::GPR(x), dest);
} else {
self.assembler
.emit_mov(Size::S64, Location::GPR(GPR::RAX), dest);
self.assembler
.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
self.assembler
.emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest);
}
}
_ => self.assembler.emit_push(Size::S64, loc),
_ => self.assembler.emit_mov(Size::S64, loc, dest),
}
}

Expand Down