Skip to content

Commit

Permalink
Try #2787:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] committed Feb 11, 2022
2 parents db0dc29 + d9770b2 commit d8eaef5
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 57 deletions.
11 changes: 6 additions & 5 deletions lib/compiler-singlepass/src/codegen.rs
Expand Up @@ -737,8 +737,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
.collect();

// Save used GPRs. Preserve correct stack alignment
let mut used_stack = self.machine.push_used_gpr();
let used_gprs = self.machine.get_used_gprs();
let mut used_stack = self.machine.push_used_gpr(&used_gprs);
for r in used_gprs.iter() {
let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone();
if content == MachineValue::Undefined {
Expand All @@ -752,7 +752,7 @@ impl<'a, M: Machine> FuncGen<'a, M> {
// Save used SIMD registers.
let used_simds = self.machine.get_used_simd();
if used_simds.len() > 0 {
used_stack += self.machine.push_used_simd();
used_stack += self.machine.push_used_simd(&used_simds);

for r in used_simds.iter().rev() {
let content =
Expand Down Expand Up @@ -842,7 +842,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {
self.state.stack_values.push(MachineValue::Undefined);
}
}
self.machine.move_location(params_size[i], *param, loc);
self.machine
.move_location_for_native(params_size[i], *param, loc);
}
_ => {
return Err(CodegenError {
Expand Down Expand Up @@ -914,14 +915,14 @@ impl<'a, M: Machine> FuncGen<'a, M> {

// Restore SIMDs.
if !used_simds.is_empty() {
self.machine.pop_used_simd();
self.machine.pop_used_simd(&used_simds);
for _ in 0..used_simds.len() {
self.state.stack_values.pop().unwrap();
}
}

// Restore GPRs.
self.machine.pop_used_gpr();
self.machine.pop_used_gpr(&used_gprs);
for _ in used_gprs.iter().rev() {
self.state.stack_values.pop().unwrap();
}
Expand Down
10 changes: 10 additions & 0 deletions lib/compiler-singlepass/src/emitter_arm64.rs
Expand Up @@ -401,6 +401,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; stur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; stur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand All @@ -425,6 +430,11 @@ impl EmitterARM64 for Assembler {
let addr = addr.into_index() as u32;
dynasm!(self ; ldur D(reg), [X(addr), offset]);
}
(Size::S32, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; ldur S(reg), [X(addr), offset]);
}
_ => panic!(
"singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}",
sz, reg, addr, offset
Expand Down
15 changes: 10 additions & 5 deletions lib/compiler-singlepass/src/machine.rs
Expand Up @@ -83,9 +83,9 @@ pub trait Machine {
/// reserve a GPR
fn reserve_gpr(&mut self, gpr: Self::GPR);
/// Push used gpr to the stack. Return the bytes taken on the stack
fn push_used_gpr(&mut self) -> usize;
fn push_used_gpr(&mut self, grps: &Vec<Self::GPR>) -> usize;
/// Pop used gpr to the stack
fn pop_used_gpr(&mut self);
fn pop_used_gpr(&mut self, grps: &Vec<Self::GPR>);
/// Picks an unused SIMD register.
///
/// This method does not mark the register as used
Expand All @@ -101,9 +101,9 @@ pub trait Machine {
/// Releases a temporary XMM register.
fn release_simd(&mut self, simd: Self::SIMD);
/// Push used simd regs to the stack. Return bytes taken on the stack
fn push_used_simd(&mut self) -> usize;
fn push_used_simd(&mut self, simds: &Vec<Self::SIMD>) -> usize;
/// Pop used simd regs to the stack
fn pop_used_simd(&mut self);
fn pop_used_simd(&mut self, simds: &Vec<Self::SIMD>);
/// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
fn round_stack_adjust(&self, value: usize) -> usize;
/// Set the source location of the Wasm to the given offset.
Expand Down Expand Up @@ -140,7 +140,12 @@ pub trait Machine {
/// GPR Reg used for local pointer on the stack
fn local_pointer(&self) -> Self::GPR;
/// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location<Self::GPR, Self::SIMD>);
fn move_location_for_native(
&mut self,
size: Size,
loc: Location<Self::GPR, Self::SIMD>,
dest: Location<Self::GPR, Self::SIMD>,
);
/// Determine whether a local should be allocated on the stack.
fn is_local_on_stack(&self, idx: usize) -> bool;
/// Determine a local's location.
Expand Down
34 changes: 21 additions & 13 deletions lib/compiler-singlepass/src/machine_arm64.rs
Expand Up @@ -1175,8 +1175,7 @@ impl Machine for MachineARM64 {
self.used_gprs.insert(gpr);
}

fn push_used_gpr(&mut self) -> usize {
let used_gprs = self.get_used_gprs();
fn push_used_gpr(&mut self, used_gprs: &Vec<GPR>) -> usize {
if used_gprs.len() % 2 == 1 {
self.emit_push(Size::S64, Location::GPR(GPR::XzrSp));
}
Expand All @@ -1185,8 +1184,7 @@ impl Machine for MachineARM64 {
}
((used_gprs.len() + 1) / 2) * 16
}
fn pop_used_gpr(&mut self) {
let used_gprs = self.get_used_gprs();
fn pop_used_gpr(&mut self, used_gprs: &Vec<GPR>) {
for r in used_gprs.iter().rev() {
self.emit_pop(Size::S64, Location::GPR(*r));
}
Expand Down Expand Up @@ -1237,8 +1235,7 @@ impl Machine for MachineARM64 {
assert_eq!(self.used_simd.remove(&simd), true);
}

fn push_used_simd(&mut self) -> usize {
let used_neons = self.get_used_simd();
fn push_used_simd(&mut self, used_neons: &Vec<NEON>) -> usize {
let stack_adjust = if used_neons.len() & 1 == 1 {
(used_neons.len() * 8) as u32 + 8
} else {
Expand All @@ -1255,8 +1252,7 @@ impl Machine for MachineARM64 {
}
stack_adjust as usize
}
fn pop_used_simd(&mut self) {
let used_neons = self.get_used_simd();
fn pop_used_simd(&mut self, used_neons: &Vec<NEON>) {
for (i, r) in used_neons.iter().enumerate() {
self.assembler.emit_ldr(
Size::S64,
Expand Down Expand Up @@ -1407,13 +1403,17 @@ impl Machine for MachineARM64 {
);
}
// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location) {
fn move_location_for_native(&mut self, size: Size, loc: Location, dest: Location) {
match loc {
Location::Imm64(_) => {
self.move_location(Size::S64, loc, Location::GPR(GPR::X17));
self.emit_push(Size::S64, Location::GPR(GPR::X17));
Location::Imm64(_)
| Location::Imm32(_)
| Location::Imm8(_)
| Location::Memory(_, _)
| Location::Memory2(_, _, _, _) => {
self.move_location(size, loc, Location::GPR(GPR::X17));
self.move_location(size, Location::GPR(GPR::X17), dest);
}
_ => self.emit_push(Size::S64, loc),
_ => self.move_location(size, loc, dest),
}
}

Expand Down Expand Up @@ -1965,6 +1965,14 @@ impl Machine for MachineARM64 {
self.assembler.emit_fmax(sz, input, input, tmp);
self.move_location(sz, tmp, output);
}
(Size::S32, Location::Memory(_, _), _) | (Size::S64, Location::Memory(_, _), _) => {
let src = self.location_to_neon(sz, input, &mut tempn, ImmType::None, true);
let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false);
self.assembler.emit_fmax(sz, src, src, tmp);
if tmp != output {
self.move_location(sz, tmp, output);
}
}
_ => panic!(
"singlepass can't emit canonicalize_nan {:?} {:?} {:?}",
sz, input, output
Expand Down
53 changes: 19 additions & 34 deletions lib/compiler-singlepass/src/machine_x64.rs
Expand Up @@ -1626,15 +1626,13 @@ impl Machine for MachineX86_64 {
self.used_gprs.insert(gpr);
}

fn push_used_gpr(&mut self) -> usize {
let used_gprs = self.get_used_gprs();
fn push_used_gpr(&mut self, used_gprs: &Vec<GPR>) -> usize {
for r in used_gprs.iter() {
self.assembler.emit_push(Size::S64, Location::GPR(*r));
}
used_gprs.len() * 8
}
fn pop_used_gpr(&mut self) {
let used_gprs = self.get_used_gprs();
fn pop_used_gpr(&mut self, used_gprs: &Vec<GPR>) {
for r in used_gprs.iter().rev() {
self.assembler.emit_pop(Size::S64, Location::GPR(*r));
}
Expand Down Expand Up @@ -1682,8 +1680,7 @@ impl Machine for MachineX86_64 {
assert_eq!(self.used_simd.remove(&simd), true);
}

fn push_used_simd(&mut self) -> usize {
let used_xmms = self.get_used_simd();
fn push_used_simd(&mut self, used_xmms: &Vec<XMM>) -> usize {
self.adjust_stack((used_xmms.len() * 8) as u32);

for (i, r) in used_xmms.iter().enumerate() {
Expand All @@ -1696,8 +1693,7 @@ impl Machine for MachineX86_64 {

used_xmms.len() * 8
}
fn pop_used_simd(&mut self) {
let used_xmms = self.get_used_simd();
fn pop_used_simd(&mut self, used_xmms: &Vec<XMM>) {
for (i, r) in used_xmms.iter().enumerate() {
self.move_location(
Size::S64,
Expand Down Expand Up @@ -1806,34 +1802,23 @@ impl Machine for MachineX86_64 {
);
}
// push a value on the stack for a native call
fn push_location_for_native(&mut self, loc: Location) {
fn move_location_for_native(&mut self, _size: Size, loc: Location, dest: Location) {
match loc {
Location::Imm64(_) => {
// x86_64 does not support `mov imm64, mem`. We must first place the immdiate value
// into a register and then write the register to the memory. Now the problem is
// that there might not be any registers available to clobber. In order to make
// this work out we spill a register thus retaining both the original value of the
// register and producing the required data at the top of the stack.
//
// FIXME(#2723): figure out how to not require spilling a register here. It should
// definitely be possible to `pick_gpr`/`pick_temp_gpr` to grab an otherwise unused
// register and just clobber its value here.
self.assembler.emit_push(Size::S64, Location::GPR(GPR::R9));
self.move_location(Size::S64, loc, Location::GPR(GPR::R9));
self.assembler.emit_xchg(
Size::S64,
Location::GPR(GPR::R9),
Location::Memory(GPR::RSP, 0),
);
}
Location::SIMD(_) => {
// Dummy value slot to be filled with `mov`.
self.assembler.emit_push(Size::S64, Location::GPR(GPR::RAX));

// XMM registers can be directly stored to memory.
self.move_location(Size::S64, loc, Location::Memory(GPR::RSP, 0));
Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
let tmp = self.pick_temp_gpr();
if let Some(x) = tmp {
self.assembler.emit_mov(Size::S64, loc, Location::GPR(x));
self.assembler.emit_mov(Size::S64, Location::GPR(x), dest);
} else {
self.assembler
.emit_mov(Size::S64, Location::GPR(GPR::RAX), dest);
self.assembler
.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
self.assembler
.emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest);
}
}
_ => self.assembler.emit_push(Size::S64, loc),
_ => self.assembler.emit_mov(Size::S64, loc, dest),
}
}

Expand Down

0 comments on commit d8eaef5

Please sign in to comment.