From 17b3a94d36d2657c5328ac7f912a73261ed1c224 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 7 Feb 2022 14:20:00 +0100 Subject: [PATCH 1/5] Singlepass: added some more emiter --- lib/compiler-singlepass/src/emitter_arm64.rs | 10 ++++++++++ lib/compiler-singlepass/src/machine_arm64.rs | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 08505560d94..c5d31f9c764 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -401,6 +401,11 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; stur D(reg), [X(addr), offset]); } + (Size::S32, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur S(reg), [X(addr), offset]); + } _ => panic!( "singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}", sz, reg, addr, offset @@ -425,6 +430,11 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldur D(reg), [X(addr), offset]); } + (Size::S32, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur S(reg), [X(addr), offset]); + } _ => panic!( "singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}", sz, reg, addr, offset diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 9b678b7e453..bee3434127a 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1965,6 +1965,14 @@ impl Machine for MachineARM64 { self.assembler.emit_fmax(sz, input, input, tmp); self.move_location(sz, tmp, output); } + (Size::S32, Location::Memory(_, _), _) | (Size::S64, Location::Memory(_, _), _) => { + let src = self.location_to_neon(sz, input, &mut tempn, ImmType::None, true); + let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false); + self.assembler.emit_fmax(sz, src, src, tmp); + if tmp != output { + self.move_location(sz, tmp, output); + } + } _ => panic!( "singlepass can't emit canonicalize_nan {:?} {:?} {:?}", sz, input, output From a9fab001e86875e1aed9fb0ba29dd78cea77c5bb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 11 Feb 2022 10:23:47 +0100 Subject: [PATCH 2/5] Fix a singlepass codegen regression, but forcing a sort on gprs/simds list before push/pop --- lib/compiler-singlepass/src/codegen.rs | 3 +- lib/compiler-singlepass/src/machine.rs | 7 ++- lib/compiler-singlepass/src/machine_arm64.rs | 26 ++++++---- lib/compiler-singlepass/src/machine_x64.rs | 53 +++++++++----------- 4 files changed, 48 insertions(+), 41 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index c9f98b30b64..0854b1f7720 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -842,7 +842,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(MachineValue::Undefined); } } - self.machine.move_location(params_size[i], *param, loc); + self.machine + .move_location_for_native(params_size[i], *param, loc); } _ => { return Err(CodegenError { diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index a68daa91ec5..82e8ce84e9f 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -140,7 +140,12 @@ pub trait Machine { /// GPR Reg used for local pointer on the stack fn local_pointer(&self) -> Self::GPR; /// push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location); + fn move_location_for_native( + &mut self, + size: Size, + loc: Location, + dest: Location, + ); /// Determine whether a local should be allocated on the stack. fn is_local_on_stack(&self, idx: usize) -> bool; /// Determine a local's location. diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index bee3434127a..b6b71ca7130 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1176,7 +1176,8 @@ impl Machine for MachineARM64 { } fn push_used_gpr(&mut self) -> usize { - let used_gprs = self.get_used_gprs(); + let mut used_gprs = self.get_used_gprs(); + used_gprs.sort(); if used_gprs.len() % 2 == 1 { self.emit_push(Size::S64, Location::GPR(GPR::XzrSp)); } @@ -1186,7 +1187,8 @@ impl Machine for MachineARM64 { ((used_gprs.len() + 1) / 2) * 16 } fn pop_used_gpr(&mut self) { - let used_gprs = self.get_used_gprs(); + let mut used_gprs = self.get_used_gprs(); + used_gprs.sort(); for r in used_gprs.iter().rev() { self.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1238,7 +1240,8 @@ impl Machine for MachineARM64 { } fn push_used_simd(&mut self) -> usize { - let used_neons = self.get_used_simd(); + let mut used_neons = self.get_used_simd(); + used_neons.sort(); let stack_adjust = if used_neons.len() & 1 == 1 { (used_neons.len() * 8) as u32 + 8 } else { @@ -1256,7 +1259,8 @@ impl Machine for MachineARM64 { stack_adjust as usize } fn pop_used_simd(&mut self) { - let used_neons = self.get_used_simd(); + let mut used_neons = self.get_used_simd(); + used_neons.sort(); for (i, r) in used_neons.iter().enumerate() { self.assembler.emit_ldr( Size::S64, @@ -1407,13 +1411,17 @@ impl Machine for MachineARM64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location) { + fn move_location_for_native(&mut self, size: Size, loc: Location, dest: Location) { match loc { - Location::Imm64(_) => { - self.move_location(Size::S64, loc, Location::GPR(GPR::X17)); - self.emit_push(Size::S64, Location::GPR(GPR::X17)); + Location::Imm64(_) + | Location::Imm32(_) + | Location::Imm8(_) + | Location::Memory(_, _) + | Location::Memory2(_, _, _, _) => { + self.move_location(size, loc, Location::GPR(GPR::X17)); + self.move_location(size, Location::GPR(GPR::X17), dest); } - _ => self.emit_push(Size::S64, loc), + _ => self.move_location(size, loc, dest), } } diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 2d1c0f5f50d..5e06d38da5f 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1627,14 +1627,16 @@ impl Machine for MachineX86_64 { } fn push_used_gpr(&mut self) -> usize { - let used_gprs = self.get_used_gprs(); + let mut used_gprs = self.get_used_gprs(); + used_gprs.sort(); for r in used_gprs.iter() { self.assembler.emit_push(Size::S64, Location::GPR(*r)); } used_gprs.len() * 8 } fn pop_used_gpr(&mut self) { - let used_gprs = self.get_used_gprs(); + let mut used_gprs = self.get_used_gprs(); + used_gprs.sort(); for r in used_gprs.iter().rev() { self.assembler.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1683,7 +1685,8 @@ impl Machine for MachineX86_64 { } fn push_used_simd(&mut self) -> usize { - let used_xmms = self.get_used_simd(); + let mut used_xmms = self.get_used_simd(); + used_xmms.sort(); self.adjust_stack((used_xmms.len() * 8) as u32); for (i, r) in used_xmms.iter().enumerate() { @@ -1697,7 +1700,8 @@ impl Machine for MachineX86_64 { used_xmms.len() * 8 } fn pop_used_simd(&mut self) { - let used_xmms = self.get_used_simd(); + let mut used_xmms = self.get_used_simd(); + used_xmms.sort(); for (i, r) in used_xmms.iter().enumerate() { self.move_location( Size::S64, @@ -1806,34 +1810,23 @@ impl Machine for MachineX86_64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location) { + fn move_location_for_native(&mut self, _size: Size, loc: Location, dest: Location) { match loc { - Location::Imm64(_) => { - // x86_64 does not support `mov imm64, mem`. We must first place the immdiate value - // into a register and then write the register to the memory. Now the problem is - // that there might not be any registers available to clobber. In order to make - // this work out we spill a register thus retaining both the original value of the - // register and producing the required data at the top of the stack. - // - // FIXME(#2723): figure out how to not require spilling a register here. It should - // definitely be possible to `pick_gpr`/`pick_temp_gpr` to grab an otherwise unused - // register and just clobber its value here. - self.assembler.emit_push(Size::S64, Location::GPR(GPR::R9)); - self.move_location(Size::S64, loc, Location::GPR(GPR::R9)); - self.assembler.emit_xchg( - Size::S64, - Location::GPR(GPR::R9), - Location::Memory(GPR::RSP, 0), - ); - } - Location::SIMD(_) => { - // Dummy value slot to be filled with `mov`. - self.assembler.emit_push(Size::S64, Location::GPR(GPR::RAX)); - - // XMM registers can be directly stored to memory. - self.move_location(Size::S64, loc, Location::Memory(GPR::RSP, 0)); + Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { + let tmp = self.pick_temp_gpr(); + if let Some(x) = tmp { + self.assembler.emit_mov(Size::S64, loc, Location::GPR(x)); + self.assembler.emit_mov(Size::S64, Location::GPR(x), dest); + } else { + self.assembler + .emit_mov(Size::S64, Location::GPR(GPR::RAX), dest); + self.assembler + .emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + self.assembler + .emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest); + } } - _ => self.assembler.emit_push(Size::S64, loc), + _ => self.assembler.emit_mov(Size::S64, loc, dest), } } From b3b1c8b5f576a19b6c0eb86d17b029d39200d0d7 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 11 Feb 2022 13:27:42 +0100 Subject: [PATCH 3/5] Removed the sort on gprs/simds list before push/pop, by using a single vec for the push/pop --- lib/compiler-singlepass/src/codegen.rs | 8 ++++---- lib/compiler-singlepass/src/machine.rs | 8 ++++---- lib/compiler-singlepass/src/machine_arm64.rs | 16 ++++------------ lib/compiler-singlepass/src/machine_x64.rs | 16 ++++------------ 4 files changed, 16 insertions(+), 32 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 0854b1f7720..2e68a8fc0d9 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -737,8 +737,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { .collect(); // Save used GPRs. Preserve correct stack alignment - let mut used_stack = self.machine.push_used_gpr(); let used_gprs = self.machine.get_used_gprs(); + let mut used_stack = self.machine.push_used_gpr(&used_gprs); for r in used_gprs.iter() { let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone(); if content == MachineValue::Undefined { @@ -752,7 +752,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Save used SIMD registers. let used_simds = self.machine.get_used_simd(); if used_simds.len() > 0 { - used_stack += self.machine.push_used_simd(); + used_stack += self.machine.push_used_simd(&used_simds); for r in used_simds.iter().rev() { let content = @@ -915,14 +915,14 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Restore SIMDs. if !used_simds.is_empty() { - self.machine.pop_used_simd(); + self.machine.pop_used_simd(&used_simds); for _ in 0..used_simds.len() { self.state.stack_values.pop().unwrap(); } } // Restore GPRs. - self.machine.pop_used_gpr(); + self.machine.pop_used_gpr(&used_gprs); for _ in used_gprs.iter().rev() { self.state.stack_values.pop().unwrap(); } diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 82e8ce84e9f..1ad5d04254e 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -83,9 +83,9 @@ pub trait Machine { /// reserve a GPR fn reserve_gpr(&mut self, gpr: Self::GPR); /// Push used gpr to the stack. Return the bytes taken on the stack - fn push_used_gpr(&mut self) -> usize; + fn push_used_gpr(&mut self, grps: &Vec) -> usize; /// Pop used gpr to the stack - fn pop_used_gpr(&mut self); + fn pop_used_gpr(&mut self, grps: &Vec); /// Picks an unused SIMD register. /// /// This method does not mark the register as used @@ -101,9 +101,9 @@ pub trait Machine { /// Releases a temporary XMM register. fn release_simd(&mut self, simd: Self::SIMD); /// Push used simd regs to the stack. Return bytes taken on the stack - fn push_used_simd(&mut self) -> usize; + fn push_used_simd(&mut self, simds: &Vec) -> usize; /// Pop used simd regs to the stack - fn pop_used_simd(&mut self); + fn pop_used_simd(&mut self, simds: &Vec); /// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) fn round_stack_adjust(&self, value: usize) -> usize; /// Set the source location of the Wasm to the given offset. diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index b6b71ca7130..7ea534a0e10 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1175,9 +1175,7 @@ impl Machine for MachineARM64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) -> usize { - let mut used_gprs = self.get_used_gprs(); - used_gprs.sort(); + fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { if used_gprs.len() % 2 == 1 { self.emit_push(Size::S64, Location::GPR(GPR::XzrSp)); } @@ -1186,9 +1184,7 @@ impl Machine for MachineARM64 { } ((used_gprs.len() + 1) / 2) * 16 } - fn pop_used_gpr(&mut self) { - let mut used_gprs = self.get_used_gprs(); - used_gprs.sort(); + fn pop_used_gpr(&mut self, used_gprs: &Vec) { for r in used_gprs.iter().rev() { self.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1239,9 +1235,7 @@ impl Machine for MachineARM64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) -> usize { - let mut used_neons = self.get_used_simd(); - used_neons.sort(); + fn push_used_simd(&mut self, used_neons: &Vec) -> usize { let stack_adjust = if used_neons.len() & 1 == 1 { (used_neons.len() * 8) as u32 + 8 } else { @@ -1258,9 +1252,7 @@ impl Machine for MachineARM64 { } stack_adjust as usize } - fn pop_used_simd(&mut self) { - let mut used_neons = self.get_used_simd(); - used_neons.sort(); + fn pop_used_simd(&mut self, used_neons: &Vec) { for (i, r) in used_neons.iter().enumerate() { self.assembler.emit_ldr( Size::S64, diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 5e06d38da5f..a0598858155 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1626,17 +1626,13 @@ impl Machine for MachineX86_64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) -> usize { - let mut used_gprs = self.get_used_gprs(); - used_gprs.sort(); + fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { for r in used_gprs.iter() { self.assembler.emit_push(Size::S64, Location::GPR(*r)); } used_gprs.len() * 8 } - fn pop_used_gpr(&mut self) { - let mut used_gprs = self.get_used_gprs(); - used_gprs.sort(); + fn pop_used_gpr(&mut self, used_gprs: &Vec) { for r in used_gprs.iter().rev() { self.assembler.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1684,9 +1680,7 @@ impl Machine for MachineX86_64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) -> usize { - let mut used_xmms = self.get_used_simd(); - used_xmms.sort(); + fn push_used_simd(&mut self, used_xmms: &Vec) -> usize { self.adjust_stack((used_xmms.len() * 8) as u32); for (i, r) in used_xmms.iter().enumerate() { @@ -1699,9 +1693,7 @@ impl Machine for MachineX86_64 { used_xmms.len() * 8 } - fn pop_used_simd(&mut self) { - let mut used_xmms = self.get_used_simd(); - used_xmms.sort(); + fn pop_used_simd(&mut self, used_xmms: &Vec) { for (i, r) in used_xmms.iter().enumerate() { self.move_location( Size::S64, From 5ebae6f34eced53890961d02c3da566ab857b40d Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 11 Feb 2022 16:20:22 +0100 Subject: [PATCH 4/5] Remove HashSet on SinglePass register collection and use a simple Bitmap now, to avoid risk of non deterministic runs --- lib/compiler-singlepass/src/arm64_decl.rs | 24 ++++-- lib/compiler-singlepass/src/location.rs | 2 + lib/compiler-singlepass/src/machine_arm64.rs | 79 +++++++++++++++----- lib/compiler-singlepass/src/machine_x64.rs | 79 +++++++++++++++----- lib/compiler-singlepass/src/x64_decl.rs | 27 ++++--- 5 files changed, 156 insertions(+), 55 deletions(-) diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs index 400f705daaf..6aab5dbc13e 100644 --- a/lib/compiler-singlepass/src/arm64_decl.rs +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -4,6 +4,7 @@ use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; use crate::location::CombinedRegister; use crate::location::Reg as AbstractReg; use std::collections::BTreeMap; +use std::slice::Iter; use wasmer_compiler::CallingConvention; use wasmer_types::Type; @@ -98,7 +99,13 @@ impl AbstractReg for GPR { self as usize } fn from_index(n: usize) -> Result { - const REGS: [GPR; 32] = [ + match n { + 0..=31 => Ok(GPR::iterator().nth(n).unwrap().clone()), + _ => Err(()), + } + } + fn iterator() -> Iter<'static, GPR> { + static GPRS: [GPR; 32] = [ GPR::X0, GPR::X1, GPR::X2, @@ -132,7 +139,7 @@ impl AbstractReg for GPR { GPR::X30, GPR::XzrSp, ]; - REGS.get(n).cloned().ok_or(()) + GPRS.iter() } } @@ -147,7 +154,13 @@ impl AbstractReg for NEON { self as usize } fn from_index(n: usize) -> Result { - const REGS: [NEON; 32] = [ + match n { + 0..=31 => Ok(NEON::iterator().nth(n).unwrap().clone()), + _ => Err(()), + } + } + fn iterator() -> Iter<'static, NEON> { + const NEONS: [NEON; 32] = [ NEON::V0, NEON::V1, NEON::V2, @@ -181,10 +194,7 @@ impl AbstractReg for NEON { NEON::V30, NEON::V31, ]; - match n { - 0..=31 => Ok(REGS[n]), - _ => Err(()), - } + NEONS.iter() } } diff --git a/lib/compiler-singlepass/src/location.rs b/lib/compiler-singlepass/src/location.rs index 61817068a49..ca55828fcb3 100644 --- a/lib/compiler-singlepass/src/location.rs +++ b/lib/compiler-singlepass/src/location.rs @@ -2,6 +2,7 @@ use crate::common_decl::RegisterIndex; use crate::machine::*; use std::fmt::Debug; use std::hash::Hash; +use std::slice::Iter; #[allow(dead_code)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] @@ -42,6 +43,7 @@ pub trait Reg: Copy + Clone + Eq + PartialEq + Debug + Hash + Ord { fn is_reserved(self) -> bool; fn into_index(self) -> usize; fn from_index(i: usize) -> Result; + fn iterator() -> Iter<'static, Self>; } pub trait Descriptor { diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 7ea534a0e10..f176450ca6e 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3,9 +3,9 @@ use crate::arm64_decl::{GPR, NEON}; use crate::common_decl::*; use crate::emitter_arm64::*; use crate::location::Location as AbstractLocation; +use crate::location::Reg; use crate::machine::*; use dynasmrt::{aarch64::Aarch64Relocation, VecAssembler}; -use std::collections::HashSet; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, Relocation, @@ -19,8 +19,8 @@ type Location = AbstractLocation; pub struct MachineARM64 { assembler: Assembler, - used_gprs: HashSet, - used_simd: HashSet, + used_gprs: u32, + used_simd: u32, trap_table: TrapTable, /// Map from byte offset into wasm function to range of native instructions. /// @@ -57,8 +57,8 @@ impl MachineARM64 { pub fn new() -> Self { MachineARM64 { assembler: Assembler::new(0), - used_gprs: HashSet::new(), - used_simd: HashSet::new(), + used_gprs: 0, + used_simd: 0, trap_table: TrapTable::default(), instructions_address_map: vec![], src_loc: 0, @@ -1103,6 +1103,29 @@ impl MachineARM64 { self.emit_label(end); self.restore_fpcr(old_fpcr); } + + fn used_gprs_contains(&self, r: &GPR) -> bool { + self.used_gprs & (1 << r.into_index()) != 0 + } + fn used_simd_contains(&self, r: &NEON) -> bool { + self.used_simd & (1 << r.into_index()) != 0 + } + fn used_gprs_insert(&mut self, r: GPR) { + self.used_gprs |= 1 << r.into_index(); + } + fn used_simd_insert(&mut self, r: NEON) { + self.used_simd |= 1 << r.into_index(); + } + fn used_gprs_remove(&mut self, r: &GPR) -> bool { + let ret = self.used_gprs_contains(r); + self.used_gprs &= !(1 << r.into_index()); + ret + } + fn used_simd_remove(&mut self, r: &NEON) -> bool { + let ret = self.used_simd_contains(r); + self.used_simd &= !(1 << r.into_index()); + ret + } } impl Machine for MachineARM64 { @@ -1123,18 +1146,36 @@ impl Machine for MachineARM64 { } fn get_used_gprs(&self) -> Vec { - self.used_gprs.iter().cloned().collect() + GPR::iterator() + .filter_map(|x| { + if self.used_gprs & (1 << x.into_index()) != 0 { + Some(x) + } else { + None + } + }) + .cloned() + .collect() } fn get_used_simd(&self) -> Vec { - self.used_simd.iter().cloned().collect() + NEON::iterator() + .filter_map(|x| { + if self.used_simd & (1 << x.into_index()) != 0 { + Some(x) + } else { + None + } + }) + .cloned() + .collect() } fn pick_gpr(&self) -> Option { use GPR::*; static REGS: &[GPR] = &[X9, X10, X11, X12, X13, X14, X15]; for r in REGS { - if !self.used_gprs.contains(r) { + if !self.used_gprs_contains(r) { return Some(*r); } } @@ -1146,7 +1187,7 @@ impl Machine for MachineARM64 { use GPR::*; static REGS: &[GPR] = &[X8, X7, X6, X5, X4, X3, X2, X1]; for r in REGS { - if !self.used_gprs.contains(r) { + if !self.used_gprs_contains(r) { return Some(*r); } } @@ -1156,23 +1197,23 @@ impl Machine for MachineARM64 { fn acquire_temp_gpr(&mut self) -> Option { let gpr = self.pick_temp_gpr(); if let Some(x) = gpr { - self.used_gprs.insert(x); + self.used_gprs_insert(x); } gpr } fn release_gpr(&mut self, gpr: GPR) { - assert!(self.used_gprs.remove(&gpr)); + assert!(self.used_gprs_remove(&gpr)); } fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR { - assert!(!self.used_gprs.contains(&gpr)); - self.used_gprs.insert(gpr); + assert!(!self.used_gprs_contains(&gpr)); + self.used_gprs_insert(gpr); gpr } fn reserve_gpr(&mut self, gpr: GPR) { - self.used_gprs.insert(gpr); + self.used_gprs_insert(gpr); } fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { @@ -1198,7 +1239,7 @@ impl Machine for MachineARM64 { use NEON::*; static REGS: &[NEON] = &[V8, V9, V10, V11, V12]; for r in REGS { - if !self.used_simd.contains(r) { + if !self.used_simd_contains(r) { return Some(*r); } } @@ -1210,7 +1251,7 @@ impl Machine for MachineARM64 { use NEON::*; static REGS: &[NEON] = &[V0, V1, V2, V3, V4, V5, V6, V7]; for r in REGS { - if !self.used_simd.contains(r) { + if !self.used_simd_contains(r) { return Some(*r); } } @@ -1221,18 +1262,18 @@ impl Machine for MachineARM64 { fn acquire_temp_simd(&mut self) -> Option { let simd = self.pick_temp_simd(); if let Some(x) = simd { - self.used_simd.insert(x); + self.used_simd_insert(x); } simd } fn reserve_simd(&mut self, simd: NEON) { - self.used_simd.insert(simd); + self.used_simd_insert(simd); } // Releases a temporary NEON register. fn release_simd(&mut self, simd: NEON) { - assert_eq!(self.used_simd.remove(&simd), true); + assert_eq!(self.used_simd_remove(&simd), true); } fn push_used_simd(&mut self, used_neons: &Vec) -> usize { diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index a0598858155..c5595b989b8 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1,11 +1,11 @@ use crate::common_decl::*; use crate::emitter_x64::*; use crate::location::Location as AbstractLocation; +use crate::location::Reg; use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; use dynasmrt::{x64::X64Relocation, VecAssembler}; -use std::collections::HashSet; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, InstructionAddressMap, @@ -19,8 +19,8 @@ type Location = AbstractLocation; pub struct MachineX86_64 { assembler: Assembler, - used_gprs: HashSet, - used_simd: HashSet, + used_gprs: u32, + used_simd: u32, trap_table: TrapTable, /// Map from byte offset into wasm function to range of native instructions. /// @@ -34,8 +34,8 @@ impl MachineX86_64 { pub fn new() -> Self { MachineX86_64 { assembler: Assembler::new(0), - used_gprs: HashSet::new(), - used_simd: HashSet::new(), + used_gprs: 0, + used_simd: 0, trap_table: TrapTable::default(), instructions_address_map: vec![], src_loc: 0, @@ -1554,6 +1554,29 @@ impl MachineX86_64 { fn emit_relaxed_atomic_xchg(&mut self, sz: Size, src: Location, dst: Location) { self.emit_relaxed_binop(Assembler::emit_xchg, sz, src, dst); } + + fn used_gprs_contains(&self, r: &GPR) -> bool { + self.used_gprs & (1 << r.into_index()) != 0 + } + fn used_simd_contains(&self, r: &XMM) -> bool { + self.used_simd & (1 << r.into_index()) != 0 + } + fn used_gprs_insert(&mut self, r: GPR) { + self.used_gprs |= 1 << r.into_index(); + } + fn used_simd_insert(&mut self, r: XMM) { + self.used_simd |= 1 << r.into_index(); + } + fn used_gprs_remove(&mut self, r: &GPR) -> bool { + let ret = self.used_gprs_contains(r); + self.used_gprs &= !(1 << r.into_index()); + ret + } + fn used_simd_remove(&mut self, r: &XMM) -> bool { + let ret = self.used_simd_contains(r); + self.used_simd &= !(1 << r.into_index()); + ret + } } impl Machine for MachineX86_64 { @@ -1574,18 +1597,36 @@ impl Machine for MachineX86_64 { } fn get_used_gprs(&self) -> Vec { - self.used_gprs.iter().cloned().collect() + GPR::iterator() + .filter_map(|x| { + if self.used_gprs & (1 << x.into_index()) != 0 { + Some(x) + } else { + None + } + }) + .cloned() + .collect() } fn get_used_simd(&self) -> Vec { - self.used_simd.iter().cloned().collect() + XMM::iterator() + .filter_map(|x| { + if self.used_simd & (1 << x.into_index()) != 0 { + Some(x) + } else { + None + } + }) + .cloned() + .collect() } fn pick_gpr(&self) -> Option { use GPR::*; static REGS: &[GPR] = &[RSI, RDI, R8, R9, R10, R11]; for r in REGS { - if !self.used_gprs.contains(r) { + if !self.used_gprs_contains(r) { return Some(*r); } } @@ -1597,7 +1638,7 @@ impl Machine for MachineX86_64 { use GPR::*; static REGS: &[GPR] = &[RAX, RCX, RDX]; for r in REGS { - if !self.used_gprs.contains(r) { + if !self.used_gprs_contains(r) { return Some(*r); } } @@ -1607,23 +1648,23 @@ impl Machine for MachineX86_64 { fn acquire_temp_gpr(&mut self) -> Option { let gpr = self.pick_temp_gpr(); if let Some(x) = gpr { - self.used_gprs.insert(x); + self.used_gprs_insert(x); } gpr } fn release_gpr(&mut self, gpr: GPR) { - assert!(self.used_gprs.remove(&gpr)); + assert!(self.used_gprs_remove(&gpr)); } fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR { - assert!(!self.used_gprs.contains(&gpr)); - self.used_gprs.insert(gpr); + assert!(!self.used_gprs_contains(&gpr)); + self.used_gprs_insert(gpr); gpr } fn reserve_gpr(&mut self, gpr: GPR) { - self.used_gprs.insert(gpr); + self.used_gprs_insert(gpr); } fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { @@ -1643,7 +1684,7 @@ impl Machine for MachineX86_64 { use XMM::*; static REGS: &[XMM] = &[XMM3, XMM4, XMM5, XMM6, XMM7]; for r in REGS { - if !self.used_simd.contains(r) { + if !self.used_simd_contains(r) { return Some(*r); } } @@ -1655,7 +1696,7 @@ impl Machine for MachineX86_64 { use XMM::*; static REGS: &[XMM] = &[XMM0, XMM1, XMM2]; for r in REGS { - if !self.used_simd.contains(r) { + if !self.used_simd_contains(r) { return Some(*r); } } @@ -1666,18 +1707,18 @@ impl Machine for MachineX86_64 { fn acquire_temp_simd(&mut self) -> Option { let simd = self.pick_temp_simd(); if let Some(x) = simd { - self.used_simd.insert(x); + self.used_simd_insert(x); } simd } fn reserve_simd(&mut self, simd: XMM) { - self.used_simd.insert(simd); + self.used_simd_insert(simd); } // Releases a temporary XMM register. fn release_simd(&mut self, simd: XMM) { - assert_eq!(self.used_simd.remove(&simd), true); + assert_eq!(self.used_simd_remove(&simd), true); } fn push_used_simd(&mut self, used_xmms: &Vec) -> usize { diff --git a/lib/compiler-singlepass/src/x64_decl.rs b/lib/compiler-singlepass/src/x64_decl.rs index 14f63ff5fe2..b9626f947bd 100644 --- a/lib/compiler-singlepass/src/x64_decl.rs +++ b/lib/compiler-singlepass/src/x64_decl.rs @@ -4,6 +4,7 @@ use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; use crate::location::CombinedRegister; use crate::location::Reg as AbstractReg; use std::collections::BTreeMap; +use std::slice::Iter; use wasmer_compiler::CallingConvention; use wasmer_types::Type; @@ -67,7 +68,13 @@ impl AbstractReg for GPR { self as usize } fn from_index(n: usize) -> Result { - const REGS: [GPR; 16] = [ + match n { + 0..=15 => Ok(GPR::iterator().nth(n).unwrap().clone()), + _ => Err(()), + } + } + fn iterator() -> Iter<'static, GPR> { + static GPRS: [GPR; 16] = [ GPR::RAX, GPR::RCX, GPR::RDX, @@ -85,10 +92,7 @@ impl AbstractReg for GPR { GPR::R14, GPR::R15, ]; - match n { - 0..=15 => Ok(REGS[n]), - _ => Err(()), - } + GPRS.iter() } } @@ -107,7 +111,13 @@ impl AbstractReg for XMM { self as usize } fn from_index(n: usize) -> Result { - const REGS: [XMM; 16] = [ + match n { + 0..=15 => Ok(XMM::iterator().nth(n).unwrap().clone()), + _ => Err(()), + } + } + fn iterator() -> Iter<'static, XMM> { + static XMMS: [XMM; 16] = [ XMM::XMM0, XMM::XMM1, XMM::XMM2, @@ -125,10 +135,7 @@ impl AbstractReg for XMM { XMM::XMM14, XMM::XMM15, ]; - match n { - 0..=15 => Ok(REGS[n]), - _ => Err(()), - } + XMMS.iter() } } From c2a998224be3dbfd490400b2f31631ce2857d1e9 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 14 Feb 2022 17:50:20 +0100 Subject: [PATCH 5/5] Some more aarch64 emitter, after some fuzz testing --- lib/compiler-singlepass/src/emitter_arm64.rs | 36 +++++++++++++- lib/compiler-singlepass/src/machine_arm64.rs | 49 ++++++++++++++++++-- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index c5d31f9c764..bb899c0a33e 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -380,6 +380,30 @@ impl EmitterARM64 for Assembler { assert!((disp & 0x3) == 0 && (disp < 0x4000)); dynasm!(self ; ldr S(reg), [X(addr), disp]); } + (Size::S64, Location::SIMD(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldr D(reg), [X(addr)]), + 1 => dynasm!(self ; ldr D(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr D(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::SIMD(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldr S(reg), [X(addr)]), + 1 => dynasm!(self ; ldr S(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr S(reg), [X(addr), X(r2), LSL mult]), + }; + } _ => panic!("singlepass can't emit LDR {:?}, {:?}, {:?}", sz, reg, addr), } } @@ -1373,7 +1397,8 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; ror X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let imm = imm as u32; let dst = dst.into_index() as u32; @@ -1382,6 +1407,15 @@ impl EmitterARM64 for Assembler { } dynasm!(self ; ror X(dst), X(src1), imm); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; ror W(dst), W(src1), imm as u32); + } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let src2 = src2.into_index() as u32; diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index f176450ca6e..98254944b47 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -163,12 +163,51 @@ impl MachineARM64 { tmp }; if read_val { - let offsize = if sz == Size::S32 { - ImmType::OffsetWord - } else { - ImmType::OffsetDWord + let offsize = match sz { + Size::S8 => ImmType::OffsetByte, + Size::S16 => ImmType::OffsetHWord, + Size::S32 => ImmType::OffsetWord, + Size::S64 => ImmType::OffsetDWord, }; - if self.compatible_imm(val as i64, offsize) { + if sz == Size::S8 { + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldrb( + sz, + Location::GPR(tmp), + Location::Memory(reg, val as _), + ); + } else { + if reg == tmp { + unreachable!(); + } + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); + self.assembler.emit_ldrb( + sz, + Location::GPR(tmp), + Location::Memory2(reg, tmp, Multiplier::One, 0), + ); + } + } else if sz == Size::S16 { + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldrh( + sz, + Location::GPR(tmp), + Location::Memory(reg, val as _), + ); + } else { + if reg == tmp { + unreachable!(); + } + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); + self.assembler.emit_ldrh( + sz, + Location::GPR(tmp), + Location::Memory2(reg, tmp, Multiplier::One, 0), + ); + } + } else if self.compatible_imm(val as i64, offsize) { self.assembler.emit_ldr( sz, Location::GPR(tmp),