From 1e1032ef28657f92f75cd96dad4d9be32b442afb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 10 Dec 2021 10:04:32 +0100 Subject: [PATCH 01/34] improvement(compiler) Added basic arm64 declaration for singlepass --- lib/compiler-singlepass/src/arm64_decl.rs | 294 ++++++++++++++++++++++ lib/compiler-singlepass/src/lib.rs | 1 + lib/compiler-singlepass/src/location.rs | 5 - lib/compiler-singlepass/src/x64_decl.rs | 29 --- 4 files changed, 295 insertions(+), 34 deletions(-) create mode 100644 lib/compiler-singlepass/src/arm64_decl.rs diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs new file mode 100644 index 00000000000..15d651cb4f4 --- /dev/null +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -0,0 +1,294 @@ +//! ARM64 structures. + +use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; +use crate::location::CombinedRegister; +use crate::location::Reg as AbstractReg; +use std::collections::BTreeMap; +use wasmer_compiler::CallingConvention; +use wasmer_types::Type; + +/// General-purpose registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum GPR { + X0 = 0, + X1 = 1, + X2 = 2, + X3 = 3, + X4 = 4, + X5 = 5, + X6 = 6, + X7 = 7, + X8 = 8, + X9 = 9, + X10 = 10, + X11 = 11, + X12 = 12, + X13 = 13, + X14 = 14, + X15 = 15, + X16 = 16, + X17 = 17, + X18 = 18, + X19 = 19, + X20 = 20, + X21 = 21, + X22 = 22, + X23 = 23, + X24 = 24, + X25 = 25, + X26 = 26, + X27 = 27, + X28 = 28, + X29 = 29, + X30 = 30, + XzrSp = 31, +} + +/// NEON registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[allow(dead_code)] +pub enum NEON { + V0 = 0, + V1 = 1, + V2 = 2, + V3 = 3, + V4 = 4, + V5 = 5, + V6 = 6, + V7 = 7, + V8 = 8, + V9 = 9, + V10 = 10, + V11 = 11, + V12 = 12, + V13 = 13, + V14 = 14, + V15 = 15, + V16 = 16, + V17 = 17, + V18 = 18, + V19 = 19, + V20 = 20, + V21 = 21, + V22 = 22, + V23 = 23, + V24 = 24, + V25 = 25, + V26 = 26, + V27 = 27, + V28 = 28, + V29 = 29, + V30 = 30, + V31 = 31, +} + +impl AbstractReg for GPR { + fn is_callee_save(self) -> bool { + self as usize > 18 + } + fn is_reserved(self) -> bool { + match self.into_index() { + 0..=16 | 19..=27 => false, + _ => true, + } + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [GPR; 32] = [ + GPR::X0, + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + GPR::X8, + GPR::X9, + GPR::X10, + GPR::X11, + GPR::X12, + GPR::X13, + GPR::X14, + GPR::X15, + GPR::X16, + GPR::X17, + GPR::X18, + GPR::X19, + GPR::X20, + GPR::X21, + GPR::X22, + GPR::X23, + GPR::X24, + GPR::X25, + GPR::X26, + GPR::X27, + GPR::X28, + GPR::X29, + GPR::X30, + GPR::XzrSp, + ]; + match n { + 0..=31 => Ok(REGS[n]), + _ => Err(()), + } + } +} + +impl AbstractReg for NEON { + fn is_callee_save(self) -> bool { + self as usize > 16 + } + fn is_reserved(self) -> bool { + false + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [NEON; 32] = [ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + NEON::V8, + NEON::V9, + NEON::V10, + NEON::V11, + NEON::V12, + NEON::V13, + NEON::V14, + NEON::V15, + NEON::V16, + NEON::V17, + NEON::V18, + NEON::V19, + NEON::V20, + NEON::V21, + NEON::V22, + NEON::V23, + NEON::V24, + NEON::V25, + NEON::V26, + NEON::V27, + NEON::V28, + NEON::V29, + NEON::V30, + NEON::V31, + + ]; + match n { + 0..=15 => Ok(REGS[n]), + _ => Err(()), + } + } +} + +/// A machine register under the x86-64 architecture. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ARM64Register { + /// General-purpose registers. + GPR(GPR), + /// NEON (floating point/SIMD) registers. + NEON(NEON), +} + +impl CombinedRegister for ARM64Register { + /// Returns the index of the register. + fn to_index(&self) -> RegisterIndex { + match *self { + ARM64Register::GPR(x) => RegisterIndex(x as usize), + ARM64Register::NEON(x) => RegisterIndex(x as usize + 64), + } + } + /// Convert from a GPR register + fn from_gpr(x: u16) -> Self { + ARM64Register::GPR(GPR::from_index(x as usize).unwrap()) + } + /// Convert from an SIMD register + fn from_simd(x: u16) -> Self { + ARM64Register::NEON(NEON::from_index(x as usize).unwrap()) + } + + /// Converts a DWARF regnum to ARM64Register. + fn _from_dwarf_regnum(x: u16) -> Option { + Some(match x { + 0..=31 => ARM64Register::GPR(GPR::from_index(x as usize).unwrap()), + 64..=95 => ARM64Register::NEON(NEON::from_index(x as usize - 64).unwrap()), + _ => return None, + }) + } +} + +/// An allocator that allocates registers for function arguments according to the System V ABI. +#[derive(Default)] +pub struct ArgumentRegisterAllocator { + n_gprs: usize, + n_neons: usize, +} + +impl ArgumentRegisterAllocator { + /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. + pub fn next(&mut self, ty: Type, calling_convention: CallingConvention) -> Option { + match calling_convention { + CallingConvention::SystemV => { + static GPR_SEQ: &'static [GPR] = + &[GPR::X0, GPR::X1, GPR::X2, GPR::X3, GPR::X4, GPR::X5, GPR::X6, GPR::X7]; + static NEON_SEQ: &'static [NEON] = &[ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + ]; + match ty { + Type::I32 | Type::I64 => { + if self.n_gprs < GPR_SEQ.len() { + let gpr = GPR_SEQ[self.n_gprs]; + self.n_gprs += 1; + Some(ARM64Register::GPR(gpr)) + } else { + None + } + } + Type::F32 | Type::F64 => { + if self.n_neons < NEON_SEQ.len() { + let neon = NEON_SEQ[self.n_neons]; + self.n_neons += 1; + Some(ARM64Register::NEON(neon)) + } else { + None + } + } + _ => todo!( + "ArgumentRegisterAllocator::next: Unsupported type: {:?}", + ty + ), + } + } + _ => unimplemented!(), + } + } +} + +/// Create a new `MachineState` with default values. +pub fn new_machine_state() -> MachineState { + MachineState { + stack_values: vec![], + register_values: vec![MachineValue::Undefined; 32 + 32], + prev_frame: BTreeMap::new(), + wasm_stack: vec![], + wasm_inst_offset: std::usize::MAX, + } +} diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index 42cde289d36..aa90688e90f 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -18,6 +18,7 @@ mod location; mod machine; mod machine_x64; mod x64_decl; +mod arm64_decl; pub use crate::compiler::SinglepassCompiler; pub use crate::config::Singlepass; diff --git a/lib/compiler-singlepass/src/location.rs b/lib/compiler-singlepass/src/location.rs index bc5277d2cad..61817068a49 100644 --- a/lib/compiler-singlepass/src/location.rs +++ b/lib/compiler-singlepass/src/location.rs @@ -71,9 +71,4 @@ pub trait CombinedRegister: Copy + Clone + Eq + PartialEq + Debug { fn from_gpr(x: u16) -> Self; /// Convert from an SIMD register fn from_simd(x: u16) -> Self; - /// Returns the instruction prefix for move to stack - /// for example `movq %this_reg, ?(%rsp)` on x86_64 - /// To build an instruction, append the memory location as a 32-bit - /// offset to the stack pointer to this prefix. - fn _prefix_mov_to_stack(&self) -> Option<&'static [u8]>; } diff --git a/lib/compiler-singlepass/src/x64_decl.rs b/lib/compiler-singlepass/src/x64_decl.rs index 96590527562..14f63ff5fe2 100644 --- a/lib/compiler-singlepass/src/x64_decl.rs +++ b/lib/compiler-singlepass/src/x64_decl.rs @@ -166,35 +166,6 @@ impl CombinedRegister for X64Register { _ => return None, }) } - - /// Returns the instruction prefix for `movq %this_reg, ?(%rsp)`. - /// - /// To build an instruction, append the memory location as a 32-bit - /// offset to the stack pointer to this prefix. - fn _prefix_mov_to_stack(&self) -> Option<&'static [u8]> { - Some(match *self { - X64Register::GPR(gpr) => match gpr { - GPR::RDI => &[0x48, 0x89, 0xbc, 0x24], - GPR::RSI => &[0x48, 0x89, 0xb4, 0x24], - GPR::RDX => &[0x48, 0x89, 0x94, 0x24], - GPR::RCX => &[0x48, 0x89, 0x8c, 0x24], - GPR::R8 => &[0x4c, 0x89, 0x84, 0x24], - GPR::R9 => &[0x4c, 0x89, 0x8c, 0x24], - _ => return None, - }, - X64Register::XMM(xmm) => match xmm { - XMM::XMM0 => &[0x66, 0x0f, 0xd6, 0x84, 0x24], - XMM::XMM1 => &[0x66, 0x0f, 0xd6, 0x8c, 0x24], - XMM::XMM2 => &[0x66, 0x0f, 0xd6, 0x94, 0x24], - XMM::XMM3 => &[0x66, 0x0f, 0xd6, 0x9c, 0x24], - XMM::XMM4 => &[0x66, 0x0f, 0xd6, 0xa4, 0x24], - XMM::XMM5 => &[0x66, 0x0f, 0xd6, 0xac, 0x24], - XMM::XMM6 => &[0x66, 0x0f, 0xd6, 0xb4, 0x24], - XMM::XMM7 => &[0x66, 0x0f, 0xd6, 0xbc, 0x24], - _ => return None, - }, - }) - } } /// An allocator that allocates registers for function arguments according to the System V ABI. From d317e1ec918b1e0b175ea6e1693dc6c280be5f3b Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 10 Dec 2021 10:09:08 +0100 Subject: [PATCH 02/34] Fixed linting --- lib/compiler-singlepass/src/arm64_decl.rs | 599 +++++++++++----------- lib/compiler-singlepass/src/lib.rs | 2 +- 2 files changed, 306 insertions(+), 295 deletions(-) diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs index 15d651cb4f4..44eb3017f17 100644 --- a/lib/compiler-singlepass/src/arm64_decl.rs +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -1,294 +1,305 @@ -//! ARM64 structures. - -use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; -use crate::location::CombinedRegister; -use crate::location::Reg as AbstractReg; -use std::collections::BTreeMap; -use wasmer_compiler::CallingConvention; -use wasmer_types::Type; - -/// General-purpose registers. -#[repr(u8)] -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum GPR { - X0 = 0, - X1 = 1, - X2 = 2, - X3 = 3, - X4 = 4, - X5 = 5, - X6 = 6, - X7 = 7, - X8 = 8, - X9 = 9, - X10 = 10, - X11 = 11, - X12 = 12, - X13 = 13, - X14 = 14, - X15 = 15, - X16 = 16, - X17 = 17, - X18 = 18, - X19 = 19, - X20 = 20, - X21 = 21, - X22 = 22, - X23 = 23, - X24 = 24, - X25 = 25, - X26 = 26, - X27 = 27, - X28 = 28, - X29 = 29, - X30 = 30, - XzrSp = 31, -} - -/// NEON registers. -#[repr(u8)] -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] -#[allow(dead_code)] -pub enum NEON { - V0 = 0, - V1 = 1, - V2 = 2, - V3 = 3, - V4 = 4, - V5 = 5, - V6 = 6, - V7 = 7, - V8 = 8, - V9 = 9, - V10 = 10, - V11 = 11, - V12 = 12, - V13 = 13, - V14 = 14, - V15 = 15, - V16 = 16, - V17 = 17, - V18 = 18, - V19 = 19, - V20 = 20, - V21 = 21, - V22 = 22, - V23 = 23, - V24 = 24, - V25 = 25, - V26 = 26, - V27 = 27, - V28 = 28, - V29 = 29, - V30 = 30, - V31 = 31, -} - -impl AbstractReg for GPR { - fn is_callee_save(self) -> bool { - self as usize > 18 - } - fn is_reserved(self) -> bool { - match self.into_index() { - 0..=16 | 19..=27 => false, - _ => true, - } - } - fn into_index(self) -> usize { - self as usize - } - fn from_index(n: usize) -> Result { - const REGS: [GPR; 32] = [ - GPR::X0, - GPR::X1, - GPR::X2, - GPR::X3, - GPR::X4, - GPR::X5, - GPR::X6, - GPR::X7, - GPR::X8, - GPR::X9, - GPR::X10, - GPR::X11, - GPR::X12, - GPR::X13, - GPR::X14, - GPR::X15, - GPR::X16, - GPR::X17, - GPR::X18, - GPR::X19, - GPR::X20, - GPR::X21, - GPR::X22, - GPR::X23, - GPR::X24, - GPR::X25, - GPR::X26, - GPR::X27, - GPR::X28, - GPR::X29, - GPR::X30, - GPR::XzrSp, - ]; - match n { - 0..=31 => Ok(REGS[n]), - _ => Err(()), - } - } -} - -impl AbstractReg for NEON { - fn is_callee_save(self) -> bool { - self as usize > 16 - } - fn is_reserved(self) -> bool { - false - } - fn into_index(self) -> usize { - self as usize - } - fn from_index(n: usize) -> Result { - const REGS: [NEON; 32] = [ - NEON::V0, - NEON::V1, - NEON::V2, - NEON::V3, - NEON::V4, - NEON::V5, - NEON::V6, - NEON::V7, - NEON::V8, - NEON::V9, - NEON::V10, - NEON::V11, - NEON::V12, - NEON::V13, - NEON::V14, - NEON::V15, - NEON::V16, - NEON::V17, - NEON::V18, - NEON::V19, - NEON::V20, - NEON::V21, - NEON::V22, - NEON::V23, - NEON::V24, - NEON::V25, - NEON::V26, - NEON::V27, - NEON::V28, - NEON::V29, - NEON::V30, - NEON::V31, - - ]; - match n { - 0..=15 => Ok(REGS[n]), - _ => Err(()), - } - } -} - -/// A machine register under the x86-64 architecture. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum ARM64Register { - /// General-purpose registers. - GPR(GPR), - /// NEON (floating point/SIMD) registers. - NEON(NEON), -} - -impl CombinedRegister for ARM64Register { - /// Returns the index of the register. - fn to_index(&self) -> RegisterIndex { - match *self { - ARM64Register::GPR(x) => RegisterIndex(x as usize), - ARM64Register::NEON(x) => RegisterIndex(x as usize + 64), - } - } - /// Convert from a GPR register - fn from_gpr(x: u16) -> Self { - ARM64Register::GPR(GPR::from_index(x as usize).unwrap()) - } - /// Convert from an SIMD register - fn from_simd(x: u16) -> Self { - ARM64Register::NEON(NEON::from_index(x as usize).unwrap()) - } - - /// Converts a DWARF regnum to ARM64Register. - fn _from_dwarf_regnum(x: u16) -> Option { - Some(match x { - 0..=31 => ARM64Register::GPR(GPR::from_index(x as usize).unwrap()), - 64..=95 => ARM64Register::NEON(NEON::from_index(x as usize - 64).unwrap()), - _ => return None, - }) - } -} - -/// An allocator that allocates registers for function arguments according to the System V ABI. -#[derive(Default)] -pub struct ArgumentRegisterAllocator { - n_gprs: usize, - n_neons: usize, -} - -impl ArgumentRegisterAllocator { - /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. - pub fn next(&mut self, ty: Type, calling_convention: CallingConvention) -> Option { - match calling_convention { - CallingConvention::SystemV => { - static GPR_SEQ: &'static [GPR] = - &[GPR::X0, GPR::X1, GPR::X2, GPR::X3, GPR::X4, GPR::X5, GPR::X6, GPR::X7]; - static NEON_SEQ: &'static [NEON] = &[ - NEON::V0, - NEON::V1, - NEON::V2, - NEON::V3, - NEON::V4, - NEON::V5, - NEON::V6, - NEON::V7, - ]; - match ty { - Type::I32 | Type::I64 => { - if self.n_gprs < GPR_SEQ.len() { - let gpr = GPR_SEQ[self.n_gprs]; - self.n_gprs += 1; - Some(ARM64Register::GPR(gpr)) - } else { - None - } - } - Type::F32 | Type::F64 => { - if self.n_neons < NEON_SEQ.len() { - let neon = NEON_SEQ[self.n_neons]; - self.n_neons += 1; - Some(ARM64Register::NEON(neon)) - } else { - None - } - } - _ => todo!( - "ArgumentRegisterAllocator::next: Unsupported type: {:?}", - ty - ), - } - } - _ => unimplemented!(), - } - } -} - -/// Create a new `MachineState` with default values. -pub fn new_machine_state() -> MachineState { - MachineState { - stack_values: vec![], - register_values: vec![MachineValue::Undefined; 32 + 32], - prev_frame: BTreeMap::new(), - wasm_stack: vec![], - wasm_inst_offset: std::usize::MAX, - } -} +//! ARM64 structures. + +use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; +use crate::location::CombinedRegister; +use crate::location::Reg as AbstractReg; +use std::collections::BTreeMap; +use wasmer_compiler::CallingConvention; +use wasmer_types::Type; + +/// General-purpose registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum GPR { + X0 = 0, + X1 = 1, + X2 = 2, + X3 = 3, + X4 = 4, + X5 = 5, + X6 = 6, + X7 = 7, + X8 = 8, + X9 = 9, + X10 = 10, + X11 = 11, + X12 = 12, + X13 = 13, + X14 = 14, + X15 = 15, + X16 = 16, + X17 = 17, + X18 = 18, + X19 = 19, + X20 = 20, + X21 = 21, + X22 = 22, + X23 = 23, + X24 = 24, + X25 = 25, + X26 = 26, + X27 = 27, + X28 = 28, + X29 = 29, + X30 = 30, + XzrSp = 31, +} + +/// NEON registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[allow(dead_code)] +pub enum NEON { + V0 = 0, + V1 = 1, + V2 = 2, + V3 = 3, + V4 = 4, + V5 = 5, + V6 = 6, + V7 = 7, + V8 = 8, + V9 = 9, + V10 = 10, + V11 = 11, + V12 = 12, + V13 = 13, + V14 = 14, + V15 = 15, + V16 = 16, + V17 = 17, + V18 = 18, + V19 = 19, + V20 = 20, + V21 = 21, + V22 = 22, + V23 = 23, + V24 = 24, + V25 = 25, + V26 = 26, + V27 = 27, + V28 = 28, + V29 = 29, + V30 = 30, + V31 = 31, +} + +impl AbstractReg for GPR { + fn is_callee_save(self) -> bool { + self as usize > 18 + } + fn is_reserved(self) -> bool { + match self.into_index() { + 0..=16 | 19..=27 => false, + _ => true, + } + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [GPR; 32] = [ + GPR::X0, + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + GPR::X8, + GPR::X9, + GPR::X10, + GPR::X11, + GPR::X12, + GPR::X13, + GPR::X14, + GPR::X15, + GPR::X16, + GPR::X17, + GPR::X18, + GPR::X19, + GPR::X20, + GPR::X21, + GPR::X22, + GPR::X23, + GPR::X24, + GPR::X25, + GPR::X26, + GPR::X27, + GPR::X28, + GPR::X29, + GPR::X30, + GPR::XzrSp, + ]; + match n { + 0..=31 => Ok(REGS[n]), + _ => Err(()), + } + } +} + +impl AbstractReg for NEON { + fn is_callee_save(self) -> bool { + self as usize > 16 + } + fn is_reserved(self) -> bool { + false + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [NEON; 32] = [ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + NEON::V8, + NEON::V9, + NEON::V10, + NEON::V11, + NEON::V12, + NEON::V13, + NEON::V14, + NEON::V15, + NEON::V16, + NEON::V17, + NEON::V18, + NEON::V19, + NEON::V20, + NEON::V21, + NEON::V22, + NEON::V23, + NEON::V24, + NEON::V25, + NEON::V26, + NEON::V27, + NEON::V28, + NEON::V29, + NEON::V30, + NEON::V31, + ]; + match n { + 0..=15 => Ok(REGS[n]), + _ => Err(()), + } + } +} + +/// A machine register under the x86-64 architecture. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ARM64Register { + /// General-purpose registers. + GPR(GPR), + /// NEON (floating point/SIMD) registers. + NEON(NEON), +} + +impl CombinedRegister for ARM64Register { + /// Returns the index of the register. + fn to_index(&self) -> RegisterIndex { + match *self { + ARM64Register::GPR(x) => RegisterIndex(x as usize), + ARM64Register::NEON(x) => RegisterIndex(x as usize + 64), + } + } + /// Convert from a GPR register + fn from_gpr(x: u16) -> Self { + ARM64Register::GPR(GPR::from_index(x as usize).unwrap()) + } + /// Convert from an SIMD register + fn from_simd(x: u16) -> Self { + ARM64Register::NEON(NEON::from_index(x as usize).unwrap()) + } + + /// Converts a DWARF regnum to ARM64Register. + fn _from_dwarf_regnum(x: u16) -> Option { + Some(match x { + 0..=31 => ARM64Register::GPR(GPR::from_index(x as usize).unwrap()), + 64..=95 => ARM64Register::NEON(NEON::from_index(x as usize - 64).unwrap()), + _ => return None, + }) + } +} + +/// An allocator that allocates registers for function arguments according to the System V ABI. +#[derive(Default)] +pub struct ArgumentRegisterAllocator { + n_gprs: usize, + n_neons: usize, +} + +impl ArgumentRegisterAllocator { + /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. + pub fn next( + &mut self, + ty: Type, + calling_convention: CallingConvention, + ) -> Option { + match calling_convention { + CallingConvention::SystemV => { + static GPR_SEQ: &'static [GPR] = &[ + GPR::X0, + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + ]; + static NEON_SEQ: &'static [NEON] = &[ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + ]; + match ty { + Type::I32 | Type::I64 => { + if self.n_gprs < GPR_SEQ.len() { + let gpr = GPR_SEQ[self.n_gprs]; + self.n_gprs += 1; + Some(ARM64Register::GPR(gpr)) + } else { + None + } + } + Type::F32 | Type::F64 => { + if self.n_neons < NEON_SEQ.len() { + let neon = NEON_SEQ[self.n_neons]; + self.n_neons += 1; + Some(ARM64Register::NEON(neon)) + } else { + None + } + } + _ => todo!( + "ArgumentRegisterAllocator::next: Unsupported type: {:?}", + ty + ), + } + } + _ => unimplemented!(), + } + } +} + +/// Create a new `MachineState` with default values. +pub fn new_machine_state() -> MachineState { + MachineState { + stack_values: vec![], + register_values: vec![MachineValue::Undefined; 32 + 32], + prev_frame: BTreeMap::new(), + wasm_stack: vec![], + wasm_inst_offset: std::usize::MAX, + } +} diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index aa90688e90f..f79fb6b7d46 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -9,6 +9,7 @@ //! runtime performance. mod address_map; +mod arm64_decl; mod codegen; mod common_decl; mod compiler; @@ -18,7 +19,6 @@ mod location; mod machine; mod machine_x64; mod x64_decl; -mod arm64_decl; pub use crate::compiler::SinglepassCompiler; pub use crate::config::Singlepass; From 69478cd57504691982c263fc914bc1ebe9700694 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 13 Dec 2021 10:53:03 +0100 Subject: [PATCH 03/34] improvement(compiler) Added squeleton for arm64 singlepass --- Makefile | 5 + lib/compiler-singlepass/src/compiler.rs | 99 +- lib/compiler-singlepass/src/emitter_arm64.rs | 350 +++ lib/compiler-singlepass/src/lib.rs | 2 + lib/compiler-singlepass/src/machine.rs | 39 + lib/compiler-singlepass/src/machine_arm64.rs | 2410 ++++++++++++++++++ lib/compiler-singlepass/src/machine_x64.rs | 42 +- 7 files changed, 2865 insertions(+), 82 deletions(-) create mode 100644 lib/compiler-singlepass/src/emitter_arm64.rs create mode 100644 lib/compiler-singlepass/src/machine_arm64.rs diff --git a/Makefile b/Makefile index 2df94e8e675..afc2b6c9fb1 100644 --- a/Makefile +++ b/Makefile @@ -179,6 +179,11 @@ ifneq ($(ENABLE_SINGLEPASS), 0) ifeq ($(IS_AMD64), 1) compilers += singlepass endif + ifeq ($(IS_AARCH64), 1) + ifneq ($(IS_WINDOWS), 1) + compilers += singlepass + endif + endif endif endif diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index df9e9a8d63b..95c86b3f834 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -4,9 +4,11 @@ use crate::codegen::FuncGen; use crate::config::Singlepass; +use crate::machine::Machine; use crate::machine::{ gen_import_call_trampoline, gen_std_dynamic_import_trampoline, gen_std_trampoline, CodegenError, }; +use crate::machine_arm64::MachineARM64; use crate::machine_x64::MachineX86_64; use loupe::MemoryUsage; #[cfg(feature = "rayon")] @@ -58,17 +60,18 @@ impl Compiler for SinglepassCompiler { _module_translation: &ModuleTranslationState, function_body_inputs: PrimaryMap>, ) -> Result { - /*if target.triple().operating_system == OperatingSystem::Windows { - return Err(CompileError::UnsupportedTarget( - OperatingSystem::Windows.to_string(), - )); - }*/ - if target.triple().architecture != Architecture::X86_64 { - return Err(CompileError::UnsupportedTarget( - target.triple().architecture.to_string(), - )); + match target.triple().architecture { + Architecture::X86_64 => {} + Architecture::Aarch64(_) => {} + _ => { + return Err(CompileError::UnsupportedTarget( + target.triple().architecture.to_string(), + )) + } } - if !target.cpu_features().contains(CpuFeature::AVX) { + if target.triple().architecture != Architecture::X86_64 + && !target.cpu_features().contains(CpuFeature::AVX) + { return Err(CompileError::UnsupportedTarget( "x86_64 without AVX".to_string(), )); @@ -126,30 +129,53 @@ impl Compiler for SinglepassCompiler { } } - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), - _ => unimplemented!(), - }; - let mut generator = FuncGen::new( - module, - &self.config, - &vmoffsets, - &memory_styles, - &table_styles, - i, - &locals, - machine, - calling_convention, - ) - .map_err(to_compile_error)?; + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + let mut generator = FuncGen::new( + module, + &self.config, + &vmoffsets, + &memory_styles, + &table_styles, + i, + &locals, + machine, + calling_convention, + ) + .map_err(to_compile_error)?; + while generator.has_control_frames() { + generator.set_srcloc(reader.original_position() as u32); + let op = reader.read_operator()?; + generator.feed_operator(op).map_err(to_compile_error)?; + } - while generator.has_control_frames() { - generator.set_srcloc(reader.original_position() as u32); - let op = reader.read_operator()?; - generator.feed_operator(op).map_err(to_compile_error)?; - } + Ok(generator.finalize(&input)) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + let mut generator = FuncGen::new( + module, + &self.config, + &vmoffsets, + &memory_styles, + &table_styles, + i, + &locals, + machine, + calling_convention, + ) + .map_err(to_compile_error)?; + while generator.has_control_frames() { + generator.set_srcloc(reader.original_position() as u32); + let op = reader.read_operator()?; + generator.feed_operator(op).map_err(to_compile_error)?; + } - Ok(generator.finalize(&input)) + Ok(generator.finalize(&input)) + } + _ => unimplemented!(), + } }) .collect::, CompileError>>()? .into_iter() @@ -253,15 +279,6 @@ mod tests { fn errors_for_unsupported_targets() { let compiler = SinglepassCompiler::new(Singlepass::default()); - // Compile for win64 - /*let win64 = Target::new(triple!("x86_64-pc-windows-msvc"), CpuFeature::for_host()); - let (mut info, translation, inputs) = dummy_compilation_ingredients(); - let result = compiler.compile_module(&win64, &mut info, &translation, inputs); - match result.unwrap_err() { - CompileError::UnsupportedTarget(name) => assert_eq!(name, "windows"), - error => panic!("Unexpected error: {:?}", error), - };*/ - // Compile for 32bit Linux let linux32 = Target::new(triple!("i686-unknown-linux-gnu"), CpuFeature::for_host()); let (mut info, translation, inputs) = dummy_compilation_ingredients(); diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs new file mode 100644 index 00000000000..d38f646538e --- /dev/null +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -0,0 +1,350 @@ +pub use crate::arm64_decl::{GPR, NEON}; +use crate::common_decl::Size; +use crate::location::Location as AbstractLocation; +pub use crate::location::{Multiplier, Reg}; +pub use crate::machine::{Label, Offset}; +use dynasm::dynasm; +use dynasmrt::{ + aarch64::Aarch64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, + VecAssembler, +}; + +type Assembler = VecAssembler; + +/// Force `dynasm!` to use the correct arch (x64) when cross-compiling. +/// `dynasm!` proc-macro tries to auto-detect it by default by looking at the +/// `target_arch`, but it sees the `target_arch` of the proc-macro itself, which +/// is always equal to host, even when cross-compiling. +macro_rules! dynasm { + ($a:expr ; $($tt:tt)*) => { + dynasm::dynasm!( + $a + ; .arch aarch64 + ; $($tt)* + ) + }; +} + +pub type Location = AbstractLocation; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +pub enum Condition { + Eq, + Ne, + Cs, + Cc, + Mi, + Pl, + Vs, + Vc, + Hi, + Ls, + Ge, + Lt, + Gt, + Le, + Uncond, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +pub enum NeonOrMemory { + NEON(NEON), + Memory(GPR, i32), +} + +#[derive(Copy, Clone, Debug)] +#[allow(dead_code)] +pub enum GPROrMemory { + GPR(GPR), + Memory(GPR, i32), +} + +pub trait EmitterARM64 { + fn get_label(&mut self) -> Label; + fn get_offset(&self) -> Offset; + fn get_jmp_instr_size(&self) -> u8; + + fn finalize_function(&mut self); + + fn emit_str(&mut self, sz: Size, src: Location, dst: Location); + fn emit_ldr(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_mov_imm(&mut self, dst: Location, val: u64); + + fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_push(&mut self, sz: Size, src: Location); + fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location); + fn emit_pop(&mut self, sz: Size, dst: Location); + fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location); + + fn emit_label(&mut self, label: Label); + fn emit_b_label(&mut self, label: Label); + fn emit_bcond_label(&mut self, condition: Condition, label: Label); + fn emit_call_label(&mut self, label: Label); + fn emit_call_register(&mut self, reg: GPR); + fn emit_ret(&mut self); + + fn emit_udf(&mut self); + + fn arch_supports_canonicalize_nan(&self) -> bool { + true + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + false + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, _loc: Location) { + unimplemented!() + } +} + +impl EmitterARM64 for Assembler { + fn get_label(&mut self) -> DynamicLabel { + self.new_dynamic_label() + } + + fn get_offset(&self) -> AssemblyOffset { + self.offset() + } + + fn get_jmp_instr_size(&self) -> u8 { + 4 // relative jump, not full 32bits capable + } + + fn finalize_function(&mut self) { + dynasm!( + self + ; const_neg_one_32: + ; .dword -1 + ; const_zero_32: + ; .dword 0 + ; const_pos_one_32: + ; .dword 1 + ); + } + + fn emit_str(&mut self, sz: Size, reg: Location, addr: Location) { + match (sz, reg, addr) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; str X(reg), [X(addr), disp]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; str W(reg), [X(addr), disp]); + } + (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; strh W(reg), [X(addr), disp]); + } + (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; strb W(reg), [X(addr), disp]); + } + (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; str D(reg), [X(addr), disp]); + } + _ => unreachable!(), + } + } + fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location) { + match (sz, reg, addr) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; ldr X(reg), [X(addr), disp]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; ldr W(reg), [X(addr), disp]); + } + (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; ldrh W(reg), [X(addr), disp]); + } + (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; ldrb W(reg), [X(addr), disp]); + } + (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + dynasm!(self ; ldr D(reg), [X(addr), disp]); + } + _ => unreachable!(), + } + } + + fn emit_mov_imm(&mut self, dst: Location, val: u64) { + match dst { + Location::GPR(dst) => { + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), val) + } + _ => panic!("singlepass can't emit MOVW {:?}", dst), + } + } + + fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_push(&mut self, sz: Size, src: Location) { + match (sz, src) { + (Size::S64, Location::GPR(src)) => { + let src = src.into_index() as u32; + dynasm!(self ; str X(src), [sp, -16]!); + } + (Size::S64, Location::SIMD(src)) => { + let src = src.into_index() as u32; + dynasm!(self ; str Q(src), [sp, -16]!); + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), + } + } + fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location) { + match (sz, src1, src2) { + (Size::S64, Location::GPR(src1), Location::GPR(src2)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + dynasm!(self ; stp X(src1), X(src2), [sp, -16]!); + } + _ => panic!( + "singlepass can't emit DOUBLE PUSH {:?} {:?} {:?}", + sz, src1, src2 + ), + } + } + fn emit_pop(&mut self, sz: Size, dst: Location) { + match (sz, dst) { + (Size::S64, Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; ldr X(dst), [sp], 16); + } + (Size::S64, Location::SIMD(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; ldr Q(dst), [sp], 16); + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, dst), + } + } + fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location) { + match (sz, dst1, dst2) { + (Size::S64, Location::GPR(dst1), Location::GPR(dst2)) => { + let dst1 = dst1.into_index() as u32; + let dst2 = dst2.into_index() as u32; + dynasm!(self ; ldp X(dst1), X(dst2), [sp], 16); + } + _ => panic!( + "singlepass can't emit DOUBLE PUSH {:?} {:?} {:?}", + sz, dst1, dst2 + ), + } + } + + fn emit_label(&mut self, label: Label) { + self.emit_label(label); + } + fn emit_b_label(&mut self, label: Label) { + dynasm!(self ; b =>label); + } + fn emit_bcond_label(&mut self, condition: Condition, label: Label) { + match condition { + Condition::Eq => dynasm!(self ; b.eq => label), + Condition::Ne => dynasm!(self ; b.ne => label), + Condition::Cs => dynasm!(self ; b.cs => label), + Condition::Cc => dynasm!(self ; b.cc => label), + Condition::Mi => dynasm!(self ; b.mi => label), + Condition::Pl => dynasm!(self ; b.pl => label), + Condition::Vs => dynasm!(self ; b.vs => label), + Condition::Vc => dynasm!(self ; b.vc => label), + Condition::Hi => dynasm!(self ; b.hi => label), + Condition::Ls => dynasm!(self ; b.ls => label), + Condition::Ge => dynasm!(self ; b.ge => label), + Condition::Lt => dynasm!(self ; b.lt => label), + Condition::Gt => dynasm!(self ; b.gt => label), + Condition::Le => dynasm!(self ; b.le => label), + Condition::Uncond => dynasm!(self ; b => label), + } + } + fn emit_call_label(&mut self, label: Label) { + dynasm!(self ; bl =>label); + } + fn emit_call_register(&mut self, reg: GPR) { + dynasm!(self ; blr X(reg.into_index() as u32)); + } + fn emit_ret(&mut self) { + dynasm!(self ; ret); + } + + fn emit_udf(&mut self) { + dynasm!(self ; udf 0); + } +} diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index f79fb6b7d46..48329748abf 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -14,9 +14,11 @@ mod codegen; mod common_decl; mod compiler; mod config; +mod emitter_arm64; mod emitter_x64; mod location; mod machine; +mod machine_arm64; mod machine_x64; mod x64_decl; diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 67b940749c9..7f5c18a3952 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -2199,3 +2199,42 @@ pub fn gen_import_call_trampoline( }; machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) } + +// Constants for the bounds of truncation operations. These are the least or +// greatest exact floats in either f32 or f64 representation less-than (for +// least) or greater-than (for greatest) the i32 or i64 or u32 or u64 +// min (for least) or max (for greatest), when rounding towards zero. + +/// Greatest Exact Float (32 bits) less-than i32::MIN when rounding towards zero. +pub const GEF32_LT_I32_MIN: f32 = -2147483904.0; +/// Least Exact Float (32 bits) greater-than i32::MAX when rounding towards zero. +pub const LEF32_GT_I32_MAX: f32 = 2147483648.0; +/// Greatest Exact Float (32 bits) less-than i64::MIN when rounding towards zero. +pub const GEF32_LT_I64_MIN: f32 = -9223373136366403584.0; +/// Least Exact Float (32 bits) greater-than i64::MAX when rounding towards zero. +pub const LEF32_GT_I64_MAX: f32 = 9223372036854775808.0; +/// Greatest Exact Float (32 bits) less-than u32::MIN when rounding towards zero. +pub const GEF32_LT_U32_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u32::MAX when rounding towards zero. +pub const LEF32_GT_U32_MAX: f32 = 4294967296.0; +/// Greatest Exact Float (32 bits) less-than u64::MIN when rounding towards zero. +pub const GEF32_LT_U64_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u64::MAX when rounding towards zero. +pub const LEF32_GT_U64_MAX: f32 = 18446744073709551616.0; + +/// Greatest Exact Float (64 bits) less-than i32::MIN when rounding towards zero. +pub const GEF64_LT_I32_MIN: f64 = -2147483649.0; +/// Least Exact Float (64 bits) greater-than i32::MAX when rounding towards zero. +pub const LEF64_GT_I32_MAX: f64 = 2147483648.0; +/// Greatest Exact Float (64 bits) less-than i64::MIN when rounding towards zero. +pub const GEF64_LT_I64_MIN: f64 = -9223372036854777856.0; +/// Least Exact Float (64 bits) greater-than i64::MAX when rounding towards zero. +pub const LEF64_GT_I64_MAX: f64 = 9223372036854775808.0; +/// Greatest Exact Float (64 bits) less-than u32::MIN when rounding towards zero. +pub const GEF64_LT_U32_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u32::MAX when rounding towards zero. +pub const LEF64_GT_U32_MAX: f64 = 4294967296.0; +/// Greatest Exact Float (64 bits) less-than u64::MIN when rounding towards zero. +pub const GEF64_LT_U64_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u64::MAX when rounding towards zero. +pub const LEF64_GT_U64_MAX: f64 = 18446744073709551616.0; diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs new file mode 100644 index 00000000000..d3d5519d13d --- /dev/null +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -0,0 +1,2410 @@ +use crate::arm64_decl::new_machine_state; +use crate::arm64_decl::{ARM64Register, ArgumentRegisterAllocator, GPR, NEON}; +use crate::common_decl::*; +use crate::emitter_arm64::*; +use crate::location::Location as AbstractLocation; +use crate::machine::*; +use dynasmrt::{aarch64::Aarch64Relocation, VecAssembler}; +use std::collections::HashSet; +use wasmer_compiler::wasmparser::Type as WpType; +use wasmer_compiler::{ + CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, InstructionAddressMap, + Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, TrapInformation, +}; +use wasmer_types::{FunctionIndex, FunctionType, Type}; +use wasmer_vm::{TrapCode, VMOffsets}; + +type Assembler = VecAssembler; +type Location = AbstractLocation; + +pub struct MachineARM64 { + assembler: Assembler, + used_gprs: HashSet, + used_simd: HashSet, + trap_table: TrapTable, + /// Map from byte offset into wasm function to range of native instructions. + /// + // Ordered by increasing InstructionAddressMap::srcloc. + instructions_address_map: Vec, + /// The source location for the current operator. + src_loc: u32, +} + +impl MachineARM64 { + pub fn new() -> Self { + MachineARM64 { + assembler: Assembler::new(0), + used_gprs: HashSet::new(), + used_simd: HashSet::new(), + trap_table: TrapTable::default(), + instructions_address_map: vec![], + src_loc: 0, + } + } + pub fn emit_relaxed_binop( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + ) { + unimplemented!(); + } + /// I32 binary operation with both operands popped from the virtual stack. + fn emit_binop_i32( + &mut self, + f: fn(&mut Assembler, Size, Location, Location), + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + unimplemented!(); + } + /// I64 binary operation with both operands popped from the virtual stack. + fn emit_binop_i64( + &mut self, + f: fn(&mut Assembler, Size, Location, Location), + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + if loc_a != ret { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp)); + self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp)); + self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } else { + self.emit_relaxed_binop(f, Size::S64, loc_b, ret); + } + } + /// I64 comparison with. + fn emit_cmpop_i64_dynamic_b( + &mut self, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + unimplemented!(); + } + /// I64 shift with both operands popped from the virtual stack. + fn emit_shift_i64( + &mut self, + f: fn(&mut Assembler, Size, Location, Location), + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + unimplemented!(); + } + /// I32 comparison with. + fn emit_cmpop_i32_dynamic_b( + &mut self, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + unimplemented!(); + } + /// I32 shift with both operands popped from the virtual stack. + fn emit_shift_i32( + &mut self, + f: fn(&mut Assembler, Size, Location, Location), + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + unimplemented!(); + } + + fn memory_op( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + check_alignment: bool, + value_size: usize, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + cb: F, + ) { + unimplemented!(); + } + + fn emit_compare_and_swap( + &mut self, + loc: Location, + target: Location, + ret: Location, + memarg: &MemoryImmediate, + value_size: usize, + memory_sz: Size, + stack_sz: Size, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + cb: F, + ) { + unimplemented!(); + } + + // Checks for underflow/overflow/nan. + fn emit_f32_int_conv_check( + &mut self, + reg: NEON, + lower_bound: f32, + upper_bound: f32, + underflow_label: Label, + overflow_label: Label, + nan_label: Label, + succeed_label: Label, + ) { + unimplemented!(); + } + + // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32. + fn emit_f32_int_conv_check_trap(&mut self, reg: NEON, lower_bound: f32, upper_bound: f32) { + unimplemented!(); + } + fn emit_f32_int_conv_check_sat< + F1: FnOnce(&mut Self), + F2: FnOnce(&mut Self), + F3: FnOnce(&mut Self), + F4: FnOnce(&mut Self), + >( + &mut self, + reg: NEON, + lower_bound: f32, + upper_bound: f32, + underflow_cb: F1, + overflow_cb: F2, + nan_cb: Option, + convert_cb: F4, + ) { + unimplemented!(); + } + // Checks for underflow/overflow/nan. + fn emit_f64_int_conv_check( + &mut self, + reg: NEON, + lower_bound: f64, + upper_bound: f64, + underflow_label: Label, + overflow_label: Label, + nan_label: Label, + succeed_label: Label, + ) { + unimplemented!(); + } + // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64.. return offset/len for trap_overflow and trap_badconv + fn emit_f64_int_conv_check_trap(&mut self, reg: NEON, lower_bound: f64, upper_bound: f64) { + unimplemented!(); + } + fn emit_f64_int_conv_check_sat< + F1: FnOnce(&mut Self), + F2: FnOnce(&mut Self), + F3: FnOnce(&mut Self), + F4: FnOnce(&mut Self), + >( + &mut self, + reg: NEON, + lower_bound: f64, + upper_bound: f64, + underflow_cb: F1, + overflow_cb: F2, + nan_cb: Option, + convert_cb: F4, + ) { + unimplemented!(); + } + + fn convert_i64_f64_u_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f64_u_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f64_s_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f64_s_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f64_s_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f64_s_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f64_u_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f64_u_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f32_u_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f32_u_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f32_s_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i64_f32_s_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f32_s_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f32_s_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f32_u_s(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_i32_f32_u_u(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } +} + +impl Machine for MachineARM64 { + type GPR = GPR; + type SIMD = NEON; + fn assembler_get_offset(&self) -> Offset { + self.assembler.get_offset() + } + fn index_from_gpr(&self, x: GPR) -> RegisterIndex { + RegisterIndex(x as usize) + } + fn index_from_simd(&self, x: NEON) -> RegisterIndex { + RegisterIndex(x as usize + 32) + } + + fn get_vmctx_reg(&self) -> GPR { + GPR::X28 + } + + fn get_used_gprs(&self) -> Vec { + self.used_gprs.iter().cloned().collect() + } + + fn get_used_simd(&self) -> Vec { + self.used_simd.iter().cloned().collect() + } + + fn pick_gpr(&self) -> Option { + use GPR::*; + static REGS: &[GPR] = &[ + X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, + ]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + // Picks an unused general purpose register for internal temporary use. + fn pick_temp_gpr(&self) -> Option { + use GPR::*; + static REGS: &[GPR] = &[X0, X1, X2, X3, X4, X5, X6, X7]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + fn acquire_temp_gpr(&mut self) -> Option { + let gpr = self.pick_temp_gpr(); + if let Some(x) = gpr { + self.used_gprs.insert(x); + } + gpr + } + + fn release_gpr(&mut self, gpr: GPR) { + assert!(self.used_gprs.remove(&gpr)); + } + + fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR { + assert!(!self.used_gprs.contains(&gpr)); + self.used_gprs.insert(gpr); + gpr + } + + fn reserve_gpr(&mut self, gpr: GPR) { + self.used_gprs.insert(gpr); + } + + fn push_used_gpr(&mut self) { + let used_gprs = self.get_used_gprs(); + for r in used_gprs.iter() { + self.assembler.emit_push(Size::S64, Location::GPR(*r)); + } + } + fn pop_used_gpr(&mut self) { + let used_gprs = self.get_used_gprs(); + for r in used_gprs.iter().rev() { + self.assembler.emit_pop(Size::S64, Location::GPR(*r)); + } + } + + // Picks an unused NEON register. + fn pick_simd(&self) -> Option { + use NEON::*; + static REGS: &[NEON] = &[V8, V9, V10, V11, V12]; + for r in REGS { + if !self.used_simd.contains(r) { + return Some(*r); + } + } + None + } + + // Picks an unused NEON register for internal temporary use. + fn pick_temp_simd(&self) -> Option { + use NEON::*; + static REGS: &[NEON] = &[V0, V1, V2, V3, V4, V5, V6, V7]; + for r in REGS { + if !self.used_simd.contains(r) { + return Some(*r); + } + } + None + } + + // Acquires a temporary NEON register. + fn acquire_temp_simd(&mut self) -> Option { + let simd = self.pick_temp_simd(); + if let Some(x) = simd { + self.used_simd.insert(x); + } + simd + } + + fn reserve_simd(&mut self, simd: NEON) { + self.used_simd.insert(simd); + } + + // Releases a temporary NEON register. + fn release_simd(&mut self, simd: NEON) { + assert_eq!(self.used_simd.remove(&simd), true); + } + + fn push_used_simd(&mut self) { + let used_neons = self.get_used_simd(); + self.adjust_stack((used_neons.len() * 8) as u32); + + for (i, r) in used_neons.iter().enumerate() { + self.assembler.emit_str( + Size::S64, + Location::SIMD(*r), + Location::Memory(GPR::XzrSp, (i * 8) as i32), + ); + } + } + fn pop_used_simd(&mut self) { + let used_neons = self.get_used_simd(); + for (i, r) in used_neons.iter().enumerate() { + self.assembler.emit_ldr( + Size::S64, + Location::SIMD(*r), + Location::Memory(GPR::XzrSp, (i * 8) as i32), + ); + } + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32((used_neons.len() * 8) as u32), + Location::GPR(GPR::XzrSp), + ); + } + + /// Set the source location of the Wasm to the given offset. + fn set_srcloc(&mut self, offset: u32) { + self.src_loc = offset; + } + /// Marks each address in the code range emitted by `f` with the trap code `code`. + fn mark_address_range_with_trap_code(&mut self, code: TrapCode, begin: usize, end: usize) { + for i in begin..end { + self.trap_table.offset_to_code.insert(i, code); + } + self.mark_instruction_address_end(begin); + } + + /// Marks one address as trappable with trap code `code`. + fn mark_address_with_trap_code(&mut self, code: TrapCode) { + let offset = self.assembler.get_offset().0; + self.trap_table.offset_to_code.insert(offset, code); + self.mark_instruction_address_end(offset); + } + /// Marks the instruction as trappable with trap code `code`. return "begin" offset + fn mark_instruction_with_trap_code(&mut self, code: TrapCode) -> usize { + let offset = self.assembler.get_offset().0; + self.trap_table.offset_to_code.insert(offset, code); + offset + } + /// Pushes the instruction to the address map, calculating the offset from a + /// provided beginning address. + fn mark_instruction_address_end(&mut self, begin: usize) { + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: begin, + code_len: self.assembler.get_offset().0 - begin, + }); + } + + /// Insert a StackOverflow (at offset 0) + fn insert_stackoverflow(&mut self) { + let offset = 0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::StackOverflow); + self.mark_instruction_address_end(offset); + } + + /// Get all current TrapInformation + fn collect_trap_information(&self) -> Vec { + self.trap_table + .offset_to_code + .clone() + .into_iter() + .map(|(offset, code)| TrapInformation { + code_offset: offset as u32, + trap_code: code, + }) + .collect() + } + + fn instructions_address_map(&self) -> Vec { + self.instructions_address_map.clone() + } + + // Memory location for a local on the stack + fn local_on_stack(&mut self, stack_offset: i32) -> Location { + Location::Memory(GPR::X27, -stack_offset) + } + + // Adjust stack for locals + fn adjust_stack(&mut self, delta_stack_offset: u32) { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(delta_stack_offset), + Location::GPR(GPR::XzrSp), + ); + } + // restore stack + fn restore_stack(&mut self, delta_stack_offset: u32) { + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(delta_stack_offset), + Location::GPR(GPR::XzrSp), + ); + } + fn push_callee_saved(&mut self) {} + fn pop_callee_saved(&mut self) {} + fn pop_stack_locals(&mut self, delta_stack_offset: u32) { + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(delta_stack_offset), + Location::GPR(GPR::XzrSp), + ); + } + // push a value on the stack for a native call + fn push_location_for_native(&mut self, loc: Location) { + match loc { + Location::Imm64(_) => { + self.reserve_unused_temp_gpr(GPR::X4); + self.move_location(Size::S64, loc, Location::GPR(GPR::X4)); + self.assembler.emit_push(Size::S64, Location::GPR(GPR::X4)); + self.release_gpr(GPR::X4); + } + _ => self.assembler.emit_push(Size::S64, loc), + } + } + + // Zero a location that is 32bits + fn zero_location(&mut self, size: Size, location: Location) { + match location { + Location::GPR(_) => self.assembler.emit_mov_imm(location, 0u64), + _ => unreachable!(), + } + } + + // GPR Reg used for local pointer on the stack + fn local_pointer(&self) -> GPR { + GPR::X27 + } + + // Determine whether a local should be allocated on the stack. + fn is_local_on_stack(&self, idx: usize) -> bool { + idx > 7 + } + + // Determine a local's location. + fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location { + // Use callee-saved registers for the first locals. + match idx { + 0 => Location::GPR(GPR::X18), + 1 => Location::GPR(GPR::X19), + 2 => Location::GPR(GPR::X20), + 3 => Location::GPR(GPR::X21), + 4 => Location::GPR(GPR::X22), + 5 => Location::GPR(GPR::X23), + 6 => Location::GPR(GPR::X24), + 7 => Location::GPR(GPR::X25), + _ => Location::Memory(GPR::X27, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), + } + } + // Move a local to the stack + fn move_local(&mut self, stack_offset: i32, location: Location) { + unimplemented!(); + } + + // List of register to save, depending on the CallingConvention + fn list_to_save(&self, calling_convention: CallingConvention) -> Vec { + vec![] + } + + // Get param location + fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + match calling_convention { + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => Location::Memory(GPR::X27, (16 + (idx - 8) * 8) as i32), + }, + } + } + // move a location to another + fn move_location(&mut self, size: Size, source: Location, dest: Location) { + unimplemented!(); + } + // move a location to another + fn move_location_extend( + &mut self, + size_val: Size, + signed: bool, + source: Location, + size_op: Size, + dest: Location, + ) { + unimplemented!(); + } + fn load_address(&mut self, size: Size, reg: Location, mem: Location) { + unimplemented!(); + } + // Init the stack loc counter + fn init_stack_loc(&mut self, init_stack_loc_cnt: u64, last_stack_loc: Location) { + unimplemented!(); + } + // Restore save_area + fn restore_saved_area(&mut self, saved_area_offset: i32) { + unimplemented!(); + } + // Pop a location + fn pop_location(&mut self, location: Location) { + self.assembler.emit_pop(Size::S64, location); + } + // Create a new `MachineState` with default values. + fn new_machine_state(&self) -> MachineState { + new_machine_state() + } + + // assembler finalize + fn assembler_finalize(self) -> Vec { + self.assembler.finalize().unwrap() + } + + fn get_offset(&self) -> Offset { + self.assembler.get_offset() + } + + fn finalize_function(&mut self) { + self.assembler.finalize_function(); + } + + fn emit_function_prolog(&mut self) { + self.assembler.emit_double_push( + Size::S64, + Location::GPR(GPR::X27), + Location::GPR(GPR::X30), + ); // save LR too + self.move_location( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X27), + ); + } + + fn emit_function_epilog(&mut self) { + self.move_location( + Size::S64, + Location::GPR(GPR::X27), + Location::GPR(GPR::XzrSp), + ); + self.assembler + .emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); + } + + fn emit_function_return_value(&mut self, ty: WpType, canonicalize: bool, loc: Location) { + if canonicalize { + self.canonicalize_nan( + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::X0), + ); + } else { + self.emit_relaxed_mov(Size::S64, loc, Location::GPR(GPR::X0)); + } + } + + fn emit_function_return_float(&mut self) { + self.move_location(Size::S64, Location::GPR(GPR::X0), Location::SIMD(NEON::V0)); + } + + fn arch_supports_canonicalize_nan(&self) -> bool { + self.assembler.arch_supports_canonicalize_nan() + } + fn canonicalize_nan(&mut self, sz: Size, input: Location, output: Location) { + unimplemented!(); + } + + fn emit_illegal_op(&mut self) { + self.assembler.emit_udf(); + } + fn get_label(&mut self) -> Label { + self.assembler.new_dynamic_label() + } + fn emit_label(&mut self, label: Label) { + self.assembler.emit_label(label); + } + fn get_grp_for_call(&self) -> GPR { + GPR::X26 + } + fn emit_call_register(&mut self, reg: GPR) { + self.assembler.emit_call_register(reg); + } + fn emit_call_label(&mut self, label: Label) { + self.assembler.emit_call_label(label); + } + fn get_gpr_for_ret(&self) -> GPR { + GPR::X26 + } + fn get_simd_for_ret(&self) -> NEON { + NEON::V0 + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + self.assembler.arch_requires_indirect_call_trampoline() + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, location: Location) { + self.assembler + .arch_emit_indirect_call_with_trampoline(location); + } + + fn emit_call_location(&mut self, location: Location) { + unimplemented!(); + } + + fn location_address(&mut self, size: Size, source: Location, dest: Location) { + unimplemented!(); + } + // logic + fn location_and(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_xor(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_or(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_test(&mut self, size: Size, source: Location, dest: Location) { + unimplemented!(); + } + // math + fn location_add(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_sub(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_cmp(&mut self, size: Size, source: Location, dest: Location) { + unimplemented!(); + } + // (un)conditionnal jmp + // (un)conditionnal jmp + fn jmp_unconditionnal(&mut self, label: Label) { + self.assembler.emit_b_label(label); + } + fn jmp_on_equal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Eq, label); + } + fn jmp_on_different(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Ne, label); + } + fn jmp_on_above(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Hi, label); + } + fn jmp_on_aboveequal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Cs, label); + } + fn jmp_on_belowequal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Ls, label); + } + fn jmp_on_overflow(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Cs, label); + } + + // jmp table + fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) { + unimplemented!(); + } + + fn align_for_loop(&mut self) { + // noting to do on ARM64 + } + + fn emit_ret(&mut self) { + self.assembler.emit_ret(); + } + + fn emit_push(&mut self, size: Size, loc: Location) { + self.assembler.emit_push(size, loc); + } + fn emit_pop(&mut self, size: Size, loc: Location) { + self.assembler.emit_pop(size, loc); + } + + fn emit_memory_fence(&mut self) { + // nothing on x86_64 + } + + fn location_neg( + &mut self, + size_val: Size, // size of src + signed: bool, + source: Location, + size_op: Size, + dest: Location, + ) { + unimplemented!(); + } + + fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) { + unimplemented!(); + } + + // relaxed binop based... + fn emit_relaxed_mov(&mut self, sz: Size, src: Location, dst: Location) { + unimplemented!(); + } + fn emit_relaxed_cmp(&mut self, sz: Size, src: Location, dst: Location) { + unimplemented!(); + } + fn emit_relaxed_zero_extension( + &mut self, + sz_src: Size, + src: Location, + sz_dst: Size, + dst: Location, + ) { + unimplemented!(); + } + fn emit_relaxed_sign_extension( + &mut self, + sz_src: Size, + src: Location, + sz_dst: Size, + dst: Location, + ) { + unimplemented!(); + } + + fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_udiv32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_sdiv32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_urem32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_srem32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_clz(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i32_ctz(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i32_popcnt(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i32_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_load_8s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_load_16s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_save_8( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_save_16( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_save( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_save_8( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_save_16( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with i32 + fn i32_atomic_add( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with u8 + fn i32_atomic_add_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with u16 + fn i32_atomic_add_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with i32 + fn i32_atomic_sub( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with u8 + fn i32_atomic_sub_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with u16 + fn i32_atomic_sub_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with i32 + fn i32_atomic_and( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with u8 + fn i32_atomic_and_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with u16 + fn i32_atomic_and_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with i32 + fn i32_atomic_or( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with u8 + fn i32_atomic_or_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with u16 + fn i32_atomic_or_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with i32 + fn i32_atomic_xor( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with u8 + fn i32_atomic_xor_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with u16 + fn i32_atomic_xor_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with i32 + fn i32_atomic_xchg( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u8 + fn i32_atomic_xchg_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u16 + fn i32_atomic_xchg_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with i32 + fn i32_atomic_cmpxchg( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u8 + fn i32_atomic_cmpxchg_8u( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u16 + fn i32_atomic_cmpxchg_16u( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + + fn move_with_reloc( + &mut self, + reloc_target: RelocationTarget, + relocations: &mut Vec, + ) { + unimplemented!(); + } + + fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_udiv64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_sdiv64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_urem64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_srem64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); + } + fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_clz(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i64_ctz(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i64_popcnt(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn i64_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_8s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_16s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_32u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_load_32s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_32u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_save_8( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_save_16( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_save_32( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_8( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_16( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_32( + &mut self, + value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with i64 + fn i64_atomic_add( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u8 + fn i64_atomic_add_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u16 + fn i64_atomic_add_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u32 + fn i64_atomic_add_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with i64 + fn i64_atomic_sub( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u8 + fn i64_atomic_sub_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u16 + fn i64_atomic_sub_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u32 + fn i64_atomic_sub_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with i64 + fn i64_atomic_and( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u8 + fn i64_atomic_and_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u16 + fn i64_atomic_and_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u32 + fn i64_atomic_and_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with i64 + fn i64_atomic_or( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u8 + fn i64_atomic_or_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u16 + fn i64_atomic_or_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u32 + fn i64_atomic_or_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with i64 + fn i64_atomic_xor( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u8 + fn i64_atomic_xor_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u16 + fn i64_atomic_xor_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u32 + fn i64_atomic_xor_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with i64 + fn i64_atomic_xchg( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u8 + fn i64_atomic_xchg_8u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u16 + fn i64_atomic_xchg_16u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u32 + fn i64_atomic_xchg_32u( + &mut self, + loc: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with i64 + fn i64_atomic_cmpxchg( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u8 + fn i64_atomic_cmpxchg_8u( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u16 + fn i64_atomic_cmpxchg_16u( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u32 + fn i64_atomic_cmpxchg_32u( + &mut self, + new: Location, + cmp: Location, + target: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + + fn f32_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn f32_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn f64_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + fn f64_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + unimplemented!(); + } + + fn convert_f64_i64(&mut self, loc: Location, signed: bool, ret: Location) { + unimplemented!(); + } + fn convert_f64_i32(&mut self, loc: Location, signed: bool, ret: Location) { + unimplemented!(); + } + fn convert_f32_i64(&mut self, loc: Location, signed: bool, ret: Location) { + unimplemented!(); + } + fn convert_f32_i32(&mut self, loc: Location, signed: bool, ret: Location) { + unimplemented!(); + } + fn convert_i64_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + match (signed, sat) { + (false, true) => self.convert_i64_f64_u_s(loc, ret), + (false, false) => self.convert_i64_f64_u_u(loc, ret), + (true, true) => self.convert_i64_f64_s_s(loc, ret), + (true, false) => self.convert_i64_f64_s_u(loc, ret), + } + } + fn convert_i32_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + match (signed, sat) { + (false, true) => self.convert_i32_f64_u_s(loc, ret), + (false, false) => self.convert_i32_f64_u_u(loc, ret), + (true, true) => self.convert_i32_f64_s_s(loc, ret), + (true, false) => self.convert_i32_f64_s_u(loc, ret), + } + } + fn convert_i64_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + match (signed, sat) { + (false, true) => self.convert_i64_f32_u_s(loc, ret), + (false, false) => self.convert_i64_f32_u_u(loc, ret), + (true, true) => self.convert_i64_f32_s_s(loc, ret), + (true, false) => self.convert_i64_f32_s_u(loc, ret), + } + } + fn convert_i32_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + match (signed, sat) { + (false, true) => self.convert_i32_f32_u_s(loc, ret), + (false, false) => self.convert_i32_f32_u_u(loc, ret), + (true, true) => self.convert_i32_f32_s_s(loc, ret), + (true, false) => self.convert_i32_f32_s_u(loc, ret), + } + } + fn convert_f64_f32(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn convert_f32_f64(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_neg(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_abs(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + unimplemented!(); + } + fn f64_sqrt(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_trunc(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_ceil(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_floor(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_nearest(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_neg(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_abs(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + unimplemented!(); + } + fn f32_sqrt(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_trunc(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_ceil(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_floor(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_nearest(&mut self, loc: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + unimplemented!(); + } + + fn gen_std_trampoline( + &self, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> FunctionBody { + unimplemented!(); + } + // Generates dynamic import function call trampoline for a function type. + fn gen_std_dynamic_import_trampoline( + &self, + vmoffsets: &VMOffsets, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> FunctionBody { + unimplemented!(); + } + // Singlepass calls import functions through a trampoline. + fn gen_import_call_trampoline( + &self, + vmoffsets: &VMOffsets, + index: FunctionIndex, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> CustomSection { + unimplemented!(); + } +} diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 3f428fe19fe..e935ee864b8 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1,8 +1,7 @@ use crate::common_decl::*; use crate::emitter_x64::*; use crate::location::Location as AbstractLocation; -use crate::machine::Machine; -use crate::machine::{MemoryImmediate, TrapTable}; +use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; use dynasmrt::{x64::X64Relocation, VecAssembler}; @@ -6916,42 +6915,3 @@ impl Machine for MachineX86_64 { } } } - -// Constants for the bounds of truncation operations. These are the least or -// greatest exact floats in either f32 or f64 representation less-than (for -// least) or greater-than (for greatest) the i32 or i64 or u32 or u64 -// min (for least) or max (for greatest), when rounding towards zero. - -/// Greatest Exact Float (32 bits) less-than i32::MIN when rounding towards zero. -const GEF32_LT_I32_MIN: f32 = -2147483904.0; -/// Least Exact Float (32 bits) greater-than i32::MAX when rounding towards zero. -const LEF32_GT_I32_MAX: f32 = 2147483648.0; -/// Greatest Exact Float (32 bits) less-than i64::MIN when rounding towards zero. -const GEF32_LT_I64_MIN: f32 = -9223373136366403584.0; -/// Least Exact Float (32 bits) greater-than i64::MAX when rounding towards zero. -const LEF32_GT_I64_MAX: f32 = 9223372036854775808.0; -/// Greatest Exact Float (32 bits) less-than u32::MIN when rounding towards zero. -const GEF32_LT_U32_MIN: f32 = -1.0; -/// Least Exact Float (32 bits) greater-than u32::MAX when rounding towards zero. -const LEF32_GT_U32_MAX: f32 = 4294967296.0; -/// Greatest Exact Float (32 bits) less-than u64::MIN when rounding towards zero. -const GEF32_LT_U64_MIN: f32 = -1.0; -/// Least Exact Float (32 bits) greater-than u64::MAX when rounding towards zero. -const LEF32_GT_U64_MAX: f32 = 18446744073709551616.0; - -/// Greatest Exact Float (64 bits) less-than i32::MIN when rounding towards zero. -const GEF64_LT_I32_MIN: f64 = -2147483649.0; -/// Least Exact Float (64 bits) greater-than i32::MAX when rounding towards zero. -const LEF64_GT_I32_MAX: f64 = 2147483648.0; -/// Greatest Exact Float (64 bits) less-than i64::MIN when rounding towards zero. -const GEF64_LT_I64_MIN: f64 = -9223372036854777856.0; -/// Least Exact Float (64 bits) greater-than i64::MAX when rounding towards zero. -const LEF64_GT_I64_MAX: f64 = 9223372036854775808.0; -/// Greatest Exact Float (64 bits) less-than u32::MIN when rounding towards zero. -const GEF64_LT_U32_MIN: f64 = -1.0; -/// Least Exact Float (64 bits) greater-than u32::MAX when rounding towards zero. -const LEF64_GT_U32_MAX: f64 = 4294967296.0; -/// Greatest Exact Float (64 bits) less-than u64::MIN when rounding towards zero. -const GEF64_LT_U64_MIN: f64 = -1.0; -/// Least Exact Float (64 bits) greater-than u64::MAX when rounding towards zero. -const LEF64_GT_U64_MAX: f64 = 18446744073709551616.0; From 26d334f4acf356219e9afcadfcd340f8c16917bb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 13 Dec 2021 12:42:41 +0100 Subject: [PATCH 04/34] improv(compiler) Enabled aarch64 tests --- Makefile | 3 +++ lib/compiler-singlepass/src/compiler.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index afc2b6c9fb1..38eb1d967f8 100644 --- a/Makefile +++ b/Makefile @@ -255,6 +255,9 @@ ifeq ($(ENABLE_SINGLEPASS), 1) ifeq ($(IS_AMD64), 1) compilers_engines += singlepass-universal endif + ifeq ($(IS_AARCH64), 1) + compilers_engines += singlepass-universal + endif endif endif diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 95c86b3f834..960afe818d0 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -69,7 +69,7 @@ impl Compiler for SinglepassCompiler { )) } } - if target.triple().architecture != Architecture::X86_64 + if target.triple().architecture == Architecture::X86_64 && !target.cpu_features().contains(CpuFeature::AVX) { return Err(CompileError::UnsupportedTarget( From 900220826e55b0539d63ed29a63511ab3d77dddc Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 13 Dec 2021 16:34:44 +0100 Subject: [PATCH 05/34] improv(compiler) Added some machine_arm64 methods --- lib/compiler-singlepass/src/emitter_arm64.rs | 220 +++++++++--- lib/compiler-singlepass/src/machine_arm64.rs | 350 +++++++++++++------ 2 files changed, 409 insertions(+), 161 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index d38f646538e..6ee7a6d9d8f 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -70,17 +70,20 @@ pub trait EmitterARM64 { fn emit_str(&mut self, sz: Size, src: Location, dst: Location); fn emit_ldr(&mut self, sz: Size, src: Location, dst: Location); + fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); + fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); + fn emit_strbd(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrai(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_stpbd(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + fn emit_ldpai(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); fn emit_mov_imm(&mut self, dst: Location, val: u64); fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); - fn emit_push(&mut self, sz: Size, src: Location); - fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location); - fn emit_pop(&mut self, sz: Size, dst: Location); - fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location); - fn emit_label(&mut self, label: Label); fn emit_b_label(&mut self, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); @@ -198,6 +201,146 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } + fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur W(reg), [X(addr), offset]); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur D(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur W(reg), [X(addr), offset]); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur D(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + + fn emit_strbd(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str X(reg), [X(addr), -(offset as i32)]!); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str D(reg), [X(addr), -(offset as i32)]!); + } + _ => unreachable!(), + } + } + fn emit_ldrai(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldr X(reg), [X(addr)], offset); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldr D(reg), [X(addr)], offset); + } + _ => unreachable!(), + } + } + + fn emit_stpbd(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + match (sz, reg1, reg2) { + (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { + let reg1 = reg1.into_index() as u32; + let reg2 = reg2.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stp X(reg1), X(reg2), [X(addr), -(offset as i32)]!); + } + _ => unreachable!(), + } + } + fn emit_ldpai(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + match (sz, reg1, reg2) { + (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { + let reg1 = reg1.into_index() as u32; + let reg2 = reg2.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldp X(reg1), X(reg2), [X(addr)], offset); + } + _ => unreachable!(), + } + } + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { + match(sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), W(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).D[0], V(src).D[0]); + } + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).S[0], V(src).S[0]); + } + (Size::S64, Location::GPR(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).D[0], X(src)); + } + (Size::S32, Location::GPR(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).S[0], W(src)); + } + (Size::S64, Location::SIMD(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov X(dst), V(src).D[0]); + } + (Size::S32, Location::SIMD(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), V(src).S[0]); + } + _ => unreachable!(), + } + } fn emit_mov_imm(&mut self, dst: Location, val: u64) { match dst { @@ -229,6 +372,16 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add W(dst), W(src1), W(src2)); } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(src1), imm as u32); + } _ => panic!( "singlepass can't emit ADD {:?} {:?} {:?} {:?}", sz, src1, src2, dst @@ -249,62 +402,19 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; sub W(dst), W(src1), W(src2)); } - _ => panic!( - "singlepass can't emit ADD {:?} {:?} {:?} {:?}", - sz, src1, src2, dst - ), - } - } - - fn emit_push(&mut self, sz: Size, src: Location) { - match (sz, src) { - (Size::S64, Location::GPR(src)) => { - let src = src.into_index() as u32; - dynasm!(self ; str X(src), [sp, -16]!); - } - (Size::S64, Location::SIMD(src)) => { - let src = src.into_index() as u32; - dynasm!(self ; str Q(src), [sp, -16]!); - } - _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), - } - } - fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location) { - match (sz, src1, src2) { - (Size::S64, Location::GPR(src1), Location::GPR(src2)) => { + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; - let src2 = src2.into_index() as u32; - dynasm!(self ; stp X(src1), X(src2), [sp, -16]!); - } - _ => panic!( - "singlepass can't emit DOUBLE PUSH {:?} {:?} {:?}", - sz, src1, src2 - ), - } - } - fn emit_pop(&mut self, sz: Size, dst: Location) { - match (sz, dst) { - (Size::S64, Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; ldr X(dst), [sp], 16); + dynasm!(self ; sub X(dst), X(src1), imm as u32); } - (Size::S64, Location::SIMD(dst)) => { + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - dynasm!(self ; ldr Q(dst), [sp], 16); - } - _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, dst), - } - } - fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location) { - match (sz, dst1, dst2) { - (Size::S64, Location::GPR(dst1), Location::GPR(dst2)) => { - let dst1 = dst1.into_index() as u32; - let dst2 = dst2.into_index() as u32; - dynasm!(self ; ldp X(dst1), X(dst2), [sp], 16); + dynasm!(self ; sub W(dst), W(src1), imm as u32); } _ => panic!( - "singlepass can't emit DOUBLE PUSH {:?} {:?} {:?}", - sz, dst1, dst2 + "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst ), } } diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index d3d5519d13d..62fe0e47c0c 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -28,6 +28,8 @@ pub struct MachineARM64 { instructions_address_map: Vec, /// The source location for the current operator. src_loc: u32, + /// is last push on a 8byte multiple or 16bytes? + pushed: bool, } impl MachineARM64 { @@ -39,24 +41,25 @@ impl MachineARM64 { trap_table: TrapTable::default(), instructions_address_map: vec![], src_loc: 0, + pushed: false, } } - pub fn emit_relaxed_binop( + fn emit_relaxed_binop( &mut self, - op: fn(&mut Assembler, Size, Location, Location), - sz: Size, - src: Location, - dst: Location, + _op: fn(&mut Assembler, Size, Location, Location), + _sz: Size, + _src: Location, + _dst: Location, ) { unimplemented!(); } /// I32 binary operation with both operands popped from the virtual stack. fn emit_binop_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), - loc_a: Location, - loc_b: Location, - ret: Location, + _f: fn(&mut Assembler, Size, Location, Location), + _loc_a: Location, + _loc_b: Location, + _ret: Location, ) { unimplemented!(); } @@ -81,73 +84,73 @@ impl MachineARM64 { /// I64 comparison with. fn emit_cmpop_i64_dynamic_b( &mut self, - c: Condition, - loc_a: Location, - loc_b: Location, - ret: Location, + _c: Condition, + _loc_a: Location, + _loc_b: Location, + _ret: Location, ) { unimplemented!(); } /// I64 shift with both operands popped from the virtual stack. fn emit_shift_i64( &mut self, - f: fn(&mut Assembler, Size, Location, Location), - loc_a: Location, - loc_b: Location, - ret: Location, + _f: fn(&mut Assembler, Size, Location, Location), + _loc_a: Location, + _loc_b: Location, + _ret: Location, ) { unimplemented!(); } /// I32 comparison with. fn emit_cmpop_i32_dynamic_b( &mut self, - c: Condition, - loc_a: Location, - loc_b: Location, - ret: Location, + _c: Condition, + _loc_a: Location, + _loc_b: Location, + _ret: Location, ) { unimplemented!(); } /// I32 shift with both operands popped from the virtual stack. fn emit_shift_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), - loc_a: Location, - loc_b: Location, - ret: Location, + _f: fn(&mut Assembler, Size, Location, Location), + _loc_a: Location, + _loc_b: Location, + _ret: Location, ) { unimplemented!(); } fn memory_op( &mut self, - addr: Location, - memarg: &MemoryImmediate, - check_alignment: bool, - value_size: usize, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, - cb: F, + _addr: Location, + _memarg: &MemoryImmediate, + _check_alignment: bool, + _value_size: usize, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + _cb: F, ) { unimplemented!(); } fn emit_compare_and_swap( &mut self, - loc: Location, - target: Location, - ret: Location, - memarg: &MemoryImmediate, - value_size: usize, - memory_sz: Size, - stack_sz: Size, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, - cb: F, + _loc: Location, + _target: Location, + _ret: Location, + _memarg: &MemoryImmediate, + _value_size: usize, + _memory_sz: Size, + _stack_sz: Size, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + _cb: F, ) { unimplemented!(); } @@ -155,13 +158,13 @@ impl MachineARM64 { // Checks for underflow/overflow/nan. fn emit_f32_int_conv_check( &mut self, - reg: NEON, - lower_bound: f32, - upper_bound: f32, - underflow_label: Label, - overflow_label: Label, - nan_label: Label, - succeed_label: Label, + _reg: NEON, + _lower_bound: f32, + _upper_bound: f32, + _underflow_label: Label, + _overflow_label: Label, + _nan_label: Label, + _succeed_label: Label, ) { unimplemented!(); } @@ -177,26 +180,26 @@ impl MachineARM64 { F4: FnOnce(&mut Self), >( &mut self, - reg: NEON, - lower_bound: f32, - upper_bound: f32, - underflow_cb: F1, - overflow_cb: F2, - nan_cb: Option, - convert_cb: F4, + _reg: NEON, + _lower_bound: f32, + _upper_bound: f32, + _underflow_cb: F1, + _overflow_cb: F2, + _nan_cb: Option, + _convert_cb: F4, ) { unimplemented!(); } // Checks for underflow/overflow/nan. fn emit_f64_int_conv_check( &mut self, - reg: NEON, - lower_bound: f64, - upper_bound: f64, - underflow_label: Label, - overflow_label: Label, - nan_label: Label, - succeed_label: Label, + _reg: NEON, + _lower_bound: f64, + _upper_bound: f64, + _underflow_label: Label, + _overflow_label: Label, + _nan_label: Label, + _succeed_label: Label, ) { unimplemented!(); } @@ -211,65 +214,143 @@ impl MachineARM64 { F4: FnOnce(&mut Self), >( &mut self, - reg: NEON, - lower_bound: f64, - upper_bound: f64, - underflow_cb: F1, - overflow_cb: F2, - nan_cb: Option, - convert_cb: F4, + _reg: NEON, + _lower_bound: f64, + _upper_bound: f64, + _underflow_cb: F1, + _overflow_cb: F2, + _nan_cb: Option, + _convert_cb: F4, ) { unimplemented!(); } - fn convert_i64_f64_u_s(&mut self, loc: Location, ret: Location) { + fn convert_i64_f64_u_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f64_u_u(&mut self, loc: Location, ret: Location) { + fn convert_i64_f64_u_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f64_s_s(&mut self, loc: Location, ret: Location) { + fn convert_i64_f64_s_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f64_s_u(&mut self, loc: Location, ret: Location) { + fn convert_i64_f64_s_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f64_s_s(&mut self, loc: Location, ret: Location) { + fn convert_i32_f64_s_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f64_s_u(&mut self, loc: Location, ret: Location) { + fn convert_i32_f64_s_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f64_u_s(&mut self, loc: Location, ret: Location) { + fn convert_i32_f64_u_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f64_u_u(&mut self, loc: Location, ret: Location) { + fn convert_i32_f64_u_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f32_u_s(&mut self, loc: Location, ret: Location) { + fn convert_i64_f32_u_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f32_u_u(&mut self, loc: Location, ret: Location) { + fn convert_i64_f32_u_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f32_s_s(&mut self, loc: Location, ret: Location) { + fn convert_i64_f32_s_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i64_f32_s_u(&mut self, loc: Location, ret: Location) { + fn convert_i64_f32_s_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f32_s_s(&mut self, loc: Location, ret: Location) { + fn convert_i32_f32_s_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f32_s_u(&mut self, loc: Location, ret: Location) { + fn convert_i32_f32_s_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f32_u_s(&mut self, loc: Location, ret: Location) { + fn convert_i32_f32_u_s(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_i32_f32_u_u(&mut self, loc: Location, ret: Location) { + fn convert_i32_f32_u_u(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } + + fn offset_is_ok(&self, size: Size, offset: i32) -> bool { + if offset<0 { return false;} + let shift = match size { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + if offset >= 0x1000<>shift)< { + let offset = if self.pushed { + 8 + } else { + self.assembler.emit_sub(Size::S64, Location::GPR(GPR::XzrSp), Location::GPR(GPR::XzrSp), Location::Imm8(16)); + 0 + }; + self.assembler.emit_str(Size::S64, src, Location::Memory(GPR::XzrSp, offset)); + self.pushed = !self.pushed; + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), + } + } + fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location) { + if !self.pushed { + match (sz, src1, src2) { + (Size::S64, Location::GPR(_), Location::GPR(_)) => { + self.assembler.emit_stpbd(Size::S64, src1, src2, GPR::XzrSp, 16); + } + _ => { + self.emit_push(sz, src1); + self.emit_push(sz, src2); + } + } + } else { + self.emit_push(sz, src1); + self.emit_push(sz, src2); + } + } + fn emit_pop(&mut self, sz: Size, dst: Location) { + match (sz, dst) { + (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { + let offset = if self.pushed { + 0 + } else { + 8 + }; + self.assembler.emit_ldr(Size::S64, dst, Location::Memory(GPR::XzrSp, offset)); + if self.pushed { + self.assembler.emit_add(Size::S64, Location::GPR(GPR::XzrSp), Location::GPR(GPR::XzrSp), Location::Imm8(16)); + } + self.pushed = !self.pushed; + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, dst), + } + } + fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location) { + if !self.pushed { + match (sz, dst1, dst2) { + (Size::S64, Location::GPR(_), Location::GPR(_)) => { + self.assembler.emit_ldpai(Size::S64, dst1, dst2, GPR::XzrSp, 16); + } + _ => { + self.emit_pop(sz, dst2); + self.emit_pop(sz, dst1); + } + } + } else { + self.emit_pop(sz, dst2); + self.emit_pop(sz, dst1); + } + } } impl Machine for MachineARM64 { @@ -347,13 +428,13 @@ impl Machine for MachineARM64 { fn push_used_gpr(&mut self) { let used_gprs = self.get_used_gprs(); for r in used_gprs.iter() { - self.assembler.emit_push(Size::S64, Location::GPR(*r)); + self.emit_push(Size::S64, Location::GPR(*r)); } } fn pop_used_gpr(&mut self) { let used_gprs = self.get_used_gprs(); for r in used_gprs.iter().rev() { - self.assembler.emit_pop(Size::S64, Location::GPR(*r)); + self.emit_pop(Size::S64, Location::GPR(*r)); } } @@ -420,10 +501,17 @@ impl Machine for MachineARM64 { Location::Memory(GPR::XzrSp, (i * 8) as i32), ); } + let delta = if (used_neons.len() * 8) < 256 { + Location::Imm8((used_neons.len() * 8) as u8) + } else { + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), (used_neons.len() * 8) as u64); + Location::GPR(tmp) + }; self.assembler.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm32((used_neons.len() * 8) as u32), + delta, Location::GPR(GPR::XzrSp), ); } @@ -495,29 +583,50 @@ impl Machine for MachineARM64 { // Adjust stack for locals fn adjust_stack(&mut self, delta_stack_offset: u32) { + let delta = if delta_stack_offset < 256 { + Location::Imm8(delta_stack_offset as u8) + } else { + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + Location::GPR(tmp) + }; self.assembler.emit_sub( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm32(delta_stack_offset), + delta, Location::GPR(GPR::XzrSp), ); } // restore stack fn restore_stack(&mut self, delta_stack_offset: u32) { + let delta = if delta_stack_offset < 256 { + Location::Imm8(delta_stack_offset as u8) + } else { + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + Location::GPR(tmp) + }; self.assembler.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm32(delta_stack_offset), + delta, Location::GPR(GPR::XzrSp), ); } fn push_callee_saved(&mut self) {} fn pop_callee_saved(&mut self) {} fn pop_stack_locals(&mut self, delta_stack_offset: u32) { + let delta = if delta_stack_offset < 256 { + Location::Imm8(delta_stack_offset as u8) + } else { + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + Location::GPR(tmp) + }; self.assembler.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm32(delta_stack_offset), + delta, Location::GPR(GPR::XzrSp), ); } @@ -527,10 +636,10 @@ impl Machine for MachineARM64 { Location::Imm64(_) => { self.reserve_unused_temp_gpr(GPR::X4); self.move_location(Size::S64, loc, Location::GPR(GPR::X4)); - self.assembler.emit_push(Size::S64, Location::GPR(GPR::X4)); + self.emit_push(Size::S64, Location::GPR(GPR::X4)); self.release_gpr(GPR::X4); } - _ => self.assembler.emit_push(Size::S64, loc), + _ => self.emit_push(Size::S64, loc), } } @@ -569,7 +678,14 @@ impl Machine for MachineARM64 { } // Move a local to the stack fn move_local(&mut self, stack_offset: i32, location: Location) { - unimplemented!(); + if stack_offset<256 { + self.assembler.emit_stur(Size::S64, location, GPR::X27, -stack_offset); + } else { + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), stack_offset as u64); + self.assembler.emit_sub(Size::S64, Location::GPR(GPR::X27), Location::GPR(tmp), Location::GPR(tmp)); + self.assembler.emit_str(Size::S64, location, Location::GPR(tmp)); + } } // List of register to save, depending on the CallingConvention @@ -595,7 +711,30 @@ impl Machine for MachineARM64 { } // move a location to another fn move_location(&mut self, size: Size, source: Location, dest: Location) { - unimplemented!(); + match source { + Location::GPR(_) | Location::SIMD(_) => { + match dest { + Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(addr, offs) => { + if self.offset_is_ok(size, offs) { + self.assembler.emit_str(size, source, dest); + } else { + let tmp = self.pick_temp_gpr().unwrap(); + if offs < 0 { + self.assembler.emit_mov_imm(Location::GPR(tmp), (-offs) as u64); + self.assembler.emit_sub(Size::S64, Location::GPR(addr), Location::GPR(tmp), Location::GPR(tmp)); + } else { + self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); + self.assembler.emit_add(Size::S64, Location::GPR(addr), Location::GPR(tmp), Location::GPR(tmp)); + } + self.assembler.emit_str(size, source,Location::GPR(tmp)); + } + } + _ => unimplemented!(), + } + } + _ => unimplemented!(), + } } // move a location to another fn move_location_extend( @@ -621,7 +760,7 @@ impl Machine for MachineARM64 { } // Pop a location fn pop_location(&mut self, location: Location) { - self.assembler.emit_pop(Size::S64, location); + self.emit_pop(Size::S64, location); } // Create a new `MachineState` with default values. fn new_machine_state(&self) -> MachineState { @@ -642,7 +781,7 @@ impl Machine for MachineARM64 { } fn emit_function_prolog(&mut self) { - self.assembler.emit_double_push( + self.emit_double_push( Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30), @@ -660,8 +799,7 @@ impl Machine for MachineARM64 { Location::GPR(GPR::X27), Location::GPR(GPR::XzrSp), ); - self.assembler - .emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); + self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); } fn emit_function_return_value(&mut self, ty: WpType, canonicalize: bool, loc: Location) { @@ -793,10 +931,10 @@ impl Machine for MachineARM64 { } fn emit_push(&mut self, size: Size, loc: Location) { - self.assembler.emit_push(size, loc); + self.emit_push(size, loc); } fn emit_pop(&mut self, size: Size, loc: Location) { - self.assembler.emit_pop(size, loc); + self.emit_pop(size, loc); } fn emit_memory_fence(&mut self) { From 19a9fa74d55879b4c1946bc29624ba6cb5a8f7eb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 14 Dec 2021 15:31:28 +0100 Subject: [PATCH 06/34] improv(compiler) Added some more machine_arm64 and emitter_arm64 methods (now 30 tests pass) --- lib/compiler-singlepass/src/emitter_arm64.rs | 361 ++++++++++++- lib/compiler-singlepass/src/machine.rs | 43 +- lib/compiler-singlepass/src/machine_arm64.rs | 510 ++++++++++++++++--- 3 files changed, 818 insertions(+), 96 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 6ee7a6d9d8f..93379bec9f1 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -8,6 +8,11 @@ use dynasmrt::{ aarch64::Aarch64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, VecAssembler, }; +use wasmer_compiler::{ + CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, SectionBody, +}; +use wasmer_types::{FunctionIndex, FunctionType, Type}; +use wasmer_vm::VMOffsets; type Assembler = VecAssembler; @@ -30,20 +35,36 @@ pub type Location = AbstractLocation; #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[allow(dead_code)] pub enum Condition { + // meaning for cmp or sub + /// Equal Eq, + /// Not equal Ne, + /// Unsigned higher or same (or carry set) Cs, + /// Unsigned lower (or carry clear) Cc, + /// Negative. The mnemonic stands for "minus" Mi, + /// Positive or zero. The mnemonic stands for "plus" Pl, + /// Signed overflow. The mnemonic stands for "V set" Vs, + /// No signed overflow. The mnemonic stands for "V clear" Vc, + /// Unsigned higher Hi, + /// Unsigned lower or same Ls, + /// Signed greater than or equal Ge, + /// Signed less than Lt, + /// Signed greater than Gt, + /// Signed less than or equal Le, + /// Always executed Uncond, } @@ -68,22 +89,37 @@ pub trait EmitterARM64 { fn finalize_function(&mut self); - fn emit_str(&mut self, sz: Size, src: Location, dst: Location); - fn emit_ldr(&mut self, sz: Size, src: Location, dst: Location); + fn emit_str(&mut self, sz: Size, reg: Location, addr: Location); + fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location); fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); fn emit_strbd(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_ldrai(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_stpbd(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); fn emit_ldpai(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); - + + fn emit_ldrb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); + fn emit_movz(&mut self, reg: Location, val: u32); + fn emit_movk(&mut self, reg: Location, val: u32, shift: u32); + fn emit_mov_imm(&mut self, dst: Location, val: u64); fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_add2(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location); + fn emit_tst(&mut self, sz: Size, src: Location, dst: Location); + fn emit_label(&mut self, label: Label); fn emit_b_label(&mut self, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); @@ -296,8 +332,79 @@ impl EmitterARM64 for Assembler { } } + fn emit_ldrb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrb W(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrb W(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrh W(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrh W(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrsb X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrsb W(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrsh X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrsh W(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldrsw X(reg), [X(addr), offset]); + } + _ => unreachable!(), + } + } + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { - match(sz, src, dst) { + match (sz, src, dst) { (Size::S64, Location::GPR(src), Location::GPR(dst)) => { let src = src.into_index() as u32; let dst = dst.into_index() as u32; @@ -338,6 +445,37 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; mov W(dst), V(src).S[0]); } + (Size::S32, Location::Imm32(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), val as u64); + } + (Size::S64, Location::Imm32(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), val as u64); + } + (Size::S64, Location::Imm64(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; mov X(dst), val); + } + _ => panic!("singlepass can't emit MOV {:?}, {:?}, {:?}", sz, src, dst), + } + } + + fn emit_movz(&mut self, reg: Location, val: u32) { + match reg { + Location::GPR(reg) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movz W(reg), val); + } + _ => unreachable!(), + } + } + fn emit_movk(&mut self, reg: Location, val: u32, shift: u32) { + match reg { + Location::GPR(reg) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movk X(reg), val, LSL shift); + } _ => unreachable!(), } } @@ -372,12 +510,14 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add W(dst), W(src1), W(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; add X(dst), X(src1), imm as u32); } - (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; add W(dst), W(src1), imm as u32); @@ -418,9 +558,104 @@ impl EmitterARM64 for Assembler { ), } } + fn emit_add2(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(dst), X(src)); + } + (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { + let src = src as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(dst), src); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(dst), W(src)); + } + (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(dst), imm as u32); + } + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(dst), imm as u32); + } + _ => panic!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(dst), X(src)); + } + (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { + let src = src as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(dst), src); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(dst), W(src)); + } + (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(dst), imm as u32); + } + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(dst), imm as u32); + } + _ => panic!("singlepass can't emit SUB {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; cmp W(dst), W(src)); + } + _ => unreachable!(), + } + } + + fn emit_tst(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; tst X(dst), X(src)); + } + (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; tst X(dst), src as u64); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; tst W(dst), W(src)); + } + (Size::S32, Location::Imm32(src), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; tst W(dst), src); + } + _ => unreachable!(), + } + } fn emit_label(&mut self, label: Label) { - self.emit_label(label); + dynasm!(self ; => label); } fn emit_b_label(&mut self, label: Label) { dynasm!(self ; b =>label); @@ -458,3 +693,115 @@ impl EmitterARM64 for Assembler { dynasm!(self ; udf 0); } } + +pub fn gen_std_trampoline_arm64( + sig: &FunctionType, + calling_convention: CallingConvention, +) -> FunctionBody { + let mut a = Assembler::new(0); + + let fptr = GPR::X19; + let args = GPR::X20; + + dynasm!(a + ; .arch aarch64 + ; sub sp, sp, 32 + ; stp x29, x30, [sp] + ; stp X(fptr as u32), X(args as u32), [sp, 16] + ; mov x29, sp + ; mov X(fptr as u32), x1 + ; mov X(args as u32), x2 + ); + + let stack_args = sig.params().len().saturating_sub(8); + let mut stack_offset = stack_args as u32 * 8; + if stack_args > 0 { + if stack_offset % 16 != 0 { + stack_offset += 8; + assert!(stack_offset % 16 == 0); + } + dynasm!(a ; .arch aarch64 ; sub sp, sp, stack_offset); + } + + // Move arguments to their locations. + // `callee_vmctx` is already in the first argument register, so no need to move. + for (i, param) in sig.params().iter().enumerate() { + let sz = match *param { + Type::I32 => Size::S32, + Type::I64 => Size::S64, + _ => unimplemented!(), + }; + match i { + 0..=6 => { + a.emit_ldr( + sz, + Location::GPR(GPR::from_index(i + 1).unwrap()), + Location::Memory(args, (i * 16) as i32), + ); + } + _ => { + a.emit_ldr( + sz, + Location::GPR(GPR::X18), + Location::Memory(args, (i * 16) as i32), + ); + a.emit_str( + sz, + Location::GPR(GPR::X18), + Location::Memory(GPR::XzrSp, (i as i32 - 7) * 8), + ) + } + } + } + + dynasm!(a ; .arch aarch64 ; blr X(fptr as u32)); + + // Write return value. + if !sig.results().is_empty() { + a.emit_stur(Size::S64, Location::GPR(GPR::X0), args, 0); + } + + // Restore stack. + dynasm!(a + ; .arch aarch64 + ; ldp X(fptr as u32), X(args as u32), [x29, 16] + ; ldp x29, x30, [x29] + ; add sp, sp, 32 + stack_offset as u32 + ; ret + ); + + FunctionBody { + body: a.finalize().unwrap().to_vec(), + unwind_info: None, + } +} +// Generates dynamic import function call trampoline for a function type. +pub fn gen_std_dynamic_import_trampoline_arm64( + vmoffsets: &VMOffsets, + sig: &FunctionType, + calling_convention: CallingConvention, +) -> FunctionBody { + let mut a = Assembler::new(0); + dynasm!(a ; .arch aarch64 ; ret); + FunctionBody { + body: a.finalize().unwrap().to_vec(), + unwind_info: None, + } +} +// Singlepass calls import functions through a trampoline. +pub fn gen_import_call_trampoline_arm64( + vmoffsets: &VMOffsets, + index: FunctionIndex, + sig: &FunctionType, + calling_convention: CallingConvention, +) -> CustomSection { + let mut a = Assembler::new(0); + dynasm!(a ; .arch aarch64 ; ret); + let section_body = SectionBody::new_with_vec(a.finalize().unwrap().to_vec()); + + CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: section_body, + relocations: vec![], + } +} diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 7f5c18a3952..419b3612b06 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -1,5 +1,6 @@ use crate::common_decl::*; use crate::location::{Location, Reg}; +use crate::machine_arm64::MachineARM64; use crate::machine_x64::MachineX86_64; use dynasmrt::{AssemblyOffset, DynamicLabel}; use std::collections::BTreeMap; @@ -2166,11 +2167,17 @@ pub fn gen_std_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_std_trampoline(sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_std_trampoline(sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_std_trampoline(sig, calling_convention) + } } /// Generates dynamic import function call trampoline for a function type. pub fn gen_std_dynamic_import_trampoline( @@ -2179,11 +2186,17 @@ pub fn gen_std_dynamic_import_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } } /// Singlepass calls import functions through a trampoline. pub fn gen_import_call_trampoline( @@ -2193,11 +2206,17 @@ pub fn gen_import_call_trampoline( target: &Target, calling_convention: CallingConvention, ) -> CustomSection { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } } // Constants for the bounds of truncation operations. These are the least or diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 62fe0e47c0c..017224bede7 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -4,6 +4,7 @@ use crate::common_decl::*; use crate::emitter_arm64::*; use crate::location::Location as AbstractLocation; use crate::machine::*; +use dynasm::dynasm; use dynasmrt::{aarch64::Aarch64Relocation, VecAssembler}; use std::collections::HashSet; use wasmer_compiler::wasmparser::Type as WpType; @@ -46,22 +47,70 @@ impl MachineARM64 { } fn emit_relaxed_binop( &mut self, - _op: fn(&mut Assembler, Size, Location, Location), - _sz: Size, - _src: Location, - _dst: Location, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, ) { - unimplemented!(); + match (src, dst) { + (Location::GPR(_), Location::GPR(_)) => { + op(&mut self.assembler, sz, src, dst); + } + (Location::Memory(_, _), Location::Memory(_, _)) => { + let temp_src = self.acquire_temp_gpr().unwrap(); + let temp_dst = self.acquire_temp_gpr().unwrap(); + self.move_location(sz, src, Location::GPR(temp_src)); + self.move_location(sz, dst, Location::GPR(temp_dst)); + op( + &mut self.assembler, + sz, + Location::GPR(temp_src), + Location::GPR(temp_dst), + ); + self.release_gpr(temp_dst); + self.release_gpr(temp_src); + } + /*(Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => { + } + (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,*/ + (Location::Imm64(_), Location::Memory(_, _)) + | (Location::Imm64(_), Location::GPR(_)) + | (Location::Imm32(_), Location::Memory(_, _)) + | (Location::Imm32(_), Location::GPR(_)) => { + let temp = self.acquire_temp_gpr().unwrap(); + self.move_location(sz, src, Location::GPR(temp)); + op(&mut self.assembler, sz, Location::GPR(temp), dst); + self.release_gpr(temp); + } + (_, Location::SIMD(_)) => { + let temp = self.acquire_temp_gpr().unwrap(); + self.move_location(sz, src, Location::GPR(temp)); + op(&mut self.assembler, sz, Location::GPR(temp), dst); + self.release_gpr(temp); + } + _ => panic!( + "singlepass can't emit relaxed_binop {:?} {:?} => {:?}", + sz, src, dst + ), + }; } /// I32 binary operation with both operands popped from the virtual stack. fn emit_binop_i32( &mut self, - _f: fn(&mut Assembler, Size, Location, Location), - _loc_a: Location, - _loc_b: Location, - _ret: Location, + f: fn(&mut Assembler, Size, Location, Location), + loc_a: Location, + loc_b: Location, + ret: Location, ) { - unimplemented!(); + if loc_a != ret { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp)); + self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp)); + self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } else { + self.emit_relaxed_binop(f, Size::S32, loc_b, ret); + } } /// I64 binary operation with both operands popped from the virtual stack. fn emit_binop_i64( @@ -124,17 +173,133 @@ impl MachineARM64 { fn memory_op( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _check_alignment: bool, - _value_size: usize, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, - _cb: F, + addr: Location, + memarg: &MemoryImmediate, + check_alignment: bool, + value_size: usize, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + cb: F, ) { - unimplemented!(); + let tmp_addr = self.acquire_temp_gpr().unwrap(); + + // Reusing `tmp_addr` for temporary indirection here, since it's not used before the last reference to `{base,bound}_loc`. + let (base_loc, bound_loc) = if imported_memories { + // Imported memories require one level of indirection. + self.emit_relaxed_binop( + Assembler::emit_mov, + Size::S64, + Location::Memory(self.get_vmctx_reg(), offset), + Location::GPR(tmp_addr), + ); + (Location::Memory(tmp_addr, 0), Location::Memory(tmp_addr, 8)) + } else { + ( + Location::Memory(self.get_vmctx_reg(), offset), + Location::Memory(self.get_vmctx_reg(), offset + 8), + ) + }; + + let tmp_base = self.acquire_temp_gpr().unwrap(); + let tmp_bound = self.acquire_temp_gpr().unwrap(); + + // Load base into temporary register. + self.assembler + .emit_ldr(Size::S64, Location::GPR(tmp_base), base_loc); + + // Load bound into temporary register, if needed. + if need_check { + self.assembler + .emit_ldr(Size::S64, Location::GPR(tmp_bound), bound_loc); + + // Wasm -> Effective. + // Assuming we never underflow - should always be true on Linux/macOS and Windows >=8, + // since the first page from 0x0 to 0x1000 is not accepted by mmap. + self.assembler.emit_add( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp_base), + Location::GPR(tmp_bound), + ); + if value_size < 256 { + self.assembler.emit_sub( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp_bound), + Location::Imm8(value_size as u8), + ); + } else { + // reusing tmp_base + self.assembler + .emit_mov_imm(Location::GPR(tmp_base), value_size as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp_base), + Location::GPR(tmp_bound), + ); + } + } + + // Load effective address. + // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr` + // might be reused. + self.assembler + .emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); + + // Add offset to memory address. + if memarg.offset != 0 { + self.assembler.emit_add( + Size::S32, + Location::Imm32(memarg.offset), + Location::GPR(tmp_addr), + Location::GPR(tmp_addr), + ); + + // Trap if offset calculation overflowed. + self.assembler + .emit_bcond_label(Condition::Cs, heap_access_oob); + } + + // Wasm linear memory -> real memory + self.assembler.emit_add( + Size::S64, + Location::GPR(tmp_base), + Location::GPR(tmp_addr), + Location::GPR(tmp_addr), + ); + + if need_check { + // Trap if the end address of the requested area is above that of the linear memory. + self.assembler + .emit_cmp(Size::S64, Location::GPR(tmp_bound), Location::GPR(tmp_addr)); + + // `tmp_bound` is inclusive. So trap only if `tmp_addr > tmp_bound`. + self.assembler + .emit_bcond_label(Condition::Hi, heap_access_oob); + } + + self.release_gpr(tmp_bound); + self.release_gpr(tmp_base); + + let align = memarg.align; + if check_alignment && align != 1 { + self.assembler.emit_tst( + Size::S64, + Location::Imm32((align - 1).into()), + Location::GPR(tmp_addr), + ); + self.assembler + .emit_bcond_label(Condition::Ne, heap_access_oob); + } + let begin = self.assembler.get_offset().0; + cb(self, tmp_addr); + let end = self.assembler.get_offset().0; + self.mark_address_range_with_trap_code(TrapCode::HeapAccessOutOfBounds, begin, end); + + self.release_gpr(tmp_addr); } fn emit_compare_and_swap( @@ -275,15 +440,21 @@ impl MachineARM64 { } fn offset_is_ok(&self, size: Size, offset: i32) -> bool { - if offset<0 { return false;} + if offset < 0 { + return false; + } let shift = match size { Size::S8 => 0, Size::S16 => 1, Size::S32 => 2, Size::S64 => 3, }; - if offset >= 0x1000<>shift)<= 0x1000 << shift { + return false; + } + if (offset >> shift) << shift != offset { + return false; + } return true; } @@ -293,10 +464,16 @@ impl MachineARM64 { let offset = if self.pushed { 8 } else { - self.assembler.emit_sub(Size::S64, Location::GPR(GPR::XzrSp), Location::GPR(GPR::XzrSp), Location::Imm8(16)); + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); 0 }; - self.assembler.emit_str(Size::S64, src, Location::Memory(GPR::XzrSp, offset)); + self.assembler + .emit_str(Size::S64, src, Location::Memory(GPR::XzrSp, offset)); self.pushed = !self.pushed; } _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), @@ -306,7 +483,8 @@ impl MachineARM64 { if !self.pushed { match (sz, src1, src2) { (Size::S64, Location::GPR(_), Location::GPR(_)) => { - self.assembler.emit_stpbd(Size::S64, src1, src2, GPR::XzrSp, 16); + self.assembler + .emit_stpbd(Size::S64, src1, src2, GPR::XzrSp, 16); } _ => { self.emit_push(sz, src1); @@ -321,14 +499,16 @@ impl MachineARM64 { fn emit_pop(&mut self, sz: Size, dst: Location) { match (sz, dst) { (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { - let offset = if self.pushed { - 0 - } else { - 8 - }; - self.assembler.emit_ldr(Size::S64, dst, Location::Memory(GPR::XzrSp, offset)); + let offset = if self.pushed { 0 } else { 8 }; + self.assembler + .emit_ldr(Size::S64, dst, Location::Memory(GPR::XzrSp, offset)); if self.pushed { - self.assembler.emit_add(Size::S64, Location::GPR(GPR::XzrSp), Location::GPR(GPR::XzrSp), Location::Imm8(16)); + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); } self.pushed = !self.pushed; } @@ -339,7 +519,8 @@ impl MachineARM64 { if !self.pushed { match (sz, dst1, dst2) { (Size::S64, Location::GPR(_), Location::GPR(_)) => { - self.assembler.emit_ldpai(Size::S64, dst1, dst2, GPR::XzrSp, 16); + self.assembler + .emit_ldpai(Size::S64, dst1, dst2, GPR::XzrSp, 16); } _ => { self.emit_pop(sz, dst2); @@ -505,7 +686,8 @@ impl Machine for MachineARM64 { Location::Imm8((used_neons.len() * 8) as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), (used_neons.len() * 8) as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (used_neons.len() * 8) as u64); Location::GPR(tmp) }; self.assembler.emit_add( @@ -587,7 +769,8 @@ impl Machine for MachineARM64 { Location::Imm8(delta_stack_offset as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); Location::GPR(tmp) }; self.assembler.emit_sub( @@ -603,7 +786,8 @@ impl Machine for MachineARM64 { Location::Imm8(delta_stack_offset as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); Location::GPR(tmp) }; self.assembler.emit_add( @@ -620,7 +804,8 @@ impl Machine for MachineARM64 { Location::Imm8(delta_stack_offset as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); Location::GPR(tmp) }; self.assembler.emit_add( @@ -678,13 +863,21 @@ impl Machine for MachineARM64 { } // Move a local to the stack fn move_local(&mut self, stack_offset: i32, location: Location) { - if stack_offset<256 { - self.assembler.emit_stur(Size::S64, location, GPR::X27, -stack_offset); + if stack_offset < 256 { + self.assembler + .emit_stur(Size::S64, location, GPR::X27, -stack_offset); } else { let tmp = self.pick_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), stack_offset as u64); - self.assembler.emit_sub(Size::S64, Location::GPR(GPR::X27), Location::GPR(tmp), Location::GPR(tmp)); - self.assembler.emit_str(Size::S64, location, Location::GPR(tmp)); + self.assembler + .emit_mov_imm(Location::GPR(tmp), stack_offset as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X27), + Location::GPR(tmp), + Location::GPR(tmp), + ); + self.assembler + .emit_str(Size::S64, location, Location::GPR(tmp)); } } @@ -712,28 +905,86 @@ impl Machine for MachineARM64 { // move a location to another fn move_location(&mut self, size: Size, source: Location, dest: Location) { match source { - Location::GPR(_) | Location::SIMD(_) => { - match dest { - Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest), - Location::Memory(addr, offs) => { - if self.offset_is_ok(size, offs) { - self.assembler.emit_str(size, source, dest); + Location::GPR(_) | Location::SIMD(_) => match dest { + Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(addr, offs) => { + if self.offset_is_ok(size, offs) { + self.assembler.emit_str(size, source, dest); + } else if offs > -256 && offs < 256 { + self.assembler.emit_stur(size, dest, addr, offs); + } else { + let tmp = self.pick_temp_gpr().unwrap(); + if offs < 0 { + self.assembler + .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); } else { - let tmp = self.pick_temp_gpr().unwrap(); - if offs < 0 { - self.assembler.emit_mov_imm(Location::GPR(tmp), (-offs) as u64); - self.assembler.emit_sub(Size::S64, Location::GPR(addr), Location::GPR(tmp), Location::GPR(tmp)); - } else { - self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); - self.assembler.emit_add(Size::S64, Location::GPR(addr), Location::GPR(tmp), Location::GPR(tmp)); - } - self.assembler.emit_str(size, source,Location::GPR(tmp)); + self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); } + self.assembler.emit_str(size, source, Location::GPR(tmp)); } - _ => unimplemented!(), } - } - _ => unimplemented!(), + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Imm8(_) | Location::Imm32(_) | Location::Imm64(_) => match dest { + Location::GPR(_) => self.assembler.emit_mov(size, source, dest), + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Memory(addr, offs) => match dest { + Location::GPR(_) => { + if self.offset_is_ok(size, offs) { + self.assembler.emit_ldr(size, dest, source); + } else if offs > -256 && offs < 256 { + self.assembler.emit_ldur(size, dest, addr, offs); + } else { + let tmp = self.pick_temp_gpr().unwrap(); + if offs < 0 { + self.assembler + .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } else { + self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } + self.assembler.emit_ldr(size, source, Location::GPR(tmp)); + } + } + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), } } // move a location to another @@ -756,7 +1007,25 @@ impl Machine for MachineARM64 { } // Restore save_area fn restore_saved_area(&mut self, saved_area_offset: i32) { - unimplemented!(); + if saved_area_offset < 256 { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X27), + Location::Imm8(saved_area_offset as u8), + Location::GPR(GPR::XzrSp), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), saved_area_offset as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X27), + Location::GPR(tmp), + Location::GPR(GPR::XzrSp), + ); + self.release_gpr(tmp); + } } // Pop a location fn pop_location(&mut self, location: Location) { @@ -781,11 +1050,7 @@ impl Machine for MachineARM64 { } fn emit_function_prolog(&mut self) { - self.emit_double_push( - Size::S64, - Location::GPR(GPR::X27), - Location::GPR(GPR::X30), - ); // save LR too + self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); // save LR too self.move_location( Size::S64, Location::GPR(GPR::XzrSp), @@ -983,7 +1248,7 @@ impl Machine for MachineARM64 { } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - unimplemented!(); + self.emit_binop_i32(Assembler::emit_add2, loc_a, loc_b, ret); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { unimplemented!(); @@ -1100,7 +1365,19 @@ impl Machine for MachineARM64 { offset: i32, heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldur(Size::S32, ret, addr, 0); + }, + ); } fn i32_load_8u( &mut self, @@ -1112,7 +1389,19 @@ impl Machine for MachineARM64 { offset: i32, heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrb(Size::S32, ret, addr, 0); + }, + ); } fn i32_load_8s( &mut self, @@ -1124,7 +1413,19 @@ impl Machine for MachineARM64 { offset: i32, heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrsb(Size::S32, ret, addr, 0); + }, + ); } fn i32_load_16u( &mut self, @@ -1136,7 +1437,19 @@ impl Machine for MachineARM64 { offset: i32, heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrh(Size::S32, ret, addr, 0); + }, + ); } fn i32_load_16s( &mut self, @@ -1148,7 +1461,19 @@ impl Machine for MachineARM64 { offset: i32, heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrsh(Size::S32, ret, addr, 0); + }, + ); } fn i32_atomic_load( &mut self, @@ -1561,7 +1886,38 @@ impl Machine for MachineARM64 { reloc_target: RelocationTarget, relocations: &mut Vec, ) { - unimplemented!(); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw0, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 0); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw1, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 16); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw2, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 32); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw3, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48); } fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -2524,7 +2880,7 @@ impl Machine for MachineARM64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - unimplemented!(); + gen_std_trampoline_arm64(sig, calling_convention) } // Generates dynamic import function call trampoline for a function type. fn gen_std_dynamic_import_trampoline( @@ -2533,7 +2889,7 @@ impl Machine for MachineARM64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - unimplemented!(); + gen_std_dynamic_import_trampoline_arm64(vmoffsets, sig, calling_convention) } // Singlepass calls import functions through a trampoline. fn gen_import_call_trampoline( @@ -2543,6 +2899,6 @@ impl Machine for MachineARM64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> CustomSection { - unimplemented!(); + gen_import_call_trampoline_arm64(vmoffsets, index, sig, calling_convention) } } From 72eead32932071c2bfe00bed4a421b84654443fa Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 16 Dec 2021 13:03:54 +0100 Subject: [PATCH 07/34] improv(compiler) Calls are now working properly (and 43 tests passes) --- lib/compiler-singlepass/src/codegen.rs | 37 +- lib/compiler-singlepass/src/emitter_arm64.rs | 338 ++++++++++++++++++- lib/compiler-singlepass/src/emitter_x64.rs | 2 +- lib/compiler-singlepass/src/machine.rs | 2 + lib/compiler-singlepass/src/machine_arm64.rs | 120 +++++-- lib/compiler-singlepass/src/machine_x64.rs | 5 + 6 files changed, 442 insertions(+), 62 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 6bdbc4375b0..043f6c1173a 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -290,6 +290,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ret.push(loc); } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.adjust_stack(delta_stack_offset as u32); } @@ -335,7 +336,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } self.state.wasm_stack.pop().unwrap(); } - + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.restore_stack(delta_stack_offset as u32); } @@ -376,6 +377,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.wasm_stack.pop().unwrap(); } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.adjust_stack(delta_stack_offset as u32); } @@ -421,6 +423,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Wasm state popping is deferred to `release_locations_only_osr_state`. } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.pop_stack_locals(delta_stack_offset as u32); } @@ -457,6 +460,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.pop_stack_locals(delta_stack_offset as u32); } @@ -504,6 +508,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { static_area_size += num_mem_slots * 8; // Allocate save area, without actually writing to it. + static_area_size = self.machine.round_stack_adjust(static_area_size); self.machine.adjust_stack(static_area_size as _); // Save callee-saved registers. @@ -517,7 +522,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } - // Save R15 for vmctx use. + // Save the Reg use for vmctx. self.stack_offset.0 += 8; self.machine.move_local( self.stack_offset.0 as i32, @@ -741,6 +746,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { let mut stack_offset: usize = 0; + while self + .machine + .round_stack_adjust(used_gprs.len() * 8 + used_simds.len() * 8) + != used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset + { + // on ARM64, push use 2 bytes slot, because stack as to stay 16bytes aligned + stack_offset += 8; + } + // Calculate stack offset. for (i, _param) in params.iter().enumerate() { if let Location::Memory(_, _) = @@ -751,7 +765,11 @@ impl<'a, M: Machine> FuncGen<'a, M> { } // Align stack to 16 bytes. - if (self.get_stack_offset() + used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset) + if (self.get_stack_offset() + + self + .machine + .round_stack_adjust(used_gprs.len() * 8 + used_simds.len() * 8) + + stack_offset) % 16 != 0 { @@ -831,10 +849,12 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.get_param_location(0, calling_convention), ); // vmctx - if (self.state.stack_values.len() % 2) != 1 { - return Err(CodegenError { - message: "emit_call_native: explicit shadow takes one slot".to_string(), - }); + if self.machine.round_stack_adjust(8) == 8 { + if (self.state.stack_values.len() % 2) != 1 { + return Err(CodegenError { + message: "emit_call_native: explicit shadow takes one slot".to_string(), + }); + } } if stack_padding > 0 { @@ -945,9 +965,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { } fn emit_head(&mut self) -> Result<(), CodegenError> { - // TODO: Patchpoint is not emitted for now, and ARM trampoline is not prepended. - - // Normal x86 entry prologue. self.machine.emit_function_prolog(); // Initialize locals. diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 93379bec9f1..ec0de5388f6 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -1,4 +1,4 @@ -pub use crate::arm64_decl::{GPR, NEON}; +pub use crate::arm64_decl::{ARM64Register, ArgumentRegisterAllocator, GPR, NEON}; use crate::common_decl::Size; use crate::location::Location as AbstractLocation; pub use crate::location::{Multiplier, Reg}; @@ -82,6 +82,16 @@ pub enum GPROrMemory { Memory(GPR, i32), } +fn is_immediate_64bit_encodable(value: u64) -> bool { + let offset = value.trailing_zeros() & 0b11_0000; + let masked = 0xffff & (value >> offset); + if (masked << offset) == value { + true + } else { + false + } +} + pub trait EmitterARM64 { fn get_label(&mut self) -> Label; fn get_offset(&self) -> Offset; @@ -93,10 +103,10 @@ pub trait EmitterARM64 { fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location); fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); - fn emit_strbd(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_ldrai(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_stpbd(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); - fn emit_ldpai(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); fn emit_ldrb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); @@ -123,6 +133,7 @@ pub trait EmitterARM64 { fn emit_label(&mut self, label: Label); fn emit_b_label(&mut self, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); + fn emit_b_register(&mut self, reg: GPR); fn emit_call_label(&mut self, label: Label); fn emit_call_register(&mut self, reg: GPR); fn emit_ret(&mut self); @@ -278,7 +289,7 @@ impl EmitterARM64 for Assembler { } } - fn emit_strbd(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -293,7 +304,7 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } - fn emit_ldrai(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -309,7 +320,7 @@ impl EmitterARM64 for Assembler { } } - fn emit_stpbd(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -320,7 +331,7 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } - fn emit_ldpai(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -484,7 +495,23 @@ impl EmitterARM64 for Assembler { match dst { Location::GPR(dst) => { let dst = dst.into_index() as u32; - dynasm!(self ; mov W(dst), val) + if is_immediate_64bit_encodable(val) { + dynasm!(self ; mov W(dst), val); + } else { + dynasm!(self ; movz W(dst), (val&0xffff) as u32); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 16); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 32); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 48); + } + } + } + } } _ => panic!("singlepass can't emit MOVW {:?}", dst), } @@ -679,6 +706,9 @@ impl EmitterARM64 for Assembler { Condition::Uncond => dynasm!(self ; b => label), } } + fn emit_b_register(&mut self, reg: GPR) { + dynasm!(self ; br X(reg.into_index() as u32)); + } fn emit_call_label(&mut self, label: Label) { dynasm!(self ; bl =>label); } @@ -700,8 +730,8 @@ pub fn gen_std_trampoline_arm64( ) -> FunctionBody { let mut a = Assembler::new(0); - let fptr = GPR::X19; - let args = GPR::X20; + let fptr = GPR::X26; + let args = GPR::X8; dynasm!(a ; .arch aarch64 @@ -727,8 +757,8 @@ pub fn gen_std_trampoline_arm64( // `callee_vmctx` is already in the first argument register, so no need to move. for (i, param) in sig.params().iter().enumerate() { let sz = match *param { - Type::I32 => Size::S32, - Type::I64 => Size::S64, + Type::I32 | Type::F32 => Size::S32, + Type::I64 | Type::F64 => Size::S64, _ => unimplemented!(), }; match i { @@ -782,7 +812,132 @@ pub fn gen_std_dynamic_import_trampoline_arm64( calling_convention: CallingConvention, ) -> FunctionBody { let mut a = Assembler::new(0); - dynasm!(a ; .arch aarch64 ; ret); + // Allocate argument array. + let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 16; + // Save LR and X20, as scratch register + a.emit_stpdb( + Size::S64, + Location::GPR(GPR::X30), + Location::GPR(GPR::X20), + GPR::XzrSp, + 16, + ); + + if stack_offset < 256 + 16 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8((stack_offset - 16) as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } + + // Copy arguments. + if !sig.params().is_empty() { + let mut argalloc = ArgumentRegisterAllocator::default(); + argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext + + let mut stack_param_count: usize = 0; + + for (i, ty) in sig.params().iter().enumerate() { + let source_loc = match argalloc.next(*ty, calling_convention) { + Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), + Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), + None => { + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X20), + Location::Memory(GPR::XzrSp, (stack_offset + stack_param_count * 8) as _), + ); + stack_param_count += 1; + Location::GPR(GPR::X20) + } + }; + a.emit_str( + Size::S64, + source_loc, + Location::Memory(GPR::XzrSp, (i * 16) as _), + ); + + // Zero upper 64 bits. + a.emit_str( + Size::S64, + Location::GPR(GPR::XzrSp), // XZR here + Location::Memory(GPR::XzrSp, (i * 16 + 8) as _), // XSP here + ); + } + } + + match calling_convention { + _ => { + // Load target address. + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X20), + Location::Memory( + GPR::X0, + vmoffsets.vmdynamicfunction_import_context_address() as i32, + ), + ); + // Load values array. + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(0), + Location::GPR(GPR::X1), + ); + } + }; + + // Call target. + a.emit_call_register(GPR::X20); + + // Fetch return value. + if !sig.results().is_empty() { + assert_eq!(sig.results().len(), 1); + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X0), + Location::Memory(GPR::XzrSp, 0), + ); + } + + // Release values array. + if stack_offset < 256 + 16 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32((stack_offset - 16) as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } + a.emit_ldpia( + Size::S64, + Location::GPR(GPR::X30), + Location::GPR(GPR::X20), + GPR::XzrSp, + 16, + ); + + // Return. + a.emit_ret(); + FunctionBody { body: a.finalize().unwrap().to_vec(), unwind_info: None, @@ -796,7 +951,158 @@ pub fn gen_import_call_trampoline_arm64( calling_convention: CallingConvention, ) -> CustomSection { let mut a = Assembler::new(0); - dynasm!(a ; .arch aarch64 ; ret); + + // Singlepass internally treats all arguments as integers + // For the standard System V calling convention requires + // floating point arguments to be passed in NEON registers. + // Translation is expensive, so only do it if needed. + if sig + .params() + .iter() + .any(|&x| x == Type::F32 || x == Type::F64) + { + match calling_convention { + _ => { + let mut param_locations: Vec = vec![]; + + // Allocate stack space for arguments. + let stack_offset: i32 = if sig.params().len() > 5 { + 5 * 8 + } else { + (sig.params().len() as i32) * 8 + }; + let stack_offset = if stack_offset & 15 != 0 { + stack_offset + 8 + } else { + stack_offset + }; + if stack_offset > 0 { + if stack_offset < 256 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(stack_offset as u8), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), stack_offset as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X16), + Location::GPR(GPR::XzrSp), + ); + } + } + + // Store all arguments to the stack to prevent overwrite. + for i in 0..sig.params().len() { + let loc = match i { + 0..=6 => { + static PARAM_REGS: &[GPR] = &[ + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + ]; + let loc = Location::Memory(GPR::XzrSp, (i * 8) as i32); + a.emit_str(Size::S64, Location::GPR(PARAM_REGS[i]), loc); + loc + } + _ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 5) * 8) as i32), + }; + param_locations.push(loc); + } + + // Copy arguments. + let mut argalloc = ArgumentRegisterAllocator::default(); + argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext + let mut caller_stack_offset: i32 = 0; + for (i, ty) in sig.params().iter().enumerate() { + let prev_loc = param_locations[i]; + let targ = match argalloc.next(*ty, calling_convention) { + Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), + Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), + None => { + // No register can be allocated. Put this argument on the stack. + a.emit_ldr(Size::S64, Location::GPR(GPR::X20), prev_loc); + a.emit_str( + Size::S64, + Location::GPR(GPR::X20), + Location::Memory( + GPR::XzrSp, + stack_offset + 8 + caller_stack_offset, + ), + ); + caller_stack_offset += 8; + continue; + } + }; + a.emit_ldr(Size::S64, targ, prev_loc); + } + + // Restore stack pointer. + if stack_offset > 0 { + if stack_offset < 256 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(stack_offset as u8), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), stack_offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X16), + Location::GPR(GPR::XzrSp), + ); + } + } + } + } + } + + // Emits a tail call trampoline that loads the address of the target import function + // from Ctx and jumps to it. + + let offset = vmoffsets.vmctx_vmfunction_import(index); + // for ldr, offset needs to be a multiple of 8, wich often is not + // so use ldur, but then offset is limited to -255 .. +255. It will be positive here + let offset = if offset > 255 { + a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::X0), + Location::GPR(GPR::X16), + Location::GPR(GPR::X0), + ); + 0 + } else { + offset + }; + match calling_convention { + _ => { + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X16), + GPR::X0, + offset as i32, // function pointer + ); + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X0), + GPR::X0, + offset as i32 + 8, // target vmctx + ); + } + } + a.emit_b_register(GPR::X16); + let section_body = SectionBody::new_with_vec(a.finalize().unwrap().to_vec()); CustomSection { diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 71f1b9128ba..6a8c8bce746 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -281,7 +281,7 @@ pub trait EmitterX64 { } fn arch_supports_canonicalize_nan(&self) -> bool { - true + false // no need to canonicalize, the default form is the canonicalized one already } fn arch_requires_indirect_call_trampoline(&self) -> bool { diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 419b3612b06..1113ebe878c 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -104,6 +104,8 @@ pub trait Machine { fn push_used_simd(&mut self); /// Pop used simd regs to the stack fn pop_used_simd(&mut self); + /// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize; /// Set the source location of the Wasm to the given offset. fn set_srcloc(&mut self, offset: u32); /// Marks each address in the code range emitted by `f` with the trap code `code`. diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 017224bede7..e2d3d8cf06a 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -462,7 +462,7 @@ impl MachineARM64 { match (sz, src) { (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { let offset = if self.pushed { - 8 + 0 } else { self.assembler.emit_sub( Size::S64, @@ -470,10 +470,9 @@ impl MachineARM64 { Location::Imm8(16), Location::GPR(GPR::XzrSp), ); - 0 + 8 }; - self.assembler - .emit_str(Size::S64, src, Location::Memory(GPR::XzrSp, offset)); + self.assembler.emit_stur(Size::S64, src, GPR::XzrSp, offset); self.pushed = !self.pushed; } _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), @@ -484,7 +483,7 @@ impl MachineARM64 { match (sz, src1, src2) { (Size::S64, Location::GPR(_), Location::GPR(_)) => { self.assembler - .emit_stpbd(Size::S64, src1, src2, GPR::XzrSp, 16); + .emit_stpdb(Size::S64, src1, src2, GPR::XzrSp, 16); } _ => { self.emit_push(sz, src1); @@ -499,9 +498,8 @@ impl MachineARM64 { fn emit_pop(&mut self, sz: Size, dst: Location) { match (sz, dst) { (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { - let offset = if self.pushed { 0 } else { 8 }; - self.assembler - .emit_ldr(Size::S64, dst, Location::Memory(GPR::XzrSp, offset)); + let offset = if self.pushed { 8 } else { 0 }; + self.assembler.emit_ldur(Size::S64, dst, GPR::XzrSp, offset); if self.pushed { self.assembler.emit_add( Size::S64, @@ -520,7 +518,7 @@ impl MachineARM64 { match (sz, dst1, dst2) { (Size::S64, Location::GPR(_), Location::GPR(_)) => { self.assembler - .emit_ldpai(Size::S64, dst1, dst2, GPR::XzrSp, 16); + .emit_ldpia(Size::S64, dst1, dst2, GPR::XzrSp, 16); } _ => { self.emit_pop(sz, dst2); @@ -663,7 +661,12 @@ impl Machine for MachineARM64 { fn push_used_simd(&mut self) { let used_neons = self.get_used_simd(); - self.adjust_stack((used_neons.len() * 8) as u32); + let stack_adjust = if used_neons.len() & 1 == 1 { + (used_neons.len() * 8) as u32 + 8 + } else { + (used_neons.len() * 8) as u32 + }; + self.adjust_stack(stack_adjust); for (i, r) in used_neons.iter().enumerate() { self.assembler.emit_str( @@ -682,12 +685,17 @@ impl Machine for MachineARM64 { Location::Memory(GPR::XzrSp, (i * 8) as i32), ); } - let delta = if (used_neons.len() * 8) < 256 { - Location::Imm8((used_neons.len() * 8) as u8) + let stack_adjust = if used_neons.len() & 1 == 1 { + (used_neons.len() * 8) as u32 + 8 + } else { + (used_neons.len() * 8) as u32 + }; + let delta = if stack_adjust < 256 { + Location::Imm8(stack_adjust as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), (used_neons.len() * 8) as u64); + .emit_mov_imm(Location::GPR(tmp), stack_adjust as u64); Location::GPR(tmp) }; self.assembler.emit_add( @@ -758,9 +766,18 @@ impl Machine for MachineARM64 { self.instructions_address_map.clone() } + // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize { + if value & 0xf != 0 { + ((value >> 4) + 1) << 4 + } else { + value + } + } + // Memory location for a local on the stack fn local_on_stack(&mut self, stack_offset: i32) -> Location { - Location::Memory(GPR::X27, -stack_offset) + Location::Memory(GPR::X29, -stack_offset) } // Adjust stack for locals @@ -800,12 +817,17 @@ impl Machine for MachineARM64 { fn push_callee_saved(&mut self) {} fn pop_callee_saved(&mut self) {} fn pop_stack_locals(&mut self, delta_stack_offset: u32) { - let delta = if delta_stack_offset < 256 { - Location::Imm8(delta_stack_offset as u8) + let real_delta = if delta_stack_offset & 15 != 0 { + delta_stack_offset + 8 + } else { + delta_stack_offset + }; + let delta = if real_delta < 256 { + Location::Imm8(real_delta as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + .emit_mov_imm(Location::GPR(tmp), real_delta as u64); Location::GPR(tmp) }; self.assembler.emit_add( @@ -838,7 +860,7 @@ impl Machine for MachineARM64 { // GPR Reg used for local pointer on the stack fn local_pointer(&self) -> GPR { - GPR::X27 + GPR::X29 } // Determine whether a local should be allocated on the stack. @@ -858,21 +880,21 @@ impl Machine for MachineARM64 { 5 => Location::GPR(GPR::X23), 6 => Location::GPR(GPR::X24), 7 => Location::GPR(GPR::X25), - _ => Location::Memory(GPR::X27, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), + _ => Location::Memory(GPR::X29, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), } } // Move a local to the stack fn move_local(&mut self, stack_offset: i32, location: Location) { if stack_offset < 256 { self.assembler - .emit_stur(Size::S64, location, GPR::X27, -stack_offset); + .emit_stur(Size::S64, location, GPR::X29, -stack_offset); } else { let tmp = self.pick_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), stack_offset as u64); self.assembler.emit_sub( Size::S64, - Location::GPR(GPR::X27), + Location::GPR(GPR::X29), Location::GPR(tmp), Location::GPR(tmp), ); @@ -898,7 +920,7 @@ impl Machine for MachineARM64 { 5 => Location::GPR(GPR::X5), 6 => Location::GPR(GPR::X6), 7 => Location::GPR(GPR::X7), - _ => Location::Memory(GPR::X27, (16 + (idx - 8) * 8) as i32), + _ => Location::Memory(GPR::X29, (16 + (idx - 8) * 8) as i32), }, } } @@ -940,13 +962,27 @@ impl Machine for MachineARM64 { size, source, dest ), }, - Location::Imm8(_) | Location::Imm32(_) | Location::Imm64(_) => match dest { + Location::Imm8(_) => match dest { Location::GPR(_) => self.assembler.emit_mov(size, source, dest), _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest ), }, + Location::Imm32(val) => match dest { + Location::GPR(_) => self.assembler.emit_mov_imm(dest, val as u64), + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Imm64(val) => match dest { + Location::GPR(_) => self.assembler.emit_mov_imm(dest, val), + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, Location::Memory(addr, offs) => match dest { Location::GPR(_) => { if self.offset_is_ok(size, offs) { @@ -1007,20 +1043,27 @@ impl Machine for MachineARM64 { } // Restore save_area fn restore_saved_area(&mut self, saved_area_offset: i32) { - if saved_area_offset < 256 { + let real_delta = if saved_area_offset & 15 != 0 { + self.pushed = true; + saved_area_offset + 8 + } else { + self.pushed = false; + saved_area_offset + }; + if real_delta < 256 { self.assembler.emit_sub( Size::S64, - Location::GPR(GPR::X27), - Location::Imm8(saved_area_offset as u8), + Location::GPR(GPR::X29), + Location::Imm8(real_delta as u8), Location::GPR(GPR::XzrSp), ); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), saved_area_offset as u64); + .emit_mov_imm(Location::GPR(tmp), real_delta as u64); self.assembler.emit_sub( Size::S64, - Location::GPR(GPR::X27), + Location::GPR(GPR::X29), Location::GPR(tmp), Location::GPR(GPR::XzrSp), ); @@ -1050,21 +1093,28 @@ impl Machine for MachineARM64 { } fn emit_function_prolog(&mut self) { - self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); // save LR too - self.move_location( + self.emit_double_push(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); // save LR too + self.emit_double_push(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + // cannot use mov, because XSP is XZR there. Need to use ADD with #0 + self.assembler.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X27), + Location::Imm8(0), + Location::GPR(GPR::X29), ); } fn emit_function_epilog(&mut self) { - self.move_location( + // cannot use mov, because XSP is XZR there. Need to use ADD with #0 + self.assembler.emit_add( Size::S64, - Location::GPR(GPR::X27), + Location::GPR(GPR::X29), + Location::Imm8(0), Location::GPR(GPR::XzrSp), ); - self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X30)); + self.pushed = false; // SP is restored, concider it aligned + self.emit_double_pop(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); } fn emit_function_return_value(&mut self, ty: WpType, canonicalize: bool, loc: Location) { @@ -1113,7 +1163,7 @@ impl Machine for MachineARM64 { self.assembler.emit_call_label(label); } fn get_gpr_for_ret(&self) -> GPR { - GPR::X26 + GPR::X0 } fn get_simd_for_ret(&self) -> NEON { NEON::V0 diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index e935ee864b8..59a28f9f4ff 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1774,6 +1774,11 @@ impl Machine for MachineX86_64 { Location::Memory(GPR::RBP, -stack_offset) } + // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize { + value + } + // Adjust stack for locals fn adjust_stack(&mut self, delta_stack_offset: u32) { self.assembler.emit_sub( From a57c422ad9c09063b39f13a293bed91986518e15 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 16 Dec 2021 18:53:11 +0100 Subject: [PATCH 08/34] improv(compiler) Some ork on register affectation (60 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 123 ++++++++++- lib/compiler-singlepass/src/machine_arm64.rs | 208 +++++++++++++------ 2 files changed, 264 insertions(+), 67 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index ec0de5388f6..6fe232c009c 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -123,6 +123,9 @@ pub trait EmitterARM64 { fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_mul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_adds(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_subs(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_add2(&mut self, sz: Size, src: Location, dst: Location); fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location); @@ -139,6 +142,7 @@ pub trait EmitterARM64 { fn emit_ret(&mut self); fn emit_udf(&mut self); + fn emit_dmb(&mut self); fn arch_supports_canonicalize_nan(&self) -> bool { true @@ -184,18 +188,27 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } dynasm!(self ; str X(reg), [X(addr), disp]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x3) != 0 { + unreachable!(); + } dynasm!(self ; str W(reg), [X(addr), disp]); } (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x1) != 0 { + unreachable!(); + } dynasm!(self ; strh W(reg), [X(addr), disp]); } (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { @@ -208,6 +221,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } dynasm!(self ; str D(reg), [X(addr), disp]); } _ => unreachable!(), @@ -219,18 +235,27 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } dynasm!(self ; ldr X(reg), [X(addr), disp]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x3) != 0 { + unreachable!(); + } dynasm!(self ; ldr W(reg), [X(addr), disp]); } (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x1) != 0 { + unreachable!(); + } dynasm!(self ; ldrh W(reg), [X(addr), disp]); } (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { @@ -243,6 +268,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } dynasm!(self ; ldr D(reg), [X(addr), disp]); } _ => unreachable!(), @@ -580,7 +608,95 @@ impl EmitterARM64 for Assembler { dynasm!(self ; sub W(dst), W(src1), imm as u32); } _ => panic!( - "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + "singlepass can't emit SUB {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_mul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mul X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mul W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit MUL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_adds(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit ADD.S {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_subs(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit SUB.S {:?} {:?} {:?} {:?}", sz, src1, src2, dst ), } @@ -722,11 +838,14 @@ impl EmitterARM64 for Assembler { fn emit_udf(&mut self) { dynasm!(self ; udf 0); } + fn emit_dmb(&mut self) { + dynasm!(self ; dmb ish); + } } pub fn gen_std_trampoline_arm64( sig: &FunctionType, - calling_convention: CallingConvention, + _calling_convention: CallingConvention, ) -> FunctionBody { let mut a = Assembler::new(0); diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index e2d3d8cf06a..a13cbba90f2 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1,18 +1,17 @@ use crate::arm64_decl::new_machine_state; -use crate::arm64_decl::{ARM64Register, ArgumentRegisterAllocator, GPR, NEON}; +use crate::arm64_decl::{GPR, NEON}; use crate::common_decl::*; use crate::emitter_arm64::*; use crate::location::Location as AbstractLocation; use crate::machine::*; -use dynasm::dynasm; use dynasmrt::{aarch64::Aarch64Relocation, VecAssembler}; use std::collections::HashSet; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ - CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, InstructionAddressMap, - Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, TrapInformation, + CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, Relocation, + RelocationKind, RelocationTarget, SourceLoc, TrapInformation, }; -use wasmer_types::{FunctionIndex, FunctionType, Type}; +use wasmer_types::{FunctionIndex, FunctionType}; use wasmer_vm::{TrapCode, VMOffsets}; type Assembler = VecAssembler; @@ -51,48 +50,87 @@ impl MachineARM64 { sz: Size, src: Location, dst: Location, - ) { - match (src, dst) { - (Location::GPR(_), Location::GPR(_)) => { - op(&mut self.assembler, sz, src, dst); - } - (Location::Memory(_, _), Location::Memory(_, _)) => { - let temp_src = self.acquire_temp_gpr().unwrap(); - let temp_dst = self.acquire_temp_gpr().unwrap(); - self.move_location(sz, src, Location::GPR(temp_src)); - self.move_location(sz, dst, Location::GPR(temp_dst)); - op( - &mut self.assembler, - sz, - Location::GPR(temp_src), - Location::GPR(temp_dst), - ); - self.release_gpr(temp_dst); - self.release_gpr(temp_src); + putback: bool, + ) { + let mut temps = vec![]; + let src = self.location_to_reg(sz, src, &mut temps, false); + let dest = self.location_to_reg(sz, dst, &mut temps, false); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn location_to_reg( + &mut self, + sz: Size, + src: Location, + temps: &mut Vec, + allow_imm8: bool, + ) -> Location { + match src { + Location::GPR(_) => src, + Location::Imm8(val) => { + if allow_imm8 { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } - /*(Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => { + Location::Imm32(val) => { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) } - (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,*/ - (Location::Imm64(_), Location::Memory(_, _)) - | (Location::Imm64(_), Location::GPR(_)) - | (Location::Imm32(_), Location::Memory(_, _)) - | (Location::Imm32(_), Location::GPR(_)) => { - let temp = self.acquire_temp_gpr().unwrap(); - self.move_location(sz, src, Location::GPR(temp)); - op(&mut self.assembler, sz, Location::GPR(temp), dst); - self.release_gpr(temp); + Location::Imm64(val) => { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) } - (_, Location::SIMD(_)) => { - let temp = self.acquire_temp_gpr().unwrap(); - self.move_location(sz, src, Location::GPR(temp)); - op(&mut self.assembler, sz, Location::GPR(temp), dst); - self.release_gpr(temp); + Location::Memory(reg, val) => { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + if val > -256 && val < 256 { + self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); + } else { + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory2(reg, tmp, Multiplier::One, 0), + ); + } + Location::GPR(tmp) } - _ => panic!( - "singlepass can't emit relaxed_binop {:?} {:?} => {:?}", - sz, src, dst - ), - }; + _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), + } + } + fn emit_relaxed_binop3( + &mut self, + op: fn(&mut Assembler, Size, Location, Location, Location), + sz: Size, + src1: Location, + src2: Location, + dst: Location, + ) { + let mut temps = vec![]; + let src1 = self.location_to_reg(sz, src1, &mut temps, false); + let src2 = self.location_to_reg(sz, src2, &mut temps, true); + let dest = self.location_to_reg(sz, dst, &mut temps, false); + op(&mut self.assembler, sz, src1, src2, dest); + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } /// I32 binary operation with both operands popped from the virtual stack. fn emit_binop_i32( @@ -105,11 +143,11 @@ impl MachineARM64 { if loc_a != ret { let tmp = self.acquire_temp_gpr().unwrap(); self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp)); - self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp)); + self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp), true); self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret); self.release_gpr(tmp); } else { - self.emit_relaxed_binop(f, Size::S32, loc_b, ret); + self.emit_relaxed_binop(f, Size::S32, loc_b, ret, true); } } /// I64 binary operation with both operands popped from the virtual stack. @@ -123,11 +161,11 @@ impl MachineARM64 { if loc_a != ret { let tmp = self.acquire_temp_gpr().unwrap(); self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp)); - self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp)); + self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp), true); self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret); self.release_gpr(tmp); } else { - self.emit_relaxed_binop(f, Size::S64, loc_b, ret); + self.emit_relaxed_binop(f, Size::S64, loc_b, ret, true); } } /// I64 comparison with. @@ -193,6 +231,7 @@ impl MachineARM64 { Size::S64, Location::Memory(self.get_vmctx_reg(), offset), Location::GPR(tmp_addr), + true, ); (Location::Memory(tmp_addr, 0), Location::Memory(tmp_addr, 8)) } else { @@ -559,9 +598,7 @@ impl Machine for MachineARM64 { fn pick_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[ - X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, - ]; + static REGS: &[GPR] = &[X6, X7, X9, X10, X11, X12, X13, X14, X15, X22, X23, X24, X25]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -573,7 +610,7 @@ impl Machine for MachineARM64 { // Picks an unused general purpose register for internal temporary use. fn pick_temp_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X0, X1, X2, X3, X4, X5, X6, X7]; + static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X8]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -1179,7 +1216,15 @@ impl Machine for MachineARM64 { } fn emit_call_location(&mut self, location: Location) { - unimplemented!(); + let mut temps = vec![]; + let loc = self.location_to_reg(Size::S64, location, &mut temps, false); + match loc { + Location::GPR(reg) => self.assembler.emit_call_register(reg), + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } } fn location_address(&mut self, size: Size, source: Location, dest: Location) { @@ -1199,17 +1244,41 @@ impl Machine for MachineARM64 { unimplemented!(); } // math - fn location_add(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { - unimplemented!(); + fn location_add(&mut self, size: Size, source: Location, dest: Location, flags: bool) { + let mut temps = vec![]; + let src = self.location_to_reg(size, source, &mut temps, true); + let dst = self.location_to_reg(size, dest, &mut temps, false); + if flags { + self.assembler.emit_adds(size, dst, src, dst); + } else { + self.assembler.emit_add(size, dst, src, dst); + } + if dst != dest { + self.move_location(size, dst, dest); + } + for r in temps { + self.release_gpr(r); + } } - fn location_sub(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { - unimplemented!(); + fn location_sub(&mut self, size: Size, source: Location, dest: Location, flags: bool) { + let mut temps = vec![]; + let src = self.location_to_reg(size, source, &mut temps, true); + let dst = self.location_to_reg(size, dest, &mut temps, false); + if flags { + self.assembler.emit_subs(size, dst, src, dst); + } else { + self.assembler.emit_sub(size, dst, src, dst); + } + if dst != dest { + self.move_location(size, dst, dest); + } + for r in temps { + self.release_gpr(r); + } } fn location_cmp(&mut self, size: Size, source: Location, dest: Location) { - unimplemented!(); + self.emit_relaxed_binop(Assembler::emit_cmp, size, source, dest, false); } - // (un)conditionnal jmp - // (un)conditionnal jmp fn jmp_unconditionnal(&mut self, label: Label) { self.assembler.emit_b_label(label); } @@ -1253,7 +1322,7 @@ impl Machine for MachineARM64 { } fn emit_memory_fence(&mut self) { - // nothing on x86_64 + self.assembler.emit_dmb(); } fn location_neg( @@ -1268,15 +1337,24 @@ impl Machine for MachineARM64 { } fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) { - unimplemented!(); + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), imm32 as u64); + self.assembler.emit_mul( + size, + Location::GPR(gpr), + Location::GPR(tmp), + Location::GPR(gpr), + ); + self.release_gpr(tmp); } // relaxed binop based... fn emit_relaxed_mov(&mut self, sz: Size, src: Location, dst: Location) { - unimplemented!(); + self.emit_relaxed_binop(Assembler::emit_mov, sz, src, dst, true); } fn emit_relaxed_cmp(&mut self, sz: Size, src: Location, dst: Location) { - unimplemented!(); + self.emit_relaxed_binop(Assembler::emit_cmp, sz, src, dst, false); } fn emit_relaxed_zero_extension( &mut self, @@ -1298,10 +1376,10 @@ impl Machine for MachineARM64 { } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_add2, loc_a, loc_b, ret); + self.emit_relaxed_binop3(Assembler::emit_add, Size::S32, loc_a, loc_b, ret); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - unimplemented!(); + self.emit_relaxed_binop3(Assembler::emit_sub, Size::S32, loc_a, loc_b, ret); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { unimplemented!(); From ae1925e9570f051d603c59ed16076cd05763b342 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 10:08:35 +0100 Subject: [PATCH 09/34] improv(compiler) Cleaned up warnings --- lib/compiler-singlepass/src/machine_arm64.rs | 1751 ++++++++---------- 1 file changed, 806 insertions(+), 945 deletions(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index a13cbba90f2..4aa3dbe9568 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -133,7 +133,7 @@ impl MachineARM64 { } } /// I32 binary operation with both operands popped from the virtual stack. - fn emit_binop_i32( + /*fn emit_binop_i32( &mut self, f: fn(&mut Assembler, Size, Location, Location), loc_a: Location, @@ -149,9 +149,9 @@ impl MachineARM64 { } else { self.emit_relaxed_binop(f, Size::S32, loc_b, ret, true); } - } + }*/ /// I64 binary operation with both operands popped from the virtual stack. - fn emit_binop_i64( + /*fn emit_binop_i64( &mut self, f: fn(&mut Assembler, Size, Location, Location), loc_a: Location, @@ -167,9 +167,9 @@ impl MachineARM64 { } else { self.emit_relaxed_binop(f, Size::S64, loc_b, ret, true); } - } + }*/ /// I64 comparison with. - fn emit_cmpop_i64_dynamic_b( + /*fn emit_cmpop_i64_dynamic_b( &mut self, _c: Condition, _loc_a: Location, @@ -177,9 +177,9 @@ impl MachineARM64 { _ret: Location, ) { unimplemented!(); - } + }*/ /// I64 shift with both operands popped from the virtual stack. - fn emit_shift_i64( + /*fn emit_shift_i64( &mut self, _f: fn(&mut Assembler, Size, Location, Location), _loc_a: Location, @@ -187,9 +187,9 @@ impl MachineARM64 { _ret: Location, ) { unimplemented!(); - } + }*/ /// I32 comparison with. - fn emit_cmpop_i32_dynamic_b( + /*fn emit_cmpop_i32_dynamic_b( &mut self, _c: Condition, _loc_a: Location, @@ -197,9 +197,9 @@ impl MachineARM64 { _ret: Location, ) { unimplemented!(); - } + }*/ /// I32 shift with both operands popped from the virtual stack. - fn emit_shift_i32( + /*fn emit_shift_i32( &mut self, _f: fn(&mut Assembler, Size, Location, Location), _loc_a: Location, @@ -207,7 +207,7 @@ impl MachineARM64 { _ret: Location, ) { unimplemented!(); - } + }*/ fn memory_op( &mut self, @@ -341,7 +341,7 @@ impl MachineARM64 { self.release_gpr(tmp_addr); } - fn emit_compare_and_swap( + /*fn emit_compare_and_swap( &mut self, _loc: Location, _target: Location, @@ -357,126 +357,7 @@ impl MachineARM64 { _cb: F, ) { unimplemented!(); - } - - // Checks for underflow/overflow/nan. - fn emit_f32_int_conv_check( - &mut self, - _reg: NEON, - _lower_bound: f32, - _upper_bound: f32, - _underflow_label: Label, - _overflow_label: Label, - _nan_label: Label, - _succeed_label: Label, - ) { - unimplemented!(); - } - - // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32. - fn emit_f32_int_conv_check_trap(&mut self, reg: NEON, lower_bound: f32, upper_bound: f32) { - unimplemented!(); - } - fn emit_f32_int_conv_check_sat< - F1: FnOnce(&mut Self), - F2: FnOnce(&mut Self), - F3: FnOnce(&mut Self), - F4: FnOnce(&mut Self), - >( - &mut self, - _reg: NEON, - _lower_bound: f32, - _upper_bound: f32, - _underflow_cb: F1, - _overflow_cb: F2, - _nan_cb: Option, - _convert_cb: F4, - ) { - unimplemented!(); - } - // Checks for underflow/overflow/nan. - fn emit_f64_int_conv_check( - &mut self, - _reg: NEON, - _lower_bound: f64, - _upper_bound: f64, - _underflow_label: Label, - _overflow_label: Label, - _nan_label: Label, - _succeed_label: Label, - ) { - unimplemented!(); - } - // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64.. return offset/len for trap_overflow and trap_badconv - fn emit_f64_int_conv_check_trap(&mut self, reg: NEON, lower_bound: f64, upper_bound: f64) { - unimplemented!(); - } - fn emit_f64_int_conv_check_sat< - F1: FnOnce(&mut Self), - F2: FnOnce(&mut Self), - F3: FnOnce(&mut Self), - F4: FnOnce(&mut Self), - >( - &mut self, - _reg: NEON, - _lower_bound: f64, - _upper_bound: f64, - _underflow_cb: F1, - _overflow_cb: F2, - _nan_cb: Option, - _convert_cb: F4, - ) { - unimplemented!(); - } - - fn convert_i64_f64_u_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f64_u_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f64_s_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f64_s_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f64_s_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f64_s_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f64_u_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f64_u_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f32_u_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f32_u_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f32_s_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i64_f32_s_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f32_s_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f32_s_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f32_u_s(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } - fn convert_i32_f32_u_u(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); - } + }*/ fn offset_is_ok(&self, size: Size, offset: i32) -> bool { if offset < 0 { @@ -888,7 +769,7 @@ impl Machine for MachineARM64 { } // Zero a location that is 32bits - fn zero_location(&mut self, size: Size, location: Location) { + fn zero_location(&mut self, _size: Size, location: Location) { match location { Location::GPR(_) => self.assembler.emit_mov_imm(location, 0u64), _ => unreachable!(), @@ -941,7 +822,7 @@ impl Machine for MachineARM64 { } // List of register to save, depending on the CallingConvention - fn list_to_save(&self, calling_convention: CallingConvention) -> Vec { + fn list_to_save(&self, _calling_convention: CallingConvention) -> Vec { vec![] } @@ -1063,19 +944,19 @@ impl Machine for MachineARM64 { // move a location to another fn move_location_extend( &mut self, - size_val: Size, - signed: bool, - source: Location, - size_op: Size, - dest: Location, + _size_val: Size, + _signed: bool, + _source: Location, + _size_op: Size, + _dest: Location, ) { unimplemented!(); } - fn load_address(&mut self, size: Size, reg: Location, mem: Location) { + fn load_address(&mut self, _size: Size, _reg: Location, _mem: Location) { unimplemented!(); } // Init the stack loc counter - fn init_stack_loc(&mut self, init_stack_loc_cnt: u64, last_stack_loc: Location) { + fn init_stack_loc(&mut self, _init_stack_loc_cnt: u64, _last_stack_loc: Location) { unimplemented!(); } // Restore save_area @@ -1177,7 +1058,7 @@ impl Machine for MachineARM64 { fn arch_supports_canonicalize_nan(&self) -> bool { self.assembler.arch_supports_canonicalize_nan() } - fn canonicalize_nan(&mut self, sz: Size, input: Location, output: Location) { + fn canonicalize_nan(&mut self, _sz: Size, _input: Location, _output: Location) { unimplemented!(); } @@ -1227,20 +1108,20 @@ impl Machine for MachineARM64 { } } - fn location_address(&mut self, size: Size, source: Location, dest: Location) { + fn location_address(&mut self, _size: Size, _source: Location, _dest: Location) { unimplemented!(); } // logic - fn location_and(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + fn location_and(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { unimplemented!(); } - fn location_xor(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + fn location_xor(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { unimplemented!(); } - fn location_or(&mut self, size: Size, source: Location, dest: Location, _flags: bool) { + fn location_or(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { unimplemented!(); } - fn location_test(&mut self, size: Size, source: Location, dest: Location) { + fn location_test(&mut self, _size: Size, _source: Location, _dest: Location) { unimplemented!(); } // math @@ -1302,7 +1183,7 @@ impl Machine for MachineARM64 { } // jmp table - fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) { + fn emit_jmp_to_jumptable(&mut self, _label: Label, _cond: Location) { unimplemented!(); } @@ -1327,11 +1208,11 @@ impl Machine for MachineARM64 { fn location_neg( &mut self, - size_val: Size, // size of src - signed: bool, - source: Location, - size_op: Size, - dest: Location, + _size_val: Size, // size of src + _signed: bool, + _source: Location, + _size_op: Size, + _dest: Location, ) { unimplemented!(); } @@ -1358,19 +1239,19 @@ impl Machine for MachineARM64 { } fn emit_relaxed_zero_extension( &mut self, - sz_src: Size, - src: Location, - sz_dst: Size, - dst: Location, + _sz_src: Size, + _src: Location, + _sz_dst: Size, + _dst: Location, ) { unimplemented!(); } fn emit_relaxed_sign_extension( &mut self, - sz_src: Size, - src: Location, - sz_dst: Size, - dst: Location, + _sz_src: Size, + _src: Location, + _sz_dst: Size, + _dst: Location, ) { unimplemented!(); } @@ -1382,105 +1263,105 @@ impl Machine for MachineARM64 { self.emit_relaxed_binop3(Assembler::emit_sub, Size::S32, loc_a, loc_b, ret); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - unimplemented!(); + self.emit_relaxed_binop3(Assembler::emit_mul, Size::S32, loc_a, loc_b, ret); } fn emit_binop_udiv32( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_sdiv32( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_urem32( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_srem32( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } - fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_and32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_or32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_xor32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_gt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_le_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_lt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_ge_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_gt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_le_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_lt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_clz(&mut self, loc: Location, ret: Location) { + fn i32_clz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i32_ctz(&mut self, loc: Location, ret: Location) { + fn i32_ctz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i32_popcnt(&mut self, loc: Location, ret: Location) { + fn i32_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_shl(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_shr(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_sar(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_rol(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i32_ror(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } fn i32_load( @@ -1605,406 +1486,406 @@ impl Machine for MachineARM64 { } fn i32_atomic_load( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_atomic_load_8u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_atomic_load_16u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_save( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_save_8( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_save_16( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_atomic_save( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_atomic_save_8( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i32_atomic_save_16( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Add with i32 fn i32_atomic_add( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Add with u8 fn i32_atomic_add_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Add with u16 fn i32_atomic_add_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Sub with i32 fn i32_atomic_sub( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Sub with u8 fn i32_atomic_sub_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, - ) { - unimplemented!(); - } - // i32 atomic Sub with u16 + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with u16 fn i32_atomic_sub_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic And with i32 fn i32_atomic_and( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic And with u8 fn i32_atomic_and_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic And with u16 fn i32_atomic_and_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Or with i32 fn i32_atomic_or( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Or with u8 fn i32_atomic_or_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Or with u16 fn i32_atomic_or_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Xor with i32 fn i32_atomic_xor( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Xor with u8 fn i32_atomic_xor_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Xor with u16 fn i32_atomic_xor_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with i32 fn i32_atomic_xchg( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with u8 fn i32_atomic_xchg_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with u16 fn i32_atomic_xchg_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with i32 fn i32_atomic_cmpxchg( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with u8 fn i32_atomic_cmpxchg_8u( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i32 atomic Exchange with u16 fn i32_atomic_cmpxchg_16u( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } @@ -2048,958 +1929,938 @@ impl Machine for MachineARM64 { self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48); } - fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_add64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_sub64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_mul64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } fn emit_binop_udiv64( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_sdiv64( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_urem64( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, ) -> usize { unimplemented!(); } fn emit_binop_srem64( &mut self, - loc_a: Location, - loc_b: Location, - ret: Location, - integer_division_by_zero: Label, - ) -> usize { - unimplemented!(); + _loc_a: Location, + _loc_b: Location, + _ret: Location, + _integer_division_by_zero: Label, + ) -> usize { + unimplemented!(); } - fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_and64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_or64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn emit_binop_xor64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_gt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_le_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_lt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_ge_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_gt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_le_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_lt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_clz(&mut self, loc: Location, ret: Location) { + fn i64_clz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i64_ctz(&mut self, loc: Location, ret: Location) { + fn i64_ctz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i64_popcnt(&mut self, loc: Location, ret: Location) { + fn i64_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_shl(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_shr(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_sar(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_rol(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn i64_ror(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } fn i64_load( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_8u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_8s( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_16u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_16s( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_32u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_load_32s( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_load( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_load_8u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_load_16u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_load_32u( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_save( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_save_8( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_save_16( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_save_32( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_save( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_save_8( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_save_16( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn i64_atomic_save_32( &mut self, - value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Add with i64 fn i64_atomic_add( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Add with u8 fn i64_atomic_add_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Add with u16 fn i64_atomic_add_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Add with u32 fn i64_atomic_add_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Sub with i64 fn i64_atomic_sub( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Sub with u8 fn i64_atomic_sub_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Sub with u16 fn i64_atomic_sub_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Sub with u32 fn i64_atomic_sub_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic And with i64 fn i64_atomic_and( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic And with u8 fn i64_atomic_and_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic And with u16 fn i64_atomic_and_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic And with u32 fn i64_atomic_and_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Or with i64 fn i64_atomic_or( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Or with u8 fn i64_atomic_or_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Or with u16 fn i64_atomic_or_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Or with u32 fn i64_atomic_or_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic xor with i64 fn i64_atomic_xor( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic xor with u8 fn i64_atomic_xor_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic xor with u16 fn i64_atomic_xor_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic xor with u32 fn i64_atomic_xor_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with i64 fn i64_atomic_xchg( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u8 fn i64_atomic_xchg_8u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u16 fn i64_atomic_xchg_16u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u32 fn i64_atomic_xchg_32u( &mut self, - loc: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with i64 fn i64_atomic_cmpxchg( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u8 fn i64_atomic_cmpxchg_8u( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u16 fn i64_atomic_cmpxchg_16u( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } // i64 atomic Exchange with u32 fn i64_atomic_cmpxchg_32u( &mut self, - new: Location, - cmp: Location, - target: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn f32_load( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn f32_save( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - canonicalize: bool, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _canonicalize: bool, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn f64_load( &mut self, - addr: Location, - memarg: &MemoryImmediate, - ret: Location, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } fn f64_save( &mut self, - target_value: Location, - memarg: &MemoryImmediate, - target_addr: Location, - canonicalize: bool, - need_check: bool, - imported_memories: bool, - offset: i32, - heap_access_oob: Label, + _target_value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _canonicalize: bool, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, ) { unimplemented!(); } - fn convert_f64_i64(&mut self, loc: Location, signed: bool, ret: Location) { + fn convert_f64_i64(&mut self, _loc: Location, _signed: bool, _ret: Location) { unimplemented!(); } - fn convert_f64_i32(&mut self, loc: Location, signed: bool, ret: Location) { + fn convert_f64_i32(&mut self, _loc: Location, _signed: bool, _ret: Location) { unimplemented!(); } - fn convert_f32_i64(&mut self, loc: Location, signed: bool, ret: Location) { + fn convert_f32_i64(&mut self, _loc: Location, _signed: bool, _ret: Location) { unimplemented!(); } - fn convert_f32_i32(&mut self, loc: Location, signed: bool, ret: Location) { + fn convert_f32_i32(&mut self, _loc: Location, _signed: bool, _ret: Location) { unimplemented!(); } - fn convert_i64_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { - match (signed, sat) { - (false, true) => self.convert_i64_f64_u_s(loc, ret), - (false, false) => self.convert_i64_f64_u_u(loc, ret), - (true, true) => self.convert_i64_f64_s_s(loc, ret), - (true, false) => self.convert_i64_f64_s_u(loc, ret), - } + fn convert_i64_f64(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { + unimplemented!(); } - fn convert_i32_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { - match (signed, sat) { - (false, true) => self.convert_i32_f64_u_s(loc, ret), - (false, false) => self.convert_i32_f64_u_u(loc, ret), - (true, true) => self.convert_i32_f64_s_s(loc, ret), - (true, false) => self.convert_i32_f64_s_u(loc, ret), - } + fn convert_i32_f64(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { + unimplemented!(); } - fn convert_i64_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { - match (signed, sat) { - (false, true) => self.convert_i64_f32_u_s(loc, ret), - (false, false) => self.convert_i64_f32_u_u(loc, ret), - (true, true) => self.convert_i64_f32_s_s(loc, ret), - (true, false) => self.convert_i64_f32_s_u(loc, ret), - } + fn convert_i64_f32(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { + unimplemented!(); } - fn convert_i32_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { - match (signed, sat) { - (false, true) => self.convert_i32_f32_u_s(loc, ret), - (false, false) => self.convert_i32_f32_u_u(loc, ret), - (true, true) => self.convert_i32_f32_s_s(loc, ret), - (true, false) => self.convert_i32_f32_s_u(loc, ret), - } + fn convert_i32_f32(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { + unimplemented!(); } - fn convert_f64_f32(&mut self, loc: Location, ret: Location) { + fn convert_f64_f32(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn convert_f32_f64(&mut self, loc: Location, ret: Location) { + fn convert_f32_f64(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_neg(&mut self, loc: Location, ret: Location) { + fn f64_neg(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_abs(&mut self, loc: Location, ret: Location) { + fn f64_abs(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + fn emit_i64_copysign(&mut self, _tmp1: GPR, _tmp2: GPR) { unimplemented!(); } - fn f64_sqrt(&mut self, loc: Location, ret: Location) { + fn f64_sqrt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_trunc(&mut self, loc: Location, ret: Location) { + fn f64_trunc(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_ceil(&mut self, loc: Location, ret: Location) { + fn f64_ceil(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_floor(&mut self, loc: Location, ret: Location) { + fn f64_floor(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_nearest(&mut self, loc: Location, ret: Location) { + fn f64_nearest(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_ge(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_gt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_le(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_lt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_min(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_max(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_add(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_sub(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_mul(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f64_div(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_neg(&mut self, loc: Location, ret: Location) { + fn f32_neg(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_abs(&mut self, loc: Location, ret: Location) { + fn f32_abs(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + fn emit_i32_copysign(&mut self, _tmp1: GPR, _tmp2: GPR) { unimplemented!(); } - fn f32_sqrt(&mut self, loc: Location, ret: Location) { + fn f32_sqrt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_trunc(&mut self, loc: Location, ret: Location) { + fn f32_trunc(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_ceil(&mut self, loc: Location, ret: Location) { + fn f32_ceil(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_floor(&mut self, loc: Location, ret: Location) { + fn f32_floor(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_nearest(&mut self, loc: Location, ret: Location) { + fn f32_nearest(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_ge(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_gt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_le(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_lt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_min(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_max(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_add(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_sub(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_mul(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + fn f32_div(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } From 67e5f9c0922bbaf7ec90a0c1865f685b88d9e18e Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 10:28:33 +0100 Subject: [PATCH 10/34] improv(compiler) Added emit_relaxed_ldr64 utility (65 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 13 +++++++++ lib/compiler-singlepass/src/machine_arm64.rs | 28 +++++++++++++++++--- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 6fe232c009c..2d2063c55b7 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -264,6 +264,19 @@ impl EmitterARM64 for Assembler { let disp = disp as u32; dynasm!(self ; ldrb W(reg), [X(addr), disp]); } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]); + } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 4aa3dbe9568..17588e9b3ac 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -132,6 +132,28 @@ impl MachineARM64 { self.release_gpr(r); } } + fn emit_relaxed_ldr64(&mut self, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if offset & 0x7 == 0 { + self.assembler.emit_ldr(Size::S64, dst, src); + } else if offset > -256 && offset < 256 { + self.assembler.emit_ldur(Size::S64, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldr( + Size::S64, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } /// I32 binary operation with both operands popped from the virtual stack. /*fn emit_binop_i32( &mut self, @@ -245,13 +267,11 @@ impl MachineARM64 { let tmp_bound = self.acquire_temp_gpr().unwrap(); // Load base into temporary register. - self.assembler - .emit_ldr(Size::S64, Location::GPR(tmp_base), base_loc); + self.emit_relaxed_ldr64(Location::GPR(tmp_base), base_loc); // Load bound into temporary register, if needed. if need_check { - self.assembler - .emit_ldr(Size::S64, Location::GPR(tmp_bound), bound_loc); + self.emit_relaxed_ldr64(Location::GPR(tmp_bound), bound_loc); // Wasm -> Effective. // Assuming we never underflow - should always be true on Linux/macOS and Windows >=8, From 4a656640e24f65e90d342aae20f9b9d457524506 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 11:16:46 +0100 Subject: [PATCH 11/34] improv(compiler) Fixed emit_mov_imm for 64bits values (67 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 2d2063c55b7..d84ce5d3f43 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -82,16 +82,6 @@ pub enum GPROrMemory { Memory(GPR, i32), } -fn is_immediate_64bit_encodable(value: u64) -> bool { - let offset = value.trailing_zeros() & 0b11_0000; - let masked = 0xffff & (value >> offset); - if (masked << offset) == value { - true - } else { - false - } -} - pub trait EmitterARM64 { fn get_label(&mut self) -> Label; fn get_offset(&self) -> Offset; @@ -536,8 +526,10 @@ impl EmitterARM64 for Assembler { match dst { Location::GPR(dst) => { let dst = dst.into_index() as u32; - if is_immediate_64bit_encodable(val) { - dynasm!(self ; mov W(dst), val); + let offset = val.trailing_zeros() & 48; + let masked = 0xffff & (val >> offset); + if (masked << offset) == val { + dynasm!(self ; movz X(dst), masked as u32, LSL offset); } else { dynasm!(self ; movz W(dst), (val&0xffff) as u32); let val = val >> 16; From 2f6264ac237f601262da266bfc5e163ff956f666 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 11:32:53 +0100 Subject: [PATCH 12/34] improv(compiler) Added emit_jmp_to_jumptable (68 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 21 ++++++++++++++++++++ lib/compiler-singlepass/src/machine_arm64.rs | 21 ++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index d84ce5d3f43..91406fc8543 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -117,6 +117,8 @@ pub trait EmitterARM64 { fn emit_adds(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_subs(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_add_lsl(&mut self, sz: Size, src1: Location, src2: Location, lsl: u32, dst: Location); + fn emit_add2(&mut self, sz: Size, src: Location, dst: Location); fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location); @@ -124,6 +126,7 @@ pub trait EmitterARM64 { fn emit_tst(&mut self, sz: Size, src: Location, dst: Location); fn emit_label(&mut self, label: Label); + fn emit_load_label(&mut self, reg: GPR, label: Label); fn emit_b_label(&mut self, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); fn emit_b_register(&mut self, reg: GPR); @@ -706,6 +709,20 @@ impl EmitterARM64 for Assembler { ), } } + fn emit_add_lsl(&mut self, sz: Size, src1: Location, src2: Location, lsl: u32, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), X(src2), LSL lsl); + } + _ => panic!( + "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } fn emit_add2(&mut self, sz: Size, src: Location, dst: Location) { match (sz, src, dst) { (Size::S64, Location::GPR(src), Location::GPR(dst)) => { @@ -805,6 +822,10 @@ impl EmitterARM64 for Assembler { fn emit_label(&mut self, label: Label) { dynasm!(self ; => label); } + fn emit_load_label(&mut self, reg: GPR, label: Label) { + let reg = reg.into_index() as u32; + dynasm!(self ; adr X(reg), =>label); + } fn emit_b_label(&mut self, label: Label) { dynasm!(self ; b =>label); } diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 17588e9b3ac..7cf93172df2 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1203,8 +1203,25 @@ impl Machine for MachineARM64 { } // jmp table - fn emit_jmp_to_jumptable(&mut self, _label: Label, _cond: Location) { - unimplemented!(); + fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) { + let tmp1 = self.pick_temp_gpr().unwrap(); + self.reserve_gpr(tmp1); + let tmp2 = self.pick_temp_gpr().unwrap(); + self.reserve_gpr(tmp2); + + self.assembler.emit_load_label(tmp1, label); + self.move_location(Size::S32, cond, Location::GPR(tmp2)); + + self.assembler.emit_add_lsl( + Size::S64, + Location::GPR(tmp1), + Location::GPR(tmp2), + 2, + Location::GPR(tmp2), + ); + self.assembler.emit_b_register(tmp2); + self.release_gpr(tmp2); + self.release_gpr(tmp1); } fn align_for_loop(&mut self) { From 962511d6170e3a86c18a82799771fa2bd3c285af Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 12:02:37 +0100 Subject: [PATCH 13/34] improv(compiler) Fixed stack alignment on native function call (69 tests passes now) --- lib/compiler-singlepass/src/codegen.rs | 35 ++++++++++---------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 043f6c1173a..5403bf73b89 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -746,15 +746,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { let mut stack_offset: usize = 0; - while self - .machine - .round_stack_adjust(used_gprs.len() * 8 + used_simds.len() * 8) - != used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset - { - // on ARM64, push use 2 bytes slot, because stack as to stay 16bytes aligned - stack_offset += 8; - } - // Calculate stack offset. for (i, _param) in params.iter().enumerate() { if let Location::Memory(_, _) = @@ -765,17 +756,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { } // Align stack to 16 bytes. - if (self.get_stack_offset() - + self - .machine - .round_stack_adjust(used_gprs.len() * 8 + used_simds.len() * 8) - + stack_offset) - % 16 - != 0 - { - self.machine.adjust_stack(8); - stack_offset += 8; - self.state.stack_values.push(MachineValue::Undefined); + if self.machine.round_stack_adjust(8) == 8 { + if (self.get_stack_offset() + used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset) + % 16 + != 0 + { + self.machine.adjust_stack(8); + stack_offset += 8; + self.state.stack_values.push(MachineValue::Undefined); + } } let mut call_movs: Vec<(Location, M::GPR)> = vec![]; @@ -883,8 +872,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Restore stack. if stack_offset + stack_padding > 0 { - self.machine - .restore_stack((stack_offset + stack_padding) as u32); + self.machine.restore_stack( + self.machine + .round_stack_adjust(stack_offset + stack_padding) as u32, + ); if (stack_offset % 8) != 0 { return Err(CodegenError { message: "emit_call_native: Bad restoring stack alignement".to_string(), From e9b80d48b64503a5933dbb95ab6a9aef89c9fb0f Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 14:45:38 +0100 Subject: [PATCH 14/34] improv(compiler) Added logic and shift operations (70 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 333 ++++++++++++++++++- lib/compiler-singlepass/src/machine_arm64.rs | 327 ++++++++++++++---- 2 files changed, 594 insertions(+), 66 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 91406fc8543..808fb30c470 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -4,6 +4,7 @@ use crate::location::Location as AbstractLocation; pub use crate::location::{Multiplier, Reg}; pub use crate::machine::{Label, Offset}; use dynasm::dynasm; +pub use dynasmrt::aarch64::{encode_logical_immediate_32bit, encode_logical_immediate_64bit}; use dynasmrt::{ aarch64::Aarch64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, VecAssembler, @@ -125,6 +126,15 @@ pub trait EmitterARM64 { fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location); fn emit_tst(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lsl(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_lsr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_asr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_ror(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_or(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_label(&mut self, label: Label); fn emit_load_label(&mut self, reg: GPR, label: Label); fn emit_b_label(&mut self, label: Label); @@ -561,12 +571,6 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { - let src1 = src1.into_index() as u32; - let src2 = src2 as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; add X(dst), X(src1), src2); - } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let src2 = src2.into_index() as u32; @@ -579,12 +583,24 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add X(dst), X(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), imm); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; add W(dst), W(src1), imm as u32); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(src1), imm); + } _ => panic!( "singlepass can't emit ADD {:?} {:?} {:?} {:?}", sz, src1, src2, dst @@ -615,6 +631,16 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; sub W(dst), W(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(src1), imm); + } _ => panic!( "singlepass can't emit SUB {:?} {:?} {:?} {:?}", sz, src1, src2, dst @@ -718,8 +744,8 @@ impl EmitterARM64 for Assembler { dynasm!(self ; add X(dst), X(src1), X(src2), LSL lsl); } _ => panic!( - "singlepass can't emit ADD {:?} {:?} {:?} {:?}", - sz, src1, src2, dst + "singlepass can't emit ADD {:?} {:?} {:?} {:?} LSL {:?}", + sz, src1, src2, dst, lsl ), } } @@ -748,6 +774,10 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add W(dst), W(dst), imm as u32); } + (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(dst), imm); + } _ => panic!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst), } } @@ -819,6 +849,293 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } + + fn emit_lsl(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit LSL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_asr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if src2 == 0 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit ASR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_lsr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if src2 == 0 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsr W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsr W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit LSR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_ror(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ror X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if src2 == 0 { + unreachable!(); + } + dynasm!(self ; ror X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ror W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; ror X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ror W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit ROR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_or(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; orr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; orr X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; orr W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; orr W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit OR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; and X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; and X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; and W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; and W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit AND {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; eor X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; eor X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; eor W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; eor W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit EOR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_label(&mut self, label: Label) { dynasm!(self ; => label); } diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 7cf93172df2..19a7db80dcb 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -32,6 +32,23 @@ pub struct MachineARM64 { pushed: bool, } +#[allow(dead_code)] +enum ImmType { + None, + Bits8, + Shift32, + Shift32No0, + Shift64, + Shift64No0, + Logical32, + Logical64, + UnscaledOffset, + OffsetByte, + OffsetWord, + OffsetDWord, + OffsetQuad, +} + impl MachineARM64 { pub fn new() -> Self { MachineARM64 { @@ -53,8 +70,8 @@ impl MachineARM64 { putback: bool, ) { let mut temps = vec![]; - let src = self.location_to_reg(sz, src, &mut temps, false); - let dest = self.location_to_reg(sz, dst, &mut temps, false); + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None); op(&mut self.assembler, sz, src, dest); if dst != dest && putback { self.move_location(sz, dest, dst); @@ -63,17 +80,36 @@ impl MachineARM64 { self.release_gpr(r); } } + + fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { + match ty { + ImmType::None => false, + ImmType::Bits8 => imm >= 0 && imm < 256, + ImmType::Shift32 => imm >= 0 && imm < 32, + ImmType::Shift32No0 => imm > 0 && imm < 32, + ImmType::Shift64 => imm >= 0 && imm < 64, + ImmType::Shift64No0 => imm > 0 && imm < 64, + ImmType::Logical32 => encode_logical_immediate_32bit(imm as u32).is_some(), + ImmType::Logical64 => encode_logical_immediate_64bit(imm as u64).is_some(), + ImmType::UnscaledOffset => imm > -256 && imm < 256, + ImmType::OffsetByte => imm >= 0 && imm < 0x1000, + ImmType::OffsetWord => imm & 1 == 0 && imm >= 0 && imm < 0x2000, + ImmType::OffsetDWord => imm & 3 == 0 && imm >= 0 && imm < 0x4000, + ImmType::OffsetQuad => imm & 7 == 0 && imm >= 0 && imm < 0x8000, + } + } + fn location_to_reg( &mut self, sz: Size, src: Location, temps: &mut Vec, - allow_imm8: bool, + allow_imm: ImmType, ) -> Location { match src { Location::GPR(_) => src, Location::Imm8(val) => { - if allow_imm8 { + if self.compatible_imm(val as i64, allow_imm) { src } else { let tmp = self.acquire_temp_gpr().unwrap(); @@ -83,21 +119,29 @@ impl MachineARM64 { } } Location::Imm32(val) => { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - Location::GPR(tmp) + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } Location::Imm64(val) => { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - Location::GPR(tmp) + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } Location::Memory(reg, val) => { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); - if val > -256 && val < 256 { + if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); } else { self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); @@ -119,11 +163,12 @@ impl MachineARM64 { src1: Location, src2: Location, dst: Location, + allow_imm: ImmType, ) { let mut temps = vec![]; - let src1 = self.location_to_reg(sz, src1, &mut temps, false); - let src2 = self.location_to_reg(sz, src2, &mut temps, true); - let dest = self.location_to_reg(sz, dst, &mut temps, false); + let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None); + let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None); op(&mut self.assembler, sz, src1, src2, dest); if dst != dest { self.move_location(sz, dest, dst); @@ -135,9 +180,9 @@ impl MachineARM64 { fn emit_relaxed_ldr64(&mut self, dst: Location, src: Location) { match src { Location::Memory(addr, offset) => { - if offset & 0x7 == 0 { + if self.compatible_imm(offset as i64, ImmType::OffsetQuad) { self.assembler.emit_ldr(Size::S64, dst, src); - } else if offset > -256 && offset < 256 { + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { self.assembler.emit_ldur(Size::S64, dst, addr, offset); } else { let tmp = self.acquire_temp_gpr().unwrap(); @@ -1118,7 +1163,7 @@ impl Machine for MachineARM64 { fn emit_call_location(&mut self, location: Location) { let mut temps = vec![]; - let loc = self.location_to_reg(Size::S64, location, &mut temps, false); + let loc = self.location_to_reg(Size::S64, location, &mut temps, ImmType::None); match loc { Location::GPR(reg) => self.assembler.emit_call_register(reg), _ => unreachable!(), @@ -1147,8 +1192,8 @@ impl Machine for MachineARM64 { // math fn location_add(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, true); - let dst = self.location_to_reg(size, dest, &mut temps, false); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None); if flags { self.assembler.emit_adds(size, dst, src, dst); } else { @@ -1163,8 +1208,8 @@ impl Machine for MachineARM64 { } fn location_sub(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, true); - let dst = self.location_to_reg(size, dest, &mut temps, false); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None); if flags { self.assembler.emit_subs(size, dst, src, dst); } else { @@ -1294,13 +1339,34 @@ impl Machine for MachineARM64 { } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_binop3(Assembler::emit_add, Size::S32, loc_a, loc_b, ret); + self.emit_relaxed_binop3( + Assembler::emit_add, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Bits8, + ); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_binop3(Assembler::emit_sub, Size::S32, loc_a, loc_b, ret); + self.emit_relaxed_binop3( + Assembler::emit_sub, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Bits8, + ); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_binop3(Assembler::emit_mul, Size::S32, loc_a, loc_b, ret); + self.emit_relaxed_binop3( + Assembler::emit_mul, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } fn emit_binop_udiv32( &mut self, @@ -1338,14 +1404,35 @@ impl Machine for MachineARM64 { ) -> usize { unimplemented!(); } - fn emit_binop_and32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_and, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); } - fn emit_binop_or32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_or, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); } - fn emit_binop_xor32(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_eor, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); } fn i32_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); @@ -1386,20 +1473,71 @@ impl Machine for MachineARM64 { fn i32_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i32_shl(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsl, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); } - fn i32_shr(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsr, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); } - fn i32_sar(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_asr, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); } - fn i32_rol(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let src2 = match loc_b { + Location::Imm8(imm) => Location::Imm8(32 - (imm & 31)), + Location::Imm32(imm) => Location::Imm8(32 - (imm & 31) as u8), + Location::Imm64(imm) => Location::Imm8(32 - (imm & 31) as u8), + _ => { + let tmp1 = + self.location_to_reg(Size::S32, Location::Imm32(32), &mut temps, ImmType::None); + let tmp2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None); + self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp2); + tmp2 + } + }; + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S32, + loc_a, + src2, + ret, + ImmType::Shift32No0, + ); + for r in temps { + self.release_gpr(r); + } } - fn i32_ror(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); } fn i32_load( &mut self, @@ -2011,14 +2149,35 @@ impl Machine for MachineARM64 { ) -> usize { unimplemented!(); } - fn emit_binop_and64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_and, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); } - fn emit_binop_or64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_or, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); } - fn emit_binop_xor64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_eor, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); } fn i64_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); @@ -2059,20 +2218,72 @@ impl Machine for MachineARM64 { fn i64_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn i64_shl(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsl, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); } - fn i64_shr(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsr, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); } - fn i64_sar(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_asr, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); } - fn i64_rol(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + // there is no ROL on ARM64. We use ROR with 64-value instead + let mut temps = vec![]; + let src2 = match loc_b { + Location::Imm8(imm) => Location::Imm8(64 - (imm & 63)), + Location::Imm32(imm) => Location::Imm8(64 - (imm & 63) as u8), + Location::Imm64(imm) => Location::Imm8(64 - (imm & 63) as u8), + _ => { + let tmp1 = + self.location_to_reg(Size::S64, Location::Imm32(64), &mut temps, ImmType::None); + let tmp2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None); + self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp2); + tmp2 + } + }; + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S64, + loc_a, + src2, + ret, + ImmType::Shift64No0, + ); + for r in temps { + self.release_gpr(r); + } } - fn i64_ror(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); } fn i64_load( &mut self, From 7c2eac166a244992a9be6313522052c4c2c07a50 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 17 Dec 2021 16:24:02 +0100 Subject: [PATCH 15/34] improv(compiler) Added/Fixed Store/Load of i32/i64 (74 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 91 +++- lib/compiler-singlepass/src/machine_arm64.rs | 540 ++++++++++++++----- 2 files changed, 478 insertions(+), 153 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 808fb30c470..9d79dc1e6c8 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -104,6 +104,8 @@ pub trait EmitterARM64 { fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_strb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_strh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); @@ -309,7 +311,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; stur D(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { @@ -329,7 +334,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldur D(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } @@ -399,7 +407,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldrb W(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDRB {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { @@ -414,7 +425,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldrh W(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDRH {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { @@ -429,7 +443,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldrsb W(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDRSB {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { @@ -444,7 +461,10 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldrsh W(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDRSH {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { @@ -454,7 +474,46 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldrsw X(reg), [X(addr), offset]); } - _ => unreachable!(), + _ => panic!( + "singlepass can't emit LDRSW {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), + } + } + fn emit_strb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; strb W(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; strb W(reg), [X(addr), offset]); + } + _ => panic!( + "singlepass can't emit STRB {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), + } + } + fn emit_strh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; strh W(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; strh W(reg), [X(addr), offset]); + } + _ => panic!( + "singlepass can't emit STRH {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), } } @@ -675,12 +734,6 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; adds X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { - let src1 = src1.into_index() as u32; - let src2 = src2 as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; adds X(dst), X(src1), src2); - } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let src2 = src2.into_index() as u32; @@ -693,12 +746,24 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; adds X(dst), X(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), imm); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; adds W(dst), W(src1), imm as u32); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds W(dst), W(src1), imm); + } _ => panic!( "singlepass can't emit ADD.S {:?} {:?} {:?} {:?}", sz, src1, src2, dst diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 19a7db80dcb..8c263545253 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -33,8 +33,10 @@ pub struct MachineARM64 { } #[allow(dead_code)] +#[derive(PartialEq)] enum ImmType { None, + NoneXzr, Bits8, Shift32, Shift32No0, @@ -44,11 +46,12 @@ enum ImmType { Logical64, UnscaledOffset, OffsetByte, + OffsetHWord, OffsetWord, OffsetDWord, - OffsetQuad, } +#[allow(dead_code)] impl MachineARM64 { pub fn new() -> Self { MachineARM64 { @@ -84,6 +87,7 @@ impl MachineARM64 { fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { match ty { ImmType::None => false, + ImmType::NoneXzr => false, ImmType::Bits8 => imm >= 0 && imm < 256, ImmType::Shift32 => imm >= 0 && imm < 32, ImmType::Shift32No0 => imm > 0 && imm < 32, @@ -93,9 +97,9 @@ impl MachineARM64 { ImmType::Logical64 => encode_logical_immediate_64bit(imm as u64).is_some(), ImmType::UnscaledOffset => imm > -256 && imm < 256, ImmType::OffsetByte => imm >= 0 && imm < 0x1000, - ImmType::OffsetWord => imm & 1 == 0 && imm >= 0 && imm < 0x2000, - ImmType::OffsetDWord => imm & 3 == 0 && imm >= 0 && imm < 0x4000, - ImmType::OffsetQuad => imm & 7 == 0 && imm >= 0 && imm < 0x8000, + ImmType::OffsetHWord => imm & 1 == 0 && imm >= 0 && imm < 0x2000, + ImmType::OffsetWord => imm & 3 == 0 && imm >= 0 && imm < 0x4000, + ImmType::OffsetDWord => imm & 7 == 0 && imm >= 0 && imm < 0x8000, } } @@ -109,33 +113,45 @@ impl MachineARM64 { match src { Location::GPR(_) => src, Location::Imm8(val) => { - if self.compatible_imm(val as i64, allow_imm) { - src + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - Location::GPR(tmp) + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } } Location::Imm32(val) => { - if self.compatible_imm(val as i64, allow_imm) { - src + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - Location::GPR(tmp) + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } } Location::Imm64(val) => { - if self.compatible_imm(val as i64, allow_imm) { - src + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - Location::GPR(tmp) + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } } } Location::Memory(reg, val) => { @@ -180,7 +196,7 @@ impl MachineARM64 { fn emit_relaxed_ldr64(&mut self, dst: Location, src: Location) { match src { Location::Memory(addr, offset) => { - if self.compatible_imm(offset as i64, ImmType::OffsetQuad) { + if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { self.assembler.emit_ldr(Size::S64, dst, src); } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { self.assembler.emit_ldur(Size::S64, dst, addr, offset); @@ -199,6 +215,82 @@ impl MachineARM64 { _ => unreachable!(), } } + fn emit_relaxed_ldr32(&mut self, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_ldr(Size::S32, dst, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(Size::S32, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldr( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } + fn emit_relaxed_str64(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { + self.assembler.emit_str(Size::S64, dst, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(Size::S64, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_str( + Size::S64, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str32(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_str(Size::S32, dst, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(Size::S32, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_str( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } /// I32 binary operation with both operands popped from the virtual stack. /*fn emit_binop_i32( &mut self, @@ -355,7 +447,7 @@ impl MachineARM64 { // Add offset to memory address. if memarg.offset != 0 { - self.assembler.emit_add( + self.assembler.emit_adds( Size::S32, Location::Imm32(memarg.offset), Location::GPR(tmp_addr), @@ -824,10 +916,10 @@ impl Machine for MachineARM64 { fn push_location_for_native(&mut self, loc: Location) { match loc { Location::Imm64(_) => { - self.reserve_unused_temp_gpr(GPR::X4); - self.move_location(Size::S64, loc, Location::GPR(GPR::X4)); - self.emit_push(Size::S64, Location::GPR(GPR::X4)); - self.release_gpr(GPR::X4); + self.reserve_unused_temp_gpr(GPR::X8); + self.move_location(Size::S64, loc, Location::GPR(GPR::X8)); + self.emit_push(Size::S64, Location::GPR(GPR::X8)); + self.release_gpr(GPR::X8); } _ => self.emit_push(Size::S64, loc), } @@ -1697,39 +1789,75 @@ impl Machine for MachineARM64 { } fn i32_save( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + }, + ); } fn i32_save_8( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_strb(Size::S32, target_value, addr, 0); + }, + ); } fn i32_save_16( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_strh(Size::S32, target_value, addr, 0); + }, + ); } fn i32_atomic_save( &mut self, @@ -2287,87 +2415,171 @@ impl Machine for MachineARM64 { } fn i64_load( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldur(Size::S64, ret, addr, 0); + }, + ); } fn i64_load_8u( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrb(Size::S64, ret, addr, 0); + }, + ); } fn i64_load_8s( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrsb(Size::S64, ret, addr, 0); + }, + ); } fn i64_load_16u( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrh(Size::S64, ret, addr, 0); + }, + ); } fn i64_load_16s( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrsh(Size::S64, ret, addr, 0); + }, + ); } fn i64_load_32u( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldur(Size::S32, ret, addr, 0); + }, + ); } fn i64_load_32s( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_ldrsw(Size::S64, ret, addr, 0); + }, + ); } fn i64_atomic_load( &mut self, @@ -2419,51 +2631,99 @@ impl Machine for MachineARM64 { } fn i64_save( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str64(target_value, Location::Memory(addr, 0)); + }, + ); } fn i64_save_8( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_strb(Size::S64, target_value, addr, 0); + }, + ); } fn i64_save_16( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler.emit_strh(Size::S64, target_value, addr, 0); + }, + ); } fn i64_save_32( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + }, + ); } fn i64_atomic_save( &mut self, From 6b593edf7bf6239328cb6f996b65d0ba12e6d35f Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Dec 2021 11:20:32 +0100 Subject: [PATCH 16/34] improv(compiler) More logic oprations (91 tests passes now) --- lib/compiler-singlepass/Cargo.toml | 4 +- lib/compiler-singlepass/src/emitter_arm64.rs | 94 ++++-- lib/compiler-singlepass/src/machine_arm64.rs | 330 ++++++++++++------- 3 files changed, 297 insertions(+), 131 deletions(-) diff --git a/lib/compiler-singlepass/Cargo.toml b/lib/compiler-singlepass/Cargo.toml index aec29e2a27b..e7f1f62ce23 100644 --- a/lib/compiler-singlepass/Cargo.toml +++ b/lib/compiler-singlepass/Cargo.toml @@ -18,8 +18,8 @@ wasmer-types = { path = "../types", version = "2.1.1", default-features = false, rayon = { version = "1.5", optional = true } hashbrown = { version = "0.11", optional = true } more-asserts = "0.2" -dynasm = "1.2" -dynasmrt = "1.2" +dynasm = "1.2.1" +dynasmrt = "1.2.1" lazy_static = "1.4" byteorder = "1.3" smallvec = "1.6" diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 9d79dc1e6c8..5337ef18f1f 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -35,38 +35,39 @@ pub type Location = AbstractLocation; #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[allow(dead_code)] +#[repr(u8)] pub enum Condition { // meaning for cmp or sub /// Equal - Eq, + Eq = 0, /// Not equal - Ne, + Ne = 1, /// Unsigned higher or same (or carry set) - Cs, + Cs = 2, /// Unsigned lower (or carry clear) - Cc, + Cc = 3, /// Negative. The mnemonic stands for "minus" - Mi, + Mi = 4, /// Positive or zero. The mnemonic stands for "plus" - Pl, + Pl = 5, /// Signed overflow. The mnemonic stands for "V set" - Vs, + Vs = 6, /// No signed overflow. The mnemonic stands for "V clear" - Vc, + Vc = 7, /// Unsigned higher - Hi, + Hi = 8, /// Unsigned lower or same - Ls, + Ls = 9, /// Signed greater than or equal - Ge, + Ge = 10, /// Signed less than - Lt, + Lt = 11, /// Signed greater than - Gt, + Gt = 12, /// Signed less than or equal - Le, + Le = 13, /// Always executed - Uncond, + Al = 14, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -137,6 +138,8 @@ pub trait EmitterARM64 { fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition); + fn emit_label(&mut self, label: Label); fn emit_load_label(&mut self, reg: GPR, label: Label); fn emit_b_label(&mut self, label: Label); @@ -648,6 +651,13 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; add X(dst), X(src1), imm); } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + let imm = imm as u32; + dynasm!(self ; add X(dst), X(src1), imm); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; @@ -690,15 +700,20 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; sub W(dst), W(src1), imm as u32); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(src1), imm); + } (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; sub X(dst), X(src1), imm); } - (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - dynasm!(self ; sub W(dst), W(src1), imm); + dynasm!(self ; sub X(dst), X(src1), imm as u32); } _ => panic!( "singlepass can't emit SUB {:?} {:?} {:?} {:?}", @@ -809,7 +824,7 @@ impl EmitterARM64 for Assembler { dynasm!(self ; add X(dst), X(src1), X(src2), LSL lsl); } _ => panic!( - "singlepass can't emit ADD {:?} {:?} {:?} {:?} LSL {:?}", + "singlepass can't emit LSL {:?} {:?} {:?} {:?} LSL {:?}", sz, src1, src2, dst, lsl ), } @@ -1201,6 +1216,47 @@ impl EmitterARM64 for Assembler { } } + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition) { + let reg = reg.into_index() as u32; + match sz { + Size::S32 => match cond { + Condition::Eq => dynasm!(self ; cset W(reg), eq), + Condition::Ne => dynasm!(self ; cset W(reg), ne), + Condition::Cs => dynasm!(self ; cset W(reg), cs), + Condition::Cc => dynasm!(self ; cset W(reg), cc), + Condition::Mi => dynasm!(self ; cset W(reg), mi), + Condition::Pl => dynasm!(self ; cset W(reg), pl), + Condition::Vs => dynasm!(self ; cset W(reg), vs), + Condition::Vc => dynasm!(self ; cset W(reg), vc), + Condition::Hi => dynasm!(self ; cset W(reg), hi), + Condition::Ls => dynasm!(self ; cset W(reg), ls), + Condition::Ge => dynasm!(self ; cset W(reg), ge), + Condition::Lt => dynasm!(self ; cset W(reg), lt), + Condition::Gt => dynasm!(self ; cset W(reg), gt), + Condition::Le => dynasm!(self ; cset W(reg), le), + Condition::Al => dynasm!(self ; cset W(reg), al), + }, + Size::S64 => match cond { + Condition::Eq => dynasm!(self ; cset X(reg), eq), + Condition::Ne => dynasm!(self ; cset X(reg), ne), + Condition::Cs => dynasm!(self ; cset X(reg), cs), + Condition::Cc => dynasm!(self ; cset X(reg), cc), + Condition::Mi => dynasm!(self ; cset X(reg), mi), + Condition::Pl => dynasm!(self ; cset X(reg), pl), + Condition::Vs => dynasm!(self ; cset X(reg), vs), + Condition::Vc => dynasm!(self ; cset X(reg), vc), + Condition::Hi => dynasm!(self ; cset X(reg), hi), + Condition::Ls => dynasm!(self ; cset X(reg), ls), + Condition::Ge => dynasm!(self ; cset X(reg), ge), + Condition::Lt => dynasm!(self ; cset X(reg), lt), + Condition::Gt => dynasm!(self ; cset X(reg), gt), + Condition::Le => dynasm!(self ; cset X(reg), le), + Condition::Al => dynasm!(self ; cset X(reg), al), + }, + _ => unreachable!(), + } + } + fn emit_label(&mut self, label: Label) { dynasm!(self ; => label); } @@ -1227,7 +1283,7 @@ impl EmitterARM64 for Assembler { Condition::Lt => dynasm!(self ; b.lt => label), Condition::Gt => dynasm!(self ; b.gt => label), Condition::Le => dynasm!(self ; b.le => label), - Condition::Uncond => dynasm!(self ; b => label), + Condition::Al => dynasm!(self ; b => label), } } fn emit_b_register(&mut self, reg: GPR) { diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 8c263545253..df7c7b28a59 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -73,8 +73,8 @@ impl MachineARM64 { putback: bool, ) { let mut temps = vec![]; - let src = self.location_to_reg(sz, src, &mut temps, ImmType::None); - let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None); + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); op(&mut self.assembler, sz, src, dest); if dst != dest && putback { self.move_location(sz, dest, dst); @@ -109,6 +109,7 @@ impl MachineARM64 { src: Location, temps: &mut Vec, allow_imm: ImmType, + wanted: Option, ) -> Location { match src { Location::GPR(_) => src, @@ -119,8 +120,13 @@ impl MachineARM64 { if self.compatible_imm(val as i64, allow_imm) { src } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); Location::GPR(tmp) } @@ -133,8 +139,13 @@ impl MachineARM64 { if self.compatible_imm(val as i64, allow_imm) { src } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); Location::GPR(tmp) } @@ -147,19 +158,43 @@ impl MachineARM64 { if self.compatible_imm(val as i64, allow_imm) { src } else { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); Location::GPR(tmp) } } } Location::Memory(reg, val) => { - let tmp = self.acquire_temp_gpr().unwrap(); - temps.push(tmp.clone()); - if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); } else { + if reg == tmp { + unreachable!(); + } self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); self.assembler.emit_ldr( sz, @@ -182,9 +217,9 @@ impl MachineARM64 { allow_imm: ImmType, ) { let mut temps = vec![]; - let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None); - let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm); - let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None); + let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); op(&mut self.assembler, sz, src1, src2, dest); if dst != dest { self.move_location(sz, dest, dst); @@ -239,7 +274,7 @@ impl MachineARM64 { } fn emit_relaxed_str64(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { @@ -266,7 +301,7 @@ impl MachineARM64 { } fn emit_relaxed_str32(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetWord) { @@ -328,45 +363,83 @@ impl MachineARM64 { } }*/ /// I64 comparison with. - /*fn emit_cmpop_i64_dynamic_b( - &mut self, - _c: Condition, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - ) { - unimplemented!(); - }*/ - /// I64 shift with both operands popped from the virtual stack. - /*fn emit_shift_i64( + fn emit_cmpop_i64_dynamic_b( &mut self, - _f: fn(&mut Assembler, Size, Location, Location), - _loc_a: Location, - _loc_b: Location, - _ret: Location, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, ) { - unimplemented!(); - }*/ + match ret { + Location::GPR(x) => { + self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, x, c); + self.assembler.emit_and( + Size::S32, + Location::GPR(x), + Location::Imm32(0xff), + Location::GPR(x), + ); + } + Location::Memory(_, _) => { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, tmp, c); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp), + Location::Imm32(0xff), + Location::GPR(tmp), + ); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } + _ => { + unreachable!(); + } + } + } /// I32 comparison with. - /*fn emit_cmpop_i32_dynamic_b( - &mut self, - _c: Condition, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - ) { - unimplemented!(); - }*/ - /// I32 shift with both operands popped from the virtual stack. - /*fn emit_shift_i32( + fn emit_cmpop_i32_dynamic_b( &mut self, - _f: fn(&mut Assembler, Size, Location, Location), - _loc_a: Location, - _loc_b: Location, - _ret: Location, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, ) { - unimplemented!(); - }*/ + // bug on dynasm + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(tmp), 0xff); + match ret { + Location::GPR(x) => { + self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, x, c); + self.assembler.emit_and( + Size::S32, + Location::GPR(x), + Location::GPR(tmp), /*Location::Imm32(0xff)*/ + Location::GPR(x), + ); + } + Location::Memory(_, _) => { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, tmp, c); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp), + Location::GPR(tmp), /*Location::Imm32(0xff)*/ + Location::GPR(tmp), + ); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } + _ => { + unreachable!(); + } + } + self.release_gpr(tmp); + } fn memory_op( &mut self, @@ -636,7 +709,7 @@ impl Machine for MachineARM64 { fn pick_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X6, X7, X9, X10, X11, X12, X13, X14, X15, X22, X23, X24, X25]; + static REGS: &[GPR] = &[X6, X7, X9, X10, X11, X12, X13, X14, X15]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -1007,8 +1080,8 @@ impl Machine for MachineARM64 { Location::Memory(addr, offs) => { if self.offset_is_ok(size, offs) { self.assembler.emit_str(size, source, dest); - } else if offs > -256 && offs < 256 { - self.assembler.emit_stur(size, dest, addr, offs); + } else if self.compatible_imm(offs as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(size, source, addr, offs); } else { let tmp = self.pick_temp_gpr().unwrap(); if offs < 0 { @@ -1255,7 +1328,13 @@ impl Machine for MachineARM64 { fn emit_call_location(&mut self, location: Location) { let mut temps = vec![]; - let loc = self.location_to_reg(Size::S64, location, &mut temps, ImmType::None); + let loc = self.location_to_reg( + Size::S64, + location, + &mut temps, + ImmType::None, + Some(GPR::X26), + ); match loc { Location::GPR(reg) => self.assembler.emit_call_register(reg), _ => unreachable!(), @@ -1284,8 +1363,8 @@ impl Machine for MachineARM64 { // math fn location_add(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8); - let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, None); if flags { self.assembler.emit_adds(size, dst, src, dst); } else { @@ -1300,8 +1379,8 @@ impl Machine for MachineARM64 { } fn location_sub(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8); - let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, None); if flags { self.assembler.emit_subs(size, dst, src, dst); } else { @@ -1526,35 +1605,35 @@ impl Machine for MachineARM64 { ImmType::Logical32, ); } - fn i32_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ge, loc_a, loc_b, ret); } - fn i32_cmp_gt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Gt, loc_a, loc_b, ret); } - fn i32_cmp_le_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Le, loc_a, loc_b, ret); } - fn i32_cmp_lt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Lt, loc_a, loc_b, ret); } - fn i32_cmp_ge_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Cs, loc_a, loc_b, ret); } - fn i32_cmp_gt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Hi, loc_a, loc_b, ret); } - fn i32_cmp_le_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ls, loc_a, loc_b, ret); } - fn i32_cmp_lt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Cc, loc_a, loc_b, ret); } - fn i32_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ne, loc_a, loc_b, ret); } - fn i32_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } fn i32_clz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -1602,9 +1681,14 @@ impl Machine for MachineARM64 { Location::Imm32(imm) => Location::Imm8(32 - (imm & 31) as u8), Location::Imm64(imm) => Location::Imm8(32 - (imm & 31) as u8), _ => { - let tmp1 = - self.location_to_reg(Size::S32, Location::Imm32(32), &mut temps, ImmType::None); - let tmp2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None); + let tmp1 = self.location_to_reg( + Size::S32, + Location::Imm32(32), + &mut temps, + ImmType::None, + None, + ); + let tmp2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp2); tmp2 } @@ -2232,14 +2316,35 @@ impl Machine for MachineARM64 { self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48); } - fn emit_binop_add64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_add, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Bits8, + ); } - fn emit_binop_sub64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_sub, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Bits8, + ); } - fn emit_binop_mul64(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_mul, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } fn emit_binop_udiv64( &mut self, @@ -2307,35 +2412,35 @@ impl Machine for MachineARM64 { ImmType::Logical64, ); } - fn i64_cmp_ge_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ge, loc_a, loc_b, ret); } - fn i64_cmp_gt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Gt, loc_a, loc_b, ret); } - fn i64_cmp_le_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Le, loc_a, loc_b, ret); } - fn i64_cmp_lt_s(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Lt, loc_a, loc_b, ret); } - fn i64_cmp_ge_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Cs, loc_a, loc_b, ret); } - fn i64_cmp_gt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Hi, loc_a, loc_b, ret); } - fn i64_cmp_le_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ls, loc_a, loc_b, ret); } - fn i64_cmp_lt_u(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Cc, loc_a, loc_b, ret); } - fn i64_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ne, loc_a, loc_b, ret); } - fn i64_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn i64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } fn i64_clz(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -2384,9 +2489,14 @@ impl Machine for MachineARM64 { Location::Imm32(imm) => Location::Imm8(64 - (imm & 63) as u8), Location::Imm64(imm) => Location::Imm8(64 - (imm & 63) as u8), _ => { - let tmp1 = - self.location_to_reg(Size::S64, Location::Imm32(64), &mut temps, ImmType::None); - let tmp2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None); + let tmp1 = self.location_to_reg( + Size::S64, + Location::Imm32(64), + &mut temps, + ImmType::None, + None, + ); + let tmp2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp2); tmp2 } From f23a137564fb5ca4281c8656054d4555600deb19 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Dec 2021 14:33:24 +0100 Subject: [PATCH 17/34] improv(compiler) More memory oprations (95 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 254 +++++++++---- lib/compiler-singlepass/src/machine_arm64.rs | 360 +++++++++++++++---- tests/ignores.txt | 4 + 3 files changed, 476 insertions(+), 142 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 5337ef18f1f..7992c90faa2 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -100,13 +100,13 @@ pub trait EmitterARM64 { fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); - fn emit_ldrb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_strb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); - fn emit_strh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldrb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrh(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsh(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsw(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_strb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_strh(&mut self, sz: Size, reg: Location, dst: Location); fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); @@ -234,7 +234,16 @@ impl EmitterARM64 for Assembler { } dynasm!(self ; str D(reg), [X(addr), disp]); } - _ => unreachable!(), + (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } + dynasm!(self ; str S(reg), [X(addr), disp]); + } + _ => panic!("singlepass can't emit STR {:?}, {:?}, {:?}", sz, reg, addr), } } fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location) { @@ -242,33 +251,36 @@ impl EmitterARM64 for Assembler { (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x7) != 0 || disp < 0 || disp >= 0x8000 { unreachable!(); } + let disp = disp as u32; dynasm!(self ; ldr X(reg), [X(addr), disp]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - let disp = disp as u32; - if (disp & 0x3) != 0 { + if (disp & 0x3) != 0 || disp < 0 || disp >= 0x4000 { unreachable!(); } + let disp = disp as u32; dynasm!(self ; ldr W(reg), [X(addr), disp]); } (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - let disp = disp as u32; - if (disp & 0x1) != 0 { + if (disp & 0x1) != 0 || disp < 0 || disp >= 0x2000 { unreachable!(); } + let disp = disp as u32; dynasm!(self ; ldrh W(reg), [X(addr), disp]); } (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + if disp < 0 || disp >= 0x1000 { + unreachable!(); + } let disp = disp as u32; dynasm!(self ; ldrb W(reg), [X(addr), disp]); } @@ -294,7 +306,16 @@ impl EmitterARM64 for Assembler { } dynasm!(self ; ldr D(reg), [X(addr), disp]); } - _ => unreachable!(), + (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + if (disp & 0x7) != 0 { + unreachable!(); + } + dynasm!(self ; ldr S(reg), [X(addr), disp]); + } + _ => panic!("singlepass can't emit LDR {:?}, {:?}, {:?}", sz, reg, addr), } } fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { @@ -398,125 +419,210 @@ impl EmitterARM64 for Assembler { } } - fn emit_ldrb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_ldrb(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrb W(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - dynasm!(self ; ldrb W(reg), [X(addr), offset]); + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrb W(reg), [X(addr), X(r2), LSL mult]); } - _ => panic!( - "singlepass can't emit LDRB {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + _ => panic!("singlepass can't emit LDRB {:?}, {:?}", reg, dst), } } - fn emit_ldrh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_ldrh(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrh W(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - dynasm!(self ; ldrh W(reg), [X(addr), offset]); + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrh W(reg), [X(addr), X(r2), LSL mult]); } - _ => panic!( - "singlepass can't emit LDRH {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + _ => panic!("singlepass can't emit LDRH {:?}, {:?}", reg, dst), } } - fn emit_ldrsb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_ldrsb(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrsb X(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrsb W(reg), [X(addr), offset]); } - _ => panic!( - "singlepass can't emit LDRSB {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrsb X(reg), [X(addr), X(r2), LSL mult]); + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrsb W(reg), [X(addr), X(r2), LSL mult]); + } + _ => panic!("singlepass can't emit LDRSB {:?}, {:?}, {:?}", sz, reg, dst), } } - fn emit_ldrsh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_ldrsh(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrsh X(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrsh W(reg), [X(addr), offset]); } - _ => panic!( - "singlepass can't emit LDRSH {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrsh X(reg), [X(addr), X(r2), LSL mult]); + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrsh W(reg), [X(addr), X(r2), LSL mult]); + } + _ => panic!("singlepass can't emit LDRSH {:?}, {:?}, {:?}", sz, reg, dst), } } - fn emit_ldrsw(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_ldrsw(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; ldrsw X(reg), [X(addr), offset]); } - _ => panic!( - "singlepass can't emit LDRSW {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; ldrsw X(reg), [X(addr), X(r2), LSL mult]); + } + _ => panic!("singlepass can't emit LDRSW {:?}, {:?}, {:?}", sz, reg, dst), } } - fn emit_strb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_strb(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; strb W(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - dynasm!(self ; strb W(reg), [X(addr), offset]); + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; strb W(reg), [X(addr), X(r2), LSL mult]); } - _ => panic!( - "singlepass can't emit STRB {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + _ => panic!("singlepass can't emit STRB {:?}, {:?}", reg, dst), } } - fn emit_strh(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - match (sz, reg) { - (Size::S64, Location::GPR(reg)) => { + fn emit_strh(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; + let offset = offset as u32; dynasm!(self ; strh W(reg), [X(addr), offset]); } - (Size::S32, Location::GPR(reg)) => { + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - dynasm!(self ; strh W(reg), [X(addr), offset]); + let r2 = r2.into_index() as u32; + if offs != 0 { + unreachable!(); + } + let mult = mult as u32; + if mult == 0 { + unreachable!(); + } + dynasm!(self ; strh W(reg), [X(addr), X(r2), LSL mult]); } - _ => panic!( - "singlepass can't emit STRH {:?}, {:?}, {:?}, {:?}", - sz, reg, addr, offset - ), + _ => panic!("singlepass can't emit STRH {:?}, {:?}", reg, dst), } } diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index df7c7b28a59..95bd762c1a9 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -88,18 +88,18 @@ impl MachineARM64 { match ty { ImmType::None => false, ImmType::NoneXzr => false, - ImmType::Bits8 => imm >= 0 && imm < 256, - ImmType::Shift32 => imm >= 0 && imm < 32, - ImmType::Shift32No0 => imm > 0 && imm < 32, - ImmType::Shift64 => imm >= 0 && imm < 64, - ImmType::Shift64No0 => imm > 0 && imm < 64, + ImmType::Bits8 => (imm >= 0) && (imm < 256), + ImmType::Shift32 => (imm >= 0) && (imm < 32), + ImmType::Shift32No0 => (imm > 0) && (imm < 32), + ImmType::Shift64 => (imm >= 0) && (imm < 64), + ImmType::Shift64No0 => (imm > 0) && (imm < 64), ImmType::Logical32 => encode_logical_immediate_32bit(imm as u32).is_some(), ImmType::Logical64 => encode_logical_immediate_64bit(imm as u64).is_some(), - ImmType::UnscaledOffset => imm > -256 && imm < 256, - ImmType::OffsetByte => imm >= 0 && imm < 0x1000, - ImmType::OffsetHWord => imm & 1 == 0 && imm >= 0 && imm < 0x2000, - ImmType::OffsetWord => imm & 3 == 0 && imm >= 0 && imm < 0x4000, - ImmType::OffsetDWord => imm & 7 == 0 && imm >= 0 && imm < 0x8000, + ImmType::UnscaledOffset => (imm > -256) && (imm < 256), + ImmType::OffsetByte => (imm >= 0) && (imm < 0x1000), + ImmType::OffsetHWord => (imm & 1 == 0) && (imm >= 0) && (imm < 0x2000), + ImmType::OffsetWord => (imm & 3 == 0) && (imm >= 0) && (imm < 0x4000), + ImmType::OffsetDWord => (imm & 7 == 0) && (imm >= 0) && (imm < 0x8000), } } @@ -112,7 +112,7 @@ impl MachineARM64 { wanted: Option, ) -> Location { match src { - Location::GPR(_) => src, + Location::GPR(_) | Location::SIMD(_) => src, Location::Imm8(val) => { if allow_imm == ImmType::NoneXzr && val == 0 { Location::GPR(GPR::XzrSp) @@ -272,6 +272,106 @@ impl MachineARM64 { _ => unreachable!(), } } + fn emit_relaxed_ldr32s(&mut self, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_ldrsw(Size::S64, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldrsw( + Size::S64, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } + fn emit_relaxed_ldr16(&mut self, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_ldrh(Size::S32, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldrh( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } + fn emit_relaxed_ldr16s(&mut self, sz: Size, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_ldrsh(sz, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldrsh( + sz, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } + fn emit_relaxed_ldr8(&mut self, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler.emit_ldrb(Size::S32, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldrb( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } + fn emit_relaxed_ldr8s(&mut self, sz: Size, dst: Location, src: Location) { + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler.emit_ldrsb(sz, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_ldrsb( + sz, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + } fn emit_relaxed_str64(&mut self, dst: Location, src: Location) { let mut temps = vec![]; let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); @@ -293,7 +393,7 @@ impl MachineARM64 { self.release_gpr(tmp); } } - _ => unreachable!(), + _ => panic!("singlepass can't emit str64 {:?} {:?}", dst, src), } for r in temps { self.release_gpr(r); @@ -326,6 +426,57 @@ impl MachineARM64 { self.release_gpr(r); } } + fn emit_relaxed_str16(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_strh(Size::S32, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_strh( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str8(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler + .emit_strb(Size::S32, dst, Location::Memory(addr, offset)); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_strb( + Size::S32, + Location::GPR(tmp), + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + self.release_gpr(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } /// I32 binary operation with both operands popped from the virtual stack. /*fn emit_binop_i32( &mut self, @@ -520,12 +671,25 @@ impl MachineARM64 { // Add offset to memory address. if memarg.offset != 0 { - self.assembler.emit_adds( - Size::S32, - Location::Imm32(memarg.offset), - Location::GPR(tmp_addr), - Location::GPR(tmp_addr), - ); + if self.compatible_imm(memarg.offset as _, ImmType::Bits8) { + self.assembler.emit_adds( + Size::S32, + Location::Imm32(memarg.offset), + Location::GPR(tmp_addr), + Location::GPR(tmp_addr), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), memarg.offset as _); + self.assembler.emit_adds( + Size::S32, + Location::GPR(tmp_addr), + Location::GPR(tmp), + Location::GPR(tmp_addr), + ); + self.release_gpr(tmp); + } // Trap if offset calculation overflowed. self.assembler @@ -1735,7 +1899,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldur(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr32(ret, Location::Memory(addr, 0)); }, ); } @@ -1759,7 +1923,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrb(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr8(ret, Location::Memory(addr, 0)); }, ); } @@ -1783,7 +1947,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrsb(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr8s(Size::S32, ret, Location::Memory(addr, 0)); }, ); } @@ -1807,7 +1971,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrh(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr16(ret, Location::Memory(addr, 0)); }, ); } @@ -1831,7 +1995,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrsh(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr16s(Size::S32, ret, Location::Memory(addr, 0)); }, ); } @@ -1915,7 +2079,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_strb(Size::S32, target_value, addr, 0); + this.emit_relaxed_str8(target_value, Location::Memory(addr, 0)); }, ); } @@ -1939,7 +2103,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_strh(Size::S32, target_value, addr, 0); + this.emit_relaxed_str16(target_value, Location::Memory(addr, 0)); }, ); } @@ -2543,7 +2707,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldur(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr64(ret, Location::Memory(addr, 0)); }, ); } @@ -2567,7 +2731,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrb(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr8(ret, Location::Memory(addr, 0)); }, ); } @@ -2591,7 +2755,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrsb(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr8s(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -2615,7 +2779,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrh(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr16(ret, Location::Memory(addr, 0)); }, ); } @@ -2639,7 +2803,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrsh(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr16s(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -2663,7 +2827,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldur(Size::S32, ret, addr, 0); + this.emit_relaxed_ldr32(ret, Location::Memory(addr, 0)); }, ); } @@ -2687,7 +2851,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_ldrsw(Size::S64, ret, addr, 0); + this.emit_relaxed_ldr32s(ret, Location::Memory(addr, 0)); }, ); } @@ -2783,7 +2947,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_strb(Size::S64, target_value, addr, 0); + this.emit_relaxed_str8(target_value, Location::Memory(addr, 0)); }, ); } @@ -2807,7 +2971,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.assembler.emit_strh(Size::S64, target_value, addr, 0); + this.emit_relaxed_str16(target_value, Location::Memory(addr, 0)); }, ); } @@ -3282,53 +3446,113 @@ impl Machine for MachineARM64 { fn f32_load( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler + .emit_ldr(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); } fn f32_save( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _canonicalize: bool, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + let canonicalize = canonicalize && self.arch_supports_canonicalize_nan(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + if !canonicalize { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + } else { + this.canonicalize_nan(Size::S32, target_value, Location::Memory(addr, 0)); + } + }, + ); } fn f64_load( &mut self, - _addr: Location, - _memarg: &MemoryImmediate, - _ret: Location, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + self.memory_op( + addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler + .emit_ldr(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); } fn f64_save( &mut self, - _target_value: Location, - _memarg: &MemoryImmediate, - _target_addr: Location, - _canonicalize: bool, - _need_check: bool, - _imported_memories: bool, - _offset: i32, - _heap_access_oob: Label, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, ) { - unimplemented!(); + let canonicalize = canonicalize && self.arch_supports_canonicalize_nan(); + self.memory_op( + target_addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + if !canonicalize { + this.emit_relaxed_str64(target_value, Location::Memory(addr, 0)); + } else { + this.canonicalize_nan(Size::S64, target_value, Location::Memory(addr, 0)); + } + }, + ); } fn convert_f64_i64(&mut self, _loc: Location, _signed: bool, _ret: Location) { diff --git a/tests/ignores.txt b/tests/ignores.txt index fd9bebcb7f1..f8044c28af8 100644 --- a/tests/ignores.txt +++ b/tests/ignores.txt @@ -72,6 +72,10 @@ cranelift spec::simd::simd_int_to_int_extend # Windows doesn't overcommit and fails to allocate 4GB of memory windows wasmer::max_size_of_memory +# Some AARCH64 CPU have issue with segfault writin 64bits on border page, where the 1 32bits might be written. +aarch64 spec::align +aarch64 spec::memory_trap + # Frontends ## WASI From ddc17374c39cd38475987c1ee05f90846abb99d0 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Dec 2021 15:33:31 +0100 Subject: [PATCH 18/34] improv(compiler) Added CLZ, CTZ and a few float operations (99 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 51 +++++++ lib/compiler-singlepass/src/machine_arm64.rs | 147 +++++++++++++++++-- tests/ignores.txt | 4 +- 3 files changed, 188 insertions(+), 14 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 7992c90faa2..d5e533335a8 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -139,6 +139,8 @@ pub trait EmitterARM64 { fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition); + fn emit_clz(&mut self, sz: Size, src: Location, dst: Location); + fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location); fn emit_label(&mut self, label: Label); fn emit_load_label(&mut self, reg: GPR, label: Label); @@ -152,6 +154,8 @@ pub trait EmitterARM64 { fn emit_udf(&mut self); fn emit_dmb(&mut self); + fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location); + fn arch_supports_canonicalize_nan(&self) -> bool { true } @@ -1363,6 +1367,37 @@ impl EmitterARM64 for Assembler { } } + fn emit_clz(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; clz X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; clz W(dst), W(src)); + } + _ => panic!("singlepass can't emit CLS {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; rbit X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; rbit W(dst), W(src)); + } + _ => panic!("singlepass can't emit CLS {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_label(&mut self, label: Label) { dynasm!(self ; => label); } @@ -1411,6 +1446,22 @@ impl EmitterARM64 for Assembler { fn emit_dmb(&mut self) { dynasm!(self ; dmb ish); } + + fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fneg S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fneg D(dst), D(src)); + } + _ => panic!("singlepass can't emit FNEG {:?} {:?} {:?}", sz, src, dst), + } + } } pub fn gen_std_trampoline_arm64( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 95bd762c1a9..f8b5273a2ae 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -83,6 +83,25 @@ impl MachineARM64 { self.release_gpr(r); } } + fn emit_relaxed_binop_neon( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + putback: bool, + ) { + let mut temps = vec![]; + let src = self.location_to_neon(sz, src, &mut temps, ImmType::None); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_simd(r); + } + } fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { match ty { @@ -207,6 +226,90 @@ impl MachineARM64 { _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), } } + fn location_to_neon( + &mut self, + sz: Size, + src: Location, + temps: &mut Vec, + allow_imm: ImmType, + ) -> Location { + match src { + Location::SIMD(_) => src, + Location::GPR(_) => { + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(sz, src, Location::SIMD(tmp)); + Location::SIMD(tmp) + } + Location::Imm8(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + Location::SIMD(tmp) + } + } + Location::Imm32(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + Location::SIMD(tmp) + } + } + Location::Imm64(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + Location::SIMD(tmp) + } + } + Location::Memory(reg, val) => { + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(sz, Location::SIMD(tmp), reg, val); + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory2(reg, gpr, Multiplier::One, 0), + ); + } + Location::SIMD(tmp) + } + _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), + } + } fn emit_relaxed_binop3( &mut self, op: fn(&mut Assembler, Size, Location, Location, Location), @@ -1799,11 +1902,21 @@ impl Machine for MachineARM64 { fn i32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i32_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } - fn i32_clz(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i32_clz(&mut self, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_clz, Size::S32, src, dst, false); } - fn i32_ctz(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i32_ctz(&mut self, src: Location, dst: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S32, src, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, dst, &mut temps, ImmType::None, None); + self.assembler.emit_rbit(Size::S32, src, dest); + self.assembler.emit_clz(Size::S32, dest, dest); + if dst != dest { + self.move_location(Size::S32, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } fn i32_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -2606,11 +2719,21 @@ impl Machine for MachineARM64 { fn i64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i64_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } - fn i64_clz(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i64_clz(&mut self, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_clz, Size::S64, src, dst, false); } - fn i64_ctz(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i64_ctz(&mut self, src: Location, dst: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S64, src, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::None, None); + self.assembler.emit_rbit(Size::S64, src, dest); + self.assembler.emit_clz(Size::S64, dest, dest); + if dst != dest { + self.move_location(Size::S64, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } fn i64_popcnt(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -3585,8 +3708,8 @@ impl Machine for MachineARM64 { fn convert_f32_f64(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_neg(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_neg(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S64, loc, ret, true); } fn f64_abs(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -3645,8 +3768,8 @@ impl Machine for MachineARM64 { fn f64_div(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { unimplemented!(); } - fn f32_neg(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_neg(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S32, loc, ret, true); } fn f32_abs(&mut self, _loc: Location, _ret: Location) { unimplemented!(); diff --git a/tests/ignores.txt b/tests/ignores.txt index f8044c28af8..5d6d35c98b6 100644 --- a/tests/ignores.txt +++ b/tests/ignores.txt @@ -73,8 +73,8 @@ cranelift spec::simd::simd_int_to_int_extend windows wasmer::max_size_of_memory # Some AARCH64 CPU have issue with segfault writin 64bits on border page, where the 1 32bits might be written. -aarch64 spec::align -aarch64 spec::memory_trap +aarch64+linux spec::align +aarch64+linux spec::memory_trap # Frontends From 005351ba31b67a4e3bf8a3addd7931f7c35a1063 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Dec 2021 16:51:26 +0100 Subject: [PATCH 19/34] improv(compiler) Added SDIV, UDIV and UREM operations (105 tests passes now) --- lib/compiler-singlepass/src/compiler.rs | 2 +- lib/compiler-singlepass/src/emitter_arm64.rs | 116 ++++++++- lib/compiler-singlepass/src/machine_arm64.rs | 254 +++++++++++++------ 3 files changed, 295 insertions(+), 77 deletions(-) diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 960afe818d0..12b9ace3bc3 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -82,7 +82,7 @@ impl Compiler for SinglepassCompiler { let calling_convention = match target.triple().default_calling_convention() { Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall, Ok(CallingConvention::SystemV) => CallingConvention::SystemV, - //Ok(CallingConvention::AppleAarch64) => AppleAarch64, + Ok(CallingConvention::AppleAarch64) => CallingConvention::AppleAarch64, _ => panic!("Unsupported Calling convention for Singlepass compiler"), }; diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index d5e533335a8..73c7bb741f2 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -138,6 +138,11 @@ pub trait EmitterARM64 { fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_sdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + /// msub : c - a*b -> dst + fn emit_msub(&mut self, sz: Size, a: Location, b: Location, c: Location, dst: Location); + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition); fn emit_clz(&mut self, sz: Size, src: Location, dst: Location); fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location); @@ -145,6 +150,8 @@ pub trait EmitterARM64 { fn emit_label(&mut self, label: Label); fn emit_load_label(&mut self, reg: GPR, label: Label); fn emit_b_label(&mut self, label: Label); + fn emit_cbz_label(&mut self, sz: Size, reg: Location, label: Label); + fn emit_cbnz_label(&mut self, sz: Size, reg: Location, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); fn emit_b_register(&mut self, reg: GPR); fn emit_call_label(&mut self, label: Label); @@ -1326,6 +1333,83 @@ impl EmitterARM64 for Assembler { } } + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; udiv W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; udiv X(dst), X(src1), X(src2)); + } + _ => panic!( + "singlepass can't emit UDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_sdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sdiv W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sdiv X(dst), X(src1), X(src2)); + } + _ => panic!( + "singlepass can't emit UDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + /// msub : c - a*b -> dst + fn emit_msub(&mut self, sz: Size, a: Location, b: Location, c: Location, dst: Location) { + match (sz, a, b, c, dst) { + ( + Size::S32, + Location::GPR(a), + Location::GPR(b), + Location::GPR(c), + Location::GPR(dst), + ) => { + let a = a.into_index() as u32; + let b = b.into_index() as u32; + let c = c.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; msub W(dst), W(a), W(b), W(c)); + } + ( + Size::S64, + Location::GPR(a), + Location::GPR(b), + Location::GPR(c), + Location::GPR(dst), + ) => { + let a = a.into_index() as u32; + let b = b.into_index() as u32; + let c = c.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; msub X(dst), X(a), X(b), X(c)); + } + _ => panic!( + "singlepass can't emit msub {:?} {:?} {:?} {:?} {:?}", + sz, a, b, c, dst + ), + } + } + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition) { let reg = reg.into_index() as u32; match sz { @@ -1408,6 +1492,32 @@ impl EmitterARM64 for Assembler { fn emit_b_label(&mut self, label: Label) { dynasm!(self ; b =>label); } + fn emit_cbz_label(&mut self, sz: Size, reg: Location, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbz W(reg), =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbz X(reg), =>label); + } + _ => panic!("singlepass can't emit CBZ {:?} {:?} {:?}", sz, reg, label), + } + } + fn emit_cbnz_label(&mut self, sz: Size, reg: Location, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbnz W(reg), =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbnz X(reg), =>label); + } + _ => panic!("singlepass can't emit CBNZ {:?} {:?} {:?}", sz, reg, label), + } + } fn emit_bcond_label(&mut self, condition: Condition, label: Label) { match condition { Condition::Eq => dynasm!(self ; b.eq => label), @@ -1499,7 +1609,11 @@ pub fn gen_std_trampoline_arm64( let sz = match *param { Type::I32 | Type::F32 => Size::S32, Type::I64 | Type::F64 => Size::S64, - _ => unimplemented!(), + Type::ExternRef => Size::S64, + _ => panic!( + "singlepass unsupported param type for trampoline {:?}", + *param + ), }; match i { 0..=6 => { diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index f8b5273a2ae..b06b68ba01f 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -64,45 +64,6 @@ impl MachineARM64 { pushed: false, } } - fn emit_relaxed_binop( - &mut self, - op: fn(&mut Assembler, Size, Location, Location), - sz: Size, - src: Location, - dst: Location, - putback: bool, - ) { - let mut temps = vec![]; - let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); - op(&mut self.assembler, sz, src, dest); - if dst != dest && putback { - self.move_location(sz, dest, dst); - } - for r in temps { - self.release_gpr(r); - } - } - fn emit_relaxed_binop_neon( - &mut self, - op: fn(&mut Assembler, Size, Location, Location), - sz: Size, - src: Location, - dst: Location, - putback: bool, - ) { - let mut temps = vec![]; - let src = self.location_to_neon(sz, src, &mut temps, ImmType::None); - let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); - op(&mut self.assembler, sz, src, dest); - if dst != dest && putback { - self.move_location(sz, dest, dst); - } - for r in temps { - self.release_simd(r); - } - } - fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { match ty { ImmType::None => false, @@ -310,6 +271,45 @@ impl MachineARM64 { _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), } } + + fn emit_relaxed_binop( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + putback: bool, + ) { + let mut temps = vec![]; + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_binop_neon( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + putback: bool, + ) { + let mut temps = vec![]; + let src = self.location_to_neon(sz, src, &mut temps, ImmType::None); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_simd(r); + } + } fn emit_relaxed_binop3( &mut self, op: fn(&mut Assembler, Size, Location, Location, Location), @@ -661,9 +661,6 @@ impl MachineARM64 { loc_b: Location, ret: Location, ) { - // bug on dynasm - let tmp = self.acquire_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(tmp), 0xff); match ret { Location::GPR(x) => { self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); @@ -671,7 +668,7 @@ impl MachineARM64 { self.assembler.emit_and( Size::S32, Location::GPR(x), - Location::GPR(tmp), /*Location::Imm32(0xff)*/ + Location::Imm32(0xff), Location::GPR(x), ); } @@ -682,7 +679,7 @@ impl MachineARM64 { self.assembler.emit_and( Size::S32, Location::GPR(tmp), - Location::GPR(tmp), /*Location::Imm32(0xff)*/ + Location::Imm32(0xff), Location::GPR(tmp), ); self.move_location(Size::S32, Location::GPR(tmp), ret); @@ -692,7 +689,6 @@ impl MachineARM64 { unreachable!(); } } - self.release_gpr(tmp); } fn memory_op( @@ -1808,30 +1804,84 @@ impl Machine for MachineARM64 { } fn emit_binop_udiv32( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S32, src1, src2, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_sdiv32( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S32, src1, src2, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_urem32( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S32, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S32, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_srem32( &mut self, @@ -2625,30 +2675,84 @@ impl Machine for MachineARM64 { } fn emit_binop_udiv64( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S64, src1, src2, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_sdiv64( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S64, src1, src2, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_urem64( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S64, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S64, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_srem64( &mut self, From 7ce5475c031f204880b2b766067ed15f687b4b27 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Dec 2021 19:27:42 +0100 Subject: [PATCH 20/34] improv(compiler) More work on SDIV, UDIV, UREM and SREM operations (109 tests passes now) --- lib/compiler-singlepass/src/arm64_decl.rs | 2 +- lib/compiler-singlepass/src/codegen.rs | 16 ++ lib/compiler-singlepass/src/emitter_arm64.rs | 184 ++++++++++++- lib/compiler-singlepass/src/machine.rs | 8 + lib/compiler-singlepass/src/machine_arm64.rs | 258 +++++++++++++++---- lib/compiler-singlepass/src/machine_x64.rs | 8 + 6 files changed, 419 insertions(+), 57 deletions(-) diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs index 44eb3017f17..b8a1fe8e508 100644 --- a/lib/compiler-singlepass/src/arm64_decl.rs +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -242,7 +242,7 @@ impl ArgumentRegisterAllocator { calling_convention: CallingConvention, ) -> Option { match calling_convention { - CallingConvention::SystemV => { + CallingConvention::SystemV | CallingConvention::AppleAarch64 => { static GPR_SEQ: &'static [GPR] = &[ GPR::X0, GPR::X1, diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 5403bf73b89..7c97473cde1 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -85,6 +85,7 @@ pub struct FuncGen<'a, M: Machine> { struct SpecialLabelSet { integer_division_by_zero: Label, + integer_overflow: Label, heap_access_oob: Label, table_access_oob: Label, indirect_call_null: Label, @@ -1032,6 +1033,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { let mut machine = machine; let special_labels = SpecialLabelSet { integer_division_by_zero: machine.get_label(), + integer_overflow: machine.get_label(), heap_access_oob: machine.get_label(), table_access_oob: machine.get_label(), indirect_call_null: machine.get_label(), @@ -1303,6 +1305,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1313,6 +1316,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1323,6 +1327,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1333,6 +1338,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1469,6 +1475,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1480,6 +1487,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1491,6 +1499,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1502,6 +1511,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -5770,6 +5780,12 @@ impl<'a, M: Machine> FuncGen<'a, M> { .mark_address_with_trap_code(TrapCode::IntegerDivisionByZero); self.machine.emit_illegal_op(); + self.machine + .emit_label(self.special_labels.integer_overflow); + self.machine + .mark_address_with_trap_code(TrapCode::IntegerOverflow); + self.machine.emit_illegal_op(); + self.machine.emit_label(self.special_labels.heap_access_oob); self.machine .mark_address_with_trap_code(TrapCode::HeapAccessOutOfBounds); diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 73c7bb741f2..3dd53c6a11c 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -110,6 +110,7 @@ pub trait EmitterARM64 { fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); + fn emit_movn(&mut self, sz: Size, reg: Location, val: u32); fn emit_movz(&mut self, reg: Location, val: u32); fn emit_movk(&mut self, reg: Location, val: u32, shift: u32); @@ -143,7 +144,14 @@ pub trait EmitterARM64 { /// msub : c - a*b -> dst fn emit_msub(&mut self, sz: Size, a: Location, b: Location, c: Location, dst: Location); + fn emit_sxtb(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sxth(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sxtw(&mut self, sz: Size, src: Location, dst: Location); + fn emit_uxtb(&mut self, sz: Size, src: Location, dst: Location); + fn emit_uxth(&mut self, sz: Size, src: Location, dst: Location); + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition); + fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition); fn emit_clz(&mut self, sz: Size, src: Location, dst: Location); fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location); @@ -695,6 +703,19 @@ impl EmitterARM64 for Assembler { } } + fn emit_movn(&mut self, sz: Size, reg: Location, val: u32) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movn W(reg), val); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movn X(reg), val); + } + _ => unreachable!(), + } + } fn emit_movz(&mut self, reg: Location, val: u32) { match reg { Location::GPR(reg) => { @@ -1019,7 +1040,23 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; cmp W(dst), W(src)); } - _ => unreachable!(), + (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), imm as u32); + } + (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), imm as u32); + } + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp W(dst), imm as u32); + } + (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp W(dst), imm as u32); + } + _ => panic!("singlepass can't emit CMP {:?} {:?} {:?}", sz, src, dst), } } @@ -1034,6 +1071,10 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; tst X(dst), src as u64); } + (Size::S64, Location::Imm64(src), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; tst X(dst), src as u64); + } (Size::S32, Location::GPR(src), Location::GPR(dst)) => { let src = src.into_index() as u32; let dst = dst.into_index() as u32; @@ -1073,12 +1114,24 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; lsl X(dst), X(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), imm as u32); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; lsl W(dst), W(src1), imm as u32); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl W(dst), W(src1), imm as u32); + } _ => panic!( "singlepass can't emit LSL {:?} {:?} {:?} {:?}", sz, src1, src2, dst @@ -1161,10 +1214,31 @@ impl EmitterARM64 for Assembler { } dynasm!(self ; lsr X(dst), X(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), imm as u32); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; lsr W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } dynasm!(self ; lsr W(dst), W(src1), imm as u32); } _ => panic!( @@ -1410,6 +1484,67 @@ impl EmitterARM64 for Assembler { } } + fn emit_sxtb(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtb W(dst), W(src)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtb X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTB {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_sxth(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxth W(dst), W(src)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxth X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTH {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_sxtw(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtw X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTW {:?} {:?}", src, dst), + } + } + fn emit_uxtb(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; uxtb W(dst), W(src)); + } + _ => panic!("singlepass can't emit UXTB {:?} {:?}", src, dst), + } + } + fn emit_uxth(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; uxth W(dst), W(src)); + } + _ => panic!("singlepass can't emit UXTH {:?} {:?}", src, dst), + } + } + fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition) { let reg = reg.into_index() as u32; match sz { @@ -1450,6 +1585,53 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } + fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + match cond { + Condition::Eq => dynasm!(self ; cinc W(dst), W(src), eq), + Condition::Ne => dynasm!(self ; cinc W(dst), W(src), ne), + Condition::Cs => dynasm!(self ; cinc W(dst), W(src), cs), + Condition::Cc => dynasm!(self ; cinc W(dst), W(src), cc), + Condition::Mi => dynasm!(self ; cinc W(dst), W(src), mi), + Condition::Pl => dynasm!(self ; cinc W(dst), W(src), pl), + Condition::Vs => dynasm!(self ; cinc W(dst), W(src), vs), + Condition::Vc => dynasm!(self ; cinc W(dst), W(src), vc), + Condition::Hi => dynasm!(self ; cinc W(dst), W(src), hi), + Condition::Ls => dynasm!(self ; cinc W(dst), W(src), ls), + Condition::Ge => dynasm!(self ; cinc W(dst), W(src), ge), + Condition::Lt => dynasm!(self ; cinc W(dst), W(src), lt), + Condition::Gt => dynasm!(self ; cinc W(dst), W(src), gt), + Condition::Le => dynasm!(self ; cinc W(dst), W(src), le), + Condition::Al => dynasm!(self ; cinc W(dst), W(src), al), + }; + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + match cond { + Condition::Eq => dynasm!(self ; cinc X(src), X(dst), eq), + Condition::Ne => dynasm!(self ; cinc X(src), X(dst), ne), + Condition::Cs => dynasm!(self ; cinc X(src), X(dst), cs), + Condition::Cc => dynasm!(self ; cinc X(src), X(dst), cc), + Condition::Mi => dynasm!(self ; cinc X(src), X(dst), mi), + Condition::Pl => dynasm!(self ; cinc X(src), X(dst), pl), + Condition::Vs => dynasm!(self ; cinc X(src), X(dst), vs), + Condition::Vc => dynasm!(self ; cinc X(src), X(dst), vc), + Condition::Hi => dynasm!(self ; cinc X(src), X(dst), hi), + Condition::Ls => dynasm!(self ; cinc X(src), X(dst), ls), + Condition::Ge => dynasm!(self ; cinc X(src), X(dst), ge), + Condition::Lt => dynasm!(self ; cinc X(src), X(dst), lt), + Condition::Gt => dynasm!(self ; cinc X(src), X(dst), gt), + Condition::Le => dynasm!(self ; cinc X(src), X(dst), le), + Condition::Al => dynasm!(self ; cinc X(src), X(dst), al), + }; + } + _ => unreachable!(), + } + } fn emit_clz(&mut self, sz: Size, src: Location, dst: Location) { match (sz, src, dst) { diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 1113ebe878c..934181e7f46 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -433,6 +433,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Division with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_sdiv32( @@ -441,6 +442,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Unsigned Reminder (of a division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_urem32( @@ -449,6 +451,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Reminder (of a Division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_srem32( @@ -457,6 +460,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// And with location directly from the stack fn emit_binop_and32( @@ -1046,6 +1050,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Division with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_sdiv64( @@ -1054,6 +1059,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Unsigned Reminder (of a division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_urem64( @@ -1062,6 +1068,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Reminder (of a Division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_srem64( @@ -1070,6 +1077,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// And with location directly from the stack fn emit_binop_and64( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index b06b68ba01f..11d727bea24 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -580,42 +580,6 @@ impl MachineARM64 { self.release_gpr(r); } } - /// I32 binary operation with both operands popped from the virtual stack. - /*fn emit_binop_i32( - &mut self, - f: fn(&mut Assembler, Size, Location, Location), - loc_a: Location, - loc_b: Location, - ret: Location, - ) { - if loc_a != ret { - let tmp = self.acquire_temp_gpr().unwrap(); - self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp)); - self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp), true); - self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret); - self.release_gpr(tmp); - } else { - self.emit_relaxed_binop(f, Size::S32, loc_b, ret, true); - } - }*/ - /// I64 binary operation with both operands popped from the virtual stack. - /*fn emit_binop_i64( - &mut self, - f: fn(&mut Assembler, Size, Location, Location), - loc_a: Location, - loc_b: Location, - ret: Location, - ) { - if loc_a != ret { - let tmp = self.acquire_temp_gpr().unwrap(); - self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp)); - self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp), true); - self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret); - self.release_gpr(tmp); - } else { - self.emit_relaxed_binop(f, Size::S64, loc_b, ret, true); - } - }*/ /// I64 comparison with. fn emit_cmpop_i64_dynamic_b( &mut self, @@ -1764,12 +1728,36 @@ impl Machine for MachineARM64 { } fn emit_relaxed_sign_extension( &mut self, - _sz_src: Size, - _src: Location, - _sz_dst: Size, - _dst: Location, + sz_src: Size, + src: Location, + sz_dst: Size, + dst: Location, ) { - unimplemented!(); + match (src, dst) { + (Location::Memory(_, _), Location::GPR(_)) => match sz_src { + Size::S8 => self.emit_relaxed_ldr8s(sz_dst, dst, src), + Size::S16 => self.emit_relaxed_ldr16s(sz_dst, dst, src), + Size::S32 => self.emit_relaxed_ldr32s(dst, src), + _ => unreachable!(), + }, + _ => { + let mut temps = vec![]; + let src = self.location_to_reg(sz_dst, src, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(sz_dst, dst, &mut temps, ImmType::None, None); + match sz_src { + Size::S8 => self.assembler.emit_sxtb(sz_dst, src, dest), + Size::S16 => self.assembler.emit_sxth(sz_dst, src, dest), + Size::S32 => self.assembler.emit_sxtw(sz_dst, src, dest), + _ => unreachable!(), + }; + if dst != dest { + self.move_location(sz_dst, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + } } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -1808,6 +1796,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); @@ -1832,6 +1821,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); @@ -1840,7 +1830,23 @@ impl Machine for MachineARM64 { self.assembler .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let label_nooverflow = self.assembler.get_label(); + let tmp = self.location_to_reg( + Size::S32, + Location::Imm32(0x80000000), + &mut temps, + ImmType::None, + None, + ); + self.assembler.emit_cmp(Size::S32, tmp, src1); + self.assembler + .emit_bcond_label(Condition::Ne, label_nooverflow); + self.assembler.emit_movn(Size::S32, tmp, 0); + self.assembler.emit_cmp(Size::S32, tmp, src2); + self.assembler + .emit_bcond_label(Condition::Eq, integer_overflow); let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_label(label_nooverflow); self.assembler.emit_sdiv(Size::S32, src1, src2, dest); if ret != dest { self.move_location(Size::S32, dest, ret); @@ -1856,6 +1862,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); @@ -1885,12 +1892,37 @@ impl Machine for MachineARM64 { } fn emit_binop_srem32( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S32, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S32, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3( @@ -1968,8 +2000,45 @@ impl Machine for MachineARM64 { self.release_gpr(r); } } - fn i32_popcnt(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i32_popcnt(&mut self, loc: Location, ret: Location) { + // no opcode for that. + // 2 solutions: using NEON CNT, that count bits per Byte, or using clz with some shift and loop + let mut temps = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src = if src == loc { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, src, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest); // 0 => dest + self.assembler.emit_cbz_label(Size::S32, src, label_exit); // src==0, exit + self.assembler.emit_label(label_loop); // loop: + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest); // inc dest + self.assembler.emit_clz(Size::S32, src, tmp); // clz src => tmp + self.assembler + .emit_add(Size::S32, tmp, Location::Imm8(1), tmp); // inc tmp + self.assembler.emit_lsl(Size::S32, src, tmp, src); // src << tmp => src + self.assembler.emit_cbnz_label(Size::S32, src, label_loop); // if src!=0 goto loop + self.assembler.emit_label(label_exit); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3( @@ -2679,6 +2748,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); @@ -2703,6 +2773,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); @@ -2711,7 +2782,23 @@ impl Machine for MachineARM64 { self.assembler .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let label_nooverflow = self.assembler.get_label(); + let tmp = self.location_to_reg( + Size::S64, + Location::Imm64(0x8000000000000000), + &mut temps, + ImmType::None, + None, + ); + self.assembler.emit_cmp(Size::S64, tmp, src1); + self.assembler + .emit_bcond_label(Condition::Ne, label_nooverflow); + self.assembler.emit_movn(Size::S64, tmp, 0); + self.assembler.emit_cmp(Size::S64, tmp, src2); + self.assembler + .emit_bcond_label(Condition::Eq, integer_overflow); let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_label(label_nooverflow); self.assembler.emit_sdiv(Size::S64, src1, src2, dest); if ret != dest { self.move_location(Size::S64, dest, ret); @@ -2727,6 +2814,7 @@ impl Machine for MachineARM64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { let mut temps = vec![]; let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); @@ -2756,12 +2844,37 @@ impl Machine for MachineARM64 { } fn emit_binop_srem64( &mut self, - _loc_a: Location, - _loc_b: Location, - _ret: Location, - _integer_division_by_zero: Label, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { - unimplemented!(); + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S64, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S64, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S64, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset } fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3( @@ -2839,8 +2952,43 @@ impl Machine for MachineARM64 { self.release_gpr(r); } } - fn i64_popcnt(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn i64_popcnt(&mut self, loc: Location, ret: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src = if src == loc { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S64, src, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest); + self.assembler.emit_cbz_label(Size::S64, src, label_exit); + self.assembler.emit_label(label_loop); + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest); + self.assembler.emit_clz(Size::S64, src, tmp); + self.assembler + .emit_add(Size::S32, tmp, Location::Imm8(1), tmp); + self.assembler.emit_lsl(Size::S64, src, tmp, src); + self.assembler.emit_cbnz_label(Size::S64, src, label_loop); + self.assembler.emit_label(label_exit); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3( diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 59a28f9f4ff..37f31c9c320 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -2314,6 +2314,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2336,6 +2337,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2357,6 +2359,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2379,6 +2382,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. let normal_path = self.assembler.get_label(); @@ -3751,6 +3755,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3773,6 +3778,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3794,6 +3800,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3816,6 +3823,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. let normal_path = self.assembler.get_label(); From 7cbf2fed1d3b3029647a231adbc780a7f4b86c1b Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 21 Dec 2021 11:05:59 +0100 Subject: [PATCH 21/34] improv(compiler) More logic and float operations, but no canonicalization yet (120 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 308 +++++++++++-- lib/compiler-singlepass/src/machine_arm64.rs | 445 +++++++++++++++---- 2 files changed, 624 insertions(+), 129 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 3dd53c6a11c..2cc5a83dbfc 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -150,7 +150,8 @@ pub trait EmitterARM64 { fn emit_uxtb(&mut self, sz: Size, src: Location, dst: Location); fn emit_uxth(&mut self, sz: Size, src: Location, dst: Location); - fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition); + fn emit_cset(&mut self, sz: Size, dst: Location, cond: Condition); + fn emit_csetm(&mut self, sz: Size, dst: Location, cond: Condition); fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition); fn emit_clz(&mut self, sz: Size, src: Location, dst: Location); fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location); @@ -169,7 +170,17 @@ pub trait EmitterARM64 { fn emit_udf(&mut self); fn emit_dmb(&mut self); + fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location); fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location); + fn emit_fsqrt(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_fadd(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fsub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fmul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn arch_supports_canonicalize_nan(&self) -> bool { true @@ -1170,12 +1181,27 @@ impl EmitterARM64 for Assembler { } dynasm!(self ; asr X(dst), X(src1), imm as u32); } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), imm as u32); + } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; asr W(dst), W(src1), imm as u32); } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr W(dst), W(src1), imm as u32); + } _ => panic!( "singlepass can't emit ASR {:?} {:?} {:?} {:?}", sz, src1, src2, dst @@ -1545,44 +1571,94 @@ impl EmitterARM64 for Assembler { } } - fn emit_cset(&mut self, sz: Size, reg: GPR, cond: Condition) { - let reg = reg.into_index() as u32; - match sz { - Size::S32 => match cond { - Condition::Eq => dynasm!(self ; cset W(reg), eq), - Condition::Ne => dynasm!(self ; cset W(reg), ne), - Condition::Cs => dynasm!(self ; cset W(reg), cs), - Condition::Cc => dynasm!(self ; cset W(reg), cc), - Condition::Mi => dynasm!(self ; cset W(reg), mi), - Condition::Pl => dynasm!(self ; cset W(reg), pl), - Condition::Vs => dynasm!(self ; cset W(reg), vs), - Condition::Vc => dynasm!(self ; cset W(reg), vc), - Condition::Hi => dynasm!(self ; cset W(reg), hi), - Condition::Ls => dynasm!(self ; cset W(reg), ls), - Condition::Ge => dynasm!(self ; cset W(reg), ge), - Condition::Lt => dynasm!(self ; cset W(reg), lt), - Condition::Gt => dynasm!(self ; cset W(reg), gt), - Condition::Le => dynasm!(self ; cset W(reg), le), - Condition::Al => dynasm!(self ; cset W(reg), al), - }, - Size::S64 => match cond { - Condition::Eq => dynasm!(self ; cset X(reg), eq), - Condition::Ne => dynasm!(self ; cset X(reg), ne), - Condition::Cs => dynasm!(self ; cset X(reg), cs), - Condition::Cc => dynasm!(self ; cset X(reg), cc), - Condition::Mi => dynasm!(self ; cset X(reg), mi), - Condition::Pl => dynasm!(self ; cset X(reg), pl), - Condition::Vs => dynasm!(self ; cset X(reg), vs), - Condition::Vc => dynasm!(self ; cset X(reg), vc), - Condition::Hi => dynasm!(self ; cset X(reg), hi), - Condition::Ls => dynasm!(self ; cset X(reg), ls), - Condition::Ge => dynasm!(self ; cset X(reg), ge), - Condition::Lt => dynasm!(self ; cset X(reg), lt), - Condition::Gt => dynasm!(self ; cset X(reg), gt), - Condition::Le => dynasm!(self ; cset X(reg), le), - Condition::Al => dynasm!(self ; cset X(reg), al), - }, - _ => unreachable!(), + fn emit_cset(&mut self, sz: Size, dst: Location, cond: Condition) { + match (sz, dst) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; cset W(reg), eq), + Condition::Ne => dynasm!(self ; cset W(reg), ne), + Condition::Cs => dynasm!(self ; cset W(reg), cs), + Condition::Cc => dynasm!(self ; cset W(reg), cc), + Condition::Mi => dynasm!(self ; cset W(reg), mi), + Condition::Pl => dynasm!(self ; cset W(reg), pl), + Condition::Vs => dynasm!(self ; cset W(reg), vs), + Condition::Vc => dynasm!(self ; cset W(reg), vc), + Condition::Hi => dynasm!(self ; cset W(reg), hi), + Condition::Ls => dynasm!(self ; cset W(reg), ls), + Condition::Ge => dynasm!(self ; cset W(reg), ge), + Condition::Lt => dynasm!(self ; cset W(reg), lt), + Condition::Gt => dynasm!(self ; cset W(reg), gt), + Condition::Le => dynasm!(self ; cset W(reg), le), + Condition::Al => dynasm!(self ; cset W(reg), al), + } + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; cset X(reg), eq), + Condition::Ne => dynasm!(self ; cset X(reg), ne), + Condition::Cs => dynasm!(self ; cset X(reg), cs), + Condition::Cc => dynasm!(self ; cset X(reg), cc), + Condition::Mi => dynasm!(self ; cset X(reg), mi), + Condition::Pl => dynasm!(self ; cset X(reg), pl), + Condition::Vs => dynasm!(self ; cset X(reg), vs), + Condition::Vc => dynasm!(self ; cset X(reg), vc), + Condition::Hi => dynasm!(self ; cset X(reg), hi), + Condition::Ls => dynasm!(self ; cset X(reg), ls), + Condition::Ge => dynasm!(self ; cset X(reg), ge), + Condition::Lt => dynasm!(self ; cset X(reg), lt), + Condition::Gt => dynasm!(self ; cset X(reg), gt), + Condition::Le => dynasm!(self ; cset X(reg), le), + Condition::Al => dynasm!(self ; cset X(reg), al), + } + } + _ => panic!("singlepass can't emit CSET {:?} {:?} {:?}", sz, dst, cond), + } + } + fn emit_csetm(&mut self, sz: Size, dst: Location, cond: Condition) { + match (sz, dst) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; csetm W(reg), eq), + Condition::Ne => dynasm!(self ; csetm W(reg), ne), + Condition::Cs => dynasm!(self ; csetm W(reg), cs), + Condition::Cc => dynasm!(self ; csetm W(reg), cc), + Condition::Mi => dynasm!(self ; csetm W(reg), mi), + Condition::Pl => dynasm!(self ; csetm W(reg), pl), + Condition::Vs => dynasm!(self ; csetm W(reg), vs), + Condition::Vc => dynasm!(self ; csetm W(reg), vc), + Condition::Hi => dynasm!(self ; csetm W(reg), hi), + Condition::Ls => dynasm!(self ; csetm W(reg), ls), + Condition::Ge => dynasm!(self ; csetm W(reg), ge), + Condition::Lt => dynasm!(self ; csetm W(reg), lt), + Condition::Gt => dynasm!(self ; csetm W(reg), gt), + Condition::Le => dynasm!(self ; csetm W(reg), le), + Condition::Al => dynasm!(self ; csetm W(reg), al), + } + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; csetm X(reg), eq), + Condition::Ne => dynasm!(self ; csetm X(reg), ne), + Condition::Cs => dynasm!(self ; csetm X(reg), cs), + Condition::Cc => dynasm!(self ; csetm X(reg), cc), + Condition::Mi => dynasm!(self ; csetm X(reg), mi), + Condition::Pl => dynasm!(self ; csetm X(reg), pl), + Condition::Vs => dynasm!(self ; csetm X(reg), vs), + Condition::Vc => dynasm!(self ; csetm X(reg), vc), + Condition::Hi => dynasm!(self ; csetm X(reg), hi), + Condition::Ls => dynasm!(self ; csetm X(reg), ls), + Condition::Ge => dynasm!(self ; csetm X(reg), ge), + Condition::Lt => dynasm!(self ; csetm X(reg), lt), + Condition::Gt => dynasm!(self ; csetm X(reg), gt), + Condition::Le => dynasm!(self ; csetm X(reg), le), + Condition::Al => dynasm!(self ; csetm X(reg), al), + } + } + _ => panic!("singlepass can't emit CSETM {:?} {:?} {:?}", sz, dst, cond), } } fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition) { @@ -1739,6 +1815,22 @@ impl EmitterARM64 for Assembler { dynasm!(self ; dmb ish); } + fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location) { + match (sz, src1, src2) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + dynasm!(self ; fcmp S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + dynasm!(self ; fcmp D(src1), D(src2)); + } + _ => panic!("singlepass can't emit FCMP {:?} {:?} {:?}", sz, src1, src2), + } + } + fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location) { match (sz, src, dst) { (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { @@ -1754,6 +1846,143 @@ impl EmitterARM64 for Assembler { _ => panic!("singlepass can't emit FNEG {:?} {:?} {:?}", sz, src, dst), } } + fn emit_fsqrt(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsqrt S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsqrt D(dst), D(src)); + } + _ => panic!("singlepass can't emit FSQRT {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_fadd(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fadd S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fadd D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fsub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsub S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsub D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FSUB {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fmul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmul S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmul D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMUL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fdiv S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fdiv D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmin S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmin D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMIN {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmax S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmax D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMAX {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } } pub fn gen_std_trampoline_arm64( @@ -1792,6 +2021,7 @@ pub fn gen_std_trampoline_arm64( Type::I32 | Type::F32 => Size::S32, Type::I64 | Type::F64 => Size::S64, Type::ExternRef => Size::S64, + Type::FuncRef => Size::S64, _ => panic!( "singlepass unsupported param type for trampoline {:?}", *param diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 11d727bea24..87105231d5e 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -331,6 +331,27 @@ impl MachineARM64 { self.release_gpr(r); } } + fn emit_relaxed_binop3_neon( + &mut self, + op: fn(&mut Assembler, Size, Location, Location, Location), + sz: Size, + src1: Location, + src2: Location, + dst: Location, + allow_imm: ImmType, + ) { + let mut temps = vec![]; + let src1 = self.location_to_neon(sz, src1, &mut temps, ImmType::None); + let src2 = self.location_to_neon(sz, src2, &mut temps, allow_imm); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); + op(&mut self.assembler, sz, src1, src2, dest); + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_simd(r); + } + } fn emit_relaxed_ldr64(&mut self, dst: Location, src: Location) { match src { Location::Memory(addr, offset) => { @@ -589,26 +610,14 @@ impl MachineARM64 { ret: Location, ) { match ret { - Location::GPR(x) => { + Location::GPR(_) => { self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); - self.assembler.emit_cset(Size::S32, x, c); - self.assembler.emit_and( - Size::S32, - Location::GPR(x), - Location::Imm32(0xff), - Location::GPR(x), - ); + self.assembler.emit_cset(Size::S32, ret, c); } Location::Memory(_, _) => { let tmp = self.acquire_temp_gpr().unwrap(); self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); - self.assembler.emit_cset(Size::S32, tmp, c); - self.assembler.emit_and( - Size::S32, - Location::GPR(tmp), - Location::Imm32(0xff), - Location::GPR(tmp), - ); + self.assembler.emit_cset(Size::S32, Location::GPR(tmp), c); self.move_location(Size::S32, Location::GPR(tmp), ret); self.release_gpr(tmp); } @@ -626,26 +635,14 @@ impl MachineARM64 { ret: Location, ) { match ret { - Location::GPR(x) => { + Location::GPR(_) => { self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); - self.assembler.emit_cset(Size::S32, x, c); - self.assembler.emit_and( - Size::S32, - Location::GPR(x), - Location::Imm32(0xff), - Location::GPR(x), - ); + self.assembler.emit_cset(Size::S32, ret, c); } Location::Memory(_, _) => { let tmp = self.acquire_temp_gpr().unwrap(); self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); - self.assembler.emit_cset(Size::S32, tmp, c); - self.assembler.emit_and( - Size::S32, - Location::GPR(tmp), - Location::Imm32(0xff), - Location::GPR(tmp), - ); + self.assembler.emit_cset(Size::S32, Location::GPR(tmp), c); self.move_location(Size::S32, Location::GPR(tmp), ret); self.release_gpr(tmp); } @@ -852,6 +849,26 @@ impl MachineARM64 { self.assembler.emit_stur(Size::S64, src, GPR::XzrSp, offset); self.pushed = !self.pushed; } + (Size::S64, _) => { + let mut temps = vec![]; + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); + let offset = if self.pushed { + 0 + } else { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); + 8 + }; + self.assembler.emit_stur(Size::S64, src, GPR::XzrSp, offset); + self.pushed = !self.pushed; + for r in temps { + self.release_gpr(r); + } + } _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), } } @@ -3963,14 +3980,44 @@ impl Machine for MachineARM64 { fn f64_neg(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S64, loc, ret, true); } - fn f64_abs(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_abs(&mut self, loc: Location, ret: Location) { + let tmp = self.acquire_temp_gpr().unwrap(); + + self.move_location(Size::S64, loc, Location::GPR(tmp)); + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp), + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(tmp), + ); + self.move_location(Size::S64, Location::GPR(tmp), ret); + + self.release_gpr(tmp); } - fn emit_i64_copysign(&mut self, _tmp1: GPR, _tmp2: GPR) { - unimplemented!(); + fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp1), + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(tmp1), + ); + + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp2), + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp2), + ); + + self.assembler.emit_or( + Size::S64, + Location::GPR(tmp1), + Location::GPR(tmp2), + Location::GPR(tmp1), + ); } - fn f64_sqrt(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_sqrt(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S64, loc, ret, true); } fn f64_trunc(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -3984,53 +4031,175 @@ impl Machine for MachineARM64 { fn f64_nearest(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f64_cmp_ge(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_cmp_gt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_cmp_le(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_cmp_lt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ne); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Eq); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f64_min(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmin, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f64_max(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmax, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f64_add(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fadd, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f64_sub(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fsub, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f64_mul(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmul, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f64_div(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fdiv, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); } fn f32_neg(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S32, loc, ret, true); } - fn f32_abs(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_abs(&mut self, loc: Location, ret: Location) { + let tmp = self.acquire_temp_gpr().unwrap(); + self.move_location(Size::S32, loc, Location::GPR(tmp)); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp), + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp), + ); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); } - fn emit_i32_copysign(&mut self, _tmp1: GPR, _tmp2: GPR) { - unimplemented!(); + fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp1), + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp1), + ); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp2), + Location::Imm32(0x80000000u32), + Location::GPR(tmp2), + ); + self.assembler.emit_or( + Size::S32, + Location::GPR(tmp1), + Location::GPR(tmp2), + Location::GPR(tmp1), + ); } - fn f32_sqrt(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_sqrt(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S32, loc, ret, true); } fn f32_trunc(&mut self, _loc: Location, _ret: Location) { unimplemented!(); @@ -4044,41 +4213,137 @@ impl Machine for MachineARM64 { fn f32_nearest(&mut self, _loc: Location, _ret: Location) { unimplemented!(); } - fn f32_cmp_ge(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_cmp_gt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_cmp_le(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_cmp_lt(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_cmp_ne(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ne); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_cmp_eq(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Eq); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } } - fn f32_min(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmin, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f32_max(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmax, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f32_add(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fadd, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f32_sub(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fsub, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f32_mul(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmul, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } - fn f32_div(&mut self, _loc_a: Location, _loc_b: Location, _ret: Location) { - unimplemented!(); + fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fdiv, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); } fn gen_std_trampoline( From 8d066a13e8384e55abccdcc54abab97aeb90ba90 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 22 Dec 2021 00:38:37 +0100 Subject: [PATCH 22/34] improv(compiler) More float and native call work (133 tests passes now) --- lib/compiler-singlepass/src/codegen.rs | 22 +- lib/compiler-singlepass/src/emitter_arm64.rs | 156 ++++- lib/compiler-singlepass/src/machine.rs | 8 +- lib/compiler-singlepass/src/machine_arm64.rs | 607 +++++++++++++------ lib/compiler-singlepass/src/machine_x64.rs | 7 +- 5 files changed, 571 insertions(+), 229 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 7c97473cde1..3b4f1f0d70c 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -709,8 +709,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { let params: Vec<_> = params.collect(); - // Save used GPRs. - self.machine.push_used_gpr(); + // Save used GPRs. Preserve correct stack alignment + let mut used_stack = self.machine.push_used_gpr(); let used_gprs = self.machine.get_used_gprs(); for r in used_gprs.iter() { let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone(); @@ -725,7 +725,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Save used XMM registers. let used_simds = self.machine.get_used_simd(); if used_simds.len() > 0 { - self.machine.push_used_simd(); + used_stack += self.machine.push_used_simd(); for r in used_simds.iter().rev() { let content = @@ -757,15 +757,17 @@ impl<'a, M: Machine> FuncGen<'a, M> { } // Align stack to 16 bytes. - if self.machine.round_stack_adjust(8) == 8 { - if (self.get_stack_offset() + used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset) - % 16 - != 0 - { + if (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) + % 16 + != 0 + { + if self.machine.round_stack_adjust(8) == 8 { self.machine.adjust_stack(8); - stack_offset += 8; - self.state.stack_values.push(MachineValue::Undefined); + } else { + self.machine.emit_push(Size::S64, Location::Imm32(0)); } + stack_offset += 8; + self.state.stack_values.push(MachineValue::Undefined); } let mut call_movs: Vec<(Location, M::GPR)> = vec![]; diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 2cc5a83dbfc..4c7b4ed98c0 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -96,6 +96,7 @@ pub trait EmitterARM64 { fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); @@ -182,6 +183,10 @@ pub trait EmitterARM64 { fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location); + fn arch_supports_canonicalize_nan(&self) -> bool { true } @@ -322,10 +327,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); + match mult { + 0 => dynasm!(self ; ldr X(reg), [X(addr)]), + 1 => dynasm!(self ; ldr X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]), } - dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]); } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; @@ -410,6 +416,21 @@ impl EmitterARM64 for Assembler { _ => unreachable!(), } } + fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str X(reg), [X(addr)], (offset as i32)); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str D(reg), [X(addr)], (offset as i32)); + } + _ => unreachable!(), + } + } fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { match (sz, reg) { (Size::S64, Location::GPR(reg)) => { @@ -1059,6 +1080,10 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; cmp X(dst), imm as u32); } + (Size::S64, Location::Imm64(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), imm as u32); + } (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; dynasm!(self ; cmp W(dst), imm as u32); @@ -1809,7 +1834,7 @@ impl EmitterARM64 for Assembler { } fn emit_udf(&mut self) { - dynasm!(self ; udf 0); + dynasm!(self ; udf 0x1234); } fn emit_dmb(&mut self) { dynasm!(self ; dmb ish); @@ -1983,6 +2008,81 @@ impl EmitterARM64 for Assembler { ), } } + + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf S(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf S(dst), X(src)); + } + (Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf D(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf D(dst), X(src)); + } + _ => panic!( + "singlepass can't emit SCVTF {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf S(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf S(dst), X(src)); + } + (Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf D(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf D(dst), X(src)); + } + _ => panic!( + "singlepass can't emit UCVTF {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location) { + match (sz_in, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvt D(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvt S(dst), D(src)); + } + _ => panic!( + "singlepass can't emit UCVTF {:?} {:?} {:?}", + sz_in, src, dst + ), + } + } } pub fn gen_std_trampoline_arm64( @@ -1992,7 +2092,7 @@ pub fn gen_std_trampoline_arm64( let mut a = Assembler::new(0); let fptr = GPR::X26; - let args = GPR::X8; + let args = GPR::X25; dynasm!(a ; .arch aarch64 @@ -2004,7 +2104,7 @@ pub fn gen_std_trampoline_arm64( ; mov X(args as u32), x2 ); - let stack_args = sig.params().len().saturating_sub(8); + let stack_args = sig.params().len().saturating_sub(7); //1st arg is ctx, not an actual arg let mut stack_offset = stack_args as u32 * 8; if stack_args > 0 { if stack_offset % 16 != 0 { @@ -2016,7 +2116,7 @@ pub fn gen_std_trampoline_arm64( // Move arguments to their locations. // `callee_vmctx` is already in the first argument register, so no need to move. - for (i, param) in sig.params().iter().enumerate() { + for (i, param) in sig.params().iter().enumerate().rev() { let sz = match *param { Type::I32 | Type::F32 => Size::S32, Type::I64 | Type::F64 => Size::S64, @@ -2036,14 +2136,15 @@ pub fn gen_std_trampoline_arm64( ); } _ => { + // using X1 as scratch reg, because the for args is going backward a.emit_ldr( sz, - Location::GPR(GPR::X18), + Location::GPR(GPR::X1), Location::Memory(args, (i * 16) as i32), ); a.emit_str( sz, - Location::GPR(GPR::X18), + Location::GPR(GPR::X1), Location::Memory(GPR::XzrSp, (i as i32 - 7) * 8), ) } @@ -2089,11 +2190,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64( 16, ); - if stack_offset < 256 + 16 { + if stack_offset < 0x1000 + 16 { a.emit_sub( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm8((stack_offset - 16) as _), + Location::Imm32((stack_offset - 16) as _), Location::GPR(GPR::XzrSp), ); } else { @@ -2177,7 +2278,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( } // Release values array. - if stack_offset < 256 + 16 { + if stack_offset < 0x1000 + 16 { a.emit_add( Size::S64, Location::GPR(GPR::XzrSp), @@ -2232,8 +2333,8 @@ pub fn gen_import_call_trampoline_arm64( let mut param_locations: Vec = vec![]; // Allocate stack space for arguments. - let stack_offset: i32 = if sig.params().len() > 5 { - 5 * 8 + let stack_offset: i32 = if sig.params().len() > 7 { + 7 * 8 } else { (sig.params().len() as i32) * 8 }; @@ -2243,11 +2344,11 @@ pub fn gen_import_call_trampoline_arm64( stack_offset }; if stack_offset > 0 { - if stack_offset < 256 { + if stack_offset < 0x1000 { a.emit_sub( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm8(stack_offset as u8), + Location::Imm32(stack_offset as u32), Location::GPR(GPR::XzrSp), ); } else { @@ -2278,15 +2379,15 @@ pub fn gen_import_call_trampoline_arm64( a.emit_str(Size::S64, Location::GPR(PARAM_REGS[i]), loc); loc } - _ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 5) * 8) as i32), + _ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 7) * 8) as i32), }; param_locations.push(loc); } // Copy arguments. + let mut caller_stack_offset: i32 = 0; let mut argalloc = ArgumentRegisterAllocator::default(); argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext - let mut caller_stack_offset: i32 = 0; for (i, ty) in sig.params().iter().enumerate() { let prev_loc = param_locations[i]; let targ = match argalloc.next(*ty, calling_convention) { @@ -2294,14 +2395,11 @@ pub fn gen_import_call_trampoline_arm64( Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), None => { // No register can be allocated. Put this argument on the stack. - a.emit_ldr(Size::S64, Location::GPR(GPR::X20), prev_loc); + a.emit_ldr(Size::S64, Location::GPR(GPR::X16), prev_loc); a.emit_str( Size::S64, - Location::GPR(GPR::X20), - Location::Memory( - GPR::XzrSp, - stack_offset + 8 + caller_stack_offset, - ), + Location::GPR(GPR::X16), + Location::Memory(GPR::XzrSp, stack_offset + caller_stack_offset), ); caller_stack_offset += 8; continue; @@ -2312,11 +2410,11 @@ pub fn gen_import_call_trampoline_arm64( // Restore stack pointer. if stack_offset > 0 { - if stack_offset < 256 { + if stack_offset < 0x1000 { a.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::Imm8(stack_offset as u8), + Location::Imm32(stack_offset as u32), Location::GPR(GPR::XzrSp), ); } else { @@ -2339,7 +2437,9 @@ pub fn gen_import_call_trampoline_arm64( let offset = vmoffsets.vmctx_vmfunction_import(index); // for ldr, offset needs to be a multiple of 8, wich often is not // so use ldur, but then offset is limited to -255 .. +255. It will be positive here - let offset = if offset > 255 { + let offset = if offset > 0 && offset < 0x1000 { + offset + } else { a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); a.emit_add( Size::S64, @@ -2348,8 +2448,6 @@ pub fn gen_import_call_trampoline_arm64( Location::GPR(GPR::X0), ); 0 - } else { - offset }; match calling_convention { _ => { diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 934181e7f46..ac38693b039 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -82,8 +82,8 @@ pub trait Machine { fn reserve_unused_temp_gpr(&mut self, gpr: Self::GPR) -> Self::GPR; /// reserve a GPR fn reserve_gpr(&mut self, gpr: Self::GPR); - /// Push used gpr to the stack - fn push_used_gpr(&mut self); + /// Push used gpr to the stack. Return the bytes taken on the stack + fn push_used_gpr(&mut self) -> usize; /// Pop used gpr to the stack fn pop_used_gpr(&mut self); /// Picks an unused SIMD register. @@ -100,8 +100,8 @@ pub trait Machine { fn reserve_simd(&mut self, simd: Self::SIMD); /// Releases a temporary XMM register. fn release_simd(&mut self, simd: Self::SIMD); - /// Push used simd regs to the stack - fn push_used_simd(&mut self); + /// Push used simd regs to the stack. Return bytes taken on the stack + fn push_used_simd(&mut self) -> usize; /// Pop used simd regs to the stack fn pop_used_simd(&mut self); /// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 87105231d5e..a51c18c751f 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -38,6 +38,7 @@ enum ImmType { None, NoneXzr, Bits8, + Bits12, Shift32, Shift32No0, Shift64, @@ -69,6 +70,7 @@ impl MachineARM64 { ImmType::None => false, ImmType::NoneXzr => false, ImmType::Bits8 => (imm >= 0) && (imm < 256), + ImmType::Bits12 => (imm >= 0) && (imm < 0x1000), ImmType::Shift32 => (imm >= 0) && (imm < 32), ImmType::Shift32No0 => (imm > 0) && (imm < 32), ImmType::Shift64 => (imm >= 0) && (imm < 64), @@ -89,6 +91,7 @@ impl MachineARM64 { src: Location, temps: &mut Vec, allow_imm: ImmType, + read_val: bool, wanted: Option, ) -> Location { match src { @@ -158,29 +161,31 @@ impl MachineARM64 { temps.push(tmp.clone()); tmp }; - let offsize = if sz == Size::S32 { - ImmType::OffsetWord - } else { - ImmType::OffsetDWord - }; - if self.compatible_imm(val as i64, offsize) { - self.assembler.emit_ldr( - sz, - Location::GPR(tmp), - Location::Memory(reg, val as _), - ); - } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { - self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); - } else { - if reg == tmp { - unreachable!(); + if read_val { + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); + } else { + if reg == tmp { + unreachable!(); + } + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory2(reg, tmp, Multiplier::One, 0), + ); } - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); - self.assembler.emit_ldr( - sz, - Location::GPR(tmp), - Location::Memory2(reg, tmp, Multiplier::One, 0), - ); } Location::GPR(tmp) } @@ -193,13 +198,16 @@ impl MachineARM64 { src: Location, temps: &mut Vec, allow_imm: ImmType, + read_val: bool, ) -> Location { match src { Location::SIMD(_) => src, Location::GPR(_) => { let tmp = self.acquire_temp_simd().unwrap(); temps.push(tmp.clone()); - self.assembler.emit_mov(sz, src, Location::SIMD(tmp)); + if read_val { + self.assembler.emit_mov(sz, src, Location::SIMD(tmp)); + } Location::SIMD(tmp) } Location::Imm8(val) => { @@ -212,6 +220,7 @@ impl MachineARM64 { self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); self.assembler .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); Location::SIMD(tmp) } } @@ -225,6 +234,7 @@ impl MachineARM64 { self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); self.assembler .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); Location::SIMD(tmp) } } @@ -238,37 +248,41 @@ impl MachineARM64 { self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); self.assembler .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); Location::SIMD(tmp) } } Location::Memory(reg, val) => { let tmp = self.acquire_temp_simd().unwrap(); temps.push(tmp.clone()); - let offsize = if sz == Size::S32 { - ImmType::OffsetWord - } else { - ImmType::OffsetDWord - }; - if self.compatible_imm(val as i64, offsize) { - self.assembler.emit_ldr( - sz, - Location::SIMD(tmp), - Location::Memory(reg, val as _), - ); - } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { - self.assembler.emit_ldur(sz, Location::SIMD(tmp), reg, val); - } else { - let gpr = self.acquire_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); - self.assembler.emit_ldr( - sz, - Location::SIMD(tmp), - Location::Memory2(reg, gpr, Multiplier::One, 0), - ); + if read_val { + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(sz, Location::SIMD(tmp), reg, val); + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory2(reg, gpr, Multiplier::One, 0), + ); + self.release_gpr(gpr); + } } Location::SIMD(tmp) } - _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), + _ => panic!("singlepass can't emit location_to_neon {:?} {:?}", sz, src), } } @@ -281,8 +295,13 @@ impl MachineARM64 { putback: bool, ) { let mut temps = vec![]; - let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); + let src_imm = if putback { + ImmType::None + } else { + ImmType::Bits12 + }; + let src = self.location_to_reg(sz, src, &mut temps, src_imm, true, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, !putback, None); op(&mut self.assembler, sz, src, dest); if dst != dest && putback { self.move_location(sz, dest, dst); @@ -300,8 +319,8 @@ impl MachineARM64 { putback: bool, ) { let mut temps = vec![]; - let src = self.location_to_neon(sz, src, &mut temps, ImmType::None); - let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); + let src = self.location_to_neon(sz, src, &mut temps, ImmType::None, true); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None, !putback); op(&mut self.assembler, sz, src, dest); if dst != dest && putback { self.move_location(sz, dest, dst); @@ -320,9 +339,9 @@ impl MachineARM64 { allow_imm: ImmType, ) { let mut temps = vec![]; - let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm, None); - let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm, true, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); op(&mut self.assembler, sz, src1, src2, dest); if dst != dest { self.move_location(sz, dest, dst); @@ -341,9 +360,9 @@ impl MachineARM64 { allow_imm: ImmType, ) { let mut temps = vec![]; - let src1 = self.location_to_neon(sz, src1, &mut temps, ImmType::None); - let src2 = self.location_to_neon(sz, src2, &mut temps, allow_imm); - let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None); + let src1 = self.location_to_neon(sz, src1, &mut temps, ImmType::None, true); + let src2 = self.location_to_neon(sz, src2, &mut temps, allow_imm, true); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None, false); op(&mut self.assembler, sz, src1, src2, dest); if dst != dest { self.move_location(sz, dest, dst); @@ -352,153 +371,209 @@ impl MachineARM64 { self.release_simd(r); } } - fn emit_relaxed_ldr64(&mut self, dst: Location, src: Location) { + fn emit_relaxed_ldr64(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { - self.assembler.emit_ldr(Size::S64, dst, src); + self.assembler.emit_ldr(Size::S64, dest, src); } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { - self.assembler.emit_ldur(Size::S64, dst, addr, offset); + self.assembler.emit_ldur(Size::S64, dest, addr, offset); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldr( Size::S64, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } - fn emit_relaxed_ldr32(&mut self, dst: Location, src: Location) { + fn emit_relaxed_ldr32(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetWord) { - self.assembler.emit_ldr(Size::S32, dst, src); + self.assembler.emit_ldr(Size::S32, dest, src); } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { - self.assembler.emit_ldur(Size::S32, dst, addr, offset); + self.assembler.emit_ldur(Size::S32, dest, addr, offset); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldr( Size::S32, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } - fn emit_relaxed_ldr32s(&mut self, dst: Location, src: Location) { + fn emit_relaxed_ldr32s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetWord) { - self.assembler.emit_ldrsw(Size::S64, dst, src); + self.assembler.emit_ldrsw(Size::S64, dest, src); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldrsw( Size::S64, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } - fn emit_relaxed_ldr16(&mut self, dst: Location, src: Location) { + fn emit_relaxed_ldr16(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { - self.assembler.emit_ldrh(Size::S32, dst, src); + self.assembler.emit_ldrh(Size::S32, dest, src); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldrh( Size::S32, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } fn emit_relaxed_ldr16s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { - self.assembler.emit_ldrsh(sz, dst, src); + self.assembler.emit_ldrsh(sz, dest, src); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldrsh( sz, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } - fn emit_relaxed_ldr8(&mut self, dst: Location, src: Location) { + fn emit_relaxed_ldr8(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetByte) { - self.assembler.emit_ldrb(Size::S32, dst, src); + self.assembler.emit_ldrb(Size::S32, dest, src); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldrb( Size::S32, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } fn emit_relaxed_ldr8s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetByte) { - self.assembler.emit_ldrsb(sz, dst, src); + self.assembler.emit_ldrsb(sz, dest, src); } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_ldrsb( sz, - Location::GPR(tmp), + dest, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } } fn emit_relaxed_str64(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { @@ -525,7 +600,7 @@ impl MachineARM64 { } fn emit_relaxed_str32(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetWord) { @@ -552,7 +627,7 @@ impl MachineARM64 { } fn emit_relaxed_str16(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { @@ -577,7 +652,7 @@ impl MachineARM64 { } fn emit_relaxed_str8(&mut self, dst: Location, src: Location) { let mut temps = vec![]; - let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, None); + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); match src { Location::Memory(addr, offset) => { if self.compatible_imm(offset as i64, ImmType::OffsetByte) { @@ -688,11 +763,11 @@ impl MachineARM64 { let tmp_bound = self.acquire_temp_gpr().unwrap(); // Load base into temporary register. - self.emit_relaxed_ldr64(Location::GPR(tmp_base), base_loc); + self.emit_relaxed_ldr64(Size::S64, Location::GPR(tmp_base), base_loc); // Load bound into temporary register, if needed. if need_check { - self.emit_relaxed_ldr64(Location::GPR(tmp_bound), bound_loc); + self.emit_relaxed_ldr64(Size::S64, Location::GPR(tmp_bound), bound_loc); // Wasm -> Effective. // Assuming we never underflow - should always be true on Linux/macOS and Windows >=8, @@ -726,12 +801,11 @@ impl MachineARM64 { // Load effective address. // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr` // might be reused. - self.assembler - .emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); + self.move_location(Size::S32, addr, Location::GPR(tmp_addr)); // Add offset to memory address. if memarg.offset != 0 { - if self.compatible_imm(memarg.offset as _, ImmType::Bits8) { + if self.compatible_imm(memarg.offset as _, ImmType::Bits12) { self.assembler.emit_adds( Size::S32, Location::Imm32(memarg.offset), @@ -851,7 +925,7 @@ impl MachineARM64 { } (Size::S64, _) => { let mut temps = vec![]; - let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, None); + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, true, None); let offset = if self.pushed { 0 } else { @@ -953,7 +1027,7 @@ impl Machine for MachineARM64 { fn pick_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X6, X7, X9, X10, X11, X12, X13, X14, X15]; + static REGS: &[GPR] = &[X9, X10, X11, X12, X13, X14, X15]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -965,7 +1039,7 @@ impl Machine for MachineARM64 { // Picks an unused general purpose register for internal temporary use. fn pick_temp_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X8]; + static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X5, X7, X8]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -996,17 +1070,24 @@ impl Machine for MachineARM64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) { + fn push_used_gpr(&mut self) -> usize { let used_gprs = self.get_used_gprs(); + if used_gprs.len() % 2 == 1 { + self.emit_push(Size::S64, Location::GPR(GPR::XzrSp)); + } for r in used_gprs.iter() { self.emit_push(Size::S64, Location::GPR(*r)); } + ((used_gprs.len() + 1) / 2) * 16 } fn pop_used_gpr(&mut self) { let used_gprs = self.get_used_gprs(); for r in used_gprs.iter().rev() { self.emit_pop(Size::S64, Location::GPR(*r)); } + if used_gprs.len() % 2 == 1 { + self.emit_pop(Size::S64, Location::GPR(GPR::XzrSp)); + } } // Picks an unused NEON register. @@ -1051,7 +1132,7 @@ impl Machine for MachineARM64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) { + fn push_used_simd(&mut self) -> usize { let used_neons = self.get_used_simd(); let stack_adjust = if used_neons.len() & 1 == 1 { (used_neons.len() * 8) as u32 + 8 @@ -1067,6 +1148,7 @@ impl Machine for MachineARM64 { Location::Memory(GPR::XzrSp, (i * 8) as i32), ); } + stack_adjust as usize } fn pop_used_simd(&mut self) { let used_neons = self.get_used_simd(); @@ -1230,7 +1312,9 @@ impl Machine for MachineARM64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location) { + fn push_location_for_native(&mut self, _loc: Location) { + unimplemented!(); + /* match loc { Location::Imm64(_) => { self.reserve_unused_temp_gpr(GPR::X8); @@ -1240,13 +1324,14 @@ impl Machine for MachineARM64 { } _ => self.emit_push(Size::S64, loc), } + */ } // Zero a location that is 32bits - fn zero_location(&mut self, _size: Size, location: Location) { + fn zero_location(&mut self, size: Size, location: Location) { match location { Location::GPR(_) => self.assembler.emit_mov_imm(location, 0u64), - _ => unreachable!(), + _ => self.move_location(size, Location::GPR(GPR::XzrSp), location), } } @@ -1312,7 +1397,7 @@ impl Machine for MachineARM64 { 5 => Location::GPR(GPR::X5), 6 => Location::GPR(GPR::X6), 7 => Location::GPR(GPR::X7), - _ => Location::Memory(GPR::X29, (16 + (idx - 8) * 8) as i32), + _ => Location::Memory(GPR::X29, (16 * 2 + (idx - 8) * 8) as i32), }, } } @@ -1346,7 +1431,8 @@ impl Machine for MachineARM64 { Location::GPR(tmp), ); } - self.assembler.emit_str(size, source, Location::GPR(tmp)); + self.assembler + .emit_str(size, source, Location::Memory(tmp, 0)); } } _ => panic!( @@ -1376,7 +1462,7 @@ impl Machine for MachineARM64 { ), }, Location::Memory(addr, offs) => match dest { - Location::GPR(_) => { + Location::GPR(_) | Location::SIMD(_) => { if self.offset_is_ok(size, offs) { self.assembler.emit_ldr(size, dest, source); } else if offs > -256 && offs < 256 { @@ -1404,10 +1490,15 @@ impl Machine for MachineARM64 { self.assembler.emit_ldr(size, source, Location::GPR(tmp)); } } - _ => panic!( - "singlepass can't emit move_location {:?} {:?} => {:?}", - size, source, dest - ), + _ => { + let mut temps = vec![]; + let src = + self.location_to_reg(size, source, &mut temps, ImmType::None, true, None); + self.move_location(size, src, dest); + for r in temps { + self.release_gpr(r); + } + } }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", @@ -1430,8 +1521,80 @@ impl Machine for MachineARM64 { unimplemented!(); } // Init the stack loc counter - fn init_stack_loc(&mut self, _init_stack_loc_cnt: u64, _last_stack_loc: Location) { - unimplemented!(); + fn init_stack_loc(&mut self, init_stack_loc_cnt: u64, last_stack_loc: Location) { + let label = self.assembler.get_label(); + let mut temps = vec![]; + let dest = self.acquire_temp_gpr().unwrap(); + temps.push(dest.clone()); + let cnt = self.location_to_reg( + Size::S64, + Location::Imm64(init_stack_loc_cnt), + &mut temps, + ImmType::None, + true, + None, + ); + let dest = match last_stack_loc { + Location::GPR(_) => unreachable!(), + Location::SIMD(_) => unreachable!(), + Location::Memory(reg, offset) => { + if offset < 0 { + let offset = (-offset) as u32; + if self.compatible_imm(offset as i64, ImmType::Bits12) { + self.assembler.emit_sub( + Size::S64, + Location::GPR(reg), + Location::Imm32(offset), + Location::GPR(dest), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(reg), + Location::GPR(tmp), + Location::GPR(dest), + ); + temps.push(tmp); + } + dest + } else { + let offset = offset as u32; + if self.compatible_imm(offset as i64, ImmType::Bits12) { + self.assembler.emit_add( + Size::S64, + Location::GPR(reg), + Location::Imm32(offset), + Location::GPR(dest), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), offset as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(reg), + Location::GPR(tmp), + Location::GPR(dest), + ); + temps.push(tmp); + } + dest + } + } + _ => panic!("singlepass can't emit init_stack_loc {:?}", last_stack_loc), + }; + self.assembler.emit_label(label); + self.assembler + .emit_stria(Size::S64, Location::GPR(GPR::XzrSp), dest, 8); + self.assembler + .emit_sub(Size::S64, cnt, Location::Imm8(1), cnt); + self.assembler.emit_cbnz_label(Size::S64, cnt, label); + for r in temps { + self.release_gpr(r); + } } // Restore save_area fn restore_saved_area(&mut self, saved_area_offset: i32) { @@ -1577,6 +1740,7 @@ impl Machine for MachineARM64 { location, &mut temps, ImmType::None, + true, Some(GPR::X26), ); match loc { @@ -1607,8 +1771,8 @@ impl Machine for MachineARM64 { // math fn location_add(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8, None); - let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, None); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits12, true, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, true, None); if flags { self.assembler.emit_adds(size, dst, src, dst); } else { @@ -1623,8 +1787,8 @@ impl Machine for MachineARM64 { } fn location_sub(&mut self, size: Size, source: Location, dest: Location, flags: bool) { let mut temps = vec![]; - let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits8, None); - let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, None); + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits12, true, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, true, None); if flags { self.assembler.emit_subs(size, dst, src, dst); } else { @@ -1754,13 +1918,14 @@ impl Machine for MachineARM64 { (Location::Memory(_, _), Location::GPR(_)) => match sz_src { Size::S8 => self.emit_relaxed_ldr8s(sz_dst, dst, src), Size::S16 => self.emit_relaxed_ldr16s(sz_dst, dst, src), - Size::S32 => self.emit_relaxed_ldr32s(dst, src), + Size::S32 => self.emit_relaxed_ldr32s(sz_dst, dst, src), _ => unreachable!(), }, _ => { let mut temps = vec![]; - let src = self.location_to_reg(sz_dst, src, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(sz_dst, dst, &mut temps, ImmType::None, None); + let src = self.location_to_reg(sz_dst, src, &mut temps, ImmType::None, true, None); + let dest = + self.location_to_reg(sz_dst, dst, &mut temps, ImmType::None, false, None); match sz_src { Size::S8 => self.assembler.emit_sxtb(sz_dst, src, dest), Size::S16 => self.assembler.emit_sxth(sz_dst, src, dest), @@ -1816,9 +1981,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.assembler .emit_cbz_label(Size::S32, src2, integer_division_by_zero); @@ -1841,9 +2006,9 @@ impl Machine for MachineARM64 { integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.assembler .emit_cbz_label(Size::S32, src2, integer_division_by_zero); @@ -1853,6 +2018,7 @@ impl Machine for MachineARM64 { Location::Imm32(0x80000000), &mut temps, ImmType::None, + true, None, ); self.assembler.emit_cmp(Size::S32, tmp, src1); @@ -1882,9 +2048,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); let dest = if dest == src1 || dest == src2 { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -1916,9 +2082,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); let dest = if dest == src1 || dest == src2 { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -2002,12 +2168,12 @@ impl Machine for MachineARM64 { self.emit_cmpop_i32_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } fn i32_clz(&mut self, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_clz, Size::S32, src, dst, false); + self.emit_relaxed_binop(Assembler::emit_clz, Size::S32, src, dst, true); } fn i32_ctz(&mut self, src: Location, dst: Location) { let mut temps = vec![]; - let src = self.location_to_reg(Size::S32, src, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, dst, &mut temps, ImmType::None, None); + let src = self.location_to_reg(Size::S32, src, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, dst, &mut temps, ImmType::None, false, None); self.assembler.emit_rbit(Size::S32, src, dest); self.assembler.emit_clz(Size::S32, dest, dest); if dst != dest { @@ -2021,8 +2187,8 @@ impl Machine for MachineARM64 { // no opcode for that. // 2 solutions: using NEON CNT, that count bits per Byte, or using clz with some shift and loop let mut temps = vec![]; - let src = self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let src = self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); let src = if src == loc { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -2099,9 +2265,11 @@ impl Machine for MachineARM64 { Location::Imm32(32), &mut temps, ImmType::None, + true, None, ); - let tmp2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, None); + let tmp2 = + self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp2); tmp2 } @@ -2148,7 +2316,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr32(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr32(Size::S32, ret, Location::Memory(addr, 0)); }, ); } @@ -2172,7 +2340,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr8(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr8(Size::S32, ret, Location::Memory(addr, 0)); }, ); } @@ -2220,7 +2388,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr16(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr16(Size::S32, ret, Location::Memory(addr, 0)); }, ); } @@ -2768,9 +2936,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.assembler .emit_cbz_label(Size::S64, src2, integer_division_by_zero); @@ -2793,9 +2961,9 @@ impl Machine for MachineARM64 { integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.assembler .emit_cbz_label(Size::S64, src2, integer_division_by_zero); @@ -2805,6 +2973,7 @@ impl Machine for MachineARM64 { Location::Imm64(0x8000000000000000), &mut temps, ImmType::None, + true, None, ); self.assembler.emit_cmp(Size::S64, tmp, src1); @@ -2834,9 +3003,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); let dest = if dest == src1 || dest == src2 { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -2868,9 +3037,9 @@ impl Machine for MachineARM64 { _integer_overflow: Label, ) -> usize { let mut temps = vec![]; - let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, None); - let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); let dest = if dest == src1 || dest == src2 { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -2954,12 +3123,12 @@ impl Machine for MachineARM64 { self.emit_cmpop_i64_dynamic_b(Condition::Eq, loc_a, loc_b, ret); } fn i64_clz(&mut self, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_clz, Size::S64, src, dst, false); + self.emit_relaxed_binop(Assembler::emit_clz, Size::S64, src, dst, true); } fn i64_ctz(&mut self, src: Location, dst: Location) { let mut temps = vec![]; - let src = self.location_to_reg(Size::S64, src, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::None, None); + let src = self.location_to_reg(Size::S64, src, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::None, false, None); self.assembler.emit_rbit(Size::S64, src, dest); self.assembler.emit_clz(Size::S64, dest, dest); if dst != dest { @@ -2971,8 +3140,8 @@ impl Machine for MachineARM64 { } fn i64_popcnt(&mut self, loc: Location, ret: Location) { let mut temps = vec![]; - let src = self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, None); - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let src = self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); let src = if src == loc { let tmp = self.acquire_temp_gpr().unwrap(); temps.push(tmp.clone()); @@ -3050,9 +3219,11 @@ impl Machine for MachineARM64 { Location::Imm32(64), &mut temps, ImmType::None, + true, None, ); - let tmp2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, None); + let tmp2 = + self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp2); tmp2 } @@ -3099,7 +3270,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr64(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr64(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -3123,7 +3294,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr8(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr8(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -3171,7 +3342,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr16(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr16(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -3219,7 +3390,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr32(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr32(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -3243,7 +3414,7 @@ impl Machine for MachineARM64 { offset, heap_access_oob, |this, addr| { - this.emit_relaxed_ldr32s(ret, Location::Memory(addr, 0)); + this.emit_relaxed_ldr32s(Size::S64, ret, Location::Memory(addr, 0)); }, ); } @@ -3947,17 +4118,85 @@ impl Machine for MachineARM64 { ); } - fn convert_f64_i64(&mut self, _loc: Location, _signed: bool, _ret: Location) { - unimplemented!(); + fn convert_f64_i64(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S64, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S64, src, Size::S64, dest); + } else { + self.assembler.emit_ucvtf(Size::S64, src, Size::S64, dest); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_f64_i32(&mut self, _loc: Location, _signed: bool, _ret: Location) { - unimplemented!(); + fn convert_f64_i32(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S64, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S32, src, Size::S64, dest); + } else { + self.assembler.emit_ucvtf(Size::S32, src, Size::S64, dest); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_f32_i64(&mut self, _loc: Location, _signed: bool, _ret: Location) { - unimplemented!(); + fn convert_f32_i64(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S32, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S64, src, Size::S32, dest); + } else { + self.assembler.emit_ucvtf(Size::S64, src, Size::S32, dest); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_f32_i32(&mut self, _loc: Location, _signed: bool, _ret: Location) { - unimplemented!(); + fn convert_f32_i32(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S32, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S32, src, Size::S32, dest); + } else { + self.assembler.emit_ucvtf(Size::S32, src, Size::S32, dest); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } fn convert_i64_f64(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { unimplemented!(); @@ -3971,11 +4210,11 @@ impl Machine for MachineARM64 { fn convert_i32_f32(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { unimplemented!(); } - fn convert_f64_f32(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn convert_f64_f32(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fcvt, Size::S32, loc, ret, true); } - fn convert_f32_f64(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn convert_f32_f64(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fcvt, Size::S64, loc, ret, true); } fn f64_neg(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S64, loc, ret, true); @@ -4033,7 +4272,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ls); if ret != dest { @@ -4045,7 +4284,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); self.assembler.emit_cset(Size::S32, dest, Condition::Cc); if ret != dest { @@ -4057,7 +4296,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ls); if ret != dest { @@ -4069,7 +4308,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Cc); if ret != dest { @@ -4081,7 +4320,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ne); if ret != dest { @@ -4093,7 +4332,7 @@ impl Machine for MachineARM64 { } fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Eq); if ret != dest { @@ -4215,7 +4454,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ls); if ret != dest { @@ -4227,7 +4466,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); self.assembler.emit_cset(Size::S32, dest, Condition::Cc); if ret != dest { @@ -4239,7 +4478,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ls); if ret != dest { @@ -4251,7 +4490,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Cc); if ret != dest { @@ -4263,7 +4502,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Ne); if ret != dest { @@ -4275,7 +4514,7 @@ impl Machine for MachineARM64 { } fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); self.assembler.emit_cset(Size::S32, dest, Condition::Eq); if ret != dest { diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 37f31c9c320..22ab07070a6 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1626,11 +1626,12 @@ impl Machine for MachineX86_64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) { + fn push_used_gpr(&mut self) -> usize { let used_gprs = self.get_used_gprs(); for r in used_gprs.iter() { self.assembler.emit_push(Size::S64, Location::GPR(*r)); } + used_gprs.len() * 8 } fn pop_used_gpr(&mut self) { let used_gprs = self.get_used_gprs(); @@ -1681,7 +1682,7 @@ impl Machine for MachineX86_64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) { + fn push_used_simd(&mut self) -> usize { let used_xmms = self.get_used_simd(); self.adjust_stack((used_xmms.len() * 8) as u32); @@ -1692,6 +1693,8 @@ impl Machine for MachineX86_64 { Location::Memory(GPR::RSP, (i * 8) as i32), ); } + + used_xmms.len() * 8 } fn pop_used_simd(&mut self) { let used_xmms = self.get_used_simd(); From 0bbc81a5b8e038abb7d04154f5c7aaeceffa5164 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 22 Dec 2021 10:22:27 +0100 Subject: [PATCH 23/34] improv(compiler) Oops, that was a mystake --- lib/compiler-singlepass/src/emitter_x64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 6a8c8bce746..71f1b9128ba 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -281,7 +281,7 @@ pub trait EmitterX64 { } fn arch_supports_canonicalize_nan(&self) -> bool { - false // no need to canonicalize, the default form is the canonicalized one already + true } fn arch_requires_indirect_call_trampoline(&self) -> bool { From 4f938643d15d60ce4ccb5cdae197b9abe21e663c Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 3 Jan 2022 18:39:27 +0100 Subject: [PATCH 24/34] improv(compiler) More native call work and fixes (166 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 153 ++++++++++--------- lib/compiler-singlepass/src/machine.rs | 8 +- lib/compiler-singlepass/src/machine_arm64.rs | 47 +++--- lib/compiler-singlepass/src/machine_x64.rs | 9 +- 4 files changed, 116 insertions(+), 101 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 4c7b4ed98c0..f172ea4826f 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -170,6 +170,7 @@ pub trait EmitterARM64 { fn emit_udf(&mut self); fn emit_dmb(&mut self); + fn emit_brk(&mut self); fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location); fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location); @@ -1839,6 +1840,9 @@ impl EmitterARM64 for Assembler { fn emit_dmb(&mut self) { dynasm!(self ; dmb ish); } + fn emit_brk(&mut self) { + dynasm!(self ; brk 0); + } fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location) { match (sz, src1, src2) { @@ -2091,11 +2095,10 @@ pub fn gen_std_trampoline_arm64( ) -> FunctionBody { let mut a = Assembler::new(0); - let fptr = GPR::X26; - let args = GPR::X25; + let fptr = GPR::X27; + let args = GPR::X28; dynasm!(a - ; .arch aarch64 ; sub sp, sp, 32 ; stp x29, x30, [sp] ; stp X(fptr as u32), X(args as u32), [sp, 16] @@ -2111,7 +2114,7 @@ pub fn gen_std_trampoline_arm64( stack_offset += 8; assert!(stack_offset % 16 == 0); } - dynasm!(a ; .arch aarch64 ; sub sp, sp, stack_offset); + dynasm!(a ; sub sp, sp, stack_offset); } // Move arguments to their locations. @@ -2151,7 +2154,7 @@ pub fn gen_std_trampoline_arm64( } } - dynasm!(a ; .arch aarch64 ; blr X(fptr as u32)); + dynasm!(a ; blr X(fptr as u32)); // Write return value. if !sig.results().is_empty() { @@ -2160,7 +2163,6 @@ pub fn gen_std_trampoline_arm64( // Restore stack. dynasm!(a - ; .arch aarch64 ; ldp X(fptr as u32), X(args as u32), [x29, 16] ; ldp x29, x30, [x29] ; add sp, sp, 32 + stack_offset as u32 @@ -2180,7 +2182,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( ) -> FunctionBody { let mut a = Assembler::new(0); // Allocate argument array. - let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 16; + let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()); // Save LR and X20, as scratch register a.emit_stpdb( Size::S64, @@ -2190,21 +2192,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64( 16, ); - if stack_offset < 0x1000 + 16 { - a.emit_sub( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::Imm32((stack_offset - 16) as _), - Location::GPR(GPR::XzrSp), - ); - } else { - a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); - a.emit_sub( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), - Location::GPR(GPR::XzrSp), - ); + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } } // Copy arguments. @@ -2222,7 +2226,10 @@ pub fn gen_std_dynamic_import_trampoline_arm64( a.emit_ldr( Size::S64, Location::GPR(GPR::X20), - Location::Memory(GPR::XzrSp, (stack_offset + stack_param_count * 8) as _), + Location::Memory( + GPR::XzrSp, + (stack_offset + 16 + stack_param_count * 8) as _, + ), ); stack_param_count += 1; Location::GPR(GPR::X20) @@ -2246,14 +2253,8 @@ pub fn gen_std_dynamic_import_trampoline_arm64( match calling_convention { _ => { // Load target address. - a.emit_ldr( - Size::S64, - Location::GPR(GPR::X20), - Location::Memory( - GPR::X0, - vmoffsets.vmdynamicfunction_import_context_address() as i32, - ), - ); + let offset = vmoffsets.vmdynamicfunction_import_context_address(); + a.emit_ldur(Size::S64, Location::GPR(GPR::X20), GPR::X0, offset as i32); // Load values array. a.emit_add( Size::S64, @@ -2278,21 +2279,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64( } // Release values array. - if stack_offset < 0x1000 + 16 { - a.emit_add( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::Imm32((stack_offset - 16) as _), - Location::GPR(GPR::XzrSp), - ); - } else { - a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); - a.emit_add( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), - Location::GPR(GPR::XzrSp), - ); + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } } a.emit_ldpia( Size::S64, @@ -2437,32 +2440,46 @@ pub fn gen_import_call_trampoline_arm64( let offset = vmoffsets.vmctx_vmfunction_import(index); // for ldr, offset needs to be a multiple of 8, wich often is not // so use ldur, but then offset is limited to -255 .. +255. It will be positive here - let offset = if offset > 0 && offset < 0x1000 { - offset - } else { - a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); - a.emit_add( - Size::S64, - Location::GPR(GPR::X0), - Location::GPR(GPR::X16), - Location::GPR(GPR::X0), - ); - 0 - }; - match calling_convention { - _ => { - a.emit_ldur( + let offset = + if (offset > 0 && offset < 0xF8) || (offset > 0 && offset < 0x7FF8 && (offset & 7) == 0) { + offset + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); + a.emit_add( Size::S64, + Location::GPR(GPR::X0), Location::GPR(GPR::X16), - GPR::X0, - offset as i32, // function pointer - ); - a.emit_ldur( - Size::S64, Location::GPR(GPR::X0), - GPR::X0, - offset as i32 + 8, // target vmctx ); + 0 + }; + match calling_convention { + _ => { + if (offset & 7) == 0 { + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X16), + Location::Memory(GPR::X0, offset as i32), // function pointer + ); + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X0), + Location::Memory(GPR::X0, offset as i32 + 8), // target vmctx + ); + } else { + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X16), + GPR::X0, + offset as i32, // function pointer + ); + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X0), + GPR::X0, + offset as i32 + 8, // target vmctx + ); + } } } a.emit_b_register(GPR::X16); diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index ac38693b039..e61e03e4184 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -132,10 +132,6 @@ pub trait Machine { /// restore stack /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn restore_stack(&mut self, delta_stack_offset: u32); - /// push callee saved register to the stack - fn push_callee_saved(&mut self); - /// pop callee saved register from the stack - fn pop_callee_saved(&mut self); /// Pop stack of locals /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn pop_stack_locals(&mut self, delta_stack_offset: u32); @@ -262,6 +258,10 @@ pub trait Machine { fn get_gpr_for_ret(&self) -> Self::GPR; /// get the simd for the return of float/double values fn get_simd_for_ret(&self) -> Self::SIMD; + + /// Emit a debug breakpoint + fn emit_debug_breakpoint(&mut self); + /// load the address of a memory location (will panic if src is not a memory) /// like LEA opcode on x86_64 fn location_address( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index a51c18c751f..f23f46e0d9a 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1039,7 +1039,7 @@ impl Machine for MachineARM64 { // Picks an unused general purpose register for internal temporary use. fn pick_temp_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X5, X7, X8]; + static REGS: &[GPR] = &[X8, X7, X6, X5, X4, X3, X2, X1]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -1288,15 +1288,13 @@ impl Machine for MachineARM64 { Location::GPR(GPR::XzrSp), ); } - fn push_callee_saved(&mut self) {} - fn pop_callee_saved(&mut self) {} fn pop_stack_locals(&mut self, delta_stack_offset: u32) { let real_delta = if delta_stack_offset & 15 != 0 { delta_stack_offset + 8 } else { delta_stack_offset }; - let delta = if real_delta < 256 { + let delta = if self.compatible_imm(real_delta as i64, ImmType::Bits12) { Location::Imm8(real_delta as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); @@ -1312,9 +1310,7 @@ impl Machine for MachineARM64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, _loc: Location) { - unimplemented!(); - /* + fn push_location_for_native(&mut self, loc: Location) { match loc { Location::Imm64(_) => { self.reserve_unused_temp_gpr(GPR::X8); @@ -1324,7 +1320,6 @@ impl Machine for MachineARM64 { } _ => self.emit_push(Size::S64, loc), } - */ } // Zero a location that is 32bits @@ -1349,14 +1344,14 @@ impl Machine for MachineARM64 { fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location { // Use callee-saved registers for the first locals. match idx { - 0 => Location::GPR(GPR::X18), - 1 => Location::GPR(GPR::X19), - 2 => Location::GPR(GPR::X20), - 3 => Location::GPR(GPR::X21), - 4 => Location::GPR(GPR::X22), - 5 => Location::GPR(GPR::X23), - 6 => Location::GPR(GPR::X24), - 7 => Location::GPR(GPR::X25), + 0 => Location::GPR(GPR::X19), + 1 => Location::GPR(GPR::X20), + 2 => Location::GPR(GPR::X21), + 3 => Location::GPR(GPR::X22), + 4 => Location::GPR(GPR::X23), + 5 => Location::GPR(GPR::X24), + 6 => Location::GPR(GPR::X25), + 7 => Location::GPR(GPR::X26), _ => Location::Memory(GPR::X29, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), } } @@ -1649,7 +1644,7 @@ impl Machine for MachineARM64 { fn emit_function_prolog(&mut self) { self.emit_double_push(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); // save LR too - self.emit_double_push(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); // cannot use mov, because XSP is XZR there. Need to use ADD with #0 self.assembler.emit_add( Size::S64, @@ -1668,7 +1663,7 @@ impl Machine for MachineARM64 { Location::GPR(GPR::XzrSp), ); self.pushed = false; // SP is restored, concider it aligned - self.emit_double_pop(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); } @@ -1709,7 +1704,7 @@ impl Machine for MachineARM64 { self.assembler.emit_label(label); } fn get_grp_for_call(&self) -> GPR { - GPR::X26 + GPR::X27 } fn emit_call_register(&mut self, reg: GPR) { self.assembler.emit_call_register(reg); @@ -1733,6 +1728,10 @@ impl Machine for MachineARM64 { .arch_emit_indirect_call_with_trampoline(location); } + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_brk(); + } + fn emit_call_location(&mut self, location: Location) { let mut temps = vec![]; let loc = self.location_to_reg( @@ -1741,7 +1740,7 @@ impl Machine for MachineARM64 { &mut temps, ImmType::None, true, - Some(GPR::X26), + Some(GPR::X27), ); match loc { Location::GPR(reg) => self.assembler.emit_call_register(reg), @@ -2870,7 +2869,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 0); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 0); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw1, @@ -2878,7 +2877,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 16); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 16); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw2, @@ -2886,7 +2885,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 32); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 32); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw3, @@ -2894,7 +2893,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 48); } fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 22ab07070a6..627fb1b670f 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1798,11 +1798,6 @@ impl Machine for MachineX86_64 { Location::GPR(GPR::RSP), ); } - fn push_callee_saved(&mut self) {} - fn pop_callee_saved(&mut self) { - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R14)); - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R15)); - } fn pop_stack_locals(&mut self, delta_stack_offset: u32) { self.assembler.emit_add( Size::S64, @@ -2148,6 +2143,10 @@ impl Machine for MachineX86_64 { .arch_emit_indirect_call_with_trampoline(location); } + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_bkpt(); + } + fn emit_call_location(&mut self, location: Location) { self.assembler.emit_call_location(location); } From 8f5a30a1a7ce524492b20c875e9f3cab937fc1ae Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 4 Jan 2022 17:51:08 +0100 Subject: [PATCH 25/34] improv(compiler) More fixes (176 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 365 +++++++++++++------ lib/compiler-singlepass/src/machine_arm64.rs | 103 +++--- 2 files changed, 302 insertions(+), 166 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index f172ea4826f..bb58d7be31b 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -218,11 +218,11 @@ impl EmitterARM64 for Assembler { dynasm!( self ; const_neg_one_32: - ; .dword -1 + ; .word -1 ; const_zero_32: - ; .dword 0 + ; .word 0 ; const_pos_one_32: - ; .dword 1 + ; .word 1 ); } @@ -232,7 +232,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x7) != 0 || (disp >= 0x8000) { unreachable!(); } dynasm!(self ; str X(reg), [X(addr), disp]); @@ -241,7 +241,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x3) != 0 { + if (disp & 0x3) != 0 || (disp >= 0x4000) { unreachable!(); } dynasm!(self ; str W(reg), [X(addr), disp]); @@ -250,7 +250,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x1) != 0 { + if (disp & 0x1) != 0 || (disp >= 0x2000) { unreachable!(); } dynasm!(self ; strh W(reg), [X(addr), disp]); @@ -259,13 +259,16 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; + if disp >= 0x1000 { + unreachable!(); + } dynasm!(self ; strb W(reg), [X(addr), disp]); } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x7) != 0 || (disp >= 0x8000) { unreachable!(); } dynasm!(self ; str D(reg), [X(addr), disp]); @@ -274,7 +277,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x3) != 0 || (disp >= 0x4000) { unreachable!(); } dynasm!(self ; str S(reg), [X(addr), disp]); @@ -287,7 +290,7 @@ impl EmitterARM64 for Assembler { (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x7) != 0 || disp < 0 || disp >= 0x8000 { + if (disp & 0x7) != 0 || (disp >= 0x8000) { unreachable!(); } let disp = disp as u32; @@ -296,7 +299,7 @@ impl EmitterARM64 for Assembler { (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x3) != 0 || disp < 0 || disp >= 0x4000 { + if (disp & 0x3) != 0 || (disp >= 0x4000) { unreachable!(); } let disp = disp as u32; @@ -305,7 +308,7 @@ impl EmitterARM64 for Assembler { (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x1) != 0 || disp < 0 || disp >= 0x2000 { + if (disp & 0x1 != 0) || (disp >= 0x2000) { unreachable!(); } let disp = disp as u32; @@ -314,7 +317,7 @@ impl EmitterARM64 for Assembler { (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if disp < 0 || disp >= 0x1000 { + if disp >= 0x1000 { unreachable!(); } let disp = disp as u32; @@ -332,13 +335,13 @@ impl EmitterARM64 for Assembler { 0 => dynasm!(self ; ldr X(reg), [X(addr)]), 1 => dynasm!(self ; ldr X(reg), [X(addr), X(r2)]), _ => dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]), - } + }; } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x7) != 0 || (disp >= 0x8000) { unreachable!(); } dynasm!(self ; ldr D(reg), [X(addr), disp]); @@ -347,7 +350,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 { + if (disp & 0x3) != 0 || (disp >= 0x4000) { unreachable!(); } dynasm!(self ; ldr S(reg), [X(addr), disp]); @@ -356,6 +359,9 @@ impl EmitterARM64 for Assembler { } } fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + if (offset < -255) || (offset > 255) { + unreachable!(); + } match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -379,6 +385,9 @@ impl EmitterARM64 for Assembler { } } fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + if (offset < -255) || (offset > 255) { + unreachable!(); + } match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -403,6 +412,9 @@ impl EmitterARM64 for Assembler { } fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + if offset > 255 { + unreachable!(); + } match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -418,6 +430,9 @@ impl EmitterARM64 for Assembler { } } fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + if offset > 255 { + unreachable!(); + } match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -433,6 +448,9 @@ impl EmitterARM64 for Assembler { } } fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + if offset > 255 { + unreachable!(); + } match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -449,6 +467,9 @@ impl EmitterARM64 for Assembler { } fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + if offset > 255 { + unreachable!(); + } match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -460,6 +481,9 @@ impl EmitterARM64 for Assembler { } } fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + if offset > 255 { + unreachable!(); + } match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -477,6 +501,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if offset >= 0x1000 { + unreachable!(); + } dynasm!(self ; ldrb W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -487,10 +514,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrb W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrb W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrb W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit LDRB {:?}, {:?}", reg, dst), } @@ -501,6 +529,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if (offset & 1 != 0) || (offset >= 0x2000) { + unreachable!(); + } dynasm!(self ; ldrh W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -511,10 +542,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrh W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrh W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrh W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit LDRH {:?}, {:?}", reg, dst), } @@ -525,12 +557,18 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if offset >= 0x1000 { + unreachable!(); + } dynasm!(self ; ldrsb X(reg), [X(addr), offset]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if offset >= 0x1000 { + unreachable!(); + } dynasm!(self ; ldrsb W(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -541,10 +579,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrsb X(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrsb X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsb X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsb X(reg), [X(addr), X(r2), LSL mult]), + }; } (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; @@ -554,10 +593,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrsb W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrsb W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsb W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit LDRSB {:?}, {:?}, {:?}", sz, reg, dst), } @@ -568,12 +608,18 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if (offset & 1 != 0) || (offset >= 0x2000) { + unreachable!(); + } dynasm!(self ; ldrsh X(reg), [X(addr), offset]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if (offset & 1 != 0) || (offset >= 0x2000) { + unreachable!(); + } dynasm!(self ; ldrsh W(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -584,10 +630,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrsh X(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrsh X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsh X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsh X(reg), [X(addr), X(r2), LSL mult]), + }; } (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; @@ -597,10 +644,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrsh W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrsh W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsh W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit LDRSH {:?}, {:?}, {:?}", sz, reg, dst), } @@ -611,6 +659,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if (offset & 3 != 0) || (offset >= 0x4000) { + unreachable!(); + } dynasm!(self ; ldrsw X(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -621,10 +672,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; ldrsw X(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; ldrsw X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsw X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsw X(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit LDRSW {:?}, {:?}, {:?}", sz, reg, dst), } @@ -635,6 +687,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if offset >= 0x1000 { + unreachable!(); + } dynasm!(self ; strb W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -645,10 +700,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; strb W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; strb W(reg), [X(addr)]), + 1 => dynasm!(self ; strb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; strb W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit STRB {:?}, {:?}", reg, dst), } @@ -659,6 +715,9 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; + if (offset & 1 != 0) || (offset >= 0x2000) { + unreachable!(); + } dynasm!(self ; strh W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { @@ -669,10 +728,11 @@ impl EmitterARM64 for Assembler { unreachable!(); } let mult = mult as u32; - if mult == 0 { - unreachable!(); - } - dynasm!(self ; strh W(reg), [X(addr), X(r2), LSL mult]); + match mult { + 0 => dynasm!(self ; strh W(reg), [X(addr)]), + 1 => dynasm!(self ; strh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; strh W(reg), [X(addr), X(r2), LSL mult]), + }; } _ => panic!("singlepass can't emit STRH {:?}, {:?}", reg, dst), } @@ -722,15 +782,33 @@ impl EmitterARM64 for Assembler { } (Size::S32, Location::Imm32(val), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; mov W(dst), val as u64); + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_32bit(val as _).is_some() { + dynasm!(self ; orr W(dst), wzr, val); + } else { + unreachable!(); + } } (Size::S64, Location::Imm32(val), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; mov W(dst), val as u64); + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_64bit(val as _).is_some() { + dynasm!(self ; orr X(dst), xzr, val as u64); + } else { + unreachable!(); + } } (Size::S64, Location::Imm64(val), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; mov X(dst), val); + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_64bit(val as _).is_some() { + dynasm!(self ; orr X(dst), xzr, val as u64); + } else { + unreachable!(); + } } _ => panic!("singlepass can't emit MOV {:?}, {:?}, {:?}", sz, src, dst), } @@ -820,12 +898,18 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; add X(dst), X(src1), imm); } (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } let imm = imm as u32; dynasm!(self ; add X(dst), X(src1), imm); } @@ -839,6 +923,9 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; add W(dst), W(src1), imm); } _ => panic!( @@ -874,16 +961,25 @@ impl EmitterARM64 for Assembler { (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; sub W(dst), W(src1), imm); } (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; sub X(dst), X(src1), imm); } (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; sub X(dst), X(src1), imm as u32); } _ => panic!( @@ -936,6 +1032,9 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; adds X(dst), X(src1), imm); } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) @@ -948,6 +1047,9 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; adds W(dst), W(src1), imm); } _ => panic!( @@ -1027,6 +1129,9 @@ impl EmitterARM64 for Assembler { } (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; add W(dst), W(dst), imm); } _ => panic!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst), @@ -1039,10 +1144,13 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; sub X(dst), X(dst), X(src)); } - (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { - let src = src as u32; + (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { + let imm = imm as u32; let dst = dst.into_index() as u32; - dynasm!(self ; sub X(dst), X(dst), src); + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; sub X(dst), X(dst), imm); } (Size::S32, Location::GPR(src), Location::GPR(dst)) => { let src = src.into_index() as u32; @@ -1079,10 +1187,16 @@ impl EmitterARM64 for Assembler { } (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; cmp X(dst), imm as u32); } (Size::S64, Location::Imm64(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; cmp X(dst), imm as u32); } (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { @@ -1091,6 +1205,9 @@ impl EmitterARM64 for Assembler { } (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } dynasm!(self ; cmp W(dst), imm as u32); } _ => panic!("singlepass can't emit CMP {:?} {:?} {:?}", sz, src, dst), @@ -1104,22 +1221,31 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; tst X(dst), X(src)); } - (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { + (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; tst X(dst), src as u64); + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst X(dst), imm as u64); } - (Size::S64, Location::Imm64(src), Location::GPR(dst)) => { + (Size::S64, Location::Imm64(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; tst X(dst), src as u64); + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst X(dst), imm as u64); } (Size::S32, Location::GPR(src), Location::GPR(dst)) => { let src = src.into_index() as u32; let dst = dst.into_index() as u32; dynasm!(self ; tst W(dst), W(src)); } - (Size::S32, Location::Imm32(src), Location::GPR(dst)) => { + (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { let dst = dst.into_index() as u32; - dynasm!(self ; tst W(dst), src); + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst W(dst), imm); } _ => unreachable!(), } @@ -1133,11 +1259,14 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; lsl X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; - let src2 = src2 as u32; + if imm > 63 { + unreachable!(); + } + let imm = imm as u32; let dst = dst.into_index() as u32; - dynasm!(self ; lsl X(dst), X(src1), src2); + dynasm!(self ; lsl X(dst), X(src1), imm); } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; @@ -1149,24 +1278,36 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm > 63 { + unreachable!(); + } dynasm!(self ; lsl X(dst), X(src1), imm as u32); } (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm > 63 { + unreachable!(); + } dynasm!(self ; lsl X(dst), X(src1), imm as u32); } (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm > 31 { + unreachable!(); + } dynasm!(self ; lsl W(dst), W(src1), imm as u32); } (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm > 31 { + unreachable!(); + } dynasm!(self ; lsl W(dst), W(src1), imm as u32); } _ => panic!( @@ -1183,14 +1324,14 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; asr X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; - let src2 = src2 as u32; + let imm = imm as u32; let dst = dst.into_index() as u32; - if src2 == 0 { + if imm == 0 || imm > 63 { unreachable!(); } - dynasm!(self ; asr X(dst), X(src1), src2); + dynasm!(self ; asr X(dst), X(src1), imm); } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; @@ -1202,7 +1343,7 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 63 { unreachable!(); } dynasm!(self ; asr X(dst), X(src1), imm as u32); @@ -1211,7 +1352,7 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 63 { unreachable!(); } dynasm!(self ; asr X(dst), X(src1), imm as u32); @@ -1220,12 +1361,18 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } dynasm!(self ; asr W(dst), W(src1), imm as u32); } (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } dynasm!(self ; asr W(dst), W(src1), imm as u32); } _ => panic!( @@ -1242,14 +1389,14 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; lsr X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; - let src2 = src2 as u32; + let imm = imm as u32; let dst = dst.into_index() as u32; - if src2 == 0 { + if imm == 0 || imm > 63 { unreachable!(); } - dynasm!(self ; lsr X(dst), X(src1), src2); + dynasm!(self ; lsr X(dst), X(src1), imm); } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; @@ -1261,7 +1408,7 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 63 { unreachable!(); } dynasm!(self ; lsr X(dst), X(src1), imm as u32); @@ -1270,7 +1417,7 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 63 { unreachable!(); } dynasm!(self ; lsr X(dst), X(src1), imm as u32); @@ -1279,7 +1426,7 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 31 { unreachable!(); } dynasm!(self ; lsr W(dst), W(src1), imm as u32); @@ -1288,7 +1435,7 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 31 { unreachable!(); } dynasm!(self ; lsr W(dst), W(src1), imm as u32); @@ -1307,14 +1454,14 @@ impl EmitterARM64 for Assembler { let dst = dst.into_index() as u32; dynasm!(self ; ror X(dst), X(src1), X(src2)); } - (Size::S64, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; - let src2 = src2 as u32; + let imm = imm as u32; let dst = dst.into_index() as u32; - if src2 == 0 { + if imm == 0 || imm > 63 { unreachable!(); } - dynasm!(self ; ror X(dst), X(src1), src2); + dynasm!(self ; ror X(dst), X(src1), imm); } (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; @@ -1326,7 +1473,7 @@ impl EmitterARM64 for Assembler { | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; - if imm == 0 { + if imm == 0 || imm > 63 { unreachable!(); } dynasm!(self ; ror X(dst), X(src1), imm as u32); @@ -1335,6 +1482,9 @@ impl EmitterARM64 for Assembler { | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { let src1 = src1.into_index() as u32; let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } dynasm!(self ; ror W(dst), W(src1), imm as u32); } _ => panic!( @@ -2119,7 +2269,8 @@ pub fn gen_std_trampoline_arm64( // Move arguments to their locations. // `callee_vmctx` is already in the first argument register, so no need to move. - for (i, param) in sig.params().iter().enumerate().rev() { + let mut caller_stack_offset: i32 = 0; + for (i, param) in sig.params().iter().enumerate() { let sz = match *param { Type::I32 | Type::F32 => Size::S32, Type::I64 | Type::F64 => Size::S64, @@ -2139,17 +2290,18 @@ pub fn gen_std_trampoline_arm64( ); } _ => { - // using X1 as scratch reg, because the for args is going backward + // using X16 as scratch reg a.emit_ldr( sz, - Location::GPR(GPR::X1), + Location::GPR(GPR::X16), Location::Memory(args, (i * 16) as i32), ); a.emit_str( sz, - Location::GPR(GPR::X1), - Location::Memory(GPR::XzrSp, (i as i32 - 7) * 8), - ) + Location::GPR(GPR::X16), + Location::Memory(GPR::XzrSp, caller_stack_offset), + ); + caller_stack_offset += 8; } } } @@ -2158,7 +2310,7 @@ pub fn gen_std_trampoline_arm64( // Write return value. if !sig.results().is_empty() { - a.emit_stur(Size::S64, Location::GPR(GPR::X0), args, 0); + a.emit_str(Size::S64, Location::GPR(GPR::X0), Location::Memory(args, 0)); } // Restore stack. @@ -2183,11 +2335,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64( let mut a = Assembler::new(0); // Allocate argument array. let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()); - // Save LR and X20, as scratch register + // Save LR and X26, as scratch register a.emit_stpdb( Size::S64, Location::GPR(GPR::X30), - Location::GPR(GPR::X20), + Location::GPR(GPR::X26), GPR::XzrSp, 16, ); @@ -2201,11 +2353,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64( Location::GPR(GPR::XzrSp), ); } else { - a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_mov_imm(Location::GPR(GPR::X26), stack_offset as u64); a.emit_sub( Size::S64, Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), + Location::GPR(GPR::X26), Location::GPR(GPR::XzrSp), ); } @@ -2225,14 +2377,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64( None => { a.emit_ldr( Size::S64, - Location::GPR(GPR::X20), - Location::Memory( - GPR::XzrSp, - (stack_offset + 16 + stack_param_count * 8) as _, - ), + Location::GPR(GPR::X26), + Location::Memory(GPR::XzrSp, (stack_offset + 16 + stack_param_count) as _), ); - stack_param_count += 1; - Location::GPR(GPR::X20) + stack_param_count += 8; + Location::GPR(GPR::X26) } }; a.emit_str( @@ -2254,7 +2403,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( _ => { // Load target address. let offset = vmoffsets.vmdynamicfunction_import_context_address(); - a.emit_ldur(Size::S64, Location::GPR(GPR::X20), GPR::X0, offset as i32); + a.emit_ldur(Size::S64, Location::GPR(GPR::X26), GPR::X0, offset as i32); // Load values array. a.emit_add( Size::S64, @@ -2266,7 +2415,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( }; // Call target. - a.emit_call_register(GPR::X20); + a.emit_call_register(GPR::X26); // Fetch return value. if !sig.results().is_empty() { @@ -2288,11 +2437,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64( Location::GPR(GPR::XzrSp), ); } else { - a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_mov_imm(Location::GPR(GPR::X26), stack_offset as u64); a.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), + Location::GPR(GPR::X26), Location::GPR(GPR::XzrSp), ); } @@ -2300,7 +2449,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( a.emit_ldpia( Size::S64, Location::GPR(GPR::X30), - Location::GPR(GPR::X20), + Location::GPR(GPR::X26), GPR::XzrSp, 16, ); diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index f23f46e0d9a..55aeac8c593 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -586,10 +586,10 @@ impl MachineARM64 { .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_str( Size::S64, - Location::GPR(tmp), + dst, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => panic!("singlepass can't emit str64 {:?} {:?}", dst, src), @@ -613,10 +613,10 @@ impl MachineARM64 { .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_str( Size::S32, - Location::GPR(tmp), + dst, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), @@ -638,10 +638,10 @@ impl MachineARM64 { .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_strh( Size::S32, - Location::GPR(tmp), + dst, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), @@ -664,10 +664,10 @@ impl MachineARM64 { .emit_mov_imm(Location::GPR(tmp), offset as u64); self.assembler.emit_strb( Size::S32, - Location::GPR(tmp), + dst, Location::Memory2(addr, tmp, Multiplier::One, 0), ); - self.release_gpr(tmp); + temps.push(tmp); } } _ => unreachable!(), @@ -778,23 +778,24 @@ impl MachineARM64 { Location::GPR(tmp_base), Location::GPR(tmp_bound), ); - if value_size < 256 { + if self.compatible_imm(value_size as _, ImmType::Bits12) { self.assembler.emit_sub( Size::S64, Location::GPR(tmp_bound), Location::GPR(tmp_bound), - Location::Imm8(value_size as u8), + Location::Imm32(value_size as _), ); } else { - // reusing tmp_base + let tmp2 = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp_base), value_size as u64); + .emit_mov_imm(Location::GPR(tmp2), value_size as u64); self.assembler.emit_sub( Size::S64, Location::GPR(tmp_bound), - Location::GPR(tmp_base), + Location::GPR(tmp2), Location::GPR(tmp_bound), ); + self.release_gpr(tmp2); } } @@ -900,7 +901,7 @@ impl MachineARM64 { if offset >= 0x1000 << shift { return false; } - if (offset >> shift) << shift != offset { + if (offset & ((1 << shift) - 1)) != 0 { return false; } return true; @@ -1164,18 +1165,10 @@ impl Machine for MachineARM64 { } else { (used_neons.len() * 8) as u32 }; - let delta = if stack_adjust < 256 { - Location::Imm8(stack_adjust as u8) - } else { - let tmp = self.pick_temp_gpr().unwrap(); - self.assembler - .emit_mov_imm(Location::GPR(tmp), stack_adjust as u64); - Location::GPR(tmp) - }; self.assembler.emit_add( Size::S64, Location::GPR(GPR::XzrSp), - delta, + Location::Imm32(stack_adjust as _), Location::GPR(GPR::XzrSp), ); } @@ -1256,10 +1249,10 @@ impl Machine for MachineARM64 { // Adjust stack for locals fn adjust_stack(&mut self, delta_stack_offset: u32) { - let delta = if delta_stack_offset < 256 { - Location::Imm8(delta_stack_offset as u8) + let delta = if self.compatible_imm(delta_stack_offset as _, ImmType::Bits12) { + Location::Imm32(delta_stack_offset as _) } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; self.assembler .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); Location::GPR(tmp) @@ -1273,10 +1266,10 @@ impl Machine for MachineARM64 { } // restore stack fn restore_stack(&mut self, delta_stack_offset: u32) { - let delta = if delta_stack_offset < 256 { - Location::Imm8(delta_stack_offset as u8) + let delta = if self.compatible_imm(delta_stack_offset as _, ImmType::Bits12) { + Location::Imm32(delta_stack_offset as _) } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; self.assembler .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); Location::GPR(tmp) @@ -1295,9 +1288,9 @@ impl Machine for MachineARM64 { delta_stack_offset }; let delta = if self.compatible_imm(real_delta as i64, ImmType::Bits12) { - Location::Imm8(real_delta as u8) + Location::Imm32(real_delta as _) } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; self.assembler .emit_mov_imm(Location::GPR(tmp), real_delta as u64); Location::GPR(tmp) @@ -1313,10 +1306,8 @@ impl Machine for MachineARM64 { fn push_location_for_native(&mut self, loc: Location) { match loc { Location::Imm64(_) => { - self.reserve_unused_temp_gpr(GPR::X8); - self.move_location(Size::S64, loc, Location::GPR(GPR::X8)); - self.emit_push(Size::S64, Location::GPR(GPR::X8)); - self.release_gpr(GPR::X8); + self.move_location(Size::S64, loc, Location::GPR(GPR::X17)); + self.emit_push(Size::S64, Location::GPR(GPR::X17)); } _ => self.emit_push(Size::S64, loc), } @@ -1324,10 +1315,7 @@ impl Machine for MachineARM64 { // Zero a location that is 32bits fn zero_location(&mut self, size: Size, location: Location) { - match location { - Location::GPR(_) => self.assembler.emit_mov_imm(location, 0u64), - _ => self.move_location(size, Location::GPR(GPR::XzrSp), location), - } + self.move_location(size, Location::GPR(GPR::XzrSp), location); } // GPR Reg used for local pointer on the stack @@ -1361,7 +1349,7 @@ impl Machine for MachineARM64 { self.assembler .emit_stur(Size::S64, location, GPR::X29, -stack_offset); } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; self.assembler .emit_mov_imm(Location::GPR(tmp), stack_offset as u64); self.assembler.emit_sub( @@ -1407,7 +1395,7 @@ impl Machine for MachineARM64 { } else if self.compatible_imm(offs as i64, ImmType::UnscaledOffset) { self.assembler.emit_stur(size, source, addr, offs); } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; if offs < 0 { self.assembler .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); @@ -1463,7 +1451,7 @@ impl Machine for MachineARM64 { } else if offs > -256 && offs < 256 { self.assembler.emit_ldur(size, dest, addr, offs); } else { - let tmp = self.pick_temp_gpr().unwrap(); + let tmp = GPR::X17; if offs < 0 { self.assembler .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); @@ -1482,7 +1470,8 @@ impl Machine for MachineARM64 { Location::GPR(tmp), ); } - self.assembler.emit_ldr(size, source, Location::GPR(tmp)); + self.assembler + .emit_ldr(size, dest, Location::Memory(tmp, 0)); } } _ => { @@ -1600,11 +1589,11 @@ impl Machine for MachineARM64 { self.pushed = false; saved_area_offset }; - if real_delta < 256 { + if self.compatible_imm(real_delta as _, ImmType::Bits12) { self.assembler.emit_sub( Size::S64, Location::GPR(GPR::X29), - Location::Imm8(real_delta as u8), + Location::Imm32(real_delta as _), Location::GPR(GPR::XzrSp), ); } else { @@ -1662,7 +1651,7 @@ impl Machine for MachineARM64 { Location::Imm8(0), Location::GPR(GPR::XzrSp), ); - self.pushed = false; // SP is restored, concider it aligned + self.pushed = false; // SP is restored, consider it aligned self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); } @@ -1827,10 +1816,8 @@ impl Machine for MachineARM64 { // jmp table fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) { - let tmp1 = self.pick_temp_gpr().unwrap(); - self.reserve_gpr(tmp1); - let tmp2 = self.pick_temp_gpr().unwrap(); - self.reserve_gpr(tmp2); + let tmp1 = self.acquire_temp_gpr().unwrap(); + let tmp2 = self.acquire_temp_gpr().unwrap(); self.assembler.emit_load_label(tmp1, label); self.move_location(Size::S32, cond, Location::GPR(tmp2)); @@ -1948,7 +1935,7 @@ impl Machine for MachineARM64 { loc_a, loc_b, ret, - ImmType::Bits8, + ImmType::Bits12, ); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -1958,7 +1945,7 @@ impl Machine for MachineARM64 { loc_a, loc_b, ret, - ImmType::Bits8, + ImmType::Bits12, ); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -2269,8 +2256,8 @@ impl Machine for MachineARM64 { ); let tmp2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); - self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp2); - tmp2 + self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp1); + tmp1 } }; self.emit_relaxed_binop3( @@ -2903,7 +2890,7 @@ impl Machine for MachineARM64 { loc_a, loc_b, ret, - ImmType::Bits8, + ImmType::Bits12, ); } fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -2913,7 +2900,7 @@ impl Machine for MachineARM64 { loc_a, loc_b, ret, - ImmType::Bits8, + ImmType::Bits12, ); } fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { @@ -3223,8 +3210,8 @@ impl Machine for MachineARM64 { ); let tmp2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); - self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp2); - tmp2 + self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp1); + tmp1 } }; self.emit_relaxed_binop3( From 9d188cb84faf3ec95abc0d04f15420e8b1de5ff9 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 5 Jan 2022 15:10:45 +0100 Subject: [PATCH 26/34] improv(compiler) Added canonicalization and rounding (182 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 103 ++++++++++++++++ lib/compiler-singlepass/src/machine_arm64.rs | 116 ++++++++++++++++--- 2 files changed, 201 insertions(+), 18 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index bb58d7be31b..d6038b4d7e5 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -140,6 +140,9 @@ pub trait EmitterARM64 { fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_bfc(&mut self, se: Size, lsb: u32, width: u32, dst: Location); + fn emit_bfi(&mut self, se: Size, src: Location, lsb: u32, width: u32, dst: Location); + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_sdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); /// msub : c - a*b -> dst @@ -184,10 +187,18 @@ pub trait EmitterARM64 { fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_frintz(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintn(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintm(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintp(&mut self, sz: Size, src: Location, dst: Location); + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location); + fn emit_read_fpcr(&mut self, reg: GPR); + fn emit_write_fpcr(&mut self, reg: GPR); + fn arch_supports_canonicalize_nan(&self) -> bool { true } @@ -1609,6 +1620,29 @@ impl EmitterARM64 for Assembler { } } + fn emit_bfc(&mut self, sz: Size, lsb: u32, width: u32, dst: Location) { + match (sz, dst) { + (Size::S32, Location::GPR(dst)) => { + dynasm!(self ; bfc W(dst as u32), lsb, width); + } + (Size::S64, Location::GPR(dst)) => { + dynasm!(self ; bfc X(dst as u32), lsb, width); + } + _ => unimplemented!(), + } + } + fn emit_bfi(&mut self, sz: Size, src: Location, lsb: u32, width: u32, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; bfi W(dst as u32), W(src as u32), lsb, width); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; bfi X(dst as u32), X(src as u32), lsb, width); + } + _ => unimplemented!(), + } + } + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { match (sz, src1, src2, dst) { (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { @@ -2163,6 +2197,67 @@ impl EmitterARM64 for Assembler { } } + fn emit_frintz(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintz S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintz D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTZ {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintn(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintn S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintn D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTN {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintm(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintm S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintm D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTM {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintp(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintp S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintp D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTP {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { match (sz_in, src, sz_out, dst) { (Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { @@ -2237,6 +2332,14 @@ impl EmitterARM64 for Assembler { ), } } + // mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 + // 1 011 0100 0100 000 => fpcr + fn emit_read_fpcr(&mut self, reg: GPR) { + dynasm!(self ; mrs X(reg as u32), 0b1_011_0100_0100_000); + } + fn emit_write_fpcr(&mut self, reg: GPR) { + dynasm!(self ; msr 0b1_011_0100_0100_000, X(reg as u32)); + } } pub fn gen_std_trampoline_arm64( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 55aeac8c593..23d421f13da 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -999,6 +999,34 @@ impl MachineARM64 { self.emit_pop(sz, dst1); } } + + fn set_default_nan(&mut self, temps: &mut Vec) -> GPR { + // temporarly set FPCR to DefaultNan + let old_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(old_fpcr.clone()); + self.assembler.emit_read_fpcr(old_fpcr); + let new_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(new_fpcr.clone()); + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler + .emit_mov(Size::S32, Location::Imm32(1), Location::GPR(tmp)); + self.assembler + .emit_mov(Size::S64, Location::GPR(old_fpcr), Location::GPR(new_fpcr)); + // DN is bit 25 of FPCR + self.assembler.emit_bfi( + Size::S64, + Location::GPR(tmp), + 25, + 1, + Location::GPR(new_fpcr), + ); + self.assembler.emit_write_fpcr(new_fpcr); + old_fpcr + } + fn restore_fpcr(&mut self, old_fpcr: GPR) { + self.assembler.emit_write_fpcr(old_fpcr); + } } impl Machine for MachineARM64 { @@ -1679,8 +1707,36 @@ impl Machine for MachineARM64 { fn arch_supports_canonicalize_nan(&self) -> bool { self.assembler.arch_supports_canonicalize_nan() } - fn canonicalize_nan(&mut self, _sz: Size, _input: Location, _output: Location) { - unimplemented!(); + fn canonicalize_nan(&mut self, sz: Size, input: Location, output: Location) { + let mut tempn = vec![]; + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + // use FMAX (input, intput) => output to automaticaly normalize the NaN + match (sz, input, output) { + (Size::S32, Location::SIMD(_), Location::SIMD(_)) => { + self.assembler.emit_fmax(sz, input, input, output); + } + (Size::S64, Location::SIMD(_), Location::SIMD(_)) => { + self.assembler.emit_fmax(sz, input, input, output); + } + (Size::S32, Location::SIMD(_), _) | (Size::S64, Location::SIMD(_), _) => { + let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false); + self.assembler.emit_fmax(sz, input, input, tmp); + self.move_location(sz, tmp, output); + } + _ => panic!( + "singlepass can't emit canonicalize_nan {:?} {:?} {:?}", + sz, input, output + ), + } + + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + for r in tempn { + self.release_simd(r); + } } fn emit_illegal_op(&mut self) { @@ -4244,17 +4300,17 @@ impl Machine for MachineARM64 { fn f64_sqrt(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S64, loc, ret, true); } - fn f64_trunc(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_trunc(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintz, Size::S64, loc, ret, true); } - fn f64_ceil(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_ceil(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintp, Size::S64, loc, ret, true); } - fn f64_floor(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_floor(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintm, Size::S64, loc, ret, true); } - fn f64_nearest(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f64_nearest(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintn, Size::S64, loc, ret, true); } fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; @@ -4329,6 +4385,8 @@ impl Machine for MachineARM64 { } } fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); self.emit_relaxed_binop3_neon( Assembler::emit_fmin, Size::S64, @@ -4337,8 +4395,14 @@ impl Machine for MachineARM64 { ret, ImmType::None, ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } } fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); self.emit_relaxed_binop3_neon( Assembler::emit_fmax, Size::S64, @@ -4347,6 +4411,10 @@ impl Machine for MachineARM64 { ret, ImmType::None, ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } } fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3_neon( @@ -4426,17 +4494,17 @@ impl Machine for MachineARM64 { fn f32_sqrt(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S32, loc, ret, true); } - fn f32_trunc(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_trunc(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintz, Size::S32, loc, ret, true); } - fn f32_ceil(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_ceil(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintp, Size::S32, loc, ret, true); } - fn f32_floor(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_floor(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintm, Size::S32, loc, ret, true); } - fn f32_nearest(&mut self, _loc: Location, _ret: Location) { - unimplemented!(); + fn f32_nearest(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintn, Size::S32, loc, ret, true); } fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { let mut temps = vec![]; @@ -4511,6 +4579,8 @@ impl Machine for MachineARM64 { } } fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); self.emit_relaxed_binop3_neon( Assembler::emit_fmin, Size::S32, @@ -4519,8 +4589,14 @@ impl Machine for MachineARM64 { ret, ImmType::None, ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } } fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); self.emit_relaxed_binop3_neon( Assembler::emit_fmax, Size::S32, @@ -4529,6 +4605,10 @@ impl Machine for MachineARM64 { ret, ImmType::None, ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } } fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_relaxed_binop3_neon( From 77e65cdaf75429e749e71b88bbcee9cd36ecec30 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 6 Jan 2022 17:36:40 +0100 Subject: [PATCH 27/34] improv(compiler) Added float to int conversion, with trap (187 tests passes now) --- lib/compiler-singlepass/src/emitter_arm64.rs | 103 +++++++++- lib/compiler-singlepass/src/machine_arm64.rs | 192 ++++++++++++++++++- 2 files changed, 286 insertions(+), 9 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index d6038b4d7e5..5ab330f0d22 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -165,6 +165,8 @@ pub trait EmitterARM64 { fn emit_b_label(&mut self, label: Label); fn emit_cbz_label(&mut self, sz: Size, reg: Location, label: Label); fn emit_cbnz_label(&mut self, sz: Size, reg: Location, label: Label); + fn emit_tbz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label); + fn emit_tbnz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label); fn emit_bcond_label(&mut self, condition: Condition, label: Label); fn emit_b_register(&mut self, reg: GPR); fn emit_call_label(&mut self, label: Label); @@ -195,9 +197,13 @@ pub trait EmitterARM64 { fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location); + fn emit_fcvtzs(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_fcvtzu(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); fn emit_read_fpcr(&mut self, reg: GPR); fn emit_write_fpcr(&mut self, reg: GPR); + fn emit_read_fpsr(&mut self, reg: GPR); + fn emit_write_fpsr(&mut self, reg: GPR); fn arch_supports_canonicalize_nan(&self) -> bool { true @@ -1986,6 +1992,38 @@ impl EmitterARM64 for Assembler { _ => panic!("singlepass can't emit CBNZ {:?} {:?} {:?}", sz, reg, label), } } + fn emit_tbz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbz W(reg), n, =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbz X(reg), n, =>label); + } + _ => panic!( + "singlepass can't emit TBZ {:?} {:?} {:?} {:?}", + sz, reg, n, label + ), + } + } + fn emit_tbnz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbnz W(reg), n, =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbnz X(reg), n, =>label); + } + _ => panic!( + "singlepass can't emit TBNZ {:?} {:?} {:?} {:?}", + sz, reg, n, label + ), + } + } fn emit_bcond_label(&mut self, condition: Condition, label: Label) { match condition { Condition::Eq => dynasm!(self ; b.eq => label), @@ -2332,7 +2370,63 @@ impl EmitterARM64 for Assembler { ), } } - // mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 + fn emit_fcvtzs(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs W(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs W(dst), D(src)); + } + (Size::S32, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs X(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs X(dst), D(src)); + } + _ => panic!( + "singlepass can't emit FCVTZS {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_fcvtzu(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu W(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu W(dst), D(src)); + } + (Size::S32, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu X(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu X(dst), D(src)); + } + _ => panic!( + "singlepass can't emit FCVTZU {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + // 1 011 0100 0100 000 => fpcr fn emit_read_fpcr(&mut self, reg: GPR) { dynasm!(self ; mrs X(reg as u32), 0b1_011_0100_0100_000); @@ -2340,6 +2434,13 @@ impl EmitterARM64 for Assembler { fn emit_write_fpcr(&mut self, reg: GPR) { dynasm!(self ; msr 0b1_011_0100_0100_000, X(reg as u32)); } + // 1 011 0100 0100 001 => fpsr + fn emit_read_fpsr(&mut self, reg: GPR) { + dynasm!(self ; mrs X(reg as u32), 0b1_011_0100_0100_001); + } + fn emit_write_fpsr(&mut self, reg: GPR) { + dynasm!(self ; msr 0b1_011_0100_0100_001, X(reg as u32)); + } } pub fn gen_std_trampoline_arm64( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 23d421f13da..bada8b5a05a 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1024,9 +1024,81 @@ impl MachineARM64 { self.assembler.emit_write_fpcr(new_fpcr); old_fpcr } + fn set_trap_enabled(&mut self, temps: &mut Vec) -> GPR { + // temporarly set FPCR to DefaultNan + let old_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(old_fpcr.clone()); + self.assembler.emit_read_fpcr(old_fpcr); + let new_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(new_fpcr.clone()); + self.assembler + .emit_mov(Size::S64, Location::GPR(old_fpcr), Location::GPR(new_fpcr)); + // IOE is bit 8 of FPCR + self.assembler + .emit_bfc(Size::S64, 8, 1, Location::GPR(new_fpcr)); + self.assembler.emit_write_fpcr(new_fpcr); + old_fpcr + } fn restore_fpcr(&mut self, old_fpcr: GPR) { self.assembler.emit_write_fpcr(old_fpcr); } + + fn reset_exception_fpsr(&mut self) { + // reset exception count in FPSR + let fpsr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_read_fpsr(fpsr); + // IOC is 0 + self.assembler + .emit_bfc(Size::S64, 0, 1, Location::GPR(fpsr)); + self.assembler.emit_write_fpsr(fpsr); + self.release_gpr(fpsr); + } + fn read_fpsr(&mut self) -> GPR { + let fpsr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_read_fpsr(fpsr); + fpsr + } + + fn trap_float_convertion_errors( + &mut self, + old_fpcr: GPR, + sz: Size, + f: Location, + temps: &mut Vec, + ) { + let trap_badconv = self.assembler.get_label(); + let end = self.assembler.get_label(); + + let fpsr = self.read_fpsr(); + temps.push(fpsr.clone()); + // no trap, than all good + self.assembler + .emit_tbz_label(Size::S32, Location::GPR(fpsr), 0, end); + // now need to check if it's overflow or NaN + self.assembler + .emit_bfc(Size::S64, 0, 4, Location::GPR(fpsr)); + self.restore_fpcr(old_fpcr); + self.assembler.emit_fcmp(sz, f, f); + self.assembler.emit_bcond_label(Condition::Vs, trap_badconv); + // fallthru: trap_overflow + let offset = self.assembler.get_offset().0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::IntegerOverflow); + self.emit_illegal_op(); + self.mark_instruction_address_end(offset); + + self.emit_label(trap_badconv); + let offset = self.assembler.get_offset().0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::BadConversionToInteger); + self.emit_illegal_op(); + self.mark_instruction_address_end(offset); + + self.emit_label(end); + self.restore_fpcr(old_fpcr); + } } impl Machine for MachineARM64 { @@ -4240,17 +4312,121 @@ impl Machine for MachineARM64 { self.release_simd(r); } } - fn convert_i64_f64(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { - unimplemented!(); + fn convert_i64_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S64, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S64, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S64, src, Size::S64, dest); + } else { + self.assembler.emit_fcvtzu(Size::S64, src, Size::S64, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S64, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_i32_f64(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { - unimplemented!(); + fn convert_i32_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S64, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S32, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S64, src, Size::S32, dest); + } else { + self.assembler.emit_fcvtzu(Size::S64, src, Size::S32, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S64, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_i64_f32(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { - unimplemented!(); + fn convert_i64_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S32, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S64, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S32, src, Size::S64, dest); + } else { + self.assembler.emit_fcvtzu(Size::S32, src, Size::S64, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S32, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } - fn convert_i32_f32(&mut self, _loc: Location, _ret: Location, _signed: bool, _sat: bool) { - unimplemented!(); + fn convert_i32_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S32, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S32, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S32, src, Size::S32, dest); + } else { + self.assembler.emit_fcvtzu(Size::S32, src, Size::S32, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S32, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } } fn convert_f64_f32(&mut self, loc: Location, ret: Location) { self.emit_relaxed_binop_neon(Assembler::emit_fcvt, Size::S32, loc, ret, true); From da1d96b66edfb94e9c604cf52dc7fec0ddaf5edd Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 7 Jan 2022 18:20:44 +0100 Subject: [PATCH 28/34] improv(compiler) Fixed some mystakes (207 tests passes now, 0 failed) --- lib/compiler-singlepass/src/emitter_arm64.rs | 2 +- lib/compiler-singlepass/src/machine_arm64.rs | 47 +++++++++++--------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 5ab330f0d22..0667c29e934 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -2797,7 +2797,7 @@ pub fn gen_import_call_trampoline_arm64( if (offset > 0 && offset < 0xF8) || (offset > 0 && offset < 0x7FF8 && (offset & 7) == 0) { offset } else { - a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); + a.emit_mov_imm(Location::GPR(GPR::X16), (offset as i64) as u64); a.emit_add( Size::S64, Location::GPR(GPR::X0), diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index bada8b5a05a..032c9da7333 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -129,7 +129,8 @@ impl MachineARM64 { temps.push(tmp.clone()); tmp }; - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); Location::GPR(tmp) } } @@ -179,7 +180,8 @@ impl MachineARM64 { if reg == tmp { unreachable!(); } - self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); self.assembler.emit_ldr( sz, Location::GPR(tmp), @@ -231,7 +233,8 @@ impl MachineARM64 { let gpr = self.acquire_temp_gpr().unwrap(); let tmp = self.acquire_temp_simd().unwrap(); temps.push(tmp.clone()); - self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov_imm(Location::GPR(gpr), (val as i64) as u64); self.assembler .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); self.release_gpr(gpr); @@ -271,7 +274,8 @@ impl MachineARM64 { self.assembler.emit_ldur(sz, Location::SIMD(tmp), reg, val); } else { let gpr = self.acquire_temp_gpr().unwrap(); - self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov_imm(Location::GPR(gpr), (val as i64) as u64); self.assembler.emit_ldr( sz, Location::SIMD(tmp), @@ -383,7 +387,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldr( Size::S64, dest, @@ -413,7 +417,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldr( Size::S32, dest, @@ -441,7 +445,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldrsw( Size::S64, dest, @@ -469,7 +473,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldrh( Size::S32, dest, @@ -497,7 +501,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldrsh( sz, dest, @@ -525,7 +529,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldrb( Size::S32, dest, @@ -553,7 +557,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_ldrsb( sz, dest, @@ -583,7 +587,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_str( Size::S64, dst, @@ -610,7 +614,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_str( Size::S32, dst, @@ -635,7 +639,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_strh( Size::S32, dst, @@ -661,7 +665,7 @@ impl MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_strb( Size::S32, dst, @@ -1440,7 +1444,7 @@ impl Machine for MachineARM64 { 5 => Location::GPR(GPR::X24), 6 => Location::GPR(GPR::X25), 7 => Location::GPR(GPR::X26), - _ => Location::Memory(GPR::X29, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), + _ => Location::Memory(GPR::X29, -(((idx - 7) * 8 + callee_saved_regs_size) as i32)), } } // Move a local to the stack @@ -1451,7 +1455,7 @@ impl Machine for MachineARM64 { } else { let tmp = GPR::X17; self.assembler - .emit_mov_imm(Location::GPR(tmp), stack_offset as u64); + .emit_mov_imm(Location::GPR(tmp), (stack_offset as i64) as u64); self.assembler.emit_sub( Size::S64, Location::GPR(GPR::X29), @@ -1634,7 +1638,7 @@ impl Machine for MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_sub( Size::S64, Location::GPR(reg), @@ -1656,7 +1660,7 @@ impl Machine for MachineARM64 { } else { let tmp = self.acquire_temp_gpr().unwrap(); self.assembler - .emit_mov_imm(Location::GPR(tmp), offset as u64); + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); self.assembler.emit_add( Size::S64, Location::GPR(reg), @@ -1773,7 +1777,8 @@ impl Machine for MachineARM64 { } fn emit_function_return_float(&mut self) { - self.move_location(Size::S64, Location::GPR(GPR::X0), Location::SIMD(NEON::V0)); + self.assembler + .emit_mov(Size::S64, Location::GPR(GPR::X0), Location::SIMD(NEON::V0)); } fn arch_supports_canonicalize_nan(&self) -> bool { @@ -2037,7 +2042,7 @@ impl Machine for MachineARM64 { }, _ => { let mut temps = vec![]; - let src = self.location_to_reg(sz_dst, src, &mut temps, ImmType::None, true, None); + let src = self.location_to_reg(sz_src, src, &mut temps, ImmType::None, true, None); let dest = self.location_to_reg(sz_dst, dst, &mut temps, ImmType::None, false, None); match sz_src { From 0e2d8097d01e5b4bec26a6fa4faf2d6478904663 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 13 Jan 2022 12:30:25 +0100 Subject: [PATCH 29/34] imp(compiler) Add support for AppleAarch64 ABI to SinglePass --- lib/compiler-singlepass/src/codegen.rs | 158 +++++++++++----- lib/compiler-singlepass/src/emitter_arm64.rs | 59 +++++- lib/compiler-singlepass/src/machine.rs | 18 +- lib/compiler-singlepass/src/machine_arm64.rs | 180 ++++++++++++++++++- lib/compiler-singlepass/src/machine_x64.rs | 80 ++++++++- 5 files changed, 435 insertions(+), 60 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 3b4f1f0d70c..1064daa4060 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -470,7 +470,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { fn init_locals( &mut self, n: usize, - n_params: usize, + sig: FunctionType, calling_convention: CallingConvention, ) -> Vec> { // How many machine stack slots will all the locals use? @@ -560,15 +560,29 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Load in-register parameters into the allocated locations. // Locals are allocated on the stack from higher address to lower address, // so we won't skip the stack guard page here. - for i in 0..n_params { - let loc = self.machine.get_param_location(i + 1, calling_convention); - self.machine.move_location(Size::S64, loc, locations[i]); + let mut stack_offset: usize = 0; + for (i, param) in sig.params().iter().enumerate() { + let sz = match *param { + Type::I32 | Type::F32 => Size::S32, + Type::I64 | Type::F64 => Size::S64, + Type::ExternRef | Type::FuncRef => Size::S64, + _ => unimplemented!(), + }; + let loc = self.machine.get_call_param_location( + i + 1, + sz, + &mut stack_offset, + calling_convention, + ); + self.machine + .move_location_extend(sz, false, loc, Size::S64, locations[i]); } // Load vmctx into R15. self.machine.move_location( Size::S64, - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), Location::GPR(self.machine.get_vmctx_reg()), ); @@ -576,14 +590,17 @@ impl<'a, M: Machine> FuncGen<'a, M> { // // `rep stosq` writes data from low address to high address and may skip the stack guard page. // so here we probe it explicitly when needed. - for i in (n_params..n).step_by(NATIVE_PAGE_SIZE / 8).skip(1) { + for i in (sig.params().len()..n) + .step_by(NATIVE_PAGE_SIZE / 8) + .skip(1) + { self.machine.zero_location(Size::S64, locations[i]); } // Initialize all normal locals to zero. let mut init_stack_loc_cnt = 0; let mut last_stack_loc = Location::Memory(self.machine.local_pointer(), i32::MAX); - for i in n_params..n { + for i in sig.params().len()..n { match locations[i] { Location::Memory(_, _) => { init_stack_loc_cnt += 1; @@ -699,15 +716,27 @@ impl<'a, M: Machine> FuncGen<'a, M> { /// /// The caller MUST NOT hold any temporary registers allocated by `acquire_temp_gpr` when calling /// this function. - fn emit_call_native>, F: FnOnce(&mut Self)>( + fn emit_call_native< + I: Iterator>, + J: Iterator, + F: FnOnce(&mut Self), + >( &mut self, cb: F, params: I, + params_type: J, ) -> Result<(), CodegenError> { // Values pushed in this function are above the shadow region. self.state.stack_values.push(MachineValue::ExplicitShadow); let params: Vec<_> = params.collect(); + let params_size: Vec<_> = params_type + .map(|x| match x { + WpType::F32 | WpType::I32 => Size::S32, + WpType::V128 => unimplemented!(), + _ => Size::S64, + }) + .collect(); // Save used GPRs. Preserve correct stack alignment let mut used_stack = self.machine.push_used_gpr(); @@ -746,39 +775,37 @@ impl<'a, M: Machine> FuncGen<'a, M> { }; let mut stack_offset: usize = 0; - + let mut args: Vec> = vec![]; + let mut pushed_args: usize = 0; // Calculate stack offset. for (i, _param) in params.iter().enumerate() { - if let Location::Memory(_, _) = - self.machine.get_param_location(1 + i, calling_convention) - { - stack_offset += 8; - } + args.push(self.machine.get_param_location( + 1 + i, + params_size[i], + &mut stack_offset, + calling_convention, + )); } // Align stack to 16 bytes. - if (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) - % 16 - != 0 - { - if self.machine.round_stack_adjust(8) == 8 { - self.machine.adjust_stack(8); - } else { - self.machine.emit_push(Size::S64, Location::Imm32(0)); - } - stack_offset += 8; - self.state.stack_values.push(MachineValue::Undefined); + let stack_unaligned = + (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) + % 16; + if stack_unaligned != 0 { + stack_offset += 16 - stack_unaligned; } + self.machine.adjust_stack(stack_offset as u32); let mut call_movs: Vec<(Location, M::GPR)> = vec![]; // Prepare register & stack parameters. for (i, param) in params.iter().enumerate().rev() { - let loc = self.machine.get_param_location(1 + i, calling_convention); + let loc = args[i]; match loc { Location::GPR(x) => { call_movs.push((*param, x)); } Location::Memory(_, _) => { + pushed_args += 1; match *param { Location::GPR(x) => { let content = self.state.register_values @@ -813,7 +840,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(MachineValue::Undefined); } } - self.machine.push_location_for_native(*param); + self.machine.move_location(params_size[i], *param, loc); } _ => { return Err(CodegenError { @@ -838,17 +865,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.move_location( Size::S64, Location::GPR(self.machine.get_vmctx_reg()), - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), ); // vmctx - if self.machine.round_stack_adjust(8) == 8 { - if (self.state.stack_values.len() % 2) != 1 { - return Err(CodegenError { - message: "emit_call_native: explicit shadow takes one slot".to_string(), - }); - } - } - if stack_padding > 0 { self.machine.adjust_stack(stack_padding as u32); } @@ -884,7 +904,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { message: "emit_call_native: Bad restoring stack alignement".to_string(), }); } - for _ in 0..stack_offset / 8 { + for _ in 0..pushed_args { self.state.stack_values.pop().unwrap(); } } @@ -912,12 +932,20 @@ impl<'a, M: Machine> FuncGen<'a, M> { } /// Emits a System V call sequence, specialized for labels as the call target. - fn _emit_call_native_label>>( + fn _emit_call_native_label< + I: Iterator>, + J: Iterator, + >( &mut self, label: Label, params: I, + params_type: J, ) -> Result<(), CodegenError> { - self.emit_call_native(|this| this.machine.emit_call_label(label), params)?; + self.emit_call_native( + |this| this.machine.emit_call_label(label), + params, + params_type, + )?; Ok(()) } @@ -964,7 +992,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Initialize locals. self.locals = self.init_locals( self.local_types.len(), - self.signature.params().len(), + self.signature.clone(), self.calling_convention, ); @@ -2583,6 +2611,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { this.machine.mark_instruction_address_end(offset); }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -2794,7 +2823,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { gpr_for_call, vmcaller_checked_anyfunc_vmctx as i32, ), - this.machine.get_param_location(0, calling_convention), + this.machine + .get_simple_param_location(0, calling_convention), ); this.machine.emit_call_location(Location::Memory( @@ -2805,6 +2835,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -3038,6 +3069,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, memory_index] iter::once(Location::Imm32(memory_index.index() as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -3085,6 +3117,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, src, len]); } @@ -3107,6 +3148,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, segment_index] iter::once(Location::Imm32(segment)), + iter::once(WpType::I64), )?; } Operator::MemoryCopy { src, dst } => { @@ -3157,6 +3199,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst_pos, src_pos, len]); } @@ -3201,6 +3246,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(memory_index.index() as u32), dst, val, len] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, val, len]); } @@ -3235,6 +3283,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // [vmctx, val, memory_index] iter::once(param_pages) .chain(iter::once(Location::Imm32(memory_index.index() as u32))), + [WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[param_pages]); @@ -5432,6 +5481,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, func_index] -> funcref iter::once(Location::Imm32(function_index as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( @@ -5490,6 +5540,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index, value] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index, value]); @@ -5524,6 +5575,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index] .iter() .cloned(), + [WpType::I32, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index]); @@ -5567,6 +5619,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index] -> i32 iter::once(Location::Imm32(table_index.index() as u32)), + iter::once(WpType::I32), )?; let ret = self.acquire_locations( @@ -5616,6 +5669,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I64, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[init_value, delta]); @@ -5668,6 +5722,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5699,6 +5762,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index, start_idx, item, len] [Location::Imm32(table), dest, val, len].iter().cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, val, len]); @@ -5737,6 +5803,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5762,6 +5837,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, elem_index] [Location::Imm32(segment)].iter().cloned(), + [WpType::I32].iter().cloned(), )?; } _ => { diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 0667c29e934..7f1b5b1818b 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -2445,7 +2445,7 @@ impl EmitterARM64 for Assembler { pub fn gen_std_trampoline_arm64( sig: &FunctionType, - _calling_convention: CallingConvention, + calling_convention: CallingConvention, ) -> FunctionBody { let mut a = Assembler::new(0); @@ -2494,6 +2494,29 @@ pub fn gen_std_trampoline_arm64( ); } _ => { + match calling_convention { + CallingConvention::AppleAarch64 => { + match sz { + Size::S8 => (), + Size::S16 => { + if caller_stack_offset & 1 != 0 { + caller_stack_offset = (caller_stack_offset + 1) & !1; + } + } + Size::S32 => { + if caller_stack_offset & 3 != 0 { + caller_stack_offset = (caller_stack_offset + 3) & !3; + } + } + Size::S64 => { + if caller_stack_offset & 7 != 0 { + caller_stack_offset = (caller_stack_offset + 7) & !7; + } + } + }; + } + _ => (), + }; // using X16 as scratch reg a.emit_ldr( sz, @@ -2505,7 +2528,19 @@ pub fn gen_std_trampoline_arm64( Location::GPR(GPR::X16), Location::Memory(GPR::XzrSp, caller_stack_offset), ); - caller_stack_offset += 8; + match calling_convention { + CallingConvention::AppleAarch64 => { + caller_stack_offset += match sz { + Size::S8 => 1, + Size::S16 => 2, + Size::S32 => 4, + Size::S64 => 8, + }; + } + _ => { + caller_stack_offset += 8; + } + } } } } @@ -2579,12 +2614,28 @@ pub fn gen_std_dynamic_import_trampoline_arm64( Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), None => { + let sz = match calling_convention { + CallingConvention::AppleAarch64 => match *ty { + Type::I32 | Type::F32 => Size::S32, + _ => { + if stack_param_count & 7 != 0 { + stack_param_count = (stack_param_count + 7) & !7; + }; + Size::S64 + } + }, + _ => Size::S64, + }; a.emit_ldr( - Size::S64, + sz, Location::GPR(GPR::X26), Location::Memory(GPR::XzrSp, (stack_offset + 16 + stack_param_count) as _), ); - stack_param_count += 8; + stack_param_count += match sz { + Size::S32 => 4, + Size::S64 => 8, + _ => unreachable!(), + }; Location::GPR(GPR::X26) } }; diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index e61e03e4184..a68daa91ec5 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -157,8 +157,24 @@ pub trait Machine { &self, calling_convention: CallingConvention, ) -> Vec>; - /// Get param location + /// Get param location (to build a call, using SP for stack args) fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get call param location (from a call, using FP for stack args) + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get simple param location + fn get_simple_param_location( &self, idx: usize, calling_convention: CallingConvention, diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 032c9da7333..96fcf0e2a48 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1472,8 +1472,118 @@ impl Machine for MachineARM64 { vec![] } - // Get param location - fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + // Get param location, MUST be called in order! + fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get call param location, MUST be called in order! + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get simple param location, Will not be accurate for Apple calling convention on "stack" arguments + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { match calling_convention { _ => match idx { 0 => Location::GPR(GPR::X0), @@ -1529,6 +1639,12 @@ impl Machine for MachineARM64 { }, Location::Imm8(_) => match dest { Location::GPR(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1536,6 +1652,12 @@ impl Machine for MachineARM64 { }, Location::Imm32(val) => match dest { Location::GPR(_) => self.assembler.emit_mov_imm(dest, val as u64), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1543,6 +1665,12 @@ impl Machine for MachineARM64 { }, Location::Imm64(val) => match dest { Location::GPR(_) => self.assembler.emit_mov_imm(dest, val), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1597,13 +1725,49 @@ impl Machine for MachineARM64 { // move a location to another fn move_location_extend( &mut self, - _size_val: Size, - _signed: bool, - _source: Location, - _size_op: Size, - _dest: Location, + size_val: Size, + signed: bool, + source: Location, + size_op: Size, + dest: Location, ) { - unimplemented!(); + if size_op != Size::S64 { + unreachable!(); + } + let mut temps = vec![]; + let dst = self.location_to_reg(size_op, dest, &mut temps, ImmType::None, false, None); + let src = match (size_val, signed, source) { + (Size::S64, _, _) => source, + (Size::S32, false, Location::GPR(_)) => { + self.assembler.emit_mov(size_val, source, dst); + dst + } + (Size::S32, true, Location::GPR(_)) => { + self.assembler.emit_sxtw(size_val, source, dst); + dst + } + (Size::S32, false, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32(size_op, dst, source); + dst + } + (Size::S32, true, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32s(size_op, dst, source); + dst + } + _ => panic!( + "singlepass can't emit move_location_extend {:?} {:?} {:?} => {:?} {:?}", + size_val, signed, source, size_op, dest + ), + }; + if src != dst { + self.move_location(size_op, src, dst); + } + if dst != dest { + self.move_location(size_op, dst, dest); + } + for r in temps { + self.release_gpr(r); + } } fn load_address(&mut self, _size: Size, _reg: Location, _mem: Location) { unimplemented!(); diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 627fb1b670f..8678d72d272 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1877,14 +1877,80 @@ impl Machine for MachineX86_64 { } // Get param location - fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + fn get_param_location( + &self, + idx: usize, + _sz: Size, + stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + } + } + // Get call param location + fn get_call_param_location( + &self, + idx: usize, + _sz: Size, + _stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { match calling_convention { CallingConvention::WindowsFastcall => match idx { 0 => Location::GPR(GPR::RCX), 1 => Location::GPR(GPR::RDX), 2 => Location::GPR(GPR::R8), 3 => Location::GPR(GPR::R9), - _ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32), + }, + } + } + // Get simple param location + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), }, _ => match idx { 0 => Location::GPR(GPR::RDI), @@ -6542,7 +6608,9 @@ impl Machine for MachineX86_64 { // Calculate stack offset. let mut stack_offset: u32 = 0; for (i, _param) in sig.params().iter().enumerate() { - if let Location::Memory(_, _) = self.get_param_location(1 + i, calling_convention) { + if let Location::Memory(_, _) = + self.get_simple_param_location(1 + i, calling_convention) + { stack_offset += 8; } } @@ -6570,12 +6638,12 @@ impl Machine for MachineX86_64 { // Arguments a.emit_mov( Size::S64, - self.get_param_location(1, calling_convention), + self.get_simple_param_location(1, calling_convention), Location::GPR(GPR::R15), ); // func_ptr a.emit_mov( Size::S64, - self.get_param_location(2, calling_convention), + self.get_simple_param_location(2, calling_convention), Location::GPR(GPR::R14), ); // args_rets @@ -6585,7 +6653,7 @@ impl Machine for MachineX86_64 { let mut n_stack_args: usize = 0; for (i, _param) in sig.params().iter().enumerate() { let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i] - let dst_loc = self.get_param_location(1 + i, calling_convention); + let dst_loc = self.get_simple_param_location(1 + i, calling_convention); match dst_loc { Location::GPR(_) => { From c8ae8c8967549fda916a46c4b15c832918b2efe5 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 13 Jan 2022 13:51:30 +0100 Subject: [PATCH 30/34] imp(feature) Fixed x64 backend of Singlepass --- lib/compiler-singlepass/src/codegen.rs | 7 +++- lib/compiler-singlepass/src/machine_x64.rs | 38 +++++++++++++++------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 1064daa4060..7b57acac255 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -767,6 +767,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(content); } } + // mark the GPR used for Call as used + self.machine + .reserve_unused_temp_gpr(self.machine.get_grp_for_call()); + let calling_convention = self.calling_convention; let stack_padding: usize = match calling_convention { @@ -872,7 +876,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { if stack_padding > 0 { self.machine.adjust_stack(stack_padding as u32); } - + // release the GPR used for call + self.machine.release_gpr(self.machine.get_grp_for_call()); cb(self); // Offset needs to be after the 'call' instruction. diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 8678d72d272..fd0fd152523 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1974,9 +1974,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -1985,9 +1985,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -1996,9 +1996,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -2017,21 +2017,35 @@ impl Machine for MachineX86_64 { size_op: Size, dest: Location, ) { + let dst = match dest { + Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { + Location::GPR(self.acquire_temp_gpr().unwrap()) + } + Location::GPR(_) | Location::SIMD(_) => dest, + _ => unreachable!(), + }; match source { Location::GPR(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { match size_val { - Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dest), + Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dst), Size::S16 | Size::S8 => { if signed { - self.assembler.emit_movsx(size_val, source, size_op, dest) + self.assembler.emit_movsx(size_val, source, size_op, dst) } else { - self.assembler.emit_movzx(size_val, source, size_op, dest) + self.assembler.emit_movzx(size_val, source, size_op, dst) } } } } _ => unreachable!(), } + if dst != dest { + self.assembler.emit_mov(size_op, dst, dest); + match dst { + Location::GPR(x) => self.release_gpr(x), + _ => unreachable!(), + }; + } } fn load_address(&mut self, size: Size, reg: Location, mem: Location) { match reg { From ab3e978d1941fc2b84b6567a2af9667eb09b2cd3 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 13 Jan 2022 14:41:51 +0100 Subject: [PATCH 31/34] feat(compiler) Added entry in ChangeLog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 589142e77d2..71d9a0c2511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Looking for changes that affect our C API? See the [C API Changelog](lib/c-api/CHANGELOG.md). ## **[Unreleased]** +- [#2750](https://github.com/wasmerio/wasmer/pull/2750) Added Aarch64 support to Singlepass (both Linux and macOS). ## 2.1.1 - 2021/12/20 From 7f4fd02774fd6546155fa1b0b87b8d3470916e2c Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 13 Jan 2022 14:47:31 +0100 Subject: [PATCH 32/34] feat(compiler) Adapted/Fixed/Removed some comments in SinglePass codegen --- lib/compiler-singlepass/src/codegen.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 7b57acac255..c9f98b30b64 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -491,7 +491,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } - // Callee-saved R15 for vmctx. + // Callee-saved vmctx. static_area_size += 8; // Some ABI (like Windows) needs extrat reg save @@ -578,7 +578,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { .move_location_extend(sz, false, loc, Size::S64, locations[i]); } - // Load vmctx into R15. + // Load vmctx into it's GPR. self.machine.move_location( Size::S64, self.machine @@ -710,9 +710,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ); } - /// Emits a System V / Windows call sequence. - /// - /// This function will not use RAX before `cb` is called. + /// Emits a Native ABI call sequence. /// /// The caller MUST NOT hold any temporary registers allocated by `acquire_temp_gpr` when calling /// this function. @@ -751,7 +749,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(content); } - // Save used XMM registers. + // Save used SIMD registers. let used_simds = self.machine.get_used_simd(); if used_simds.len() > 0 { used_stack += self.machine.push_used_simd(); @@ -914,7 +912,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } - // Restore XMMs. + // Restore SIMDs. if !used_simds.is_empty() { self.machine.pop_used_simd(); for _ in 0..used_simds.len() { @@ -936,7 +934,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { Ok(()) } - /// Emits a System V call sequence, specialized for labels as the call target. + /// Emits a Native ABI call sequence, specialized for labels as the call target. fn _emit_call_native_label< I: Iterator>, J: Iterator, @@ -1503,7 +1501,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.emit_binop_mul64(loc_a, loc_b, ret); } Operator::I64DivU => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_udiv64( loc_a, @@ -1515,7 +1512,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.mark_offset_trappable(offset); } Operator::I64DivS => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_sdiv64( loc_a, @@ -1527,7 +1523,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.mark_offset_trappable(offset); } Operator::I64RemU => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_urem64( loc_a, @@ -1539,7 +1534,6 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.mark_offset_trappable(offset); } Operator::I64RemS => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_srem64( loc_a, From 80f9c8fed90869b567ee124d7947f283165bd7e3 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 17 Jan 2022 09:14:26 +0100 Subject: [PATCH 33/34] feat(compiler) Small adaption after comments --- lib/compiler-singlepass/src/arm64_decl.rs | 7 +- lib/compiler-singlepass/src/emitter_arm64.rs | 223 ++++--------------- lib/compiler-singlepass/src/machine_arm64.rs | 2 +- 3 files changed, 42 insertions(+), 190 deletions(-) diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs index b8a1fe8e508..400f705daaf 100644 --- a/lib/compiler-singlepass/src/arm64_decl.rs +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -132,10 +132,7 @@ impl AbstractReg for GPR { GPR::X30, GPR::XzrSp, ]; - match n { - 0..=31 => Ok(REGS[n]), - _ => Err(()), - } + REGS.get(n).cloned().ok_or(()) } } @@ -185,7 +182,7 @@ impl AbstractReg for NEON { NEON::V31, ]; match n { - 0..=15 => Ok(REGS[n]), + 0..=31 => Ok(REGS[n]), _ => Err(()), } } diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 7f1b5b1818b..faeb224fb62 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -17,7 +17,7 @@ use wasmer_vm::VMOffsets; type Assembler = VecAssembler; -/// Force `dynasm!` to use the correct arch (x64) when cross-compiling. +/// Force `dynasm!` to use the correct arch (aarch64) when cross-compiling. /// `dynasm!` proc-macro tries to auto-detect it by default by looking at the /// `target_arch`, but it sees the `target_arch` of the proc-macro itself, which /// is always equal to host, even when cross-compiling. @@ -125,9 +125,6 @@ pub trait EmitterARM64 { fn emit_add_lsl(&mut self, sz: Size, src1: Location, src2: Location, lsl: u32, dst: Location); - fn emit_add2(&mut self, sz: Size, src: Location, dst: Location); - fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location); - fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location); fn emit_tst(&mut self, sz: Size, src: Location, dst: Location); @@ -249,54 +246,42 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 || (disp >= 0x8000) { - unreachable!(); - } + assert!((disp & 0x7) == 0 && (disp < 0x8000)); dynasm!(self ; str X(reg), [X(addr), disp]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x3) != 0 || (disp >= 0x4000) { - unreachable!(); - } + assert!((disp & 0x3) == 0 && (disp < 0x4000)); dynasm!(self ; str W(reg), [X(addr), disp]); } (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x1) != 0 || (disp >= 0x2000) { - unreachable!(); - } + assert!((disp & 0x1) == 0 && (disp < 0x2000)); dynasm!(self ; strh W(reg), [X(addr), disp]); } (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if disp >= 0x1000 { - unreachable!(); - } + assert!(disp < 0x1000); dynasm!(self ; strb W(reg), [X(addr), disp]); } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 || (disp >= 0x8000) { - unreachable!(); - } + assert!((disp & 0x7) == 0 && (disp < 0x8000)); dynasm!(self ; str D(reg), [X(addr), disp]); } (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x3) != 0 || (disp >= 0x4000) { - unreachable!(); - } + assert!((disp & 0x3) == 0 && (disp < 0x4000)); dynasm!(self ; str S(reg), [X(addr), disp]); } _ => panic!("singlepass can't emit STR {:?}, {:?}, {:?}", sz, reg, addr), @@ -307,36 +292,28 @@ impl EmitterARM64 for Assembler { (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x7) != 0 || (disp >= 0x8000) { - unreachable!(); - } + assert!((disp & 0x7) == 0 && (disp < 0x8000)); let disp = disp as u32; dynasm!(self ; ldr X(reg), [X(addr), disp]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x3) != 0 || (disp >= 0x4000) { - unreachable!(); - } + assert!((disp & 0x3) == 0 && (disp < 0x4000)); let disp = disp as u32; dynasm!(self ; ldr W(reg), [X(addr), disp]); } (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if (disp & 0x1 != 0) || (disp >= 0x2000) { - unreachable!(); - } + assert!((disp & 0x1 == 0) && (disp < 0x2000)); let disp = disp as u32; dynasm!(self ; ldrh W(reg), [X(addr), disp]); } (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; - if disp >= 0x1000 { - unreachable!(); - } + assert!(disp < 0x1000); let disp = disp as u32; dynasm!(self ; ldrb W(reg), [X(addr), disp]); } @@ -344,9 +321,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldr X(reg), [X(addr)]), @@ -358,27 +333,21 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x7) != 0 || (disp >= 0x8000) { - unreachable!(); - } + assert!((disp & 0x7) == 0 && (disp < 0x8000)); dynasm!(self ; ldr D(reg), [X(addr), disp]); } (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let disp = disp as u32; - if (disp & 0x3) != 0 || (disp >= 0x4000) { - unreachable!(); - } + assert!((disp & 0x3) == 0 && (disp < 0x4000)); dynasm!(self ; ldr S(reg), [X(addr), disp]); } _ => panic!("singlepass can't emit LDR {:?}, {:?}, {:?}", sz, reg, addr), } } fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { - if (offset < -255) || (offset > 255) { - unreachable!(); - } + assert!((offset >= -255) && (offset <= 255)); match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -402,9 +371,7 @@ impl EmitterARM64 for Assembler { } } fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { - if (offset < -255) || (offset > 255) { - unreachable!(); - } + assert!((offset >= -255) && (offset <= 255)); match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -429,9 +396,7 @@ impl EmitterARM64 for Assembler { } fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - if offset > 255 { - unreachable!(); - } + assert!(offset <= 255); match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -447,9 +412,7 @@ impl EmitterARM64 for Assembler { } } fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - if offset > 255 { - unreachable!(); - } + assert!(offset <= 255); match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -465,9 +428,7 @@ impl EmitterARM64 for Assembler { } } fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { - if offset > 255 { - unreachable!(); - } + assert!(offset <= 255); match (sz, reg) { (Size::S64, Location::GPR(reg)) => { let reg = reg.into_index() as u32; @@ -484,9 +445,7 @@ impl EmitterARM64 for Assembler { } fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { - if offset > 255 { - unreachable!(); - } + assert!(offset <= 255); match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -498,9 +457,7 @@ impl EmitterARM64 for Assembler { } } fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { - if offset > 255 { - unreachable!(); - } + assert!(offset <= 255); match (sz, reg1, reg2) { (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { let reg1 = reg1.into_index() as u32; @@ -518,18 +475,14 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if offset >= 0x1000 { - unreachable!(); - } + assert!(offset < 0x1000); dynasm!(self ; ldrb W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrb W(reg), [X(addr)]), @@ -546,18 +499,14 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if (offset & 1 != 0) || (offset >= 0x2000) { - unreachable!(); - } + assert!((offset & 1 == 0) && (offset < 0x2000)); dynasm!(self ; ldrh W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrh W(reg), [X(addr)]), @@ -574,27 +523,21 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if offset >= 0x1000 { - unreachable!(); - } + assert!(offset < 0x1000); dynasm!(self ; ldrsb X(reg), [X(addr), offset]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if offset >= 0x1000 { - unreachable!(); - } + assert!(offset < 0x1000); dynasm!(self ; ldrsb W(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrsb X(reg), [X(addr)]), @@ -606,9 +549,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrsb W(reg), [X(addr)]), @@ -625,27 +566,21 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if (offset & 1 != 0) || (offset >= 0x2000) { - unreachable!(); - } + assert!((offset & 1 == 0) && (offset < 0x2000)); dynasm!(self ; ldrsh X(reg), [X(addr), offset]); } (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if (offset & 1 != 0) || (offset >= 0x2000) { - unreachable!(); - } + assert!((offset & 1 == 0) && (offset < 0x2000)); dynasm!(self ; ldrsh W(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrsh X(reg), [X(addr)]), @@ -657,9 +592,7 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrsh W(reg), [X(addr)]), @@ -676,18 +609,14 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if (offset & 3 != 0) || (offset >= 0x4000) { - unreachable!(); - } + assert!((offset & 3 == 0) && (offset < 0x4000)); dynasm!(self ; ldrsw X(reg), [X(addr), offset]); } (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; ldrsw X(reg), [X(addr)]), @@ -704,18 +633,14 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if offset >= 0x1000 { - unreachable!(); - } + assert!(offset < 0x1000); dynasm!(self ; strb W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; strb W(reg), [X(addr)]), @@ -732,18 +657,14 @@ impl EmitterARM64 for Assembler { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let offset = offset as u32; - if (offset & 1 != 0) || (offset >= 0x2000) { - unreachable!(); - } + assert!((offset & 1 == 0) && (offset < 0x2000)); dynasm!(self ; strh W(reg), [X(addr), offset]); } (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32; let r2 = r2.into_index() as u32; - if offs != 0 { - unreachable!(); - } + assert!(offs == 0); let mult = mult as u32; match mult { 0 => dynasm!(self ; strh W(reg), [X(addr)]), @@ -1119,72 +1040,6 @@ impl EmitterARM64 for Assembler { ), } } - fn emit_add2(&mut self, sz: Size, src: Location, dst: Location) { - match (sz, src, dst) { - (Size::S64, Location::GPR(src), Location::GPR(dst)) => { - let src = src.into_index() as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; add X(dst), X(dst), X(src)); - } - (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { - let src = src as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; add X(dst), X(dst), src); - } - (Size::S32, Location::GPR(src), Location::GPR(dst)) => { - let src = src.into_index() as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; add W(dst), W(dst), W(src)); - } - (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { - let dst = dst.into_index() as u32; - dynasm!(self ; add X(dst), X(dst), imm as u32); - } - (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { - let dst = dst.into_index() as u32; - dynasm!(self ; add W(dst), W(dst), imm as u32); - } - (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { - let dst = dst.into_index() as u32; - if imm >= 0x1000 { - unreachable!(); - } - dynasm!(self ; add W(dst), W(dst), imm); - } - _ => panic!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst), - } - } - fn emit_sub2(&mut self, sz: Size, src: Location, dst: Location) { - match (sz, src, dst) { - (Size::S64, Location::GPR(src), Location::GPR(dst)) => { - let src = src.into_index() as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; sub X(dst), X(dst), X(src)); - } - (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { - let imm = imm as u32; - let dst = dst.into_index() as u32; - if imm >= 0x1000 { - unreachable!(); - } - dynasm!(self ; sub X(dst), X(dst), imm); - } - (Size::S32, Location::GPR(src), Location::GPR(dst)) => { - let src = src.into_index() as u32; - let dst = dst.into_index() as u32; - dynasm!(self ; sub W(dst), W(dst), W(src)); - } - (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { - let dst = dst.into_index() as u32; - dynasm!(self ; sub X(dst), X(dst), imm as u32); - } - (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { - let dst = dst.into_index() as u32; - dynasm!(self ; sub W(dst), W(dst), imm as u32); - } - _ => panic!("singlepass can't emit SUB {:?} {:?} {:?}", sz, src, dst), - } - } fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location) { match (sz, src, dst) { diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 96fcf0e2a48..eca8e1f5777 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3153,7 +3153,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 0); + self.assembler.emit_movz(Location::GPR(GPR::X27), 0); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw1, From 61609c9da31d45094933e7b5da7966eccda17ceb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 19 Jan 2022 11:25:46 +0100 Subject: [PATCH 34/34] improv(compiler) Added a few more arm64 emitter, just in case --- lib/compiler-singlepass/src/emitter_arm64.rs | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index faeb224fb62..51722a051a1 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -284,6 +284,30 @@ impl EmitterARM64 for Assembler { assert!((disp & 0x3) == 0 && (disp < 0x4000)); dynasm!(self ; str S(reg), [X(addr), disp]); } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; str X(reg), [X(addr)]), + 1 => dynasm!(self ; str X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; str X(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; str W(reg), [X(addr)]), + 1 => dynasm!(self ; str W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; str W(reg), [X(addr), X(r2), LSL mult]), + }; + } _ => panic!("singlepass can't emit STR {:?}, {:?}, {:?}", sz, reg, addr), } } @@ -329,6 +353,18 @@ impl EmitterARM64 for Assembler { _ => dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]), }; } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldr W(reg), [X(addr)]), + 1 => dynasm!(self ; ldr W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr W(reg), [X(addr), X(r2), LSL mult]), + }; + } (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { let reg = reg.into_index() as u32; let addr = addr.into_index() as u32;