diff --git a/lib/compiler-singlepass/Cargo.toml b/lib/compiler-singlepass/Cargo.toml index d512297620c..ac2d36e7813 100644 --- a/lib/compiler-singlepass/Cargo.toml +++ b/lib/compiler-singlepass/Cargo.toml @@ -32,6 +32,8 @@ target-lexicon = { version = "0.12.2", default-features = false } maintenance = { status = "actively-developed" } [features] -default = ["std", "rayon"] +default = ["std", "rayon", "avx"] std = ["wasmer-compiler/std", "wasmer-types/std"] core = ["hashbrown", "wasmer-types/core"] +sse = [] +avx = [] diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 0e0b08f8cb4..ed53ef90292 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -69,13 +69,16 @@ impl Compiler for SinglepassCompiler { )) } } - if target.triple().architecture == Architecture::X86_64 - && !target.cpu_features().contains(CpuFeature::AVX) - { + + let simd_arch = if target.cpu_features().contains(CpuFeature::AVX) { + CpuFeature::AVX + } else if target.cpu_features().contains(CpuFeature::SSE42) { + CpuFeature::SSE42 + } else { return Err(CompileError::UnsupportedTarget( - "x86_64 without AVX".to_string(), + "x86_64 without AVX or SSE 4.2".to_string(), )); - } + }; if compile_info.features.multi_value { return Err(CompileError::UnsupportedFeature("multivalue".to_string())); } @@ -131,7 +134,7 @@ impl Compiler for SinglepassCompiler { match target.triple().architecture { Architecture::X86_64 => { - let machine = MachineX86_64::new(); + let machine = MachineX86_64::new(Some(simd_arch)); let mut generator = FuncGen::new( module, &self.config, diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 71f1b9128ba..7c773007753 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -2,13 +2,11 @@ use crate::common_decl::Size; use crate::location::Location as AbstractLocation; pub use crate::location::Multiplier; pub use crate::machine::{Label, Offset}; +use crate::machine_x64::AssemblerX64; pub use crate::x64_decl::{GPR, XMM}; use dynasm::dynasm; -use dynasmrt::{ - x64::X64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, VecAssembler, -}; - -type Assembler = VecAssembler; +use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; +use wasmer_compiler::CpuFeature; /// Force `dynasm!` to use the correct arch (x64) when cross-compiling. /// `dynasm!` proc-macro tries to auto-detect it by default by looking at the @@ -17,7 +15,7 @@ type Assembler = VecAssembler; macro_rules! dynasm { ($a:expr ; $($tt:tt)*) => { dynasm::dynasm!( - $a + $a.inner ; .arch x64 ; $($tt)* ) @@ -57,7 +55,13 @@ pub enum GPROrMemory { Memory(GPR, i32), } +pub enum Precision { + Single, + Double, +} + pub trait EmitterX64 { + fn get_simd_arch(&self) -> Option<&CpuFeature>; fn get_label(&mut self) -> Label; fn get_offset(&self) -> Offset; fn get_jmp_instr_size(&self) -> u8; @@ -471,154 +475,289 @@ macro_rules! jmp_op { } } +/// Move a single or double precision XMM value to another if src and destination +/// are not the same. +/// +/// TODO: Can we assume data is aligned and packed? If so, this function isn't necessary +/// TODO: as we can use [`EmitterX64::emit_vmovaps`] and [`EmitterX64::emit_vmovadp`] +/// TODO: instead +fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, dst: XMM) { + if src == dst { + return; + } + match precision { + Precision::Single => match src { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15), + }, + Precision::Double => match src { + XMM::XMM0 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm0), + XMM::XMM1 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm1), + XMM::XMM2 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm2), + XMM::XMM3 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm3), + XMM::XMM4 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm4), + XMM::XMM5 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm5), + XMM::XMM6 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm6), + XMM::XMM7 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm7), + XMM::XMM8 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm8), + XMM::XMM9 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm9), + XMM::XMM10 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm10), + XMM::XMM11 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm11), + XMM::XMM12 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm12), + XMM::XMM13 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm13), + XMM::XMM14 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm14), + XMM::XMM15 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm15), + }, + } +} + macro_rules! avx_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { // Dynasm bug: AVX instructions are not encoded correctly. match src2 { XMMOrMemory::XMM(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rx((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rx((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rx((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rx((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rx((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rx((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rx((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rx((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rx((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rx((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rx((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rx((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rx((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rx((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rx((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rx((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rx((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rx((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rx((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rx((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rx((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rx((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rx((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rx((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rx((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rx((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rx((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rx((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rx((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rx((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rx((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rx((x as u8))), }, XMMOrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, [Rq((base as u8)) + disp]), }, } } } } +macro_rules! sse_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => { + assert_ne!(x, dst); + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8))) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]) + } + } + } + }; + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8)), $mode) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode) + } + } + } + }; +} + macro_rules! avx_i2f_64_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rq((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rq((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rq((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rq((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rq((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rq((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rq((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rq((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rq((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rq((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rq((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rq((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rq((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rq((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rq((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rq((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rq((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rq((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rq((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rq((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rq((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rq((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rq((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rq((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rq((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rq((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rq((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rq((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rq((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rq((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rq((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rq((x as u8))), }, GPROrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, QWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, QWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, QWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, QWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, QWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, QWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, QWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, QWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, QWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, QWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, QWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, QWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, QWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, QWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, QWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, QWORD [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, QWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, QWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, QWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, QWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, QWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, QWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, QWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, QWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, QWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, QWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, QWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, QWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, QWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, QWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, QWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, QWORD [Rq((base as u8)) + disp]), }, } } } } +macro_rules! sse_i2f_64_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: GPROrMemory, dst: XMM| { + match src2 { + GPROrMemory::GPR(x) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rq((x as u8))) + }, + GPROrMemory::Memory(base, disp) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]) + } + } + } + } +} + macro_rules! avx_i2f_32_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rd((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rd((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rd((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rd((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rd((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rd((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rd((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rd((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rd((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rd((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rd((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rd((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rd((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rd((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rd((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rd((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rd((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rd((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rd((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rd((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rd((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rd((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rd((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rd((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rd((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rd((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rd((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rd((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rd((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rd((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rd((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rd((x as u8))), }, GPROrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, DWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, DWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, DWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, DWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, DWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, DWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, DWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, DWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, DWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, DWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, DWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, DWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, DWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, DWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, DWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, DWORD [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, DWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, DWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, DWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, DWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, DWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, DWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, DWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, DWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, DWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, DWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, DWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, DWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, DWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, DWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, DWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, DWORD [Rq((base as u8)) + disp]), + }, + } + } + } +} + +macro_rules! sse_i2f_32_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: GPROrMemory, dst: XMM| { + match src2 { + GPROrMemory::GPR(x) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter; $ins Rx((src1 as u8)), Rd((x as u8))) }, + GPROrMemory::Memory(base, disp) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]) + } } } } } macro_rules! avx_round_fn { - ($ins:ident, $name:ident, $mode:expr) => { - fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { - XMMOrMemory::XMM(x) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode), - XMMOrMemory::Memory(base, disp) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode), + XMMOrMemory::XMM(x) => dynasm!(emitter ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode), + XMMOrMemory::Memory(base, disp) => dynasm!(emitter ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode), } } } } -impl EmitterX64 for Assembler { +macro_rules! sse_round_fn { + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => { + assert_eq!(src1, x); + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((dst as u8)), $mode) + } + XMMOrMemory::Memory(..) => unreachable!(), + } + } + } +} + +impl EmitterX64 for AssemblerX64 { + fn get_simd_arch(&self) -> Option<&CpuFeature> { + self.simd_arch.as_ref() + } + fn get_label(&mut self) -> DynamicLabel { self.new_dynamic_label() } @@ -904,7 +1043,7 @@ impl EmitterX64 for Assembler { } } fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) { - // Constant elimination for comparision between consts. + // Constant elimination for comparison between consts. // // Only needed for `emit_cmp`, since other binary operators actually write to `right` and `right` must // be a writable location for them. @@ -1254,91 +1393,402 @@ impl EmitterX64 for Assembler { _ => panic!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst), }; } - - avx_fn!(vxorps, emit_vxorps); - avx_fn!(vxorpd, emit_vxorpd); - - avx_fn!(vaddss, emit_vaddss); - avx_fn!(vaddsd, emit_vaddsd); - - avx_fn!(vsubss, emit_vsubss); - avx_fn!(vsubsd, emit_vsubsd); - - avx_fn!(vmulss, emit_vmulss); - avx_fn!(vmulsd, emit_vmulsd); - - avx_fn!(vdivss, emit_vdivss); - avx_fn!(vdivsd, emit_vdivsd); - - avx_fn!(vmaxss, emit_vmaxss); - avx_fn!(vmaxsd, emit_vmaxsd); - - avx_fn!(vminss, emit_vminss); - avx_fn!(vminsd, emit_vminsd); - - avx_fn!(vcmpeqss, emit_vcmpeqss); - avx_fn!(vcmpeqsd, emit_vcmpeqsd); - - avx_fn!(vcmpneqss, emit_vcmpneqss); - avx_fn!(vcmpneqsd, emit_vcmpneqsd); - - avx_fn!(vcmpltss, emit_vcmpltss); - avx_fn!(vcmpltsd, emit_vcmpltsd); - - avx_fn!(vcmpless, emit_vcmpless); - avx_fn!(vcmplesd, emit_vcmplesd); - - avx_fn!(vcmpgtss, emit_vcmpgtss); - avx_fn!(vcmpgtsd, emit_vcmpgtsd); - - avx_fn!(vcmpgess, emit_vcmpgess); - avx_fn!(vcmpgesd, emit_vcmpgesd); - - avx_fn!(vcmpunordss, emit_vcmpunordss); - avx_fn!(vcmpunordsd, emit_vcmpunordsd); - - avx_fn!(vcmpordss, emit_vcmpordss); - avx_fn!(vcmpordsd, emit_vcmpordsd); - - avx_fn!(vsqrtss, emit_vsqrtss); - avx_fn!(vsqrtsd, emit_vsqrtsd); - - avx_fn!(vcvtss2sd, emit_vcvtss2sd); - avx_fn!(vcvtsd2ss, emit_vcvtsd2ss); - - avx_round_fn!(vroundss, emit_vroundss_nearest, 0); - avx_round_fn!(vroundss, emit_vroundss_floor, 1); - avx_round_fn!(vroundss, emit_vroundss_ceil, 2); - avx_round_fn!(vroundss, emit_vroundss_trunc, 3); - avx_round_fn!(vroundsd, emit_vroundsd_nearest, 0); - avx_round_fn!(vroundsd, emit_vroundsd_floor, 1); - avx_round_fn!(vroundsd, emit_vroundsd_ceil, 2); - avx_round_fn!(vroundsd, emit_vroundsd_trunc, 3); - - avx_i2f_32_fn!(vcvtsi2ss, emit_vcvtsi2ss_32); - avx_i2f_32_fn!(vcvtsi2sd, emit_vcvtsi2sd_32); - avx_i2f_64_fn!(vcvtsi2ss, emit_vcvtsi2ss_64); - avx_i2f_64_fn!(vcvtsi2sd, emit_vcvtsi2sd_64); - - fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { - match src2 { - XMMOrMemory::XMM(src2) => { - dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vxorps)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorps)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vxorpd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorpd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vaddss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vaddsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsubss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsubsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmulss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmulsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vdivss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vdivsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmaxss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmaxsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vminss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vminsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpeqss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpeqsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpneqss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpneqsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpltss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpltsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpless)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmplesd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgtss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgtsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgess)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgesd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpunordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpunordss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpunordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpunordsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpordss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpordsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsqrtss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtss)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsqrtsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtsd)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcvtss2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd)(self, Precision::Single, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcvtsd2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss)(self, Precision::Double, src1, src2, dst), + _ => {} + } + } + fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 0)(self, Precision::Single, src1, src2, dst) } - XMMOrMemory::Memory(base, disp) => { - dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + _ => {} + } + } + fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 0)(self, Precision::Double, src1, src2, dst) } + _ => {} } } - - fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { - match src2 { - XMMOrMemory::XMM(src2) => { - dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 1)(self, Precision::Single, src1, src2, dst) } - XMMOrMemory::Memory(base, disp) => { - dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + _ => {} + } + } + fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 1)(self, Precision::Double, src1, src2, dst) + } + _ => {} + } + } + fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 2)(self, Precision::Single, src1, src2, dst) + } + _ => {} + } + } + fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 2)(self, Precision::Double, src1, src2, dst) + } + _ => {} + } + } + fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 3)(self, Precision::Single, src1, src2, dst) + } + _ => {} + } + } + fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 3)(self, Precision::Double, src1, src2, dst) + } + _ => {} + } + } + fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_32_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst) + } + _ => {} + } + } + fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_32_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst) + } + _ => {} + } + } + fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_64_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst) } + _ => {} + } + } + fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_64_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst) + } + _ => {} + } + } + + fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { + // this implementation works only for sse 4.1 and greater + match self.get_simd_arch() { + Some(CpuFeature::AVX) => match src2 { + XMMOrMemory::XMM(src2) => { + // TODO: this argument order does not match the documentation?? + dynasm!( self; vblendvps Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!( self; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } + }, + Some(CpuFeature::SSE42) => match src2 { + XMMOrMemory::XMM(src2) => { + move_src_to_dst(self, Precision::Single, src1, dst); + dynasm!( self; blendvps Rx(dst as u8), Rx(src2 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(self, Precision::Single, src1, dst); + dynasm!( self; blendvps Rx(dst as u8), [Rq(base as u8) + disp]) + } + }, + _ => {} + } + } + + fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { + // this implementation works only for sse 4.1 and greater + match self.get_simd_arch() { + Some(CpuFeature::AVX) => match src2 { + XMMOrMemory::XMM(src2) => { + // TODO: this argument order does not match the documentation?? + dynasm!( self; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!( self; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } + }, + Some(CpuFeature::SSE42) => match src2 { + XMMOrMemory::XMM(src2) => { + move_src_to_dst(self, Precision::Double, src1, dst); + dynasm!( self; blendvpd Rx(dst as u8), Rx(src2 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(self, Precision::Double, src1, dst); + dynasm!( self; blendvpd Rx(dst as u8), [Rq(base as u8) + disp]) + } + }, + _ => {} } } diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 1ad5d04254e..d7359342e9f 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -8,8 +8,8 @@ use std::fmt::Debug; pub use wasmer_compiler::wasmparser::MemoryImmediate; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ - Architecture, CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, - Relocation, RelocationTarget, Target, TrapInformation, + Architecture, CallingConvention, CpuFeature, CustomSection, FunctionBody, + InstructionAddressMap, Relocation, RelocationTarget, Target, TrapInformation, }; use wasmer_types::{FunctionIndex, FunctionType}; use wasmer_vm::{TrapCode, VMOffsets}; @@ -2200,7 +2200,13 @@ pub fn gen_std_trampoline( ) -> FunctionBody { match target.triple().architecture { Architecture::X86_64 => { - let machine = MachineX86_64::new(); + let machine = if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + }; machine.gen_std_trampoline(sig, calling_convention) } Architecture::Aarch64(_) => { @@ -2210,6 +2216,7 @@ pub fn gen_std_trampoline( _ => unimplemented!(), } } + /// Generates dynamic import function call trampoline for a function type. pub fn gen_std_dynamic_import_trampoline( vmoffsets: &VMOffsets, @@ -2219,7 +2226,13 @@ pub fn gen_std_dynamic_import_trampoline( ) -> FunctionBody { match target.triple().architecture { Architecture::X86_64 => { - let machine = MachineX86_64::new(); + let machine = if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + }; machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) } Architecture::Aarch64(_) => { @@ -2239,7 +2252,13 @@ pub fn gen_import_call_trampoline( ) -> CustomSection { match target.triple().architecture { Architecture::X86_64 => { - let machine = MachineX86_64::new(); + let machine = if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + }; machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) } Architecture::Aarch64(_) => { diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index c5595b989b8..a463a010f32 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -5,20 +5,58 @@ use crate::location::Reg; use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; -use dynasmrt::{x64::X64Relocation, VecAssembler}; +use dynasmrt::{x64::X64Relocation, DynasmError, VecAssembler}; +use std::ops::{Deref, DerefMut}; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ - CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, InstructionAddressMap, - Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, TrapInformation, + CallingConvention, CpuFeature, CustomSection, CustomSectionProtection, FunctionBody, + InstructionAddressMap, Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, + TrapInformation, }; use wasmer_types::{FunctionIndex, FunctionType, Type}; use wasmer_vm::{TrapCode, VMOffsets}; type Assembler = VecAssembler; + +pub struct AssemblerX64 { + /// the actual inner + pub inner: Assembler, + /// the simd instructions set on the target. + /// Currently only supports SSE 4.2 and AVX + pub simd_arch: Option, +} + +impl AssemblerX64 { + fn new(baseaddr: usize, simd_arch: Option) -> Self { + Self { + inner: Assembler::new(baseaddr), + simd_arch, + } + } + + fn finalize(self) -> Result, DynasmError> { + self.inner.finalize() + } +} + +impl Deref for AssemblerX64 { + type Target = Assembler; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for AssemblerX64 { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + type Location = AbstractLocation; pub struct MachineX86_64 { - assembler: Assembler, + assembler: AssemblerX64, used_gprs: u32, used_simd: u32, trap_table: TrapTable, @@ -31,9 +69,9 @@ pub struct MachineX86_64 { } impl MachineX86_64 { - pub fn new() -> Self { + pub fn new(simd_arch: Option) -> Self { MachineX86_64 { - assembler: Assembler::new(0), + assembler: AssemblerX64::new(0, simd_arch), used_gprs: 0, used_simd: 0, trap_table: TrapTable::default(), @@ -43,7 +81,7 @@ impl MachineX86_64 { } pub fn emit_relaxed_binop( &mut self, - op: fn(&mut Assembler, Size, Location, Location), + op: fn(&mut AssemblerX64, Size, Location, Location), sz: Size, src: Location, dst: Location, @@ -56,11 +94,11 @@ impl MachineX86_64 { } let mode = match (src, dst) { (Location::GPR(_), Location::GPR(_)) - if (op as *const u8 == Assembler::emit_imul as *const u8) => + if (op as *const u8 == AssemblerX64::emit_imul as *const u8) => { RelaxMode::Direct } - _ if (op as *const u8 == Assembler::emit_imul as *const u8) => RelaxMode::BothToGPR, + _ if (op as *const u8 == AssemblerX64::emit_imul as *const u8) => RelaxMode::BothToGPR, (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR, (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => { @@ -69,7 +107,7 @@ impl MachineX86_64 { (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR, (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR, (Location::Imm64(_), Location::GPR(_)) - if (op as *const u8 != Assembler::emit_mov as *const u8) => + if (op as *const u8 != AssemblerX64::emit_mov as *const u8) => { RelaxMode::SrcToGPR } @@ -117,7 +155,7 @@ impl MachineX86_64 { } pub fn emit_relaxed_zx_sx( &mut self, - op: fn(&mut Assembler, Size, Location, Size, Location), + op: fn(&mut AssemblerX64, Size, Location, Size, Location), sz_src: Size, src: Location, sz_dst: Size, @@ -187,7 +225,7 @@ impl MachineX86_64 { /// I32 binary operation with both operands popped from the virtual stack. fn emit_binop_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -205,7 +243,7 @@ impl MachineX86_64 { /// I64 binary operation with both operands popped from the virtual stack. fn emit_binop_i64( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -252,7 +290,7 @@ impl MachineX86_64 { /// I64 shift with both operands popped from the virtual stack. fn emit_shift_i64( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -269,7 +307,7 @@ impl MachineX86_64 { /// Moves `loc` to a valid location for `div`/`idiv`. fn emit_relaxed_xdiv( &mut self, - op: fn(&mut Assembler, Size, Location), + op: fn(&mut AssemblerX64, Size, Location), sz: Size, loc: Location, integer_division_by_zero: Label, @@ -326,7 +364,7 @@ impl MachineX86_64 { /// I32 shift with both operands popped from the virtual stack. fn emit_shift_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -359,7 +397,7 @@ impl MachineX86_64 { let (base_loc, bound_loc) = if imported_memories { // Imported memories require one level of indirection. self.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(self.get_vmctx_reg(), offset), Location::GPR(tmp_addr), @@ -806,7 +844,7 @@ impl MachineX86_64 { /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions. fn emit_relaxed_avx( &mut self, - op: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + op: fn(&mut AssemblerX64, XMM, XMMOrMemory, XMM), src1: Location, src2: Location, dst: Location, @@ -1552,7 +1590,7 @@ impl MachineX86_64 { } fn emit_relaxed_atomic_xchg(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_xchg, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_xchg, sz, src, dst); } fn used_gprs_contains(&self, r: &GPR) -> bool { @@ -2385,10 +2423,10 @@ impl Machine for MachineX86_64 { // relaxed binop based... fn emit_relaxed_mov(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_mov, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_mov, sz, src, dst); } fn emit_relaxed_cmp(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_cmp, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_cmp, sz, src, dst); } fn emit_relaxed_zero_extension( &mut self, @@ -2398,9 +2436,9 @@ impl Machine for MachineX86_64 { dst: Location, ) { if (sz_src == Size::S32 || sz_src == Size::S64) && sz_dst == Size::S64 { - self.emit_relaxed_binop(Assembler::emit_mov, sz_src, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_mov, sz_src, src, dst); } else { - self.emit_relaxed_zx_sx(Assembler::emit_movzx, sz_src, src, sz_dst, dst); + self.emit_relaxed_zx_sx(AssemblerX64::emit_movzx, sz_src, src, sz_dst, dst); } } fn emit_relaxed_sign_extension( @@ -2410,17 +2448,17 @@ impl Machine for MachineX86_64 { sz_dst: Size, dst: Location, ) { - self.emit_relaxed_zx_sx(Assembler::emit_movsx, sz_src, src, sz_dst, dst); + self.emit_relaxed_zx_sx(AssemblerX64::emit_movsx, sz_src, src, sz_dst, dst); } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_add, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_add, loc_a, loc_b, ret); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_sub, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_sub, loc_a, loc_b, ret); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_imul, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_imul, loc_a, loc_b, ret); } fn emit_binop_udiv32( &mut self, @@ -2436,7 +2474,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S32, loc_b, integer_division_by_zero, @@ -2458,7 +2496,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cdq(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S32, loc_b, integer_division_by_zero, @@ -2481,7 +2519,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S32, loc_b, integer_division_by_zero, @@ -2514,7 +2552,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cdq(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S32, loc_b, integer_division_by_zero, @@ -2526,13 +2564,13 @@ impl Machine for MachineX86_64 { offset } fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_and, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_and, loc_a, loc_b, ret); } fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_or, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_or, loc_a, loc_b, ret); } fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_xor, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_xor, loc_a, loc_b, ret); } fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i32_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret); @@ -2698,19 +2736,19 @@ impl Machine for MachineX86_64 { } } fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_shl, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_shl, loc_a, loc_b, ret); } fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_shr, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_shr, loc_a, loc_b, ret); } fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_sar, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_sar, loc_a, loc_b, ret); } fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_rol, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_rol, loc_a, loc_b, ret); } fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_ror, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_ror, loc_a, loc_b, ret); } fn i32_load( &mut self, @@ -2733,7 +2771,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -2762,7 +2800,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S8, Location::Memory(addr, 0), Size::S32, @@ -2792,7 +2830,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S8, Location::Memory(addr, 0), Size::S32, @@ -2822,7 +2860,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S16, Location::Memory(addr, 0), Size::S32, @@ -2852,7 +2890,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S16, Location::Memory(addr, 0), Size::S32, @@ -2964,7 +3002,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -2993,7 +3031,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S8, target_value, Location::Memory(addr, 0), @@ -3022,7 +3060,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S16, target_value, Location::Memory(addr, 0), @@ -3855,13 +3893,13 @@ impl Machine for MachineX86_64 { } fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_add, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_add, loc_a, loc_b, ret); } fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_sub, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_sub, loc_a, loc_b, ret); } fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_imul, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_imul, loc_a, loc_b, ret); } fn emit_binop_udiv64( &mut self, @@ -3877,7 +3915,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S64, loc_b, integer_division_by_zero, @@ -3899,7 +3937,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cqo(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S64, loc_b, integer_division_by_zero, @@ -3922,7 +3960,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S64, loc_b, integer_division_by_zero, @@ -3955,7 +3993,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cqo(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S64, loc_b, integer_division_by_zero, @@ -3967,13 +4005,13 @@ impl Machine for MachineX86_64 { offset } fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_and, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_and, loc_a, loc_b, ret); } fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_or, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_or, loc_a, loc_b, ret); } fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_xor, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_xor, loc_a, loc_b, ret); } fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i64_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret); @@ -4139,19 +4177,19 @@ impl Machine for MachineX86_64 { } } fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_shl, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_shl, loc_a, loc_b, ret); } fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_shr, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_shr, loc_a, loc_b, ret); } fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_sar, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_sar, loc_a, loc_b, ret); } fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_rol, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_rol, loc_a, loc_b, ret); } fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_ror, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_ror, loc_a, loc_b, ret); } fn i64_load( &mut self, @@ -4174,7 +4212,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(addr, 0), ret, @@ -4203,7 +4241,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S8, Location::Memory(addr, 0), Size::S64, @@ -4233,7 +4271,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S8, Location::Memory(addr, 0), Size::S64, @@ -4263,7 +4301,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S16, Location::Memory(addr, 0), Size::S64, @@ -4293,7 +4331,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S16, Location::Memory(addr, 0), Size::S64, @@ -4336,7 +4374,7 @@ impl Machine for MachineX86_64 { } } this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -4365,7 +4403,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S32, Location::Memory(addr, 0), Size::S64, @@ -4519,7 +4557,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, target_value, Location::Memory(addr, 0), @@ -4548,7 +4586,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S8, target_value, Location::Memory(addr, 0), @@ -4577,7 +4615,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S16, target_value, Location::Memory(addr, 0), @@ -4606,7 +4644,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -5696,7 +5734,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -5728,7 +5766,7 @@ impl Machine for MachineX86_64 { |this, addr| { if !canonicalize { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -5760,7 +5798,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(addr, 0), ret, @@ -5792,7 +5830,7 @@ impl Machine for MachineX86_64 { |this, addr| { if !canonicalize { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, target_value, Location::Memory(addr, 0), @@ -6014,10 +6052,10 @@ impl Machine for MachineX86_64 { } } fn convert_f64_f32(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcvtss2sd, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcvtss2sd, loc, loc, ret); } fn convert_f32_f64(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcvtsd2ss, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcvtsd2ss, loc, loc, ret); } fn f64_neg(&mut self, loc: Location, ret: Location) { if self.assembler.arch_has_fneg() { @@ -6076,47 +6114,47 @@ impl Machine for MachineX86_64 { self.release_gpr(c); } fn f64_sqrt(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsqrtsd, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsqrtsd, loc, loc, ret); } fn f64_trunc(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_trunc, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_trunc, loc, loc, ret); } fn f64_ceil(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_ceil, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_ceil, loc, loc, ret); } fn f64_floor(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_floor, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_floor, loc, loc, ret); } fn f64_nearest(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_nearest, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_nearest, loc, loc, ret); } fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgesd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgesd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgtsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmplesd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmplesd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpltsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpltsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpneqsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpeqsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vminsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vminsd, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6225,7 +6263,7 @@ impl Machine for MachineX86_64 { } fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vmaxsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmaxsd, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6328,16 +6366,16 @@ impl Machine for MachineX86_64 { } } fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vaddsd, loc_a, loc_b, ret); } fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsubsd, loc_a, loc_b, ret); } fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmulsd, loc_a, loc_b, ret); } fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vdivsd, loc_a, loc_b, ret); } fn f32_neg(&mut self, loc: Location, ret: Location) { if self.assembler.arch_has_fneg() { @@ -6380,47 +6418,47 @@ impl Machine for MachineX86_64 { .emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1)); } fn f32_sqrt(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsqrtss, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsqrtss, loc, loc, ret); } fn f32_trunc(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_trunc, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_trunc, loc, loc, ret); } fn f32_ceil(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_ceil, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_ceil, loc, loc, ret); } fn f32_floor(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_floor, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_floor, loc, loc, ret); } fn f32_nearest(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_nearest, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_nearest, loc, loc, ret); } fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgess, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgess, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgtss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpless, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpless, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpltss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpltss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpneqss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpeqss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vminss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vminss, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6529,7 +6567,7 @@ impl Machine for MachineX86_64 { } fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vmaxss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmaxss, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6632,16 +6670,16 @@ impl Machine for MachineX86_64 { } } fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vaddss, loc_a, loc_b, ret); } fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsubss, loc_a, loc_b, ret); } fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmulss, loc_a, loc_b, ret); } fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vdivss, loc_a, loc_b, ret); } fn gen_std_trampoline( @@ -6649,7 +6687,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // Calculate stack offset. let mut stack_offset: u32 = 0; @@ -6761,7 +6800,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // Allocate argument array. let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding @@ -6883,7 +6923,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> CustomSection { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // TODO: ARM entry trampoline is not emitted.