From b32b5cfa53d01d24881a258879c0abc9103fcf1f Mon Sep 17 00:00:00 2001 From: R2D2 Date: Sun, 30 Jan 2022 10:55:26 +0100 Subject: [PATCH 1/9] [feat]: Adding the plumbing necessary for the singlepass to choose between different simd instruction sets. Partial work on supporting SSE4.2 --- lib/compiler-singlepass/Cargo.toml | 10 +- lib/compiler-singlepass/src/codegen.rs | 2 +- lib/compiler-singlepass/src/compiler.rs | 12 +- lib/compiler-singlepass/src/emitter_x64.rs | 899 +++++++++++++++++---- lib/compiler-singlepass/src/machine.rs | 45 +- lib/compiler-singlepass/src/machine_x64.rs | 275 ++++--- 6 files changed, 928 insertions(+), 315 deletions(-) diff --git a/lib/compiler-singlepass/Cargo.toml b/lib/compiler-singlepass/Cargo.toml index 18c8a3bdd29..b949aa513b6 100644 --- a/lib/compiler-singlepass/Cargo.toml +++ b/lib/compiler-singlepass/Cargo.toml @@ -12,9 +12,9 @@ readme = "README.md" edition = "2018" [dependencies] -wasmer-compiler = { path = "../compiler", version = "=2.1.1", features = ["translator"], default-features = false } -wasmer-vm = { path = "../vm", version = "=2.1.1" } -wasmer-types = { path = "../types", version = "=2.1.1", default-features = false, features = ["std"] } +wasmer-compiler = { path = "../compiler", version = "2.1.1", features = ["translator"], default-features = false } +wasmer-vm = { path = "../vm", version = "2.1.1" } +wasmer-types = { path = "../types", version = "2.1.1", default-features = false, features = ["std"] } rayon = { version = "1.5", optional = true } hashbrown = { version = "0.11", optional = true } more-asserts = "0.2" @@ -32,6 +32,8 @@ target-lexicon = { version = "0.12.2", default-features = false } maintenance = { status = "actively-developed" } [features] -default = ["std", "rayon"] +default = ["std", "rayon", "avx"] std = ["wasmer-compiler/std", "wasmer-types/std"] core = ["hashbrown", "wasmer-types/core"] +sse = [] +avx = [] diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 6bdbc4375b0..a792afe8a36 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -5784,7 +5784,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { .mark_address_with_trap_code(TrapCode::BadSignature); self.machine.emit_illegal_op(); - // Notify the assembler backend to generate necessary code at end of function. + // Notify the inner backend to generate necessary code at end of function. self.machine.finalize_function(); let body_len = self.machine.assembler_get_offset().0; diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 5599f9f3c65..9b8ad20a33b 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -68,11 +68,15 @@ impl Compiler for SinglepassCompiler { target.triple().architecture.to_string(), )); } - if !target.cpu_features().contains(CpuFeature::AVX) { + let simd_arch = if target.cpu_features().contains(CpuFeature::AVX) { + CpuFeature::AVX + } else if target.cpu_features().contains(CpuFeature::SSE42) { + CpuFeature::SSE42 + } else { return Err(CompileError::UnsupportedTarget( - "x86_64 without AVX".to_string(), + "x86_64 without AVX or SSE 4.2".to_string(), )); - } + }; if compile_info.features.multi_value { return Err(CompileError::UnsupportedFeature("multivalue".to_string())); } @@ -127,7 +131,7 @@ impl Compiler for SinglepassCompiler { } let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + Architecture::X86_64 => MachineX86_64::new(Some(simd_arch)), _ => unimplemented!(), }; let mut generator = FuncGen::new( diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 71f1b9128ba..9b34b6c1f84 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -2,13 +2,13 @@ use crate::common_decl::Size; use crate::location::Location as AbstractLocation; pub use crate::location::Multiplier; pub use crate::machine::{Label, Offset}; +use crate::machine_x64::AssemblerX64; pub use crate::x64_decl::{GPR, XMM}; use dynasm::dynasm; use dynasmrt::{ - x64::X64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, VecAssembler, + AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, }; - -type Assembler = VecAssembler; +use wasmer_compiler::CpuFeature; /// Force `dynasm!` to use the correct arch (x64) when cross-compiling. /// `dynasm!` proc-macro tries to auto-detect it by default by looking at the @@ -17,7 +17,7 @@ type Assembler = VecAssembler; macro_rules! dynasm { ($a:expr ; $($tt:tt)*) => { dynasm::dynasm!( - $a + $a.inner ; .arch x64 ; $($tt)* ) @@ -58,6 +58,7 @@ pub enum GPROrMemory { } pub trait EmitterX64 { + fn get_simd_arch(&self) -> Option<&CpuFeature>; fn get_label(&mut self) -> Label; fn get_offset(&self) -> Offset; fn get_jmp_instr_size(&self) -> u8; @@ -71,7 +72,7 @@ pub trait EmitterX64 { fn emit_nop(&mut self); - /// A high-level assembler method. Emits an instruction sequence of length `n` that is functionally + /// A high-level inner method. Emits an instruction sequence of length `n` that is functionally /// equivalent to a `nop` instruction, without guarantee about the underlying implementation. fn emit_nop_n(&mut self, n: usize); @@ -123,6 +124,7 @@ pub trait EmitterX64 { fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); @@ -472,153 +474,439 @@ macro_rules! jmp_op { } macro_rules! avx_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { // Dynasm bug: AVX instructions are not encoded correctly. match src2 { XMMOrMemory::XMM(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rx((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rx((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rx((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rx((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rx((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rx((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rx((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rx((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rx((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rx((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rx((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rx((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rx((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rx((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rx((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rx((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rx((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rx((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rx((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rx((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rx((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rx((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rx((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rx((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rx((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rx((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rx((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rx((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rx((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rx((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rx((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rx((x as u8))), }, XMMOrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, [Rq((base as u8)) + disp]), }, } } } } +macro_rules! sse_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8))), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8))), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8))) + } + } + XMMOrMemory::Memory(base, disp) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp]) + } + } + } + } + }; + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) + } + } + XMMOrMemory::Memory(base, disp) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) + } + } + } + } + }; +} + macro_rules! avx_i2f_64_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rq((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rq((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rq((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rq((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rq((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rq((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rq((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rq((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rq((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rq((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rq((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rq((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rq((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rq((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rq((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rq((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rq((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rq((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rq((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rq((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rq((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rq((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rq((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rq((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rq((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rq((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rq((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rq((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rq((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rq((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rq((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rq((x as u8))), }, GPROrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, QWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, QWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, QWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, QWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, QWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, QWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, QWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, QWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, QWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, QWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, QWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, QWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, QWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, QWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, QWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, QWORD [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, QWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, QWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, QWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, QWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, QWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, QWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, QWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, QWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, QWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, QWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, QWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, QWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, QWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, QWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, QWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, QWORD [Rq((base as u8)) + disp]), }, } } } } +macro_rules! sse_i2f_64_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { + match src2 { + GPROrMemory::GPR(x) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rq((x as u8))), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rq((x as u8))), + } + } else { + dynasm!(emitter ; $ins Rx((dst as u8)), Rq((x as u8))) + } + }, + GPROrMemory::Memory(base, disp) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), QWORD [Rq((base as u8)) + disp]) + } + } + } + } + } +} + macro_rules! avx_i2f_32_fn { - ($ins:ident, $name:ident) => { - fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rd((x as u8))), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rd((x as u8))), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rd((x as u8))), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rd((x as u8))), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rd((x as u8))), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rd((x as u8))), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rd((x as u8))), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rd((x as u8))), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rd((x as u8))), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rd((x as u8))), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rd((x as u8))), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rd((x as u8))), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rd((x as u8))), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rd((x as u8))), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rd((x as u8))), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rd((x as u8))), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, Rd((x as u8))), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, Rd((x as u8))), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, Rd((x as u8))), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, Rd((x as u8))), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, Rd((x as u8))), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, Rd((x as u8))), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, Rd((x as u8))), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, Rd((x as u8))), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, Rd((x as u8))), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, Rd((x as u8))), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, Rd((x as u8))), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, Rd((x as u8))), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, Rd((x as u8))), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, Rd((x as u8))), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, Rd((x as u8))), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, Rd((x as u8))), }, GPROrMemory::Memory(base, disp) => match src1 { - XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, DWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, DWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, DWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, DWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, DWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, DWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, DWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, DWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, DWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, DWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, DWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, DWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, DWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, DWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, DWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, DWORD [Rq((base as u8)) + disp]), + XMM::XMM0 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm0, DWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm1, DWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm2, DWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm3, DWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm4, DWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm5, DWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm6, DWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm7, DWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm8, DWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm9, DWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm10, DWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm11, DWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm12, DWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm13, DWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm14, DWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; $ins Rx((dst as u8)), xmm15, DWORD [Rq((base as u8)) + disp]), + }, + } + } + } +} + +macro_rules! sse_i2f_32_fn { + ($ins:ident) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { + match src2 { + GPROrMemory::GPR(x) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rd((x as u8))), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rd((x as u8))), + } + } else { + dynasm!(emitter; $ins Rx((src1 as u8)), Rd((x as u8))) + } }, + GPROrMemory::Memory(base, disp) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), DWORD [Rq((base as u8)) + disp]) + } + } } } } } macro_rules! avx_round_fn { - ($ins:ident, $name:ident, $mode:expr) => { - fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + match src2 { + XMMOrMemory::XMM(x) => dynasm!(emitter ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode), + XMMOrMemory::Memory(base, disp) => dynasm!(emitter ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode), + } + } + } +} + +macro_rules! sse_round_fn { + ($ins:ident, $mode:expr) => { + |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { - XMMOrMemory::XMM(x) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode), - XMMOrMemory::Memory(base, disp) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode), + XMMOrMemory::XMM(x) =>{ + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8)), $mode), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) + } + } + XMMOrMemory::Memory(base, disp) => { + if src1 != dst { + match src1 { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), + } + } else { + dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) + } + } } } } } -impl EmitterX64 for Assembler { +impl EmitterX64 for AssemblerX64 { + fn get_simd_arch(&self) -> Option<&CpuFeature> { + self.simd_arch.as_ref() + } + fn get_label(&mut self) -> DynamicLabel { self.new_dynamic_label() } @@ -1254,71 +1542,328 @@ impl EmitterX64 for Assembler { _ => panic!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst), }; } - - avx_fn!(vxorps, emit_vxorps); - avx_fn!(vxorpd, emit_vxorpd); - - avx_fn!(vaddss, emit_vaddss); - avx_fn!(vaddsd, emit_vaddsd); - - avx_fn!(vsubss, emit_vsubss); - avx_fn!(vsubsd, emit_vsubsd); - - avx_fn!(vmulss, emit_vmulss); - avx_fn!(vmulsd, emit_vmulsd); - - avx_fn!(vdivss, emit_vdivss); - avx_fn!(vdivsd, emit_vdivsd); - - avx_fn!(vmaxss, emit_vmaxss); - avx_fn!(vmaxsd, emit_vmaxsd); - - avx_fn!(vminss, emit_vminss); - avx_fn!(vminsd, emit_vminsd); - - avx_fn!(vcmpeqss, emit_vcmpeqss); - avx_fn!(vcmpeqsd, emit_vcmpeqsd); - - avx_fn!(vcmpneqss, emit_vcmpneqss); - avx_fn!(vcmpneqsd, emit_vcmpneqsd); - - avx_fn!(vcmpltss, emit_vcmpltss); - avx_fn!(vcmpltsd, emit_vcmpltsd); - - avx_fn!(vcmpless, emit_vcmpless); - avx_fn!(vcmplesd, emit_vcmplesd); - - avx_fn!(vcmpgtss, emit_vcmpgtss); - avx_fn!(vcmpgtsd, emit_vcmpgtsd); - - avx_fn!(vcmpgess, emit_vcmpgess); - avx_fn!(vcmpgesd, emit_vcmpgesd); - - avx_fn!(vcmpunordss, emit_vcmpunordss); - avx_fn!(vcmpunordsd, emit_vcmpunordsd); - - avx_fn!(vcmpordss, emit_vcmpordss); - avx_fn!(vcmpordsd, emit_vcmpordsd); - - avx_fn!(vsqrtss, emit_vsqrtss); - avx_fn!(vsqrtsd, emit_vsqrtsd); - - avx_fn!(vcvtss2sd, emit_vcvtss2sd); - avx_fn!(vcvtsd2ss, emit_vcvtsd2ss); - - avx_round_fn!(vroundss, emit_vroundss_nearest, 0); - avx_round_fn!(vroundss, emit_vroundss_floor, 1); - avx_round_fn!(vroundss, emit_vroundss_ceil, 2); - avx_round_fn!(vroundss, emit_vroundss_trunc, 3); - avx_round_fn!(vroundsd, emit_vroundsd_nearest, 0); - avx_round_fn!(vroundsd, emit_vroundsd_floor, 1); - avx_round_fn!(vroundsd, emit_vroundsd_ceil, 2); - avx_round_fn!(vroundsd, emit_vroundsd_trunc, 3); - - avx_i2f_32_fn!(vcvtsi2ss, emit_vcvtsi2ss_32); - avx_i2f_32_fn!(vcvtsi2sd, emit_vcvtsi2sd_32); - avx_i2f_64_fn!(vcvtsi2ss, emit_vcvtsi2ss_64); - avx_i2f_64_fn!(vcvtsi2sd, emit_vcvtsi2sd_64); + fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vxorps)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorps)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vxorpd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorpd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vaddss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vaddsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsubss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsubsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmulss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmulsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vdivss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vdivsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmaxss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vmaxsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vminss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vminsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpeqss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpeqsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpneqss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpneqsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpltss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpltsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpless)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmplesd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgtss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgtsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgess)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpgesd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpunordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpunordss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpunordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpunordsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpordss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcmpordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcmpordsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsqrtss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vsqrtsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtsd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcvtss2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_fn!(vcvtsd2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 0)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 0)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 1)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 1)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 2)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 2)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 3)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 3)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2ss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2sd)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2ss)(self, src1, src2, dst), + _ => {} + } + } + fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match self.get_simd_arch() { + Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2sd)(self, src1, src2, dst), + _ => {} + } + } fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { match src2 { diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 67b940749c9..f8004ff6cf9 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -6,10 +6,7 @@ use std::collections::BTreeMap; use std::fmt::Debug; pub use wasmer_compiler::wasmparser::MemoryImmediate; use wasmer_compiler::wasmparser::Type as WpType; -use wasmer_compiler::{ - Architecture, CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, - Relocation, RelocationTarget, Target, TrapInformation, -}; +use wasmer_compiler::{Architecture, CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, Relocation, RelocationTarget, Target, TrapInformation, CpuFeature}; use wasmer_types::{FunctionIndex, FunctionType}; use wasmer_vm::{TrapCode, VMOffsets}; @@ -53,7 +50,7 @@ pub struct MachineStackOffset(pub usize); pub trait Machine { type GPR: Copy + Eq + Debug + Reg; type SIMD: Copy + Eq + Debug + Reg; - /// Get current assembler offset + /// Get current inner offset fn assembler_get_offset(&self) -> Offset; /// Convert from a GPR register to index register fn index_from_gpr(&self, x: Self::GPR) -> RegisterIndex; @@ -124,17 +121,17 @@ pub trait Machine { /// Like Location::Memory(GPR::RBP, -(self.stack_offset.0 as i32)) for x86_64 fn local_on_stack(&mut self, stack_offset: i32) -> Location; /// Adjust stack for locals - /// Like assembler.emit_sub(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like inner.emit_sub(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn adjust_stack(&mut self, delta_stack_offset: u32); /// restore stack - /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like inner.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn restore_stack(&mut self, delta_stack_offset: u32); /// push callee saved register to the stack fn push_callee_saved(&mut self); /// pop callee saved register from the stack fn pop_callee_saved(&mut self); /// Pop stack of locals - /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like inner.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn pop_stack_locals(&mut self, delta_stack_offset: u32); /// Zero a location taht is 32bits fn zero_location(&mut self, size: Size, location: Location); @@ -201,7 +198,7 @@ pub trait Machine { /// Create a new `MachineState` with default values. fn new_machine_state(&self) -> MachineState; - /// Finalize the assembler + /// Finalize the inner fn assembler_finalize(self) -> Vec; /// get_offset of Assembler @@ -2167,7 +2164,15 @@ pub fn gen_std_trampoline( calling_convention: CallingConvention, ) -> FunctionBody { let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + Architecture::X86_64 => { + if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + } + }, _ => unimplemented!(), }; machine.gen_std_trampoline(sig, calling_convention) @@ -2180,7 +2185,15 @@ pub fn gen_std_dynamic_import_trampoline( calling_convention: CallingConvention, ) -> FunctionBody { let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + Architecture::X86_64 => { + if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + } + }, _ => unimplemented!(), }; machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) @@ -2194,7 +2207,15 @@ pub fn gen_import_call_trampoline( calling_convention: CallingConvention, ) -> CustomSection { let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + Architecture::X86_64 => { + if target.cpu_features().contains(CpuFeature::AVX) { + MachineX86_64::new(Some(CpuFeature::AVX)) + } else if target.cpu_features().contains(CpuFeature::SSE42) { + MachineX86_64::new(Some(CpuFeature::SSE42)) + } else { + unimplemented!() + } + } _ => unimplemented!(), }; machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index e2e13a4c7d3..877662c7296 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -5,21 +5,59 @@ use crate::machine::Machine; use crate::machine::{MemoryImmediate, TrapTable}; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; -use dynasmrt::{x64::X64Relocation, VecAssembler}; +use dynasmrt::{x64::X64Relocation, VecAssembler, DynasmError}; use std::collections::HashSet; +use std::ops::{Deref, DerefMut}; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ - CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, InstructionAddressMap, - Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, TrapInformation, + CallingConvention, CustomSection, CustomSectionProtection, + FunctionBody, InstructionAddressMap, Relocation, RelocationKind, + RelocationTarget, SectionBody, SourceLoc, TrapInformation, CpuFeature }; use wasmer_types::{FunctionIndex, FunctionType, Type}; use wasmer_vm::{TrapCode, VMOffsets}; type Assembler = VecAssembler; + +pub struct AssemblerX64 { + /// the actual inner + pub inner: Assembler, + /// the simd instructions set on the target. + /// Currently only supports SSE 4.2 and AVX + pub simd_arch: Option, +} + +impl AssemblerX64 { + fn new(baseaddr: usize, simd_arch: Option) -> Self { + Self { + inner: Assembler::new(baseaddr), + simd_arch, + } + } + + fn finalize(self) -> Result, DynasmError> { + self.inner.finalize() + } +} + +impl Deref for AssemblerX64 { + type Target = Assembler; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for AssemblerX64 { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + type Location = AbstractLocation; pub struct MachineX86_64 { - assembler: Assembler, + assembler: AssemblerX64, used_gprs: HashSet, used_simd: HashSet, trap_table: TrapTable, @@ -32,9 +70,9 @@ pub struct MachineX86_64 { } impl MachineX86_64 { - pub fn new() -> Self { + pub fn new(simd_arch: Option) -> Self { MachineX86_64 { - assembler: Assembler::new(0), + assembler: AssemblerX64::new(0, simd_arch), used_gprs: HashSet::new(), used_simd: HashSet::new(), trap_table: TrapTable::default(), @@ -44,7 +82,7 @@ impl MachineX86_64 { } pub fn emit_relaxed_binop( &mut self, - op: fn(&mut Assembler, Size, Location, Location), + op: fn(&mut AssemblerX64, Size, Location, Location), sz: Size, src: Location, dst: Location, @@ -57,11 +95,11 @@ impl MachineX86_64 { } let mode = match (src, dst) { (Location::GPR(_), Location::GPR(_)) - if (op as *const u8 == Assembler::emit_imul as *const u8) => + if (op as *const u8 == AssemblerX64::emit_imul as *const u8) => { RelaxMode::Direct } - _ if (op as *const u8 == Assembler::emit_imul as *const u8) => RelaxMode::BothToGPR, + _ if (op as *const u8 == AssemblerX64::emit_imul as *const u8) => RelaxMode::BothToGPR, (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR, (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => { @@ -70,7 +108,7 @@ impl MachineX86_64 { (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR, (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR, (Location::Imm64(_), Location::GPR(_)) - if (op as *const u8 != Assembler::emit_mov as *const u8) => + if (op as *const u8 != AssemblerX64::emit_mov as *const u8) => { RelaxMode::SrcToGPR } @@ -118,7 +156,7 @@ impl MachineX86_64 { } pub fn emit_relaxed_zx_sx( &mut self, - op: fn(&mut Assembler, Size, Location, Size, Location), + op: fn(&mut AssemblerX64, Size, Location, Size, Location), sz_src: Size, src: Location, sz_dst: Size, @@ -188,7 +226,7 @@ impl MachineX86_64 { /// I32 binary operation with both operands popped from the virtual stack. fn emit_binop_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -206,7 +244,7 @@ impl MachineX86_64 { /// I64 binary operation with both operands popped from the virtual stack. fn emit_binop_i64( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -253,7 +291,7 @@ impl MachineX86_64 { /// I64 shift with both operands popped from the virtual stack. fn emit_shift_i64( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -270,7 +308,7 @@ impl MachineX86_64 { /// Moves `loc` to a valid location for `div`/`idiv`. fn emit_relaxed_xdiv( &mut self, - op: fn(&mut Assembler, Size, Location), + op: fn(&mut AssemblerX64, Size, Location), sz: Size, loc: Location, integer_division_by_zero: Label, @@ -327,7 +365,7 @@ impl MachineX86_64 { /// I32 shift with both operands popped from the virtual stack. fn emit_shift_i32( &mut self, - f: fn(&mut Assembler, Size, Location, Location), + f: fn(&mut AssemblerX64, Size, Location, Location), loc_a: Location, loc_b: Location, ret: Location, @@ -360,7 +398,7 @@ impl MachineX86_64 { let (base_loc, bound_loc) = if imported_memories { // Imported memories require one level of indirection. self.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(self.get_vmctx_reg(), offset), Location::GPR(tmp_addr), @@ -807,7 +845,7 @@ impl MachineX86_64 { /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions. fn emit_relaxed_avx( &mut self, - op: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + op: fn(&mut AssemblerX64, XMM, XMMOrMemory, XMM), src1: Location, src2: Location, dst: Location, @@ -1553,7 +1591,7 @@ impl MachineX86_64 { } fn emit_relaxed_atomic_xchg(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_xchg, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_xchg, sz, src, dst); } } @@ -2019,7 +2057,7 @@ impl Machine for MachineX86_64 { new_machine_state() } - // assembler finalize + // inner finalize fn assembler_finalize(self) -> Vec { self.assembler.finalize().unwrap() } @@ -2273,10 +2311,10 @@ impl Machine for MachineX86_64 { // relaxed binop based... fn emit_relaxed_mov(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_mov, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_mov, sz, src, dst); } fn emit_relaxed_cmp(&mut self, sz: Size, src: Location, dst: Location) { - self.emit_relaxed_binop(Assembler::emit_cmp, sz, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_cmp, sz, src, dst); } fn emit_relaxed_zero_extension( &mut self, @@ -2286,9 +2324,9 @@ impl Machine for MachineX86_64 { dst: Location, ) { if (sz_src == Size::S32 || sz_src == Size::S64) && sz_dst == Size::S64 { - self.emit_relaxed_binop(Assembler::emit_mov, sz_src, src, dst); + self.emit_relaxed_binop(AssemblerX64::emit_mov, sz_src, src, dst); } else { - self.emit_relaxed_zx_sx(Assembler::emit_movzx, sz_src, src, sz_dst, dst); + self.emit_relaxed_zx_sx(AssemblerX64::emit_movzx, sz_src, src, sz_dst, dst); } } fn emit_relaxed_sign_extension( @@ -2298,17 +2336,17 @@ impl Machine for MachineX86_64 { sz_dst: Size, dst: Location, ) { - self.emit_relaxed_zx_sx(Assembler::emit_movsx, sz_src, src, sz_dst, dst); + self.emit_relaxed_zx_sx(AssemblerX64::emit_movsx, sz_src, src, sz_dst, dst); } fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_add, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_add, loc_a, loc_b, ret); } fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_sub, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_sub, loc_a, loc_b, ret); } fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_imul, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_imul, loc_a, loc_b, ret); } fn emit_binop_udiv32( &mut self, @@ -2323,7 +2361,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S32, loc_b, integer_division_by_zero, @@ -2344,7 +2382,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cdq(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S32, loc_b, integer_division_by_zero, @@ -2366,7 +2404,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S32, loc_b, integer_division_by_zero, @@ -2398,7 +2436,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cdq(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S32, loc_b, integer_division_by_zero, @@ -2410,13 +2448,13 @@ impl Machine for MachineX86_64 { offset } fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_and, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_and, loc_a, loc_b, ret); } fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_or, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_or, loc_a, loc_b, ret); } fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i32(Assembler::emit_xor, loc_a, loc_b, ret); + self.emit_binop_i32(AssemblerX64::emit_xor, loc_a, loc_b, ret); } fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i32_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret); @@ -2582,19 +2620,19 @@ impl Machine for MachineX86_64 { } } fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_shl, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_shl, loc_a, loc_b, ret); } fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_shr, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_shr, loc_a, loc_b, ret); } fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_sar, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_sar, loc_a, loc_b, ret); } fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_rol, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_rol, loc_a, loc_b, ret); } fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i32(Assembler::emit_ror, loc_a, loc_b, ret); + self.emit_shift_i32(AssemblerX64::emit_ror, loc_a, loc_b, ret); } fn i32_load( &mut self, @@ -2617,7 +2655,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -2646,7 +2684,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S8, Location::Memory(addr, 0), Size::S32, @@ -2676,7 +2714,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S8, Location::Memory(addr, 0), Size::S32, @@ -2706,7 +2744,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S16, Location::Memory(addr, 0), Size::S32, @@ -2736,7 +2774,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S16, Location::Memory(addr, 0), Size::S32, @@ -2848,7 +2886,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -2877,7 +2915,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S8, target_value, Location::Memory(addr, 0), @@ -2906,7 +2944,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S16, target_value, Location::Memory(addr, 0), @@ -3739,13 +3777,13 @@ impl Machine for MachineX86_64 { } fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_add, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_add, loc_a, loc_b, ret); } fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_sub, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_sub, loc_a, loc_b, ret); } fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_imul, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_imul, loc_a, loc_b, ret); } fn emit_binop_udiv64( &mut self, @@ -3760,7 +3798,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S64, loc_b, integer_division_by_zero, @@ -3781,7 +3819,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cqo(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S64, loc_b, integer_division_by_zero, @@ -3803,7 +3841,7 @@ impl Machine for MachineX86_64 { self.assembler .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); let offset = self.emit_relaxed_xdiv( - Assembler::emit_div, + AssemblerX64::emit_div, Size::S64, loc_b, integer_division_by_zero, @@ -3835,7 +3873,7 @@ impl Machine for MachineX86_64 { .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); self.assembler.emit_cqo(); let offset = self.emit_relaxed_xdiv( - Assembler::emit_idiv, + AssemblerX64::emit_idiv, Size::S64, loc_b, integer_division_by_zero, @@ -3847,13 +3885,13 @@ impl Machine for MachineX86_64 { offset } fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_and, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_and, loc_a, loc_b, ret); } fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_or, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_or, loc_a, loc_b, ret); } fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_binop_i64(Assembler::emit_xor, loc_a, loc_b, ret); + self.emit_binop_i64(AssemblerX64::emit_xor, loc_a, loc_b, ret); } fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { self.emit_cmpop_i64_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret); @@ -4019,19 +4057,19 @@ impl Machine for MachineX86_64 { } } fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_shl, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_shl, loc_a, loc_b, ret); } fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_shr, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_shr, loc_a, loc_b, ret); } fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_sar, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_sar, loc_a, loc_b, ret); } fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_rol, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_rol, loc_a, loc_b, ret); } fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_shift_i64(Assembler::emit_ror, loc_a, loc_b, ret); + self.emit_shift_i64(AssemblerX64::emit_ror, loc_a, loc_b, ret); } fn i64_load( &mut self, @@ -4054,7 +4092,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(addr, 0), ret, @@ -4083,7 +4121,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S8, Location::Memory(addr, 0), Size::S64, @@ -4113,7 +4151,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S8, Location::Memory(addr, 0), Size::S64, @@ -4143,7 +4181,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movzx, + AssemblerX64::emit_movzx, Size::S16, Location::Memory(addr, 0), Size::S64, @@ -4173,7 +4211,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S16, Location::Memory(addr, 0), Size::S64, @@ -4216,7 +4254,7 @@ impl Machine for MachineX86_64 { } } this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -4245,7 +4283,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_zx_sx( - Assembler::emit_movsx, + AssemblerX64::emit_movsx, Size::S32, Location::Memory(addr, 0), Size::S64, @@ -4399,7 +4437,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, target_value, Location::Memory(addr, 0), @@ -4428,7 +4466,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S8, target_value, Location::Memory(addr, 0), @@ -4457,7 +4495,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S16, target_value, Location::Memory(addr, 0), @@ -4486,7 +4524,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -5576,7 +5614,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, Location::Memory(addr, 0), ret, @@ -5608,7 +5646,7 @@ impl Machine for MachineX86_64 { |this, addr| { if !canonicalize { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S32, target_value, Location::Memory(addr, 0), @@ -5640,7 +5678,7 @@ impl Machine for MachineX86_64 { heap_access_oob, |this, addr| { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, Location::Memory(addr, 0), ret, @@ -5672,7 +5710,7 @@ impl Machine for MachineX86_64 { |this, addr| { if !canonicalize { this.emit_relaxed_binop( - Assembler::emit_mov, + AssemblerX64::emit_mov, Size::S64, target_value, Location::Memory(addr, 0), @@ -5894,10 +5932,10 @@ impl Machine for MachineX86_64 { } } fn convert_f64_f32(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcvtss2sd, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcvtss2sd, loc, loc, ret); } fn convert_f32_f64(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcvtsd2ss, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcvtsd2ss, loc, loc, ret); } fn f64_neg(&mut self, loc: Location, ret: Location) { if self.assembler.arch_has_fneg() { @@ -5956,47 +5994,47 @@ impl Machine for MachineX86_64 { self.release_gpr(c); } fn f64_sqrt(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsqrtsd, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsqrtsd, loc, loc, ret); } fn f64_trunc(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_trunc, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_trunc, loc, loc, ret); } fn f64_ceil(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_ceil, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_ceil, loc, loc, ret); } fn f64_floor(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_floor, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_floor, loc, loc, ret); } fn f64_nearest(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundsd_nearest, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_nearest, loc, loc, ret); } fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgesd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgesd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgtsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmplesd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmplesd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpltsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpltsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpneqsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpeqsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqsd, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vminsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vminsd, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6105,7 +6143,7 @@ impl Machine for MachineX86_64 { } fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vmaxsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmaxsd, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6208,16 +6246,16 @@ impl Machine for MachineX86_64 { } } fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vaddsd, loc_a, loc_b, ret); } fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsubsd, loc_a, loc_b, ret); } fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmulsd, loc_a, loc_b, ret); } fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vdivsd, loc_a, loc_b, ret); } fn f32_neg(&mut self, loc: Location, ret: Location) { if self.assembler.arch_has_fneg() { @@ -6260,47 +6298,47 @@ impl Machine for MachineX86_64 { .emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1)); } fn f32_sqrt(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsqrtss, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsqrtss, loc, loc, ret); } fn f32_trunc(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_trunc, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_trunc, loc, loc, ret); } fn f32_ceil(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_ceil, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_ceil, loc, loc, ret); } fn f32_floor(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_floor, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_floor, loc, loc, ret); } fn f32_nearest(&mut self, loc: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vroundss_nearest, loc, loc, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vroundss_nearest, loc, loc, ret); } fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgess, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgess, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpgtss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpless, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpless, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpltss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpltss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpneqss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vcmpeqss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqss, loc_a, loc_b, ret); self.assembler.emit_and(Size::S32, Location::Imm32(1), ret); } fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vminss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vminss, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6409,7 +6447,7 @@ impl Machine for MachineX86_64 { } fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { if !self.arch_supports_canonicalize_nan() { - self.emit_relaxed_avx(Assembler::emit_vmaxss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmaxss, loc_a, loc_b, ret); } else { let tmp1 = self.acquire_temp_simd().unwrap(); let tmp2 = self.acquire_temp_simd().unwrap(); @@ -6512,16 +6550,16 @@ impl Machine for MachineX86_64 { } } fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vaddss, loc_a, loc_b, ret); } fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vsubss, loc_a, loc_b, ret); } fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vmulss, loc_a, loc_b, ret); } fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { - self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret); + self.emit_relaxed_avx(AssemblerX64::emit_vdivss, loc_a, loc_b, ret); } fn gen_std_trampoline( @@ -6529,7 +6567,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // Calculate stack offset. let mut stack_offset: u32 = 0; @@ -6639,7 +6678,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> FunctionBody { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // Allocate argument array. let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding @@ -6761,7 +6801,8 @@ impl Machine for MachineX86_64 { sig: &FunctionType, calling_convention: CallingConvention, ) -> CustomSection { - let mut a = Assembler::new(0); + // the cpu feature here is irrelevant + let mut a = AssemblerX64::new(0, None); // TODO: ARM entry trampoline is not emitted. From abcbc13af9d16e09941dde9b8670944c64065705 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Mon, 31 Jan 2022 11:25:18 +0100 Subject: [PATCH 2/9] [feat]: Refactored how macros emit additional movss/movsd instructions. Added >= SSE 4.1 implementation for emit_vblendvpd. Added some TODOs --- lib/compiler-singlepass/src/emitter_x64.rs | 451 ++++++++------------- 1 file changed, 168 insertions(+), 283 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 9b34b6c1f84..78b84cae9ba 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -57,6 +57,11 @@ pub enum GPROrMemory { Memory(GPR, i32), } +pub enum Precision { + Single, + Double, +} + pub trait EmitterX64 { fn get_simd_arch(&self) -> Option<&CpuFeature>; fn get_label(&mut self) -> Label; @@ -473,6 +478,56 @@ macro_rules! jmp_op { } } +/// Move a single or double precision XMM value to another if src and destination +/// are not the same. +/// +/// TODO: Can we assume data is aligned and packed? If so, this function isn't necessary +/// TODO: as we can use [`EmitterX64::emit_vmovaps`] and [`EmitterX64::emit_vmovadp`] +/// TODO: instead +fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, dst: XMM) { + if src == dst { + return; + } + match precision { + Precision::Single => match src { + XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0), + XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1), + XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2), + XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3), + XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4), + XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5), + XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6), + XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7), + XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8), + XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9), + XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10), + XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11), + XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12), + XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13), + XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14), + XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15), + } + Precision::Double => match src { + XMM::XMM0 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm0), + XMM::XMM1 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm1), + XMM::XMM2 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm2), + XMM::XMM3 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm3), + XMM::XMM4 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm4), + XMM::XMM5 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm5), + XMM::XMM6 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm6), + XMM::XMM7 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm7), + XMM::XMM8 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm8), + XMM::XMM9 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm9), + XMM::XMM10 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm10), + XMM::XMM11 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm11), + XMM::XMM12 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm12), + XMM::XMM13 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm13), + XMM::XMM14 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm14), + XMM::XMM15 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm15), + } + } +} + macro_rules! avx_fn { ($ins:ident) => { |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { @@ -521,109 +576,29 @@ macro_rules! avx_fn { macro_rules! sse_fn { ($ins:ident) => { - |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8))), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8))), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8))) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8))) } XMMOrMemory::Memory(base, disp) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp]) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), [Rq((base as u8)) + disp]) } } } }; ($ins:ident, $mode:expr) => { - |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8)), $mode) } XMMOrMemory::Memory(base, disp) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) } } } @@ -677,55 +652,15 @@ macro_rules! avx_i2f_64_fn { macro_rules! sse_i2f_64_fn { ($ins:ident) => { - |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rq((x as u8))), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rq((x as u8))), - } - } else { - dynasm!(emitter ; $ins Rx((dst as u8)), Rq((x as u8))) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((dst as u8)), Rq((x as u8))) }, GPROrMemory::Memory(base, disp) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), QWORD [Rq((base as u8)) + disp]) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((src1 as u8)), QWORD [Rq((base as u8)) + disp]) } } } @@ -779,55 +714,15 @@ macro_rules! avx_i2f_32_fn { macro_rules! sse_i2f_32_fn { ($ins:ident) => { - |emitter: &mut AssemblerX64, src1: XMM, src2: GPROrMemory, dst: XMM| { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: GPROrMemory, dst: XMM| { match src2 { GPROrMemory::GPR(x) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rd((x as u8))), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rd((x as u8))), - } - } else { - dynasm!(emitter; $ins Rx((src1 as u8)), Rd((x as u8))) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter; $ins Rx((src1 as u8)), Rd((x as u8))) }, GPROrMemory::Memory(base, disp) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), DWORD [Rq((base as u8)) + disp]) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((src1 as u8)), DWORD [Rq((base as u8)) + disp]) } } } @@ -847,55 +742,15 @@ macro_rules! avx_round_fn { macro_rules! sse_round_fn { ($ins:ident, $mode:expr) => { - |emitter: &mut AssemblerX64, src1: XMM, src2: XMMOrMemory, dst: XMM| { + |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { - XMMOrMemory::XMM(x) =>{ - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), Rx((x as u8)), $mode), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) - } + XMMOrMemory::XMM(x) => { + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) } XMMOrMemory::Memory(base, disp) => { - if src1 != dst { - match src1 { - XMM::XMM0 => dynasm!(emitter ; movss Rx((dst as u8)), xmm0; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM1 => dynasm!(emitter ; movss Rx((dst as u8)), xmm1; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM2 => dynasm!(emitter ; movss Rx((dst as u8)), xmm2; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM3 => dynasm!(emitter ; movss Rx((dst as u8)), xmm3; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM4 => dynasm!(emitter ; movss Rx((dst as u8)), xmm4; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM5 => dynasm!(emitter ; movss Rx((dst as u8)), xmm5; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM6 => dynasm!(emitter ; movss Rx((dst as u8)), xmm6; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM7 => dynasm!(emitter ; movss Rx((dst as u8)), xmm7; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM8 => dynasm!(emitter ; movss Rx((dst as u8)), xmm8; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM9 => dynasm!(emitter ; movss Rx((dst as u8)), xmm9; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM10 => dynasm!(emitter ; movss Rx((dst as u8)), xmm10; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM11 => dynasm!(emitter ; movss Rx((dst as u8)), xmm11; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM12 => dynasm!(emitter ; movss Rx((dst as u8)), xmm12; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode), - } - } else { - dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) - } + move_src_to_dst(emitter, precision, src1, dst); + dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) } } } @@ -1192,7 +1047,7 @@ impl EmitterX64 for AssemblerX64 { } } fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) { - // Constant elimination for comparision between consts. + // Constant elimination for comparison between consts. // // Only needed for `emit_cmp`, since other binary operators actually write to `right` and `right` must // be a writable location for them. @@ -1545,345 +1400,375 @@ impl EmitterX64 for AssemblerX64 { fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vxorps)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(xorps)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorps)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vxorpd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(xorpd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(xorpd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vaddss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(addss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vaddsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(addsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(addsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vsubss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(subss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vsubsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(subsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(subsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vmulss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(mulss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vmulsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(mulsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(mulsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vdivss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(divss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vdivsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(divsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(divsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vmaxss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(maxss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vmaxsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(maxsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(maxsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vminss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(minss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vminsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(minsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(minsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpeqss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpeqsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpneqss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpneqsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpltss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpltsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpless)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmplesd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpgtss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpgtsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpgess)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpgesd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpunordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpunordss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpunordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpunordsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcmpordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpordss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcmpordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcmpordsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vsqrtss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(sqrtss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vsqrtsd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(sqrtsd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(sqrtsd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcvtss2sd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vcvtsd2ss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 0)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 0)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 0)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 1)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 1)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 1)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 2)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 2)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 2)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 3)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 3)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 3)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2ss)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst), _ => {} } } fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2sd)(self, src1, src2, dst), + Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst), _ => {} } } fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { - match src2 { - XMMOrMemory::XMM(src2) => { - dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + // this implementation works only for sse 4.1 and greater + match self.get_simd_arch() { + Some(CpuFeature::AVX) => match src2 { + XMMOrMemory::XMM(src2) => { + // TODO: this argument order does not match the documentation?? + dynasm!( self; vblendvps Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!( self; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } } - XMMOrMemory::Memory(base, disp) => { - dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + Some(CpuFeature::SSE42) => match src2 { + XMMOrMemory::XMM(src2) => { + move_src_to_dst(self, Precision::Single, src1, dst); + dynasm!( self; blendvps Rx(dst as u8), Rx(src2 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(self, Precision::Single, src1, dst); + dynasm!( self; blendvps Rx(dst as u8), [Rq(base as u8) + disp]) + } } + _ => {} } } fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { - match src2 { - XMMOrMemory::XMM(src2) => { - dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + // this implementation works only for sse 4.1 and greater + match self.get_simd_arch() { + Some(CpuFeature::AVX) => match src2 { + XMMOrMemory::XMM(src2) => { + // TODO: this argument order does not match the documentation?? + dynasm!( self; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!( self; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } } - XMMOrMemory::Memory(base, disp) => { - dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + Some(CpuFeature::SSE42) => match src2 { + XMMOrMemory::XMM(src2) => { + move_src_to_dst(self, Precision::Double, src1, dst); + dynasm!( self; blendvpd Rx(dst as u8), Rx(src2 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + move_src_to_dst(self, Precision::Double, src1, dst); + dynasm!( self; blendvpd Rx(dst as u8), [Rq(base as u8) + disp]) + } } + _ => {} } } From d42ec572fd90c155fa5f41278ff425406f8437a2 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Mon, 31 Jan 2022 11:52:06 +0100 Subject: [PATCH 3/9] [fix]: Cleanups after resolving conflicts with master --- lib/compiler-singlepass/src/machine.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index cb693585e35..ff24fbf3777 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -51,7 +51,7 @@ pub struct MachineStackOffset(pub usize); pub trait Machine { type GPR: Copy + Eq + Debug + Reg; type SIMD: Copy + Eq + Debug + Reg; - /// Get current inner offset + /// Get current assembler offset fn assembler_get_offset(&self) -> Offset; /// Convert from a GPR register to index register fn index_from_gpr(&self, x: Self::GPR) -> RegisterIndex; @@ -124,13 +124,13 @@ pub trait Machine { /// Like Location::Memory(GPR::RBP, -(self.stack_offset.0 as i32)) for x86_64 fn local_on_stack(&mut self, stack_offset: i32) -> Location; /// Adjust stack for locals - /// Like inner.emit_sub(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like assembler.emit_sub(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn adjust_stack(&mut self, delta_stack_offset: u32); /// restore stack - /// Like inner.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn restore_stack(&mut self, delta_stack_offset: u32); /// Pop stack of locals - /// Like inner.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) + /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn pop_stack_locals(&mut self, delta_stack_offset: u32); /// Zero a location taht is 32bits fn zero_location(&mut self, size: Size, location: Location); @@ -213,7 +213,7 @@ pub trait Machine { /// Create a new `MachineState` with default values. fn new_machine_state(&self) -> MachineState; - /// Finalize the inner + /// Finalize the assembler fn assembler_finalize(self) -> Vec; /// get_offset of Assembler @@ -2190,7 +2190,7 @@ pub fn gen_std_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { + match target.triple().architecture { Architecture::X86_64 => { let machine = if target.cpu_features().contains(CpuFeature::AVX) { MachineX86_64::new(Some(CpuFeature::AVX)) @@ -2216,10 +2216,10 @@ pub fn gen_std_dynamic_import_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { + match target.triple().architecture { Architecture::X86_64 => { let machine = if target.cpu_features().contains(CpuFeature::AVX) { - MachineX86_64::new(Some(CpuFeature::AVX)); + MachineX86_64::new(Some(CpuFeature::AVX)) } else if target.cpu_features().contains(CpuFeature::SSE42) { MachineX86_64::new(Some(CpuFeature::SSE42)) } else { @@ -2242,7 +2242,7 @@ pub fn gen_import_call_trampoline( target: &Target, calling_convention: CallingConvention, ) -> CustomSection { - let machine = match target.triple().architecture { + match target.triple().architecture { Architecture::X86_64 => { let machine = if target.cpu_features().contains(CpuFeature::AVX) { MachineX86_64::new(Some(CpuFeature::AVX)) From 17d410531cf5211e60487fcd75a8b1bd62bac435 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Mon, 31 Jan 2022 11:55:05 +0100 Subject: [PATCH 4/9] [chore]: Formatting --- lib/compiler-singlepass/src/emitter_x64.rs | 67 ++++++++++++++-------- lib/compiler-singlepass/src/machine.rs | 6 +- lib/compiler-singlepass/src/machine_x64.rs | 8 +-- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 78b84cae9ba..efcdd409202 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -5,9 +5,7 @@ pub use crate::machine::{Label, Offset}; use crate::machine_x64::AssemblerX64; pub use crate::x64_decl::{GPR, XMM}; use dynasm::dynasm; -use dynasmrt::{ - AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, -}; +use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; use wasmer_compiler::CpuFeature; /// Force `dynasm!` to use the correct arch (x64) when cross-compiling. @@ -129,7 +127,6 @@ pub trait EmitterX64 { fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); - fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); @@ -506,7 +503,7 @@ fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, d XMM::XMM13 => dynasm!(emitter ; movss Rx((dst as u8)), xmm13), XMM::XMM14 => dynasm!(emitter ; movss Rx((dst as u8)), xmm14), XMM::XMM15 => dynasm!(emitter ; movss Rx((dst as u8)), xmm15), - } + }, Precision::Double => match src { XMM::XMM0 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm0), XMM::XMM1 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm1), @@ -524,7 +521,7 @@ fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, d XMM::XMM13 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm13), XMM::XMM14 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm14), XMM::XMM15 => dynasm!(emitter ; movsd Rx((dst as u8)), xmm15), - } + }, } } @@ -1397,7 +1394,7 @@ impl EmitterX64 for AssemblerX64 { _ => panic!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst), }; } - fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_fn!(vxorps)(self, src1, src2, dst), Some(CpuFeature::SSE42) => sse_fn!(xorps)(self, Precision::Single, src1, src2, dst), @@ -1638,84 +1635,108 @@ impl EmitterX64 for AssemblerX64 { fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 0)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 0)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 0)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 0)(self, Precision::Double, src1, src2, dst) + } _ => {} } } fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 1)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 1)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 1)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 1)(self, Precision::Double, src1, src2, dst) + } _ => {} } } fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 2)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 2)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 2)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 2)(self, Precision::Double, src1, src2, dst) + } _ => {} } } fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundss, 3)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundss, 3)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_round_fn!(roundsd, 3)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_round_fn!(roundsd, 3)(self, Precision::Double, src1, src2, dst) + } _ => {} } } fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_32_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_32_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_32_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst) + } _ => {} } } fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_64_fn!(cvtsi2ss)(self, Precision::Single, src1, src2, dst) + } _ => {} } } fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { match self.get_simd_arch() { Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd)(self, src1, src2, dst), - Some(CpuFeature::SSE42) => sse_i2f_64_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst), + Some(CpuFeature::SSE42) => { + sse_i2f_64_fn!(cvtsi2sd)(self, Precision::Double, src1, src2, dst) + } _ => {} } } @@ -1731,7 +1752,7 @@ impl EmitterX64 for AssemblerX64 { XMMOrMemory::Memory(base, disp) => { dynasm!( self; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) } - } + }, Some(CpuFeature::SSE42) => match src2 { XMMOrMemory::XMM(src2) => { move_src_to_dst(self, Precision::Single, src1, dst); @@ -1741,7 +1762,7 @@ impl EmitterX64 for AssemblerX64 { move_src_to_dst(self, Precision::Single, src1, dst); dynasm!( self; blendvps Rx(dst as u8), [Rq(base as u8) + disp]) } - } + }, _ => {} } } @@ -1757,7 +1778,7 @@ impl EmitterX64 for AssemblerX64 { XMMOrMemory::Memory(base, disp) => { dynasm!( self; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) } - } + }, Some(CpuFeature::SSE42) => match src2 { XMMOrMemory::XMM(src2) => { move_src_to_dst(self, Precision::Double, src1, dst); @@ -1767,7 +1788,7 @@ impl EmitterX64 for AssemblerX64 { move_src_to_dst(self, Precision::Double, src1, dst); dynasm!( self; blendvpd Rx(dst as u8), [Rq(base as u8) + disp]) } - } + }, _ => {} } } diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index ff24fbf3777..a4f326a59a5 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -7,7 +7,10 @@ use std::collections::BTreeMap; use std::fmt::Debug; pub use wasmer_compiler::wasmparser::MemoryImmediate; use wasmer_compiler::wasmparser::Type as WpType; -use wasmer_compiler::{Architecture, CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, Relocation, RelocationTarget, Target, TrapInformation, CpuFeature}; +use wasmer_compiler::{ + Architecture, CallingConvention, CpuFeature, CustomSection, FunctionBody, + InstructionAddressMap, Relocation, RelocationTarget, Target, TrapInformation, +}; use wasmer_types::{FunctionIndex, FunctionType}; use wasmer_vm::{TrapCode, VMOffsets}; @@ -2256,7 +2259,6 @@ pub fn gen_import_call_trampoline( Architecture::Aarch64(_) => { let machine = MachineARM64::new(); machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) - } _ => unimplemented!(), } diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index e4adca70ef5..6b25ed5defe 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -4,14 +4,14 @@ use crate::location::Location as AbstractLocation; use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; -use dynasmrt::{x64::X64Relocation, VecAssembler, DynasmError}; +use dynasmrt::{x64::X64Relocation, DynasmError, VecAssembler}; use std::collections::HashSet; use std::ops::{Deref, DerefMut}; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ - CallingConvention, CustomSection, CustomSectionProtection, - FunctionBody, InstructionAddressMap, Relocation, RelocationKind, - RelocationTarget, SectionBody, SourceLoc, TrapInformation, CpuFeature + CallingConvention, CpuFeature, CustomSection, CustomSectionProtection, FunctionBody, + InstructionAddressMap, Relocation, RelocationKind, RelocationTarget, SectionBody, SourceLoc, + TrapInformation, }; use wasmer_types::{FunctionIndex, FunctionType, Type}; use wasmer_vm::{TrapCode, VMOffsets}; From 4ee83b34980484497dbe0a7f38b20ad6823efa70 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Mon, 31 Jan 2022 11:59:51 +0100 Subject: [PATCH 5/9] [chore]: Cleaning up doc strings from an over-zealous clion refactoring --- lib/compiler-singlepass/src/codegen.rs | 2 +- lib/compiler-singlepass/src/emitter_x64.rs | 2 +- lib/compiler-singlepass/src/machine_x64.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 4a25c05e53b..c9f98b30b64 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -5885,7 +5885,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { .mark_address_with_trap_code(TrapCode::BadSignature); self.machine.emit_illegal_op(); - // Notify the inner backend to generate necessary code at end of function. + // Notify the assembler backend to generate necessary code at end of function. self.machine.finalize_function(); let body_len = self.machine.assembler_get_offset().0; diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index efcdd409202..5dea0d99f4d 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -75,7 +75,7 @@ pub trait EmitterX64 { fn emit_nop(&mut self); - /// A high-level inner method. Emits an instruction sequence of length `n` that is functionally + /// A high-level assembler method. Emits an instruction sequence of length `n` that is functionally /// equivalent to a `nop` instruction, without guarantee about the underlying implementation. fn emit_nop_n(&mut self, n: usize); diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 6b25ed5defe..5944cbc6d75 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -2139,7 +2139,7 @@ impl Machine for MachineX86_64 { new_machine_state() } - // inner finalize + // assembler finalize fn assembler_finalize(self) -> Vec { self.assembler.finalize().unwrap() } From 98b70d9ad1126f64b0d36d8b52ddaf232d6593e3 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Sun, 6 Feb 2022 21:17:25 +0100 Subject: [PATCH 6/9] [fix]: Fixed some errors in the register being used in sse instructions. Changed the roundss/sd function --- lib/compiler-singlepass/src/emitter_x64.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 5dea0d99f4d..54e8268f368 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -576,7 +576,7 @@ macro_rules! sse_fn { |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { - move_src_to_dst(emitter, precision, src1, dst); + move_src_to_dst(emitter, precision, src1, dst) dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8))) } XMMOrMemory::Memory(base, disp) => { @@ -595,7 +595,7 @@ macro_rules! sse_fn { } XMMOrMemory::Memory(base, disp) => { move_src_to_dst(emitter, precision, src1, dst); - dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) + dynasm!(emitter ; $ins Rx((dst as u8)), [Rq((base as u8)) + disp], $mode) } } } @@ -657,7 +657,7 @@ macro_rules! sse_i2f_64_fn { }, GPROrMemory::Memory(base, disp) => { move_src_to_dst(emitter, precision, src1, dst); - dynasm!(emitter ; $ins Rx((src1 as u8)), QWORD [Rq((base as u8)) + disp]) + dynasm!(emitter ; $ins Rx((dst as u8)), QWORD [Rq((base as u8)) + disp]) } } } @@ -719,7 +719,7 @@ macro_rules! sse_i2f_32_fn { }, GPROrMemory::Memory(base, disp) => { move_src_to_dst(emitter, precision, src1, dst); - dynasm!(emitter ; $ins Rx((src1 as u8)), DWORD [Rq((base as u8)) + disp]) + dynasm!(emitter ; $ins Rx((dst as u8)), DWORD [Rq((base as u8)) + disp]) } } } @@ -742,13 +742,11 @@ macro_rules! sse_round_fn { |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { + assert_eq!(src1, x); move_src_to_dst(emitter, precision, src1, dst); - dynasm!(emitter ; $ins Rx((src1 as u8)), Rx((x as u8)), $mode) - } - XMMOrMemory::Memory(base, disp) => { - move_src_to_dst(emitter, precision, src1, dst); - dynasm!(emitter ; $ins Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode) + dynasm!(emitter ; $ins Rx((dst as u8)), Rx((dst as u8)), $mode) } + XMMOrMemory::Memory(base, disp) => unreachable!(), } } } From 8283a635bd83248f880703282a62cc29911ea9c5 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Thu, 10 Feb 2022 10:48:11 +0100 Subject: [PATCH 7/9] [fix]: Fixed a few stray mistakes in the sse_fn macro --- lib/compiler-singlepass/src/emitter_x64.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 54e8268f368..88104409944 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -576,7 +576,7 @@ macro_rules! sse_fn { |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { - move_src_to_dst(emitter, precision, src1, dst) + move_src_to_dst(emitter, precision, src1, dst); dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8))) } XMMOrMemory::Memory(base, disp) => { @@ -746,7 +746,7 @@ macro_rules! sse_round_fn { move_src_to_dst(emitter, precision, src1, dst); dynasm!(emitter ; $ins Rx((dst as u8)), Rx((dst as u8)), $mode) } - XMMOrMemory::Memory(base, disp) => unreachable!(), + XMMOrMemory::Memory(..) => unreachable!(), } } } From c0239987c8228108db17bf77097a7be9a41549ca Mon Sep 17 00:00:00 2001 From: R2D2 Date: Fri, 11 Feb 2022 13:32:45 +0100 Subject: [PATCH 8/9] [fix]: Added assert guard in SSE support against the case that src2 == dst --- lib/compiler-singlepass/src/emitter_x64.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index 88104409944..7c773007753 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -576,6 +576,7 @@ macro_rules! sse_fn { |emitter: &mut AssemblerX64, precision: Precision, src1: XMM, src2: XMMOrMemory, dst: XMM| { match src2 { XMMOrMemory::XMM(x) => { + assert_ne!(x, dst); move_src_to_dst(emitter, precision, src1, dst); dynasm!(emitter ; $ins Rx((dst as u8)), Rx((x as u8))) } From 3f2fa63995b7296ad3a8e2444bb050df35e1aaf9 Mon Sep 17 00:00:00 2001 From: R2D2 Date: Thu, 17 Feb 2022 15:09:07 +0100 Subject: [PATCH 9/9] [chore]: Linting / formatting --- lib/compiler-singlepass/src/machine_x64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 4194f81762a..a463a010f32 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -5,8 +5,8 @@ use crate::location::Reg; use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; +use dynasmrt::{x64::X64Relocation, DynasmError, VecAssembler}; use std::ops::{Deref, DerefMut}; -use dynasmrt::{x64::X64Relocation, VecAssembler, DynasmError}; use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::{ CallingConvention, CpuFeature, CustomSection, CustomSectionProtection, FunctionBody,