diff --git a/.cargo/config.toml b/.cargo/config.toml deleted file mode 100644 index f9b89e8f713..00000000000 --- a/.cargo/config.toml +++ /dev/null @@ -1,5 +0,0 @@ -[target.'cfg(all(target_os = "linux", target_env = "gnu"))'] -rustflags = [ - # Put the VM functions in the dynamic symbol table. - "-C", "link-arg=-Wl,-E", -] diff --git a/.github/workflows/test-sys.yaml b/.github/workflows/test-sys.yaml index 98c3699e605..462f978df95 100644 --- a/.github/workflows/test-sys.yaml +++ b/.github/workflows/test-sys.yaml @@ -166,6 +166,7 @@ jobs: shell: bash - name: Setup Rust target run: | + mkdir -p .cargo cat << EOF > .cargo/config.toml [build] target = "${{ matrix.target }}" diff --git a/Cargo.lock b/Cargo.lock index bc40b7bc02f..b011196bd7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,7 +121,7 @@ dependencies = [ "cfg-if 1.0.0", "libc", "miniz_oxide", - "object", + "object 0.27.1", "rustc-demangle", ] @@ -758,6 +758,26 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "enum-iterator" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eeac5c5edb79e4e39fe8439ef35207780a11f69c52cbe424ce3dfad4cb78de6" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "enumset" version = "1.0.8" @@ -1389,8 +1409,18 @@ name = "object" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" +dependencies = [ + "memchr", +] + +[[package]] +name = "object" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456" dependencies = [ "crc32fast", + "hashbrown 0.11.2", "indexmap", "memchr", ] @@ -2895,7 +2925,7 @@ dependencies = [ "lazy_static", "libc", "loupe", - "object", + "object 0.28.3", "rayon", "regex", "rustc_version 0.4.0", @@ -2991,10 +3021,12 @@ name = "wasmer-engine-dylib" version = "2.1.1" dependencies = [ "cfg-if 1.0.0", + "enum-iterator", "enumset", "leb128", "libloading", "loupe", + "object 0.28.3", "rkyv", "serde", "tempfile", @@ -3032,6 +3064,7 @@ name = "wasmer-engine-universal" version = "2.1.1" dependencies = [ "cfg-if 1.0.0", + "enum-iterator", "enumset", "leb128", "loupe", @@ -3070,7 +3103,7 @@ dependencies = [ name = "wasmer-object" version = "2.1.1" dependencies = [ - "object", + "object 0.28.3", "thiserror", "wasmer-compiler", "wasmer-types", @@ -3106,6 +3139,7 @@ dependencies = [ "backtrace", "cc", "cfg-if 1.0.0", + "enum-iterator", "indexmap", "libc", "loupe", diff --git a/Makefile b/Makefile index 2df94e8e675..4d49156fba4 100644 --- a/Makefile +++ b/Makefile @@ -211,14 +211,13 @@ compilers_engines := ifeq ($(ENABLE_CRANELIFT), 1) compilers_engines += cranelift-universal - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) ifneq ($(LIBC), musl) compilers_engines += cranelift-dylib endif else ifeq ($(IS_AARCH64), 1) - # The object crate doesn't support yet Darwin + Aarch64 relocations - ifneq ($(IS_DARWIN), 1) + ifneq ($(LIBC), musl) compilers_engines += cranelift-dylib endif endif @@ -230,7 +229,7 @@ endif ## ifeq ($(ENABLE_LLVM), 1) - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) compilers_engines += llvm-universal compilers_engines += llvm-dylib @@ -246,7 +245,7 @@ endif ## ifeq ($(ENABLE_SINGLEPASS), 1) - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) compilers_engines += singlepass-universal endif diff --git a/deny.toml b/deny.toml index e7b699965e4..397f8d2902c 100644 --- a/deny.toml +++ b/deny.toml @@ -179,6 +179,7 @@ skip = [ { name = "semver-parser", version = "=0.7.0" }, { name = "rustc_version", version = "=0.2.3" }, { name = "itoa", version = "=0.4.8" }, + { name = "object", version = "=0.27.1" }, ] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive diff --git a/lib/compiler-cranelift/src/compiler.rs b/lib/compiler-cranelift/src/compiler.rs index b5cf55482c3..a08ab57668f 100644 --- a/lib/compiler-cranelift/src/compiler.rs +++ b/lib/compiler-cranelift/src/compiler.rs @@ -295,7 +295,6 @@ impl Compiler for CraneliftCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, - None, )) } } diff --git a/lib/compiler-cranelift/src/config.rs b/lib/compiler-cranelift/src/config.rs index 397b405931d..86ad9ac059f 100644 --- a/lib/compiler-cranelift/src/config.rs +++ b/lib/compiler-cranelift/src/config.rs @@ -135,6 +135,12 @@ impl Cranelift { flags.enable("is_pic").expect("should be a valid flag"); } + // We set up libcall trampolines in engine-dylib and engine-universal. + // These trampolines are always reachable through short jumps. + flags + .enable("use_colocated_libcalls") + .expect("should be a valid flag"); + // Invert cranelift's default-on verification to instead default off. let enable_verifier = if self.enable_verifier { "true" diff --git a/lib/compiler-cranelift/src/func_environ.rs b/lib/compiler-cranelift/src/func_environ.rs index 2cdab38e1f9..95a39b13391 100644 --- a/lib/compiler-cranelift/src/func_environ.rs +++ b/lib/compiler-cranelift/src/func_environ.rs @@ -1132,9 +1132,7 @@ impl<'module_environment> BaseFuncEnvironment for FuncEnvironment<'module_enviro Ok(func.import_function(ir::ExtFuncData { name, signature, - // We currently allocate all code segments independently, so nothing - // is colocated. - colocated: false, + colocated: true, })) } diff --git a/lib/compiler-cranelift/src/translator/translation_utils.rs b/lib/compiler-cranelift/src/translator/translation_utils.rs index 81e940ac18c..7adbb97e95b 100644 --- a/lib/compiler-cranelift/src/translator/translation_utils.rs +++ b/lib/compiler-cranelift/src/translator/translation_utils.rs @@ -90,6 +90,7 @@ pub fn irreloc_to_relocationkind(reloc: Reloc) -> RelocationKind { Reloc::X86CallPCRel4 => RelocationKind::X86CallPCRel4, Reloc::X86CallPLTRel4 => RelocationKind::X86CallPLTRel4, Reloc::X86GOTPCRel4 => RelocationKind::X86GOTPCRel4, + Reloc::Arm64Call => RelocationKind::Arm64Call, _ => panic!("The relocation {} is not yet supported.", reloc), } } diff --git a/lib/compiler-llvm/Cargo.toml b/lib/compiler-llvm/Cargo.toml index 36e9fec1050..1669e405bae 100644 --- a/lib/compiler-llvm/Cargo.toml +++ b/lib/compiler-llvm/Cargo.toml @@ -12,12 +12,14 @@ readme = "README.md" edition = "2018" [dependencies] -wasmer-compiler = { path = "../compiler", version = "2.1.1", features = ["translator"] } +wasmer-compiler = { path = "../compiler", version = "2.1.1", features = [ + "translator", +] } wasmer-vm = { path = "../vm", version = "2.1.1" } wasmer-types = { path = "../types", version = "2.1.1" } target-lexicon = { version = "0.12.2", default-features = false } smallvec = "1.6" -object = { version = "0.27", default-features = false, features = ["read"] } +object = { version = "0.28.3", default-features = false, features = ["read"] } libc = { version = "^0.2", default-features = false } byteorder = "1" itertools = "0.10" diff --git a/lib/compiler-llvm/src/compiler.rs b/lib/compiler-llvm/src/compiler.rs index ce106898480..02df585301e 100644 --- a/lib/compiler-llvm/src/compiler.rs +++ b/lib/compiler-llvm/src/compiler.rs @@ -12,10 +12,9 @@ use rayon::iter::ParallelBridge; use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::sync::Arc; use wasmer_compiler::{ - Architecture, Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, - CustomSectionProtection, Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, - RelocationTarget, SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, - TrampolinesSection, + Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, CustomSectionProtection, + Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, RelocationTarget, + SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, }; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{FunctionIndex, LocalFunctionIndex, SignatureIndex}; @@ -305,37 +304,6 @@ impl Compiler for LLVMCompiler { }) .collect::>(); - let trampolines = match target.triple().architecture { - Architecture::Aarch64(_) => { - let nj = 16; - // We create a jump to an absolute 64bits address - // using x17 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register - // but Apple ask to just not use x16 - // LDR x17, #8 51 00 00 58 - // BR x17 20 02 1f d6 - // JMPADDR 00 00 00 00 00 00 00 00 - let onejump = [ - 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, - ]; - let trampolines = Some(TrampolinesSection::new( - SectionIndex::from_u32(module_custom_sections.len() as u32), - nj, - onejump.len(), - )); - let mut alljmps = vec![]; - for _ in 0..nj { - alljmps.extend(onejump.iter().copied()); - } - module_custom_sections.push(CustomSection { - protection: CustomSectionProtection::ReadExecute, - bytes: SectionBody::new_with_vec(alljmps), - relocations: vec![], - }); - trampolines - } - _ => None, - }; - let dwarf = if !frame_section_bytes.is_empty() { let dwarf = Some(Dwarf::new(SectionIndex::from_u32( module_custom_sections.len() as u32, @@ -400,7 +368,6 @@ impl Compiler for LLVMCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, - trampolines, )) } } diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index df9e9a8d63b..5599f9f3c65 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -187,7 +187,6 @@ impl Compiler for SinglepassCompiler { function_call_trampolines, dynamic_function_trampolines, None, - None, )) } } diff --git a/lib/compiler/src/function.rs b/lib/compiler/src/function.rs index 8eeafb1e1b6..c649ae0e3dd 100644 --- a/lib/compiler/src/function.rs +++ b/lib/compiler/src/function.rs @@ -109,33 +109,6 @@ impl Dwarf { } } -/// Trampolines section used by ARM short jump (26bits) -#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] -#[cfg_attr( - feature = "enable-rkyv", - derive(RkyvSerialize, RkyvDeserialize, Archive) -)] -#[derive(Debug, PartialEq, Eq, Clone, MemoryUsage)] -pub struct TrampolinesSection { - /// SectionIndex for the actual Trampolines code - pub section_index: SectionIndex, - /// Number of jump slots in the section - pub slots: usize, - /// Slot size - pub size: usize, -} - -impl TrampolinesSection { - /// Creates a `Trampolines` struct with the indice for its section, and number of slots and size of slot - pub fn new(section_index: SectionIndex, slots: usize, size: usize) -> Self { - Self { - section_index, - slots, - size, - } - } -} - /// The result of compiling a WebAssembly module's functions. #[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] #[derive(Debug, PartialEq, Eq)] @@ -182,9 +155,6 @@ pub struct Compilation { /// Section ids corresponding to the Dwarf debug info debug: Option, - - /// Trampolines for the arch that needs it - trampolines: Option, } impl Compilation { @@ -195,7 +165,6 @@ impl Compilation { function_call_trampolines: PrimaryMap, dynamic_function_trampolines: PrimaryMap, debug: Option, - trampolines: Option, ) -> Self { Self { functions, @@ -203,7 +172,6 @@ impl Compilation { function_call_trampolines, dynamic_function_trampolines, debug, - trampolines, } } @@ -281,11 +249,6 @@ impl Compilation { pub fn get_debug(&self) -> Option { self.debug.clone() } - - /// Returns the Trampilines info. - pub fn get_trampolines(&self) -> Option { - self.trampolines.clone() - } } impl<'a> IntoIterator for &'a Compilation { diff --git a/lib/compiler/src/lib.rs b/lib/compiler/src/lib.rs index 27669f27c24..04aadd09be4 100644 --- a/lib/compiler/src/lib.rs +++ b/lib/compiler/src/lib.rs @@ -74,7 +74,7 @@ pub use crate::error::{ }; pub use crate::function::{ Compilation, CompiledFunction, CompiledFunctionFrameInfo, CustomSections, Dwarf, FunctionBody, - Functions, TrampolinesSection, + Functions, }; pub use crate::jump_table::{JumpTable, JumpTableOffsets}; pub use crate::module::CompileModuleInfo; diff --git a/lib/engine-dylib/Cargo.toml b/lib/engine-dylib/Cargo.toml index 89e0ac7bf7d..6238c077e65 100644 --- a/lib/engine-dylib/Cargo.toml +++ b/lib/engine-dylib/Cargo.toml @@ -26,6 +26,8 @@ which = "4.0" rkyv = "0.7.20" loupe = "0.1" enumset = "1.0" +enum-iterator = "0.7.0" +object = { version = "0.28.3", default-features = false, features = ["write"] } [features] # Enable the `compiler` feature if you want the engine to compile diff --git a/lib/engine-dylib/src/artifact.rs b/lib/engine-dylib/src/artifact.rs index 248a2d04cd3..081cf636b4f 100644 --- a/lib/engine-dylib/src/artifact.rs +++ b/lib/engine-dylib/src/artifact.rs @@ -3,9 +3,11 @@ use crate::engine::{DylibEngine, DylibEngineInner}; use crate::serialize::ModuleMetadata; +use crate::trampoline::{emit_trampolines, fill_trampoline_table, WASMER_TRAMPOLINES_SYMBOL}; use enumset::EnumSet; use libloading::{Library, Symbol as LibrarySymbol}; use loupe::MemoryUsage; +use object::{write::CoffExportStyle, BinaryFormat}; use std::error::Error; use std::fs::{self, File}; use std::io::{Read, Write}; @@ -232,8 +234,31 @@ impl DylibArtifact { &metadata_binary, ); + let mut extra_filepath = None; let filepath = match maybe_obj_bytes { Some(obj_bytes) => { + extra_filepath = { + // Create a separate object file with the trampolines. + let mut obj = + get_object_for_target(&target_triple).map_err(to_compile_error)?; + emit_trampolines(&mut obj, engine.target()); + if obj.format() == BinaryFormat::Coff { + obj.add_coff_exports(CoffExportStyle::Gnu); + } + let file = tempfile::Builder::new() + .prefix("wasmer_dylib_") + .suffix(".o") + .tempfile() + .map_err(to_compile_error)?; + + // Re-open it. + let (mut file, filepath) = file.keep().map_err(to_compile_error)?; + let obj_bytes = obj.write().map_err(to_compile_error)?; + file.write_all(&obj_bytes).map_err(to_compile_error)?; + Some(filepath) + }; + + // Write the object file generated by the compiler. let obj_bytes = obj_bytes?; let file = tempfile::Builder::new() .prefix("wasmer_dylib_") @@ -254,6 +279,7 @@ impl DylibArtifact { function_body_inputs, )?; let mut obj = get_object_for_target(&target_triple).map_err(to_compile_error)?; + emit_trampolines(&mut obj, engine.target()); emit_data( &mut obj, WASMER_METADATA_SYMBOL, @@ -263,6 +289,9 @@ impl DylibArtifact { .map_err(to_compile_error)?; emit_compilation(&mut obj, compilation, &symbol_registry, &target_triple) .map_err(to_compile_error)?; + if obj.format() == BinaryFormat::Coff { + obj.add_coff_exports(CoffExportStyle::Gnu); + } let file = tempfile::Builder::new() .prefix("wasmer_dylib_") .suffix(".o") @@ -371,6 +400,7 @@ impl DylibArtifact { let linker = engine_inner.linker().executable(); let output = Command::new(linker) .arg(&filepath) + .args(&extra_filepath) .arg("-o") .arg(&output_filepath) .args(&target_args) @@ -385,6 +415,11 @@ impl DylibArtifact { if fs::metadata(&filepath).is_ok() { fs::remove_file(filepath).map_err(to_compile_error)?; } + if let Some(filepath) = extra_filepath { + if fs::metadata(&filepath).is_ok() { + fs::remove_file(filepath).map_err(to_compile_error)?; + } + } let output = output?; @@ -453,6 +488,13 @@ impl DylibArtifact { dylib_path: PathBuf, lib: Library, ) -> Result { + unsafe { + let trampolines_symbol: LibrarySymbol = lib + .get(WASMER_TRAMPOLINES_SYMBOL) + .expect("missing WASMER_TRAMPOLINES symbol"); + fill_trampoline_table(trampolines_symbol.into_raw().into_raw() as *mut usize); + } + let mut finished_functions: PrimaryMap = PrimaryMap::new(); for (function_local_index, _function_len) in metadata.function_body_lengths.iter() { @@ -621,7 +663,7 @@ impl DylibArtifact { DeserializeError::CorruptedBinary(format!("Library loading failed: {}", e)) })?; let shared_path: PathBuf = PathBuf::from(path); - let symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> = + let metadata_symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> = lib.get(WASMER_METADATA_SYMBOL).map_err(|e| { DeserializeError::CorruptedBinary(format!( "The provided object file doesn't seem to be generated by Wasmer: {}", @@ -630,7 +672,7 @@ impl DylibArtifact { })?; use std::slice; - let metadata = &**symbol; + let metadata = &**metadata_symbol; let metadata_len = MetadataHeader::parse(metadata)?; let metadata_slice: &'static [u8] = slice::from_raw_parts(metadata.as_ptr().add(MetadataHeader::LEN), metadata_len); diff --git a/lib/engine-dylib/src/lib.rs b/lib/engine-dylib/src/lib.rs index 7aad4a628a7..b9366d99182 100644 --- a/lib/engine-dylib/src/lib.rs +++ b/lib/engine-dylib/src/lib.rs @@ -27,6 +27,7 @@ mod artifact; mod builder; mod engine; mod serialize; +mod trampoline; pub use crate::artifact::DylibArtifact; pub use crate::builder::Dylib; diff --git a/lib/engine-dylib/src/trampoline.rs b/lib/engine-dylib/src/trampoline.rs new file mode 100644 index 00000000000..12d60e2b6c3 --- /dev/null +++ b/lib/engine-dylib/src/trampoline.rs @@ -0,0 +1,166 @@ +//! Trampolines for libcalls. +//! +//! This is needed because the target of libcall relocations are not reachable +//! through normal branch instructions. +//! +//! There is an additional complexity for dynamic libraries: we can't just +//! import the symbol from the host executable because executables don't export +//! dynamic symbols (it's possible but requires special linker options). +//! +//! Instead, we export a table of function pointers in the data section which is +//! manually filled in by the runtime after the dylib is loaded. + +use enum_iterator::IntoEnumIterator; +use object::{ + elf, macho, + write::{Object, Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection}, + BinaryFormat, RelocationEncoding, RelocationKind, SymbolFlags, SymbolKind, SymbolScope, +}; +use wasmer_compiler::{Architecture, Target}; +use wasmer_vm::libcalls::LibCall; + +/// Symbol exported from the dynamic library which points to the trampoline table. +pub const WASMER_TRAMPOLINES_SYMBOL: &[u8] = b"WASMER_TRAMPOLINES"; + +// SystemV says that both x16 and x17 are available as intra-procedural scratch +// registers but Apple's ABI restricts us to use x17. +// ADRP x17, #... 11 00 00 90 +// LDR x17, [x17, #...] 31 02 40 f9 +// BR x17 20 02 1f d6 +const AARCH64_TRAMPOLINE: [u8; 12] = [ + 0x11, 0x00, 0x00, 0x90, 0x31, 0x02, 0x40, 0xf9, 0x20, 0x02, 0x1f, 0xd6, +]; + +// JMP [RIP + ...] FF 25 00 00 00 00 +const X86_64_TRAMPOLINE: [u8; 6] = [0xff, 0x25, 0x00, 0x00, 0x00, 0x00]; + +fn emit_trampoline( + obj: &mut Object, + text: SectionId, + trampoline_table_symbols: &[SymbolId], + libcall: LibCall, + target: &Target, +) { + let function_name = libcall.to_function_name(); + let libcall_symbol = obj.add_symbol(Symbol { + name: function_name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: SymbolSection::Section(text), + flags: SymbolFlags::None, + }); + + match target.triple().architecture { + Architecture::Aarch64(_) => { + let (reloc1, reloc2) = match obj.format() { + BinaryFormat::Elf => ( + RelocationKind::Elf(elf::R_AARCH64_ADR_PREL_PG_HI21), + RelocationKind::Elf(elf::R_AARCH64_LDST64_ABS_LO12_NC), + ), + BinaryFormat::MachO => ( + RelocationKind::MachO { + value: macho::ARM64_RELOC_PAGE21, + relative: true, + }, + RelocationKind::MachO { + value: macho::ARM64_RELOC_PAGEOFF12, + relative: false, + }, + ), + _ => panic!("Unsupported binary format on AArch64"), + }; + let offset = obj.add_symbol_data(libcall_symbol, text, &AARCH64_TRAMPOLINE, 4); + obj.add_relocation( + text, + Relocation { + offset, + size: 32, + kind: reloc1, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + addend: 0, + }, + ) + .unwrap(); + obj.add_relocation( + text, + Relocation { + offset: offset + 4, + size: 32, + kind: reloc2, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + addend: 0, + }, + ) + .unwrap(); + } + Architecture::X86_64 => { + let offset = obj.add_symbol_data(libcall_symbol, text, &X86_64_TRAMPOLINE, 1); + obj.add_relocation( + text, + Relocation { + offset: offset + 2, + size: 32, + kind: RelocationKind::Relative, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + // -4 because RIP-relative addressing starts from the end of + // the instruction. + addend: -4, + }, + ) + .unwrap(); + } + arch => panic!("Unsupported architecture: {}", arch), + }; +} + +/// Emits the libcall trampolines and table to the object file. +pub fn emit_trampolines(obj: &mut Object, target: &Target) { + let text = obj.section_id(StandardSection::Text); + let bss = obj.section_id(StandardSection::UninitializedData); + + let trampoline_table = obj.add_symbol(Symbol { + name: WASMER_TRAMPOLINES_SYMBOL.to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Data, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(bss), + flags: SymbolFlags::None, + }); + let table_offset = + obj.add_symbol_bss(trampoline_table, bss, LibCall::VARIANT_COUNT as u64 * 8, 8); + + // Create a symbol for each entry in the table. We could avoid this and use + // an addend, but this isn't supported in all object formats. + let mut trampoline_table_symbols = vec![]; + for libcall in LibCall::into_enum_iter() { + trampoline_table_symbols.push(obj.add_symbol(Symbol { + name: format!("__WASMER_TRAMPOLINE{}", libcall as usize).into_bytes(), + value: table_offset + libcall as u64 * 8, + size: 0, + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(bss), + flags: SymbolFlags::None, + })); + } + + for libcall in LibCall::into_enum_iter() { + emit_trampoline(obj, text, &trampoline_table_symbols, libcall, target); + } +} + +/// Fills in the libcall trampoline table at the given address. +pub unsafe fn fill_trampoline_table(table: *mut usize) { + for libcall in LibCall::into_enum_iter() { + *table.add(libcall as usize) = libcall.function_pointer(); + } +} diff --git a/lib/engine-universal/Cargo.toml b/lib/engine-universal/Cargo.toml index 6fc93218e88..fc5bd4decdc 100644 --- a/lib/engine-universal/Cargo.toml +++ b/lib/engine-universal/Cargo.toml @@ -27,6 +27,7 @@ leb128 = "0.2" rkyv = "0.7.20" loupe = "0.1" enumset = "1.0" +enum-iterator = "0.7.0" [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3", features = ["winnt", "impl-default"] } diff --git a/lib/engine-universal/src/artifact.rs b/lib/engine-universal/src/artifact.rs index 8163b8718d0..10c19ebaed4 100644 --- a/lib/engine-universal/src/artifact.rs +++ b/lib/engine-universal/src/artifact.rs @@ -6,6 +6,7 @@ use crate::link::link_module; #[cfg(feature = "compiler")] use crate::serialize::SerializableCompilation; use crate::serialize::SerializableModule; +use crate::trampoline::{libcall_trampoline_len, make_libcall_trampolines}; use enumset::EnumSet; use loupe::MemoryUsage; use std::mem; @@ -111,6 +112,14 @@ impl UniversalArtifact { let frame_infos = compilation.get_frame_info(); + // Synthesize a custom section to hold the libcall trampolines. + let mut custom_sections = compilation.get_custom_sections(); + let mut custom_section_relocations = compilation.get_custom_section_relocations(); + let libcall_trampolines_section = make_libcall_trampolines(engine.target()); + custom_section_relocations.push(libcall_trampolines_section.relocations.clone()); + let libcall_trampolines = custom_sections.push(libcall_trampolines_section); + let libcall_trampoline_len = libcall_trampoline_len(engine.target()) as u32; + let serializable_compilation = SerializableCompilation { function_bodies: compilation.get_function_bodies(), function_relocations: compilation.get_relocations(), @@ -118,10 +127,11 @@ impl UniversalArtifact { function_frame_info: frame_infos, function_call_trampolines, dynamic_function_trampolines, - custom_sections: compilation.get_custom_sections(), - custom_section_relocations: compilation.get_custom_section_relocations(), + custom_sections, + custom_section_relocations, debug: compilation.get_debug(), - trampolines: compilation.get_trampolines(), + libcall_trampolines, + libcall_trampoline_len, }; let serializable = SerializableModule { compilation: serializable_compilation, @@ -187,7 +197,8 @@ impl UniversalArtifact { serializable.compilation.function_relocations.clone(), &custom_sections, &serializable.compilation.custom_section_relocations, - &serializable.compilation.trampolines, + serializable.compilation.libcall_trampolines, + serializable.compilation.libcall_trampoline_len as usize, ); // Compute indices into the shared signature table. diff --git a/lib/engine-universal/src/lib.rs b/lib/engine-universal/src/lib.rs index 2bbafadd4db..8685b846180 100644 --- a/lib/engine-universal/src/lib.rs +++ b/lib/engine-universal/src/lib.rs @@ -30,6 +30,7 @@ mod code_memory; mod engine; mod link; mod serialize; +mod trampoline; mod unwind; pub use crate::artifact::UniversalArtifact; diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index cb7b7245194..4f2dd083399 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -1,86 +1,40 @@ //! Linking for Universal-compiled code. -use std::collections::HashMap; +use crate::trampoline::get_libcall_trampoline; use std::ptr::{read_unaligned, write_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, - SectionIndex, TrampolinesSection, + SectionIndex, }; use wasmer_engine::FunctionExtent; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{LocalFunctionIndex, ModuleInfo}; use wasmer_vm::SectionBodyPtr; -/// Add a new trampoline address, given the base adress of the Section. Return the address of the jump -/// The trampoline itself still have to be writen -fn trampolines_add( - map: &mut HashMap, - trampoline: &TrampolinesSection, - address: usize, - baseaddress: usize, -) -> usize { - if let Some(target) = map.get(&address) { - return *target; - } - let ret = map.len(); - if ret == trampoline.slots { - panic!("No more slot in Trampolines"); - } - map.insert(address, baseaddress + ret * trampoline.size); - baseaddress + ret * trampoline.size -} - -fn use_trampoline( - address: usize, - allocated_sections: &PrimaryMap, - trampolines: &Option, - map: &mut HashMap, -) -> Option { - match trampolines { - Some(trampolines) => Some(trampolines_add( - map, - trampolines, - address, - *allocated_sections[trampolines.section_index] as usize, - )), - _ => None, - } -} - -fn fill_trampolin_map( - allocated_sections: &PrimaryMap, - trampolines: &Option, -) -> HashMap { - let mut map: HashMap = HashMap::new(); - match trampolines { - Some(trampolines) => { - let baseaddress = *allocated_sections[trampolines.section_index] as usize; - for i in 0..trampolines.size { - let jmpslot: usize = unsafe { - read_unaligned((baseaddress + i * trampolines.size + 8) as *mut usize) - }; - if jmpslot != 0 { - map.insert(jmpslot, baseaddress + i * trampolines.size); - } - } - } - _ => {} - }; - map -} - fn apply_relocation( body: usize, r: &Relocation, allocated_functions: &PrimaryMap, jt_offsets: &PrimaryMap, allocated_sections: &PrimaryMap, - trampolines: &Option, - trampolines_map: &mut HashMap, + libcall_trampolines: SectionIndex, + libcall_trampoline_len: usize, ) { let target_func_address: usize = match r.reloc_target { RelocationTarget::LocalFunc(index) => *allocated_functions[index].ptr as usize, - RelocationTarget::LibCall(libcall) => libcall.function_pointer(), + RelocationTarget::LibCall(libcall) => { + // Use the direct target of the libcall if the relocation supports + // a full 64-bit address. Otherwise use a trampoline. + if r.kind == RelocationKind::Abs8 || r.kind == RelocationKind::X86PCRel8 { + libcall.function_pointer() + } else { + get_libcall_trampoline( + libcall, + allocated_sections[libcall_trampolines].0 as usize, + libcall_trampoline_len, + ) + } + } RelocationTarget::CustomSection(custom_section) => { *allocated_sections[custom_section] as usize } @@ -115,26 +69,15 @@ fn apply_relocation( }, RelocationKind::X86PCRelRodata4 => {} RelocationKind::Arm64Call => unsafe { - let (reloc_address, mut reloc_delta) = r.for_address(body, target_func_address as u64); + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); if (reloc_delta as i64).abs() >= 0x1000_0000 { - let new_address = match use_trampoline( - target_func_address, - allocated_sections, - trampolines, - trampolines_map, - ) { - Some(new_address) => new_address, - _ => panic!( - "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", - r.kind, - r.reloc_target, - reloc_delta, - read_unaligned(reloc_address as *mut u32) - ), - }; - write_unaligned((new_address + 8) as *mut u64, target_func_address as u64); // write the jump address - let (_, new_delta) = r.for_address(body, new_address as u64); - reloc_delta = new_delta; + panic!( + "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", + r.kind, + r.reloc_target, + reloc_delta, + read_unaligned(reloc_address as *mut u32) + ) } let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff) | read_unaligned(reloc_address as *mut u32); @@ -180,9 +123,9 @@ pub fn link_module( function_relocations: Relocations, allocated_sections: &PrimaryMap, section_relocations: &PrimaryMap>, - trampolines: &Option, + libcall_trampolines: SectionIndex, + trampoline_len: usize, ) { - let mut trampolines_map = fill_trampolin_map(allocated_sections, trampolines); for (i, section_relocs) in section_relocations.iter() { let body = *allocated_sections[i] as usize; for r in section_relocs { @@ -192,8 +135,8 @@ pub fn link_module( allocated_functions, jt_offsets, allocated_sections, - trampolines, - &mut trampolines_map, + libcall_trampolines, + trampoline_len, ); } } @@ -206,8 +149,8 @@ pub fn link_module( allocated_functions, jt_offsets, allocated_sections, - trampolines, - &mut trampolines_map, + libcall_trampolines, + trampoline_len, ); } } diff --git a/lib/engine-universal/src/serialize.rs b/lib/engine-universal/src/serialize.rs index a15f68effa0..9bb450c1bdc 100644 --- a/lib/engine-universal/src/serialize.rs +++ b/lib/engine-universal/src/serialize.rs @@ -6,7 +6,7 @@ use rkyv::{ }; use wasmer_compiler::{ CompileModuleInfo, CompiledFunctionFrameInfo, CustomSection, Dwarf, FunctionBody, - JumpTableOffsets, Relocation, SectionIndex, TrampolinesSection, + JumpTableOffsets, Relocation, SectionIndex, }; use wasmer_engine::{DeserializeError, SerializeError}; use wasmer_types::entity::PrimaryMap; @@ -25,8 +25,10 @@ pub struct SerializableCompilation { pub custom_section_relocations: PrimaryMap>, // The section indices corresponding to the Dwarf debug info pub debug: Option, - // the Trampoline for Arm arch - pub trampolines: Option, + // Custom section containing libcall trampolines. + pub libcall_trampolines: SectionIndex, + // Length of each libcall trampoline. + pub libcall_trampoline_len: u32, } /// Serializable struct that is able to serialize from and to diff --git a/lib/engine-universal/src/trampoline.rs b/lib/engine-universal/src/trampoline.rs new file mode 100644 index 00000000000..e90a7c92f98 --- /dev/null +++ b/lib/engine-universal/src/trampoline.rs @@ -0,0 +1,88 @@ +//! Trampolines for libcalls. +//! +//! This is needed because the target of libcall relocations are not reachable +//! through normal branch instructions. + +use enum_iterator::IntoEnumIterator; +use wasmer_compiler::{ + Architecture, CustomSection, CustomSectionProtection, Relocation, RelocationKind, + RelocationTarget, SectionBody, Target, +}; +use wasmer_vm::libcalls::LibCall; + +// SystemV says that both x16 and x17 are available as intra-procedural scratch +// registers but Apple's ABI restricts us to use x17. +// LDR x17, [PC, #8] 51 00 00 58 +// BR x17 20 02 1f d6 +// JMPADDR 00 00 00 00 00 00 00 00 +const AARCH64_TRAMPOLINE: [u8; 16] = [ + 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +// 2 padding bytes are used to preserve alignment. +// JMP [RIP + 2] FF 25 02 00 00 00 [00 00] +// 64-bit ADDR 00 00 00 00 00 00 00 00 +const X86_64_TRAMPOLINE: [u8; 16] = [ + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; + +fn make_trampoline( + target: &Target, + libcall: LibCall, + code: &mut Vec, + relocations: &mut Vec, +) { + match target.triple().architecture { + Architecture::Aarch64(_) => { + code.extend(&AARCH64_TRAMPOLINE); + relocations.push(Relocation { + kind: RelocationKind::Abs8, + reloc_target: RelocationTarget::LibCall(libcall), + offset: code.len() as u32 - 8, + addend: 0, + }); + } + Architecture::X86_64 => { + code.extend(&X86_64_TRAMPOLINE); + relocations.push(Relocation { + kind: RelocationKind::Abs8, + reloc_target: RelocationTarget::LibCall(libcall), + offset: code.len() as u32 - 8, + addend: 0, + }); + } + arch => panic!("Unsupported architecture: {}", arch), + }; +} + +/// Returns the length of a libcall trampoline. +pub fn libcall_trampoline_len(target: &Target) -> usize { + match target.triple().architecture { + Architecture::Aarch64(_) => AARCH64_TRAMPOLINE.len(), + Architecture::X86_64 => X86_64_TRAMPOLINE.len(), + arch => panic!("Unsupported architecture: {}", arch), + } +} + +/// Creates a custom section containing the libcall trampolines. +pub fn make_libcall_trampolines(target: &Target) -> CustomSection { + let mut code = vec![]; + let mut relocations = vec![]; + for libcall in LibCall::into_enum_iter() { + make_trampoline(target, libcall, &mut code, &mut relocations); + } + CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: SectionBody::new_with_vec(code), + relocations, + } +} + +/// Returns the address of a trampoline in the libcall trampolines section. +pub fn get_libcall_trampoline( + libcall: LibCall, + libcall_trampolines: usize, + libcall_trampoline_len: usize, +) -> usize { + libcall_trampolines + libcall as usize * libcall_trampoline_len +} diff --git a/lib/object/Cargo.toml b/lib/object/Cargo.toml index 706257d31be..892e80da87c 100644 --- a/lib/object/Cargo.toml +++ b/lib/object/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" wasmer-types = { path = "../types", version = "2.1.1" } wasmer-compiler = { path = "../compiler", version = "2.1.1", default-features = false, features = [ "std", - "translator" + "translator", ] } -object = { version = "0.27", default-features = false, features = ["write"] } +object = { version = "0.28.3", default-features = false, features = ["write"] } thiserror = "1.0" diff --git a/lib/object/src/module.rs b/lib/object/src/module.rs index 4094ca08752..65fd8b490a2 100644 --- a/lib/object/src/module.rs +++ b/lib/object/src/module.rs @@ -3,7 +3,8 @@ use object::write::{ Object, Relocation, StandardSection, StandardSegment, Symbol as ObjSymbol, SymbolSection, }; use object::{ - elf, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope, + elf, macho, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, + SymbolScope, }; use wasmer_compiler::{ Architecture, BinaryFormat, Compilation, CustomSectionProtection, Endianness, @@ -292,7 +293,14 @@ pub fn emit_compilation( // Reloc::X86PCRelRodata4 => { // } Reloc::Arm64Call => ( - RelocationKind::Elf(elf::R_AARCH64_CALL26), + match obj.format() { + object::BinaryFormat::Elf => RelocationKind::Elf(elf::R_AARCH64_CALL26), + object::BinaryFormat::MachO => RelocationKind::MachO { + value: macho::ARM64_RELOC_BRANCH26, + relative: true, + }, + fmt => panic!("unsupported binary format {:?}", fmt), + }, RelocationEncoding::Generic, 32, ), diff --git a/lib/vm/Cargo.toml b/lib/vm/Cargo.toml index 0ec6da7b93a..38f24bd9df5 100644 --- a/lib/vm/Cargo.toml +++ b/lib/vm/Cargo.toml @@ -23,6 +23,7 @@ backtrace = "0.3" serde = { version = "1.0", features = ["derive", "rc"] } rkyv = { version = "0.7.20", optional = true } loupe = { version = "0.1", features = ["enable-indexmap"] } +enum-iterator = "0.7.0" [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3", features = ["winbase", "memoryapi", "errhandlingapi"] } diff --git a/lib/vm/src/libcalls.rs b/lib/vm/src/libcalls.rs index 301568e4b2e..89e7ffd5e25 100644 --- a/lib/vm/src/libcalls.rs +++ b/lib/vm/src/libcalls.rs @@ -43,6 +43,7 @@ use crate::table::{RawTableElement, TableElement}; use crate::trap::{raise_lib_trap, Trap, TrapCode}; use crate::vmcontext::VMContext; use crate::VMExternRef; +use enum_iterator::IntoEnumIterator; use loupe::MemoryUsage; #[cfg(feature = "enable-rkyv")] use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize}; @@ -688,7 +689,9 @@ pub static wasmer_vm_probestack: unsafe extern "C" fn() = PROBESTACK; feature = "enable-rkyv", derive(RkyvSerialize, RkyvDeserialize, Archive) )] -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage, IntoEnumIterator, +)] pub enum LibCall { /// ceil.f32 CeilF32, diff --git a/tests/ignores.txt b/tests/ignores.txt index 7f72118a661..5e1dd68bfee 100644 --- a/tests/ignores.txt +++ b/tests/ignores.txt @@ -33,14 +33,9 @@ llvm traps::start_trap_pretty dylib traps::start_trap_pretty aarch64 traps::start_trap_pretty -cranelift multi_value_imports::dylib # Needs investigation singlepass multi_value_imports::dylib # Singlepass doesn't support multivalue singlepass multi_value_imports::dynamic # Singlepass doesn't support multivalue -# LLVM doesn't fully work in macOS M1 -llvm+universal+macos+aarch64 * # We are using the object crate, it was not fully supporting aarch64 relocations emitted by LLVM. Needs reassesment -llvm+dylib+macos+aarch64 * # Tests seem to be randomly failing - # TODO: We need to fix this in ARM. The issue is caused by libunwind overflowing # the stack while creating the stacktrace. # https://github.com/rust-lang/backtrace-rs/issues/356