From ffb9cd33b9b76aa968632c8d693a41437ca426cd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 11 Jan 2022 19:34:35 +0000 Subject: [PATCH] Use trampolines for all libcalls in engine-universal and engine-dylib In both of these engines, the compiled code may be loaded in memory far from the Wasmer runtime which means that libcalls may not be reachable through the normal relocation types. Instead a trampoline is needed to allow reaching any address in the 64-bit address space. In the case of engine-dylib, this is even worse since the symbols are not exported by the executable without some special linker flags. The solution here is to manually patch in the addresses at load time into a data table of function pointers. --- .cargo/config.toml | 5 - .github/workflows/test-sys.yaml | 1 + Cargo.lock | 40 ++++- Makefile | 9 +- deny.toml | 1 + lib/compiler-cranelift/src/compiler.rs | 1 - lib/compiler-cranelift/src/config.rs | 6 + lib/compiler-cranelift/src/func_environ.rs | 4 +- .../src/translator/translation_utils.rs | 1 + lib/compiler-llvm/Cargo.toml | 6 +- lib/compiler-llvm/src/compiler.rs | 39 +--- lib/compiler-singlepass/src/compiler.rs | 1 - lib/compiler/src/function.rs | 37 ---- lib/compiler/src/lib.rs | 2 +- lib/engine-dylib/Cargo.toml | 2 + lib/engine-dylib/src/artifact.rs | 46 ++++- lib/engine-dylib/src/lib.rs | 1 + lib/engine-dylib/src/trampoline.rs | 166 ++++++++++++++++++ lib/engine-universal/Cargo.toml | 1 + lib/engine-universal/src/artifact.rs | 19 +- lib/engine-universal/src/lib.rs | 1 + lib/engine-universal/src/link.rs | 119 ++++--------- lib/engine-universal/src/serialize.rs | 8 +- lib/engine-universal/src/trampoline.rs | 88 ++++++++++ lib/object/Cargo.toml | 4 +- lib/object/src/module.rs | 12 +- lib/vm/Cargo.toml | 1 + lib/vm/src/libcalls.rs | 5 +- tests/ignores.txt | 5 - 29 files changed, 430 insertions(+), 201 deletions(-) delete mode 100644 .cargo/config.toml create mode 100644 lib/engine-dylib/src/trampoline.rs create mode 100644 lib/engine-universal/src/trampoline.rs diff --git a/.cargo/config.toml b/.cargo/config.toml deleted file mode 100644 index f9b89e8f713..00000000000 --- a/.cargo/config.toml +++ /dev/null @@ -1,5 +0,0 @@ -[target.'cfg(all(target_os = "linux", target_env = "gnu"))'] -rustflags = [ - # Put the VM functions in the dynamic symbol table. - "-C", "link-arg=-Wl,-E", -] diff --git a/.github/workflows/test-sys.yaml b/.github/workflows/test-sys.yaml index 98c3699e605..462f978df95 100644 --- a/.github/workflows/test-sys.yaml +++ b/.github/workflows/test-sys.yaml @@ -166,6 +166,7 @@ jobs: shell: bash - name: Setup Rust target run: | + mkdir -p .cargo cat << EOF > .cargo/config.toml [build] target = "${{ matrix.target }}" diff --git a/Cargo.lock b/Cargo.lock index 1b980b95923..aa9208220b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,7 +121,7 @@ dependencies = [ "cfg-if 1.0.0", "libc", "miniz_oxide", - "object", + "object 0.27.1", "rustc-demangle", ] @@ -779,6 +779,26 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "enum-iterator" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eeac5c5edb79e4e39fe8439ef35207780a11f69c52cbe424ce3dfad4cb78de6" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "enumset" version = "1.0.8" @@ -1419,8 +1439,18 @@ name = "object" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" +dependencies = [ + "memchr", +] + +[[package]] +name = "object" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456" dependencies = [ "crc32fast", + "hashbrown 0.11.2", "indexmap", "memchr", ] @@ -2940,7 +2970,7 @@ dependencies = [ "lazy_static", "libc", "loupe", - "object", + "object 0.28.3", "rayon", "regex", "rustc_version 0.4.0", @@ -3036,10 +3066,12 @@ name = "wasmer-engine-dylib" version = "2.1.1" dependencies = [ "cfg-if 1.0.0", + "enum-iterator", "enumset", "leb128", "libloading", "loupe", + "object 0.28.3", "rkyv", "serde", "tempfile", @@ -3077,6 +3109,7 @@ name = "wasmer-engine-universal" version = "2.1.1" dependencies = [ "cfg-if 1.0.0", + "enum-iterator", "enumset", "leb128", "loupe", @@ -3115,7 +3148,7 @@ dependencies = [ name = "wasmer-object" version = "2.1.1" dependencies = [ - "object", + "object 0.28.3", "thiserror", "wasmer-compiler", "wasmer-types", @@ -3151,6 +3184,7 @@ dependencies = [ "backtrace", "cc", "cfg-if 1.0.0", + "enum-iterator", "indexmap", "libc", "loupe", diff --git a/Makefile b/Makefile index 2df94e8e675..4d49156fba4 100644 --- a/Makefile +++ b/Makefile @@ -211,14 +211,13 @@ compilers_engines := ifeq ($(ENABLE_CRANELIFT), 1) compilers_engines += cranelift-universal - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) ifneq ($(LIBC), musl) compilers_engines += cranelift-dylib endif else ifeq ($(IS_AARCH64), 1) - # The object crate doesn't support yet Darwin + Aarch64 relocations - ifneq ($(IS_DARWIN), 1) + ifneq ($(LIBC), musl) compilers_engines += cranelift-dylib endif endif @@ -230,7 +229,7 @@ endif ## ifeq ($(ENABLE_LLVM), 1) - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) compilers_engines += llvm-universal compilers_engines += llvm-dylib @@ -246,7 +245,7 @@ endif ## ifeq ($(ENABLE_SINGLEPASS), 1) - ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX))) + ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX))) ifeq ($(IS_AMD64), 1) compilers_engines += singlepass-universal endif diff --git a/deny.toml b/deny.toml index 396aa0be376..066eddbfb2f 100644 --- a/deny.toml +++ b/deny.toml @@ -179,6 +179,7 @@ skip = [ { name = "semver-parser", version = "=0.7.0" }, { name = "rustc_version", version = "=0.2.3" }, { name = "itoa", version = "=0.4.8" }, + { name = "object", version = "=0.27.1" }, ] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive diff --git a/lib/compiler-cranelift/src/compiler.rs b/lib/compiler-cranelift/src/compiler.rs index b5cf55482c3..a08ab57668f 100644 --- a/lib/compiler-cranelift/src/compiler.rs +++ b/lib/compiler-cranelift/src/compiler.rs @@ -295,7 +295,6 @@ impl Compiler for CraneliftCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, - None, )) } } diff --git a/lib/compiler-cranelift/src/config.rs b/lib/compiler-cranelift/src/config.rs index 397b405931d..86ad9ac059f 100644 --- a/lib/compiler-cranelift/src/config.rs +++ b/lib/compiler-cranelift/src/config.rs @@ -135,6 +135,12 @@ impl Cranelift { flags.enable("is_pic").expect("should be a valid flag"); } + // We set up libcall trampolines in engine-dylib and engine-universal. + // These trampolines are always reachable through short jumps. + flags + .enable("use_colocated_libcalls") + .expect("should be a valid flag"); + // Invert cranelift's default-on verification to instead default off. let enable_verifier = if self.enable_verifier { "true" diff --git a/lib/compiler-cranelift/src/func_environ.rs b/lib/compiler-cranelift/src/func_environ.rs index 2cdab38e1f9..95a39b13391 100644 --- a/lib/compiler-cranelift/src/func_environ.rs +++ b/lib/compiler-cranelift/src/func_environ.rs @@ -1132,9 +1132,7 @@ impl<'module_environment> BaseFuncEnvironment for FuncEnvironment<'module_enviro Ok(func.import_function(ir::ExtFuncData { name, signature, - // We currently allocate all code segments independently, so nothing - // is colocated. - colocated: false, + colocated: true, })) } diff --git a/lib/compiler-cranelift/src/translator/translation_utils.rs b/lib/compiler-cranelift/src/translator/translation_utils.rs index 81e940ac18c..7adbb97e95b 100644 --- a/lib/compiler-cranelift/src/translator/translation_utils.rs +++ b/lib/compiler-cranelift/src/translator/translation_utils.rs @@ -90,6 +90,7 @@ pub fn irreloc_to_relocationkind(reloc: Reloc) -> RelocationKind { Reloc::X86CallPCRel4 => RelocationKind::X86CallPCRel4, Reloc::X86CallPLTRel4 => RelocationKind::X86CallPLTRel4, Reloc::X86GOTPCRel4 => RelocationKind::X86GOTPCRel4, + Reloc::Arm64Call => RelocationKind::Arm64Call, _ => panic!("The relocation {} is not yet supported.", reloc), } } diff --git a/lib/compiler-llvm/Cargo.toml b/lib/compiler-llvm/Cargo.toml index 036146db95e..19baaed995c 100644 --- a/lib/compiler-llvm/Cargo.toml +++ b/lib/compiler-llvm/Cargo.toml @@ -12,12 +12,14 @@ readme = "README.md" edition = "2018" [dependencies] -wasmer-compiler = { path = "../compiler", version = "=2.1.1", features = ["translator"] } +wasmer-compiler = { path = "../compiler", version = "=2.1.1", features = [ + "translator", +] } wasmer-vm = { path = "../vm", version = "=2.1.1" } wasmer-types = { path = "../types", version = "=2.1.1" } target-lexicon = { version = "0.12.2", default-features = false } smallvec = "1.6" -object = { version = "0.27", default-features = false, features = ["read"] } +object = { version = "0.28.3", default-features = false, features = ["read"] } libc = { version = "^0.2", default-features = false } byteorder = "1" itertools = "0.10" diff --git a/lib/compiler-llvm/src/compiler.rs b/lib/compiler-llvm/src/compiler.rs index ce106898480..02df585301e 100644 --- a/lib/compiler-llvm/src/compiler.rs +++ b/lib/compiler-llvm/src/compiler.rs @@ -12,10 +12,9 @@ use rayon::iter::ParallelBridge; use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::sync::Arc; use wasmer_compiler::{ - Architecture, Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, - CustomSectionProtection, Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, - RelocationTarget, SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, - TrampolinesSection, + Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, CustomSectionProtection, + Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, RelocationTarget, + SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, }; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{FunctionIndex, LocalFunctionIndex, SignatureIndex}; @@ -305,37 +304,6 @@ impl Compiler for LLVMCompiler { }) .collect::>(); - let trampolines = match target.triple().architecture { - Architecture::Aarch64(_) => { - let nj = 16; - // We create a jump to an absolute 64bits address - // using x17 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register - // but Apple ask to just not use x16 - // LDR x17, #8 51 00 00 58 - // BR x17 20 02 1f d6 - // JMPADDR 00 00 00 00 00 00 00 00 - let onejump = [ - 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, - ]; - let trampolines = Some(TrampolinesSection::new( - SectionIndex::from_u32(module_custom_sections.len() as u32), - nj, - onejump.len(), - )); - let mut alljmps = vec![]; - for _ in 0..nj { - alljmps.extend(onejump.iter().copied()); - } - module_custom_sections.push(CustomSection { - protection: CustomSectionProtection::ReadExecute, - bytes: SectionBody::new_with_vec(alljmps), - relocations: vec![], - }); - trampolines - } - _ => None, - }; - let dwarf = if !frame_section_bytes.is_empty() { let dwarf = Some(Dwarf::new(SectionIndex::from_u32( module_custom_sections.len() as u32, @@ -400,7 +368,6 @@ impl Compiler for LLVMCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, - trampolines, )) } } diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index df9e9a8d63b..5599f9f3c65 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -187,7 +187,6 @@ impl Compiler for SinglepassCompiler { function_call_trampolines, dynamic_function_trampolines, None, - None, )) } } diff --git a/lib/compiler/src/function.rs b/lib/compiler/src/function.rs index 8eeafb1e1b6..c649ae0e3dd 100644 --- a/lib/compiler/src/function.rs +++ b/lib/compiler/src/function.rs @@ -109,33 +109,6 @@ impl Dwarf { } } -/// Trampolines section used by ARM short jump (26bits) -#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] -#[cfg_attr( - feature = "enable-rkyv", - derive(RkyvSerialize, RkyvDeserialize, Archive) -)] -#[derive(Debug, PartialEq, Eq, Clone, MemoryUsage)] -pub struct TrampolinesSection { - /// SectionIndex for the actual Trampolines code - pub section_index: SectionIndex, - /// Number of jump slots in the section - pub slots: usize, - /// Slot size - pub size: usize, -} - -impl TrampolinesSection { - /// Creates a `Trampolines` struct with the indice for its section, and number of slots and size of slot - pub fn new(section_index: SectionIndex, slots: usize, size: usize) -> Self { - Self { - section_index, - slots, - size, - } - } -} - /// The result of compiling a WebAssembly module's functions. #[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] #[derive(Debug, PartialEq, Eq)] @@ -182,9 +155,6 @@ pub struct Compilation { /// Section ids corresponding to the Dwarf debug info debug: Option, - - /// Trampolines for the arch that needs it - trampolines: Option, } impl Compilation { @@ -195,7 +165,6 @@ impl Compilation { function_call_trampolines: PrimaryMap, dynamic_function_trampolines: PrimaryMap, debug: Option, - trampolines: Option, ) -> Self { Self { functions, @@ -203,7 +172,6 @@ impl Compilation { function_call_trampolines, dynamic_function_trampolines, debug, - trampolines, } } @@ -281,11 +249,6 @@ impl Compilation { pub fn get_debug(&self) -> Option { self.debug.clone() } - - /// Returns the Trampilines info. - pub fn get_trampolines(&self) -> Option { - self.trampolines.clone() - } } impl<'a> IntoIterator for &'a Compilation { diff --git a/lib/compiler/src/lib.rs b/lib/compiler/src/lib.rs index 27669f27c24..04aadd09be4 100644 --- a/lib/compiler/src/lib.rs +++ b/lib/compiler/src/lib.rs @@ -74,7 +74,7 @@ pub use crate::error::{ }; pub use crate::function::{ Compilation, CompiledFunction, CompiledFunctionFrameInfo, CustomSections, Dwarf, FunctionBody, - Functions, TrampolinesSection, + Functions, }; pub use crate::jump_table::{JumpTable, JumpTableOffsets}; pub use crate::module::CompileModuleInfo; diff --git a/lib/engine-dylib/Cargo.toml b/lib/engine-dylib/Cargo.toml index d7de2f80389..94f1712ca8c 100644 --- a/lib/engine-dylib/Cargo.toml +++ b/lib/engine-dylib/Cargo.toml @@ -26,6 +26,8 @@ which = "4.0" rkyv = "0.7.20" loupe = "0.1" enumset = "1.0" +enum-iterator = "0.7.0" +object = { version = "0.28.3", default-features = false, features = ["write"] } [features] # Enable the `compiler` feature if you want the engine to compile diff --git a/lib/engine-dylib/src/artifact.rs b/lib/engine-dylib/src/artifact.rs index 248a2d04cd3..081cf636b4f 100644 --- a/lib/engine-dylib/src/artifact.rs +++ b/lib/engine-dylib/src/artifact.rs @@ -3,9 +3,11 @@ use crate::engine::{DylibEngine, DylibEngineInner}; use crate::serialize::ModuleMetadata; +use crate::trampoline::{emit_trampolines, fill_trampoline_table, WASMER_TRAMPOLINES_SYMBOL}; use enumset::EnumSet; use libloading::{Library, Symbol as LibrarySymbol}; use loupe::MemoryUsage; +use object::{write::CoffExportStyle, BinaryFormat}; use std::error::Error; use std::fs::{self, File}; use std::io::{Read, Write}; @@ -232,8 +234,31 @@ impl DylibArtifact { &metadata_binary, ); + let mut extra_filepath = None; let filepath = match maybe_obj_bytes { Some(obj_bytes) => { + extra_filepath = { + // Create a separate object file with the trampolines. + let mut obj = + get_object_for_target(&target_triple).map_err(to_compile_error)?; + emit_trampolines(&mut obj, engine.target()); + if obj.format() == BinaryFormat::Coff { + obj.add_coff_exports(CoffExportStyle::Gnu); + } + let file = tempfile::Builder::new() + .prefix("wasmer_dylib_") + .suffix(".o") + .tempfile() + .map_err(to_compile_error)?; + + // Re-open it. + let (mut file, filepath) = file.keep().map_err(to_compile_error)?; + let obj_bytes = obj.write().map_err(to_compile_error)?; + file.write_all(&obj_bytes).map_err(to_compile_error)?; + Some(filepath) + }; + + // Write the object file generated by the compiler. let obj_bytes = obj_bytes?; let file = tempfile::Builder::new() .prefix("wasmer_dylib_") @@ -254,6 +279,7 @@ impl DylibArtifact { function_body_inputs, )?; let mut obj = get_object_for_target(&target_triple).map_err(to_compile_error)?; + emit_trampolines(&mut obj, engine.target()); emit_data( &mut obj, WASMER_METADATA_SYMBOL, @@ -263,6 +289,9 @@ impl DylibArtifact { .map_err(to_compile_error)?; emit_compilation(&mut obj, compilation, &symbol_registry, &target_triple) .map_err(to_compile_error)?; + if obj.format() == BinaryFormat::Coff { + obj.add_coff_exports(CoffExportStyle::Gnu); + } let file = tempfile::Builder::new() .prefix("wasmer_dylib_") .suffix(".o") @@ -371,6 +400,7 @@ impl DylibArtifact { let linker = engine_inner.linker().executable(); let output = Command::new(linker) .arg(&filepath) + .args(&extra_filepath) .arg("-o") .arg(&output_filepath) .args(&target_args) @@ -385,6 +415,11 @@ impl DylibArtifact { if fs::metadata(&filepath).is_ok() { fs::remove_file(filepath).map_err(to_compile_error)?; } + if let Some(filepath) = extra_filepath { + if fs::metadata(&filepath).is_ok() { + fs::remove_file(filepath).map_err(to_compile_error)?; + } + } let output = output?; @@ -453,6 +488,13 @@ impl DylibArtifact { dylib_path: PathBuf, lib: Library, ) -> Result { + unsafe { + let trampolines_symbol: LibrarySymbol = lib + .get(WASMER_TRAMPOLINES_SYMBOL) + .expect("missing WASMER_TRAMPOLINES symbol"); + fill_trampoline_table(trampolines_symbol.into_raw().into_raw() as *mut usize); + } + let mut finished_functions: PrimaryMap = PrimaryMap::new(); for (function_local_index, _function_len) in metadata.function_body_lengths.iter() { @@ -621,7 +663,7 @@ impl DylibArtifact { DeserializeError::CorruptedBinary(format!("Library loading failed: {}", e)) })?; let shared_path: PathBuf = PathBuf::from(path); - let symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> = + let metadata_symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> = lib.get(WASMER_METADATA_SYMBOL).map_err(|e| { DeserializeError::CorruptedBinary(format!( "The provided object file doesn't seem to be generated by Wasmer: {}", @@ -630,7 +672,7 @@ impl DylibArtifact { })?; use std::slice; - let metadata = &**symbol; + let metadata = &**metadata_symbol; let metadata_len = MetadataHeader::parse(metadata)?; let metadata_slice: &'static [u8] = slice::from_raw_parts(metadata.as_ptr().add(MetadataHeader::LEN), metadata_len); diff --git a/lib/engine-dylib/src/lib.rs b/lib/engine-dylib/src/lib.rs index 7aad4a628a7..b9366d99182 100644 --- a/lib/engine-dylib/src/lib.rs +++ b/lib/engine-dylib/src/lib.rs @@ -27,6 +27,7 @@ mod artifact; mod builder; mod engine; mod serialize; +mod trampoline; pub use crate::artifact::DylibArtifact; pub use crate::builder::Dylib; diff --git a/lib/engine-dylib/src/trampoline.rs b/lib/engine-dylib/src/trampoline.rs new file mode 100644 index 00000000000..12d60e2b6c3 --- /dev/null +++ b/lib/engine-dylib/src/trampoline.rs @@ -0,0 +1,166 @@ +//! Trampolines for libcalls. +//! +//! This is needed because the target of libcall relocations are not reachable +//! through normal branch instructions. +//! +//! There is an additional complexity for dynamic libraries: we can't just +//! import the symbol from the host executable because executables don't export +//! dynamic symbols (it's possible but requires special linker options). +//! +//! Instead, we export a table of function pointers in the data section which is +//! manually filled in by the runtime after the dylib is loaded. + +use enum_iterator::IntoEnumIterator; +use object::{ + elf, macho, + write::{Object, Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection}, + BinaryFormat, RelocationEncoding, RelocationKind, SymbolFlags, SymbolKind, SymbolScope, +}; +use wasmer_compiler::{Architecture, Target}; +use wasmer_vm::libcalls::LibCall; + +/// Symbol exported from the dynamic library which points to the trampoline table. +pub const WASMER_TRAMPOLINES_SYMBOL: &[u8] = b"WASMER_TRAMPOLINES"; + +// SystemV says that both x16 and x17 are available as intra-procedural scratch +// registers but Apple's ABI restricts us to use x17. +// ADRP x17, #... 11 00 00 90 +// LDR x17, [x17, #...] 31 02 40 f9 +// BR x17 20 02 1f d6 +const AARCH64_TRAMPOLINE: [u8; 12] = [ + 0x11, 0x00, 0x00, 0x90, 0x31, 0x02, 0x40, 0xf9, 0x20, 0x02, 0x1f, 0xd6, +]; + +// JMP [RIP + ...] FF 25 00 00 00 00 +const X86_64_TRAMPOLINE: [u8; 6] = [0xff, 0x25, 0x00, 0x00, 0x00, 0x00]; + +fn emit_trampoline( + obj: &mut Object, + text: SectionId, + trampoline_table_symbols: &[SymbolId], + libcall: LibCall, + target: &Target, +) { + let function_name = libcall.to_function_name(); + let libcall_symbol = obj.add_symbol(Symbol { + name: function_name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: SymbolSection::Section(text), + flags: SymbolFlags::None, + }); + + match target.triple().architecture { + Architecture::Aarch64(_) => { + let (reloc1, reloc2) = match obj.format() { + BinaryFormat::Elf => ( + RelocationKind::Elf(elf::R_AARCH64_ADR_PREL_PG_HI21), + RelocationKind::Elf(elf::R_AARCH64_LDST64_ABS_LO12_NC), + ), + BinaryFormat::MachO => ( + RelocationKind::MachO { + value: macho::ARM64_RELOC_PAGE21, + relative: true, + }, + RelocationKind::MachO { + value: macho::ARM64_RELOC_PAGEOFF12, + relative: false, + }, + ), + _ => panic!("Unsupported binary format on AArch64"), + }; + let offset = obj.add_symbol_data(libcall_symbol, text, &AARCH64_TRAMPOLINE, 4); + obj.add_relocation( + text, + Relocation { + offset, + size: 32, + kind: reloc1, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + addend: 0, + }, + ) + .unwrap(); + obj.add_relocation( + text, + Relocation { + offset: offset + 4, + size: 32, + kind: reloc2, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + addend: 0, + }, + ) + .unwrap(); + } + Architecture::X86_64 => { + let offset = obj.add_symbol_data(libcall_symbol, text, &X86_64_TRAMPOLINE, 1); + obj.add_relocation( + text, + Relocation { + offset: offset + 2, + size: 32, + kind: RelocationKind::Relative, + encoding: RelocationEncoding::Generic, + symbol: trampoline_table_symbols[libcall as usize], + // -4 because RIP-relative addressing starts from the end of + // the instruction. + addend: -4, + }, + ) + .unwrap(); + } + arch => panic!("Unsupported architecture: {}", arch), + }; +} + +/// Emits the libcall trampolines and table to the object file. +pub fn emit_trampolines(obj: &mut Object, target: &Target) { + let text = obj.section_id(StandardSection::Text); + let bss = obj.section_id(StandardSection::UninitializedData); + + let trampoline_table = obj.add_symbol(Symbol { + name: WASMER_TRAMPOLINES_SYMBOL.to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Data, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(bss), + flags: SymbolFlags::None, + }); + let table_offset = + obj.add_symbol_bss(trampoline_table, bss, LibCall::VARIANT_COUNT as u64 * 8, 8); + + // Create a symbol for each entry in the table. We could avoid this and use + // an addend, but this isn't supported in all object formats. + let mut trampoline_table_symbols = vec![]; + for libcall in LibCall::into_enum_iter() { + trampoline_table_symbols.push(obj.add_symbol(Symbol { + name: format!("__WASMER_TRAMPOLINE{}", libcall as usize).into_bytes(), + value: table_offset + libcall as u64 * 8, + size: 0, + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(bss), + flags: SymbolFlags::None, + })); + } + + for libcall in LibCall::into_enum_iter() { + emit_trampoline(obj, text, &trampoline_table_symbols, libcall, target); + } +} + +/// Fills in the libcall trampoline table at the given address. +pub unsafe fn fill_trampoline_table(table: *mut usize) { + for libcall in LibCall::into_enum_iter() { + *table.add(libcall as usize) = libcall.function_pointer(); + } +} diff --git a/lib/engine-universal/Cargo.toml b/lib/engine-universal/Cargo.toml index 48e675ecac5..1342fee967d 100644 --- a/lib/engine-universal/Cargo.toml +++ b/lib/engine-universal/Cargo.toml @@ -27,6 +27,7 @@ leb128 = "0.2" rkyv = "0.7.20" loupe = "0.1" enumset = "1.0" +enum-iterator = "0.7.0" [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3", features = ["winnt", "impl-default"] } diff --git a/lib/engine-universal/src/artifact.rs b/lib/engine-universal/src/artifact.rs index 8163b8718d0..10c19ebaed4 100644 --- a/lib/engine-universal/src/artifact.rs +++ b/lib/engine-universal/src/artifact.rs @@ -6,6 +6,7 @@ use crate::link::link_module; #[cfg(feature = "compiler")] use crate::serialize::SerializableCompilation; use crate::serialize::SerializableModule; +use crate::trampoline::{libcall_trampoline_len, make_libcall_trampolines}; use enumset::EnumSet; use loupe::MemoryUsage; use std::mem; @@ -111,6 +112,14 @@ impl UniversalArtifact { let frame_infos = compilation.get_frame_info(); + // Synthesize a custom section to hold the libcall trampolines. + let mut custom_sections = compilation.get_custom_sections(); + let mut custom_section_relocations = compilation.get_custom_section_relocations(); + let libcall_trampolines_section = make_libcall_trampolines(engine.target()); + custom_section_relocations.push(libcall_trampolines_section.relocations.clone()); + let libcall_trampolines = custom_sections.push(libcall_trampolines_section); + let libcall_trampoline_len = libcall_trampoline_len(engine.target()) as u32; + let serializable_compilation = SerializableCompilation { function_bodies: compilation.get_function_bodies(), function_relocations: compilation.get_relocations(), @@ -118,10 +127,11 @@ impl UniversalArtifact { function_frame_info: frame_infos, function_call_trampolines, dynamic_function_trampolines, - custom_sections: compilation.get_custom_sections(), - custom_section_relocations: compilation.get_custom_section_relocations(), + custom_sections, + custom_section_relocations, debug: compilation.get_debug(), - trampolines: compilation.get_trampolines(), + libcall_trampolines, + libcall_trampoline_len, }; let serializable = SerializableModule { compilation: serializable_compilation, @@ -187,7 +197,8 @@ impl UniversalArtifact { serializable.compilation.function_relocations.clone(), &custom_sections, &serializable.compilation.custom_section_relocations, - &serializable.compilation.trampolines, + serializable.compilation.libcall_trampolines, + serializable.compilation.libcall_trampoline_len as usize, ); // Compute indices into the shared signature table. diff --git a/lib/engine-universal/src/lib.rs b/lib/engine-universal/src/lib.rs index 2bbafadd4db..8685b846180 100644 --- a/lib/engine-universal/src/lib.rs +++ b/lib/engine-universal/src/lib.rs @@ -30,6 +30,7 @@ mod code_memory; mod engine; mod link; mod serialize; +mod trampoline; mod unwind; pub use crate::artifact::UniversalArtifact; diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index cb7b7245194..4f2dd083399 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -1,86 +1,40 @@ //! Linking for Universal-compiled code. -use std::collections::HashMap; +use crate::trampoline::get_libcall_trampoline; use std::ptr::{read_unaligned, write_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, - SectionIndex, TrampolinesSection, + SectionIndex, }; use wasmer_engine::FunctionExtent; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{LocalFunctionIndex, ModuleInfo}; use wasmer_vm::SectionBodyPtr; -/// Add a new trampoline address, given the base adress of the Section. Return the address of the jump -/// The trampoline itself still have to be writen -fn trampolines_add( - map: &mut HashMap, - trampoline: &TrampolinesSection, - address: usize, - baseaddress: usize, -) -> usize { - if let Some(target) = map.get(&address) { - return *target; - } - let ret = map.len(); - if ret == trampoline.slots { - panic!("No more slot in Trampolines"); - } - map.insert(address, baseaddress + ret * trampoline.size); - baseaddress + ret * trampoline.size -} - -fn use_trampoline( - address: usize, - allocated_sections: &PrimaryMap, - trampolines: &Option, - map: &mut HashMap, -) -> Option { - match trampolines { - Some(trampolines) => Some(trampolines_add( - map, - trampolines, - address, - *allocated_sections[trampolines.section_index] as usize, - )), - _ => None, - } -} - -fn fill_trampolin_map( - allocated_sections: &PrimaryMap, - trampolines: &Option, -) -> HashMap { - let mut map: HashMap = HashMap::new(); - match trampolines { - Some(trampolines) => { - let baseaddress = *allocated_sections[trampolines.section_index] as usize; - for i in 0..trampolines.size { - let jmpslot: usize = unsafe { - read_unaligned((baseaddress + i * trampolines.size + 8) as *mut usize) - }; - if jmpslot != 0 { - map.insert(jmpslot, baseaddress + i * trampolines.size); - } - } - } - _ => {} - }; - map -} - fn apply_relocation( body: usize, r: &Relocation, allocated_functions: &PrimaryMap, jt_offsets: &PrimaryMap, allocated_sections: &PrimaryMap, - trampolines: &Option, - trampolines_map: &mut HashMap, + libcall_trampolines: SectionIndex, + libcall_trampoline_len: usize, ) { let target_func_address: usize = match r.reloc_target { RelocationTarget::LocalFunc(index) => *allocated_functions[index].ptr as usize, - RelocationTarget::LibCall(libcall) => libcall.function_pointer(), + RelocationTarget::LibCall(libcall) => { + // Use the direct target of the libcall if the relocation supports + // a full 64-bit address. Otherwise use a trampoline. + if r.kind == RelocationKind::Abs8 || r.kind == RelocationKind::X86PCRel8 { + libcall.function_pointer() + } else { + get_libcall_trampoline( + libcall, + allocated_sections[libcall_trampolines].0 as usize, + libcall_trampoline_len, + ) + } + } RelocationTarget::CustomSection(custom_section) => { *allocated_sections[custom_section] as usize } @@ -115,26 +69,15 @@ fn apply_relocation( }, RelocationKind::X86PCRelRodata4 => {} RelocationKind::Arm64Call => unsafe { - let (reloc_address, mut reloc_delta) = r.for_address(body, target_func_address as u64); + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); if (reloc_delta as i64).abs() >= 0x1000_0000 { - let new_address = match use_trampoline( - target_func_address, - allocated_sections, - trampolines, - trampolines_map, - ) { - Some(new_address) => new_address, - _ => panic!( - "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", - r.kind, - r.reloc_target, - reloc_delta, - read_unaligned(reloc_address as *mut u32) - ), - }; - write_unaligned((new_address + 8) as *mut u64, target_func_address as u64); // write the jump address - let (_, new_delta) = r.for_address(body, new_address as u64); - reloc_delta = new_delta; + panic!( + "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", + r.kind, + r.reloc_target, + reloc_delta, + read_unaligned(reloc_address as *mut u32) + ) } let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff) | read_unaligned(reloc_address as *mut u32); @@ -180,9 +123,9 @@ pub fn link_module( function_relocations: Relocations, allocated_sections: &PrimaryMap, section_relocations: &PrimaryMap>, - trampolines: &Option, + libcall_trampolines: SectionIndex, + trampoline_len: usize, ) { - let mut trampolines_map = fill_trampolin_map(allocated_sections, trampolines); for (i, section_relocs) in section_relocations.iter() { let body = *allocated_sections[i] as usize; for r in section_relocs { @@ -192,8 +135,8 @@ pub fn link_module( allocated_functions, jt_offsets, allocated_sections, - trampolines, - &mut trampolines_map, + libcall_trampolines, + trampoline_len, ); } } @@ -206,8 +149,8 @@ pub fn link_module( allocated_functions, jt_offsets, allocated_sections, - trampolines, - &mut trampolines_map, + libcall_trampolines, + trampoline_len, ); } } diff --git a/lib/engine-universal/src/serialize.rs b/lib/engine-universal/src/serialize.rs index a15f68effa0..9bb450c1bdc 100644 --- a/lib/engine-universal/src/serialize.rs +++ b/lib/engine-universal/src/serialize.rs @@ -6,7 +6,7 @@ use rkyv::{ }; use wasmer_compiler::{ CompileModuleInfo, CompiledFunctionFrameInfo, CustomSection, Dwarf, FunctionBody, - JumpTableOffsets, Relocation, SectionIndex, TrampolinesSection, + JumpTableOffsets, Relocation, SectionIndex, }; use wasmer_engine::{DeserializeError, SerializeError}; use wasmer_types::entity::PrimaryMap; @@ -25,8 +25,10 @@ pub struct SerializableCompilation { pub custom_section_relocations: PrimaryMap>, // The section indices corresponding to the Dwarf debug info pub debug: Option, - // the Trampoline for Arm arch - pub trampolines: Option, + // Custom section containing libcall trampolines. + pub libcall_trampolines: SectionIndex, + // Length of each libcall trampoline. + pub libcall_trampoline_len: u32, } /// Serializable struct that is able to serialize from and to diff --git a/lib/engine-universal/src/trampoline.rs b/lib/engine-universal/src/trampoline.rs new file mode 100644 index 00000000000..e90a7c92f98 --- /dev/null +++ b/lib/engine-universal/src/trampoline.rs @@ -0,0 +1,88 @@ +//! Trampolines for libcalls. +//! +//! This is needed because the target of libcall relocations are not reachable +//! through normal branch instructions. + +use enum_iterator::IntoEnumIterator; +use wasmer_compiler::{ + Architecture, CustomSection, CustomSectionProtection, Relocation, RelocationKind, + RelocationTarget, SectionBody, Target, +}; +use wasmer_vm::libcalls::LibCall; + +// SystemV says that both x16 and x17 are available as intra-procedural scratch +// registers but Apple's ABI restricts us to use x17. +// LDR x17, [PC, #8] 51 00 00 58 +// BR x17 20 02 1f d6 +// JMPADDR 00 00 00 00 00 00 00 00 +const AARCH64_TRAMPOLINE: [u8; 16] = [ + 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +// 2 padding bytes are used to preserve alignment. +// JMP [RIP + 2] FF 25 02 00 00 00 [00 00] +// 64-bit ADDR 00 00 00 00 00 00 00 00 +const X86_64_TRAMPOLINE: [u8; 16] = [ + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; + +fn make_trampoline( + target: &Target, + libcall: LibCall, + code: &mut Vec, + relocations: &mut Vec, +) { + match target.triple().architecture { + Architecture::Aarch64(_) => { + code.extend(&AARCH64_TRAMPOLINE); + relocations.push(Relocation { + kind: RelocationKind::Abs8, + reloc_target: RelocationTarget::LibCall(libcall), + offset: code.len() as u32 - 8, + addend: 0, + }); + } + Architecture::X86_64 => { + code.extend(&X86_64_TRAMPOLINE); + relocations.push(Relocation { + kind: RelocationKind::Abs8, + reloc_target: RelocationTarget::LibCall(libcall), + offset: code.len() as u32 - 8, + addend: 0, + }); + } + arch => panic!("Unsupported architecture: {}", arch), + }; +} + +/// Returns the length of a libcall trampoline. +pub fn libcall_trampoline_len(target: &Target) -> usize { + match target.triple().architecture { + Architecture::Aarch64(_) => AARCH64_TRAMPOLINE.len(), + Architecture::X86_64 => X86_64_TRAMPOLINE.len(), + arch => panic!("Unsupported architecture: {}", arch), + } +} + +/// Creates a custom section containing the libcall trampolines. +pub fn make_libcall_trampolines(target: &Target) -> CustomSection { + let mut code = vec![]; + let mut relocations = vec![]; + for libcall in LibCall::into_enum_iter() { + make_trampoline(target, libcall, &mut code, &mut relocations); + } + CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: SectionBody::new_with_vec(code), + relocations, + } +} + +/// Returns the address of a trampoline in the libcall trampolines section. +pub fn get_libcall_trampoline( + libcall: LibCall, + libcall_trampolines: usize, + libcall_trampoline_len: usize, +) -> usize { + libcall_trampolines + libcall as usize * libcall_trampoline_len +} diff --git a/lib/object/Cargo.toml b/lib/object/Cargo.toml index a90376a6eed..8979f4ae7ef 100644 --- a/lib/object/Cargo.toml +++ b/lib/object/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" wasmer-types = { path = "../types", version = "=2.1.1" } wasmer-compiler = { path = "../compiler", version = "=2.1.1", default-features = false, features = [ "std", - "translator" + "translator", ] } -object = { version = "0.27", default-features = false, features = ["write"] } +object = { version = "0.28.3", default-features = false, features = ["write"] } thiserror = "1.0" diff --git a/lib/object/src/module.rs b/lib/object/src/module.rs index 4094ca08752..65fd8b490a2 100644 --- a/lib/object/src/module.rs +++ b/lib/object/src/module.rs @@ -3,7 +3,8 @@ use object::write::{ Object, Relocation, StandardSection, StandardSegment, Symbol as ObjSymbol, SymbolSection, }; use object::{ - elf, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope, + elf, macho, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, + SymbolScope, }; use wasmer_compiler::{ Architecture, BinaryFormat, Compilation, CustomSectionProtection, Endianness, @@ -292,7 +293,14 @@ pub fn emit_compilation( // Reloc::X86PCRelRodata4 => { // } Reloc::Arm64Call => ( - RelocationKind::Elf(elf::R_AARCH64_CALL26), + match obj.format() { + object::BinaryFormat::Elf => RelocationKind::Elf(elf::R_AARCH64_CALL26), + object::BinaryFormat::MachO => RelocationKind::MachO { + value: macho::ARM64_RELOC_BRANCH26, + relative: true, + }, + fmt => panic!("unsupported binary format {:?}", fmt), + }, RelocationEncoding::Generic, 32, ), diff --git a/lib/vm/Cargo.toml b/lib/vm/Cargo.toml index f3e3d8a3cce..cbd8085f319 100644 --- a/lib/vm/Cargo.toml +++ b/lib/vm/Cargo.toml @@ -23,6 +23,7 @@ backtrace = "0.3" serde = { version = "1.0", features = ["derive", "rc"] } rkyv = { version = "0.7.20", optional = true } loupe = { version = "0.1", features = ["enable-indexmap"] } +enum-iterator = "0.7.0" [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3", features = ["winbase", "memoryapi", "errhandlingapi"] } diff --git a/lib/vm/src/libcalls.rs b/lib/vm/src/libcalls.rs index 301568e4b2e..89e7ffd5e25 100644 --- a/lib/vm/src/libcalls.rs +++ b/lib/vm/src/libcalls.rs @@ -43,6 +43,7 @@ use crate::table::{RawTableElement, TableElement}; use crate::trap::{raise_lib_trap, Trap, TrapCode}; use crate::vmcontext::VMContext; use crate::VMExternRef; +use enum_iterator::IntoEnumIterator; use loupe::MemoryUsage; #[cfg(feature = "enable-rkyv")] use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize}; @@ -688,7 +689,9 @@ pub static wasmer_vm_probestack: unsafe extern "C" fn() = PROBESTACK; feature = "enable-rkyv", derive(RkyvSerialize, RkyvDeserialize, Archive) )] -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage, IntoEnumIterator, +)] pub enum LibCall { /// ceil.f32 CeilF32, diff --git a/tests/ignores.txt b/tests/ignores.txt index 7f72118a661..5e1dd68bfee 100644 --- a/tests/ignores.txt +++ b/tests/ignores.txt @@ -33,14 +33,9 @@ llvm traps::start_trap_pretty dylib traps::start_trap_pretty aarch64 traps::start_trap_pretty -cranelift multi_value_imports::dylib # Needs investigation singlepass multi_value_imports::dylib # Singlepass doesn't support multivalue singlepass multi_value_imports::dynamic # Singlepass doesn't support multivalue -# LLVM doesn't fully work in macOS M1 -llvm+universal+macos+aarch64 * # We are using the object crate, it was not fully supporting aarch64 relocations emitted by LLVM. Needs reassesment -llvm+dylib+macos+aarch64 * # Tests seem to be randomly failing - # TODO: We need to fix this in ARM. The issue is caused by libunwind overflowing # the stack while creating the stacktrace. # https://github.com/rust-lang/backtrace-rs/issues/356