From 61fe12e5d149668203b5f721bbbc5f3c97fa40a2 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 21 Oct 2019 18:59:06 -0500 Subject: [PATCH] Track original wasm instructions location --- crates/macro/src/lib.rs | 4 +- crates/tests/tests/custom_sections.rs | 65 ++++++- crates/tests/tests/spec-tests.rs | 4 +- examples/round-trip.rs | 8 +- src/dot.rs | 4 +- src/emit.rs | 3 +- src/function_builder.rs | 10 +- src/ir/mod.rs | 40 ++++- src/ir/traversals.rs | 8 +- src/module/config.rs | 29 ++++ src/module/custom.rs | 21 +++ .../functions/local_function/context.rs | 18 +- src/module/functions/local_function/emit.rs | 13 +- src/module/functions/local_function/mod.rs | 163 ++++++++++-------- src/module/functions/mod.rs | 37 +++- src/module/mod.rs | 33 +++- 16 files changed, 350 insertions(+), 110 deletions(-) diff --git a/crates/macro/src/lib.rs b/crates/macro/src/lib.rs index 80c7785e..dc3eba51 100755 --- a/crates/macro/src/lib.rs +++ b/crates/macro/src/lib.rs @@ -509,7 +509,7 @@ fn create_visit(variants: &[WalrusVariant]) -> impl quote::ToTokens { /// Visit `Instr`. #[inline] - fn visit_instr(&mut self, instr: &'instr Instr) { + fn visit_instr(&mut self, instr: &'instr Instr, instr_loc: &'instr InstrLocId) { // ... } @@ -593,7 +593,7 @@ fn create_visit(variants: &[WalrusVariant]) -> impl quote::ToTokens { /// Visit `Instr`. #[inline] - fn visit_instr_mut(&mut self, instr: &mut Instr) { + fn visit_instr_mut(&mut self, instr: &mut Instr, instr_loc: &mut InstrLocId) { // ... } diff --git a/crates/tests/tests/custom_sections.rs b/crates/tests/tests/custom_sections.rs index 9ccd65ab..ec2122bb 100644 --- a/crates/tests/tests/custom_sections.rs +++ b/crates/tests/tests/custom_sections.rs @@ -1,7 +1,7 @@ //! Tests for working with custom sections that `walrus` doesn't know about. use std::borrow::Cow; -use walrus::{CustomSection, IdsToIndices, Module, ModuleConfig}; +use walrus::{CodeTransform, CustomSection, IdsToIndices, Module, ModuleConfig, ValType}; #[derive(Clone, Debug, Default, PartialEq, Eq)] struct HelloCustomSection(String); @@ -63,3 +63,66 @@ fn round_trip_unkown_custom_sections() { let new_wasm = module.emit_wasm(); assert_eq!(wasm, new_wasm); } + +// Insert a `(drop (i32.const 0))` at the start of the function and assert that +// all instructions are pushed down by the size of a `(drop (i32.const 0))`, +// which is 3. +#[test] +fn smoke_test_code_transform() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + static APPLIED_CODE_TRANSFORM: AtomicUsize = AtomicUsize::new(0); + + #[derive(Debug)] + struct CheckCodeTransform; + impl CustomSection for CheckCodeTransform { + fn name(&self) -> &str { + "check-code-transform" + } + + fn data(&self, _: &IdsToIndices) -> Cow<[u8]> { + vec![].into() + } + + fn apply_code_transform(&mut self, transform: &CodeTransform) { + APPLIED_CODE_TRANSFORM.store(1, Ordering::SeqCst); + assert!(!transform.is_empty()); + for (input_offset, output_offset) in transform.iter().cloned() { + assert_eq!(input_offset.data() as usize + 3, output_offset); + } + } + } + + let mut config = ModuleConfig::new(); + config.generate_producers_section(false); + + let wasm = { + let mut module = Module::with_config(config.clone()); + + let mut builder = walrus::FunctionBuilder::new(&mut module.types, &[], &[ValType::I32]); + builder.func_body().i32_const(1337); + let locals = vec![]; + let f_id = builder.finish(locals, &mut module.funcs); + + module.exports.add("f", f_id); + + module.emit_wasm() + }; + + config.preserve_code_transform(true); + + let mut module = config.parse(&wasm).unwrap(); + module.customs.add(CheckCodeTransform); + + for (_id, f) in module.funcs.iter_local_mut() { + let builder = f.builder_mut(); + builder.func_body().const_at(0, walrus::ir::Value::I32(0)); + builder.func_body().drop_at(1); + } + + // Emit the new, transformed wasm. This should trigger the + // `apply_code_transform` method to be called. + let _wasm = module.emit_wasm(); + + assert_eq!(APPLIED_CODE_TRANSFORM.load(Ordering::SeqCst), 1); +} diff --git a/crates/tests/tests/spec-tests.rs b/crates/tests/tests/spec-tests.rs index 9eff5c08..cc13dcc1 100644 --- a/crates/tests/tests/spec-tests.rs +++ b/crates/tests/tests/spec-tests.rs @@ -100,14 +100,14 @@ fn run(wast: &Path) -> Result<(), anyhow::Error> { } cmd => { let wasm = fs::read(&path)?; - let wasm = config + let mut wasm = config .parse(&wasm) .context(format!("error parsing wasm (line {})", line))?; let wasm1 = wasm.emit_wasm(); fs::write(&path, &wasm1)?; let wasm2 = config .parse(&wasm1) - .map(|m| m.emit_wasm()) + .map(|mut m| m.emit_wasm()) .context(format!("error re-parsing wasm (line {})", line))?; if wasm1 != wasm2 { panic!("wasm module at line {} isn't deterministic", line); diff --git a/examples/round-trip.rs b/examples/round-trip.rs index 0dd75d89..716f22fc 100644 --- a/examples/round-trip.rs +++ b/examples/round-trip.rs @@ -2,10 +2,10 @@ fn main() -> anyhow::Result<()> { env_logger::init(); - let a = std::env::args().nth(1).ok_or_else(|| { - anyhow::anyhow!("must provide the input wasm file as the first argument") - })?; - let m = walrus::Module::from_file(&a)?; + let a = std::env::args() + .nth(1) + .ok_or_else(|| anyhow::anyhow!("must provide the input wasm file as the first argument"))?; + let mut m = walrus::Module::from_file(&a)?; let wasm = m.emit_wasm(); if let Some(destination) = std::env::args().nth(2) { std::fs::write(destination, wasm)?; diff --git a/src/dot.rs b/src/dot.rs index b56aea9b..cd237720 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -385,13 +385,13 @@ impl Dot for LocalFunction { impl DotNode for InstrSeq { fn fields(&self, fields: &mut impl FieldAggregator) { - for (i, instr) in self.instrs.iter().enumerate() { + for (i, (instr, _)) in self.instrs.iter().enumerate() { fields.add_field_with_port(&i.to_string(), &format!("{:?}", instr)); } } fn edges(&self, edges: &mut impl EdgeAggregator) { - for (i, instr) in self.instrs.iter().enumerate() { + for (i, (instr, _)) in self.instrs.iter().enumerate() { let port = i.to_string(); instr.visit(&mut DotVisitor { port, edges }); } diff --git a/src/emit.rs b/src/emit.rs index 98fdf3b6..25367b78 100644 --- a/src/emit.rs +++ b/src/emit.rs @@ -5,8 +5,8 @@ use crate::encode::{Encoder, MAX_U32_LENGTH}; use crate::ir::Local; use crate::map::{IdHashMap, IdHashSet}; +use crate::{CodeTransform, Global, GlobalId, Memory, MemoryId, Module, Table, TableId}; use crate::{Data, DataId, Element, ElementId, Function, FunctionId}; -use crate::{Global, GlobalId, Memory, MemoryId, Module, Table, TableId}; use crate::{Type, TypeId}; use std::ops::{Deref, DerefMut}; @@ -15,6 +15,7 @@ pub struct EmitContext<'a> { pub indices: &'a mut IdsToIndices, pub encoder: Encoder<'a>, pub locals: IdHashMap>, + pub code_transform: CodeTransform, } pub struct SubContext<'a, 'cx> { diff --git a/src/function_builder.rs b/src/function_builder.rs index d71a3744..ecd6f345 100644 --- a/src/function_builder.rs +++ b/src/function_builder.rs @@ -164,19 +164,21 @@ impl InstrSeqBuilder<'_> { } /// Get this instruction sequence's instructions. - pub fn instrs(&self) -> &[Instr] { + pub fn instrs(&self) -> &[(Instr, InstrLocId)] { &self.builder.arena[self.id] } /// Get this instruction sequence's instructions mutably. - pub fn instrs_mut(&mut self) -> &mut Vec { + pub fn instrs_mut(&mut self) -> &mut Vec<(Instr, InstrLocId)> { &mut self.builder.arena[self.id].instrs } /// Pushes a new instruction onto this builder's sequence. #[inline] pub fn instr(&mut self, instr: impl Into) -> &mut Self { - self.builder.arena[self.id].instrs.push(instr.into()); + self.builder.arena[self.id] + .instrs + .push((instr.into(), Default::default())); self } @@ -189,7 +191,7 @@ impl InstrSeqBuilder<'_> { pub fn instr_at(&mut self, position: usize, instr: impl Into) -> &mut Self { self.builder.arena[self.id] .instrs - .insert(position, instr.into()); + .insert(position, (instr.into(), Default::default())); self } diff --git a/src/ir/mod.rs b/src/ir/mod.rs index 87978b38..56c82f8d 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -119,6 +119,38 @@ impl From for InstrSeqType { } } +/// A symbolic original wasm operator source location. +#[derive(Debug, Copy, Clone)] +pub struct InstrLocId(u32); + +const DEFAULT_INSTR_LOC_ID: u32 = 0xffff_ffff; + +impl InstrLocId { + /// Create `InstrLocId` from provided data. Normaly the data is + /// wasm bytecode offset. (0xffff_ffff is reserved for default value). + pub fn new(data: u32) -> Self { + assert!(data != DEFAULT_INSTR_LOC_ID); + InstrLocId(data) + } + + /// Check if default value. + pub fn is_default(&self) -> bool { + self.0 == DEFAULT_INSTR_LOC_ID + } + + /// The data + pub fn data(&self) -> u32 { + assert!(self.0 != DEFAULT_INSTR_LOC_ID); + self.0 + } +} + +impl Default for InstrLocId { + fn default() -> Self { + InstrLocId(DEFAULT_INSTR_LOC_ID) + } +} + /// A sequence of instructions. #[derive(Debug)] pub struct InstrSeq { @@ -130,21 +162,21 @@ pub struct InstrSeq { pub ty: InstrSeqType, /// The instructions that make up the body of this block. - pub instrs: Vec, + pub instrs: Vec<(Instr, InstrLocId)>, } impl Deref for InstrSeq { - type Target = Vec; + type Target = Vec<(Instr, InstrLocId)>; #[inline] - fn deref(&self) -> &Vec { + fn deref(&self) -> &Vec<(Instr, InstrLocId)> { &self.instrs } } impl DerefMut for InstrSeq { #[inline] - fn deref_mut(&mut self) -> &mut Vec { + fn deref_mut(&mut self) -> &mut Vec<(Instr, InstrLocId)> { &mut self.instrs } } diff --git a/src/ir/traversals.rs b/src/ir/traversals.rs index 6a984725..6362bdbd 100644 --- a/src/ir/traversals.rs +++ b/src/ir/traversals.rs @@ -78,10 +78,10 @@ pub fn dfs_in_order<'instr>( seq.visit(visitor); } - 'traversing_instrs: for (index, instr) in seq.instrs.iter().enumerate().skip(index) { + 'traversing_instrs: for (index, (instr, loc)) in seq.instrs.iter().enumerate().skip(index) { // Visit this instruction. log::trace!("dfs_in_order: visit_instr({:?})", instr); - visitor.visit_instr(instr); + visitor.visit_instr(instr, loc); // Visit every other resource that this instruction references, // e.g. `MemoryId`s, `FunctionId`s and all that. @@ -192,8 +192,8 @@ pub fn dfs_pre_order_mut( visitor.start_instr_seq_mut(seq); seq.visit_mut(visitor); - for instr in &mut seq.instrs { - visitor.visit_instr_mut(instr); + for (instr, loc) in &mut seq.instrs { + visitor.visit_instr_mut(instr, loc); instr.visit_mut(visitor); match instr { diff --git a/src/module/config.rs b/src/module/config.rs index 5fb1fb7a..38dea1d0 100644 --- a/src/module/config.rs +++ b/src/module/config.rs @@ -1,4 +1,5 @@ use crate::error::Result; +use crate::ir::InstrLocId; use crate::module::Module; use crate::parse::IndicesToIds; use std::fmt; @@ -13,8 +14,10 @@ pub struct ModuleConfig { pub(crate) skip_strict_validate: bool, pub(crate) skip_producers_section: bool, pub(crate) skip_name_section: bool, + pub(crate) preserve_code_transform: bool, pub(crate) on_parse: Option Result<()> + Sync + Send + 'static>>, + pub(crate) on_instr_loc: Option InstrLocId + Sync + Send + 'static>>, } impl Clone for ModuleConfig { @@ -28,9 +31,11 @@ impl Clone for ModuleConfig { skip_strict_validate: self.skip_strict_validate, skip_producers_section: self.skip_producers_section, skip_name_section: self.skip_name_section, + preserve_code_transform: self.preserve_code_transform, // ... and this is left empty. on_parse: None, + on_instr_loc: None, } } } @@ -46,7 +51,9 @@ impl fmt::Debug for ModuleConfig { ref skip_strict_validate, ref skip_producers_section, ref skip_name_section, + ref preserve_code_transform, ref on_parse, + ref on_instr_loc, } = self; f.debug_struct("ModuleConfig") @@ -59,7 +66,9 @@ impl fmt::Debug for ModuleConfig { .field("skip_strict_validate", skip_strict_validate) .field("skip_producers_section", skip_producers_section) .field("skip_name_section", skip_name_section) + .field("preserve_code_transform", preserve_code_transform) .field("on_parse", &on_parse.as_ref().map(|_| "..")) + .field("on_instr_loc", &on_instr_loc.as_ref().map(|_| "..")) .finish() } } @@ -171,6 +180,26 @@ impl ModuleConfig { self } + /// Provide a function that is invoked on source location ID step. + /// + /// Note that cloning a `ModuleConfig` will result in a config that does not + /// have an `on_instr_loc` function, even if the original did. + pub fn on_instr_loc(&mut self, f: F) -> &mut ModuleConfig + where + F: Fn(&usize) -> InstrLocId + Send + Sync + 'static, + { + self.on_instr_loc = Some(Box::new(f) as _); + self + } + + /// Sets a flag to whether code transform is preverved during parsing. + /// + /// By default this flag is `false`. + pub fn preserve_code_transform(&mut self, preserve: bool) -> &mut ModuleConfig { + self.preserve_code_transform = preserve; + self + } + /// Parses an in-memory WebAssembly file into a `Module` using this /// configuration. pub fn parse(&self, wasm: &[u8]) -> Result { diff --git a/src/module/custom.rs b/src/module/custom.rs index cbae03a7..13cc9875 100644 --- a/src/module/custom.rs +++ b/src/module/custom.rs @@ -1,6 +1,7 @@ //! Working with custom sections. use crate::tombstone_arena::{Id, Tombstone, TombstoneArena}; +use crate::CodeTransform; use crate::IdsToIndices; use std::any::Any; use std::borrow::Cow; @@ -27,6 +28,26 @@ pub trait CustomSection: WalrusAny + Debug + Send + Sync { /// section's name, or the count of how many bytes are in the /// payload. `walrus` will handle these for you. fn data(&self, ids_to_indices: &IdsToIndices) -> Cow<[u8]>; + + /// Apply the given code transformations to this custom section. + /// + /// If the module was not configured with `preserve_code_transform = true`, + /// then this method is never called. + /// + /// This method is called after we have emitted the non-custom Wasm + /// sections, just before a custom section's data is emitted into the Wasm + /// binary. If this custom section references offsets in the Wasm code, this + /// is a chance to update them so they are valid for the new, transformed + /// Wasm code that is being emitted. + /// + /// For example, DWARF debug info references Wasm instructions via offsets + /// into the code section, and we can use these transforms to fix those + /// offsets after having transformed various functions and instructions. + /// + /// The default provided method does nothing. + fn apply_code_transform(&mut self, transform: &CodeTransform) { + let _ = transform; + } } /// A wrapper trait around `any` but implemented for all types that already diff --git a/src/module/functions/local_function/context.rs b/src/module/functions/local_function/context.rs index 522bae9c..93aacf4c 100644 --- a/src/module/functions/local_function/context.rs +++ b/src/module/functions/local_function/context.rs @@ -1,7 +1,7 @@ //! Context needed when validating instructions and constructing our `Instr` IR. use crate::error::{ErrorKind, Result}; -use crate::ir::{BlockKind, Instr, InstrSeq, InstrSeqId, InstrSeqType}; +use crate::ir::{BlockKind, Instr, InstrLocId, InstrSeq, InstrSeqId, InstrSeqType}; use crate::module::functions::{FunctionId, LocalFunction}; use crate::module::Module; use crate::parse::IndicesToIds; @@ -179,26 +179,32 @@ impl<'a> ValidationContext<'a> { Ok(&self.controls[idx]) } - pub fn alloc_instr_in_block(&mut self, block: InstrSeqId, instr: impl Into) { - self.func.block_mut(block).instrs.push(instr.into()); + pub fn alloc_instr_in_block( + &mut self, + block: InstrSeqId, + instr: impl Into, + loc: InstrLocId, + ) { + self.func.block_mut(block).instrs.push((instr.into(), loc)); } pub fn alloc_instr_in_control( &mut self, control: usize, instr: impl Into, + loc: InstrLocId, ) -> Result<()> { let frame = self.control(control)?; if frame.unreachable { return Ok(()); } let block = frame.block; - self.alloc_instr_in_block(block, instr); + self.alloc_instr_in_block(block, instr, loc); Ok(()) } - pub fn alloc_instr(&mut self, instr: impl Into) { - self.alloc_instr_in_control(0, instr).unwrap(); + pub fn alloc_instr(&mut self, instr: impl Into, loc: InstrLocId) { + self.alloc_instr_in_control(0, instr, loc).unwrap(); } } diff --git a/src/module/functions/local_function/emit.rs b/src/module/functions/local_function/emit.rs index 4ba33eb8..77928142 100644 --- a/src/module/functions/local_function/emit.rs +++ b/src/module/functions/local_function/emit.rs @@ -10,6 +10,7 @@ pub(crate) fn run( indices: &IdsToIndices, local_indices: &IdHashMap, encoder: &mut Encoder, + map: Option<&mut Vec<(InstrLocId, usize)>>, ) { let v = &mut Emit { indices, @@ -17,6 +18,7 @@ pub(crate) fn run( block_kinds: vec![BlockKind::FunctionEntry], encoder, local_indices, + map, }; dfs_in_order(v, func, func.entry_block()); @@ -42,6 +44,9 @@ struct Emit<'a, 'b> { // The instruction sequence we are building up to emit. encoder: &'a mut Encoder<'b>, + + // Encoded ExprId -> offset map. + map: Option<&'a mut Vec<(InstrLocId, usize)>>, } impl<'instr> Visitor<'instr> for Emit<'_, '_> { @@ -90,9 +95,15 @@ impl<'instr> Visitor<'instr> for Emit<'_, '_> { } } - fn visit_instr(&mut self, instr: &'instr Instr) { + fn visit_instr(&mut self, instr: &'instr Instr, instr_loc: &'instr InstrLocId) { use self::Instr::*; + if let Some(map) = self.map.as_mut() { + let pos = self.encoder.pos(); + // Save the encoded_at position for the specified ExprId. + map.push((instr_loc.clone(), pos)); + } + match instr { Block(_) => self.block_kinds.push(BlockKind::Block), Loop(_) => self.block_kinds.push(BlockKind::Loop), diff --git a/src/module/functions/local_function/mod.rs b/src/module/functions/local_function/mod.rs index a723df48..3d1f0c67 100644 --- a/src/module/functions/local_function/mod.rs +++ b/src/module/functions/local_function/mod.rs @@ -45,7 +45,8 @@ impl LocalFunction { id: FunctionId, ty: TypeId, args: Vec, - body: wasmparser::OperatorsReader, + mut body: wasmparser::OperatorsReader, + on_instr_pos: Option<&(dyn Fn(&usize) -> InstrLocId + Sync + Send + 'static)>, ) -> Result { let mut func = LocalFunction { builder: FunctionBuilder::without_entry(ty), @@ -66,9 +67,14 @@ impl LocalFunction { ); let entry = ctx.push_control_with_ty(BlockKind::FunctionEntry, ty); ctx.func.builder.entry = Some(entry); - for inst in body { - let inst = inst?; - validate_instruction(&mut ctx, inst)?; + while !body.eof() { + let (inst, pos) = body.read_with_offset()?; + let loc = if let Some(ref on_instr_pos) = on_instr_pos { + on_instr_pos(&pos) + } else { + InstrLocId::new(pos as u32) + }; + validate_instruction(&mut ctx, inst, loc)?; } if !ctx.controls.is_empty() { bail!("function failed to end with `end`"); @@ -144,7 +150,7 @@ impl LocalFunction { self.block(self.entry_block()) .instrs .iter() - .all(|e| e.is_const()) + .all(|(e, _)| e.is_const()) } /// Collect the set of data segments that are used in this function via @@ -245,8 +251,9 @@ impl LocalFunction { indices: &IdsToIndices, local_indices: &IdHashMap, dst: &mut Encoder, + map: Option<&mut Vec<(InstrLocId, usize)>>, ) { - emit::run(self, indices, local_indices, dst) + emit::run(self, indices, local_indices, dst, map) } } @@ -276,27 +283,31 @@ fn block_param_tys( } } -fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<()> { +fn validate_instruction<'context>( + ctx: &'context mut ValidationContext, + inst: Operator, + loc: InstrLocId, +) -> Result<()> { use crate::ir::ExtendedLoad::*; use crate::ValType::*; log::trace!("validate instruction: {:?}", inst); let const_ = |ctx: &mut ValidationContext, ty, value| { - ctx.alloc_instr(Const { value }); + ctx.alloc_instr(Const { value }, loc); ctx.push_operand(Some(ty)); }; let one_op = |ctx: &mut ValidationContext, input, output, op| -> Result<()> { ctx.pop_operand_expected(Some(input))?; - ctx.alloc_instr(Unop { op }); + ctx.alloc_instr(Unop { op }, loc); ctx.push_operand(Some(output)); Ok(()) }; let two_ops = |ctx: &mut ValidationContext, lhs, rhs, output, op| -> Result<()> { ctx.pop_operand_expected(Some(rhs))?; ctx.pop_operand_expected(Some(lhs))?; - ctx.alloc_instr(Binop { op }); + ctx.alloc_instr(Binop { op }, loc); ctx.push_operand(Some(output)); Ok(()) }; @@ -320,7 +331,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let arg = mem_arg(&arg)?; - ctx.alloc_instr(Load { arg, kind, memory }); + ctx.alloc_instr(Load { arg, kind, memory }, loc); ctx.push_operand(Some(ty)); Ok(()) }; @@ -330,7 +341,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let arg = mem_arg(&arg)?; - ctx.alloc_instr(Store { arg, kind, memory }); + ctx.alloc_instr(Store { arg, kind, memory }, loc); Ok(()) }; @@ -339,12 +350,15 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let arg = mem_arg(&arg)?; - ctx.alloc_instr(AtomicRmw { - arg, - memory, - op, - width, - }); + ctx.alloc_instr( + AtomicRmw { + arg, + memory, + op, + width, + }, + loc, + ); ctx.push_operand(Some(ty)); Ok(()) }; @@ -355,7 +369,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let arg = mem_arg(&arg)?; - ctx.alloc_instr(Cmpxchg { arg, memory, width }); + ctx.alloc_instr(Cmpxchg { arg, memory, width }, loc); ctx.push_operand(Some(ty)); Ok(()) }; @@ -364,11 +378,10 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let arg = mem_arg(&arg)?; - ctx.alloc_instr(LoadSplat { memory, arg, kind }); + ctx.alloc_instr(LoadSplat { memory, arg, kind }, loc); ctx.push_operand(Some(V128)); Ok(()) }; - match inst { Operator::Call { function_index } => { let func = ctx @@ -378,7 +391,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( let ty_id = ctx.module.funcs.get(func).ty(); let fun_ty = ctx.module.types.get(ty_id); ctx.pop_operands(fun_ty.params())?; - ctx.alloc_instr(Call { func }); + ctx.alloc_instr(Call { func }, loc); ctx.push_operands(fun_ty.results()); } Operator::CallIndirect { index, table_index } => { @@ -393,26 +406,26 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( .context("invalid call_indirect")?; ctx.pop_operand_expected(Some(I32))?; ctx.pop_operands(ty.params())?; - ctx.alloc_instr(CallIndirect { table, ty: type_id }); + ctx.alloc_instr(CallIndirect { table, ty: type_id }, loc); ctx.push_operands(ty.results()); } Operator::GetLocal { local_index } => { let local = ctx.indices.get_local(ctx.func_id, local_index)?; let ty = ctx.module.locals.get(local).ty(); - ctx.alloc_instr(LocalGet { local }); + ctx.alloc_instr(LocalGet { local }, loc); ctx.push_operand(Some(ty)); } Operator::SetLocal { local_index } => { let local = ctx.indices.get_local(ctx.func_id, local_index)?; let ty = ctx.module.locals.get(local).ty(); ctx.pop_operand_expected(Some(ty))?; - ctx.alloc_instr(LocalSet { local }); + ctx.alloc_instr(LocalSet { local }, loc); } Operator::TeeLocal { local_index } => { let local = ctx.indices.get_local(ctx.func_id, local_index)?; let ty = ctx.module.locals.get(local).ty(); ctx.pop_operand_expected(Some(ty))?; - ctx.alloc_instr(LocalTee { local }); + ctx.alloc_instr(LocalTee { local }, loc); ctx.push_operand(Some(ty)); } Operator::GetGlobal { global_index } => { @@ -421,7 +434,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( .get_global(global_index) .context("invalid global.get")?; let ty = ctx.module.globals.get(global).ty; - ctx.alloc_instr(GlobalGet { global }); + ctx.alloc_instr(GlobalGet { global }, loc); ctx.push_operand(Some(ty)); } Operator::SetGlobal { global_index } => { @@ -431,7 +444,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( .context("invalid global.set")?; let ty = ctx.module.globals.get(global).ty; ctx.pop_operand_expected(Some(ty))?; - ctx.alloc_instr(GlobalSet { global }); + ctx.alloc_instr(GlobalSet { global }, loc); } Operator::I32Const { value } => const_(ctx, I32, Value::I32(value)), Operator::I64Const { value } => const_(ctx, I64, Value::I64(value)), @@ -605,24 +618,24 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( Operator::Drop => { ctx.pop_operand()?; - ctx.alloc_instr(Drop {}); + ctx.alloc_instr(Drop {}, loc); } Operator::Select => { ctx.pop_operand_expected(Some(I32))?; let t1 = ctx.pop_operand()?; let t2 = ctx.pop_operand_expected(t1)?; - ctx.alloc_instr(Select {}); + ctx.alloc_instr(Select {}, loc); ctx.push_operand(t2); } Operator::Return => { let fn_ty = ctx.module.funcs.get(ctx.func_id).ty(); let expected = ctx.module.types.get(fn_ty).results(); ctx.pop_operands(expected)?; - ctx.alloc_instr(Return {}); + ctx.alloc_instr(Return {}, loc); ctx.unreachable(); } Operator::Unreachable => { - ctx.alloc_instr(Unreachable {}); + ctx.alloc_instr(Unreachable {}, loc); ctx.unreachable(); } Operator::Block { ty } => { @@ -630,14 +643,14 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( let result_tys = block_result_tys(ctx, ty)?; ctx.pop_operands(¶m_tys)?; let seq = ctx.push_control(BlockKind::Block, param_tys, result_tys)?; - ctx.alloc_instr_in_control(1, Block { seq })?; + ctx.alloc_instr_in_control(1, Block { seq }, loc)?; } Operator::Loop { ty } => { let result_tys = block_result_tys(ctx, ty)?; let param_tys = block_param_tys(ctx, ty)?; ctx.pop_operands(¶m_tys)?; let seq = ctx.push_control(BlockKind::Loop, param_tys, result_tys)?; - ctx.alloc_instr_in_control(1, Loop { seq })?; + ctx.alloc_instr_in_control(1, Loop { seq }, loc)?; } Operator::If { ty } => { let result_tys = block_result_tys(ctx, ty)?; @@ -683,10 +696,13 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( } }; - ctx.alloc_instr(IfElse { - consequent, - alternative, - }); + ctx.alloc_instr( + IfElse { + consequent, + alternative, + }, + loc, + ); } _ => {} } @@ -718,7 +734,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operands(&expected)?; let block = ctx.control(n)?.block; - ctx.alloc_instr(Br { block }); + ctx.alloc_instr(Br { block }, loc); ctx.unreachable(); } Operator::BrIf { relative_depth } => { @@ -729,7 +745,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operands(&expected)?; let block = ctx.control(n)?.block; - ctx.alloc_instr(BrIf { block }); + ctx.alloc_instr(BrIf { block }, loc); ctx.push_operands(&expected); } Operator::BrTable { table } => { @@ -767,7 +783,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; ctx.pop_operands(&expected)?; - ctx.alloc_instr(BrTable { blocks, default }); + ctx.alloc_instr(BrTable { blocks, default }, loc); ctx.unreachable(); } @@ -777,7 +793,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( bail!("reserved byte isn't zero"); } let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(MemorySize { memory }); + ctx.alloc_instr(MemorySize { memory }, loc); ctx.push_operand(Some(I32)); } Operator::MemoryGrow { reserved } => { @@ -786,7 +802,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( } ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(MemoryGrow { memory }); + ctx.alloc_instr(MemoryGrow { memory }, loc); ctx.push_operand(Some(I32)); } Operator::MemoryInit { segment } => { @@ -795,28 +811,31 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; let data = ctx.indices.get_data(segment)?; - ctx.alloc_instr(MemoryInit { memory, data }); + ctx.alloc_instr(MemoryInit { memory, data }, loc); } Operator::DataDrop { segment } => { let data = ctx.indices.get_data(segment)?; - ctx.alloc_instr(DataDrop { data }); + ctx.alloc_instr(DataDrop { data }, loc); } Operator::MemoryCopy => { ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(MemoryCopy { - src: memory, - dst: memory, - }); + ctx.alloc_instr( + MemoryCopy { + src: memory, + dst: memory, + }, + loc, + ); } Operator::MemoryFill => { ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(MemoryFill { memory }); + ctx.alloc_instr(MemoryFill { memory }, loc); } Operator::Nop => {} @@ -882,7 +901,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( if flags != 0 { bail!("fence with nonzero flags not supported yet"); } - ctx.alloc_instr(AtomicFence {}); + ctx.alloc_instr(AtomicFence {}, loc); } Operator::I32AtomicLoad { memarg } => { @@ -1111,10 +1130,13 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(AtomicNotify { - memory, - arg: mem_arg(memarg)?, - }); + ctx.alloc_instr( + AtomicNotify { + memory, + arg: mem_arg(memarg)?, + }, + loc, + ); ctx.push_operand(Some(I32)); } Operator::I32Wait { ref memarg } | Operator::I64Wait { ref memarg } => { @@ -1126,18 +1148,21 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(ty))?; ctx.pop_operand_expected(Some(I32))?; let memory = ctx.indices.get_memory(0)?; - ctx.alloc_instr(AtomicWait { - sixty_four, - memory, - arg: mem_arg(memarg)?, - }); + ctx.alloc_instr( + AtomicWait { + sixty_four, + memory, + arg: mem_arg(memarg)?, + }, + loc, + ); ctx.push_operand(Some(I32)); } Operator::TableGet { table } => { let table = ctx.indices.get_table(table)?; ctx.pop_operand_expected(Some(I32))?; - ctx.alloc_instr(TableGet { table }); + ctx.alloc_instr(TableGet { table }, loc); ctx.push_operand(Some(Anyref)); } Operator::TableSet { table } => { @@ -1148,7 +1173,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( }; ctx.pop_operand_expected(Some(expected_ty))?; ctx.pop_operand_expected(Some(I32))?; - ctx.alloc_instr(TableSet { table }); + ctx.alloc_instr(TableSet { table }, loc); } Operator::TableGrow { table } => { let table = ctx.indices.get_table(table)?; @@ -1158,35 +1183,35 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( }; ctx.pop_operand_expected(Some(I32))?; ctx.pop_operand_expected(Some(expected_ty))?; - ctx.alloc_instr(TableGrow { table }); + ctx.alloc_instr(TableGrow { table }, loc); ctx.push_operand(Some(I32)); } Operator::TableSize { table } => { let table = ctx.indices.get_table(table)?; - ctx.alloc_instr(TableSize { table }); + ctx.alloc_instr(TableSize { table }, loc); ctx.push_operand(Some(I32)); } Operator::RefNull => { - ctx.alloc_instr(RefNull {}); + ctx.alloc_instr(RefNull {}, loc); ctx.push_operand(Some(Anyref)); } Operator::RefIsNull => { ctx.pop_operand_expected(Some(Anyref))?; - ctx.alloc_instr(RefIsNull {}); + ctx.alloc_instr(RefIsNull {}, loc); ctx.push_operand(Some(I32)); } Operator::V8x16Swizzle => { ctx.pop_operand_expected(Some(V128))?; ctx.pop_operand_expected(Some(V128))?; - ctx.alloc_instr(V128Swizzle {}); + ctx.alloc_instr(V128Swizzle {}, loc); ctx.push_operand(Some(V128)); } Operator::V8x16Shuffle { lanes } => { ctx.pop_operand_expected(Some(V128))?; ctx.pop_operand_expected(Some(V128))?; - ctx.alloc_instr(V128Shuffle { indices: lanes }); + ctx.alloc_instr(V128Shuffle { indices: lanes }, loc); ctx.push_operand(Some(V128)); } @@ -1291,7 +1316,7 @@ fn validate_instruction(ctx: &mut ValidationContext, inst: Operator) -> Result<( ctx.pop_operand_expected(Some(V128))?; ctx.pop_operand_expected(Some(V128))?; ctx.pop_operand_expected(Some(V128))?; - ctx.alloc_instr(V128Bitselect {}); + ctx.alloc_instr(V128Bitselect {}, loc); ctx.push_operand(Some(V128)); } diff --git a/src/module/functions/mod.rs b/src/module/functions/mod.rs index 0dfc50b2..5f89c78f 100644 --- a/src/module/functions/mod.rs +++ b/src/module/functions/mod.rs @@ -5,6 +5,7 @@ mod local_function; use crate::emit::{Emit, EmitContext, Section}; use crate::encode::Encoder; use crate::error::Result; +use crate::ir::InstrLocId; use crate::module::imports::ImportId; use crate::module::Module; use crate::parse::IndicesToIds; @@ -324,6 +325,7 @@ impl Module { section: wasmparser::CodeSectionReader, function_section_count: u32, indices: &mut IndicesToIds, + on_instr_pos: Option<&(dyn Fn(&usize) -> InstrLocId + Sync + Send + 'static)>, ) -> Result<()> { log::debug!("parse code section"); let amt = section.get_count(); @@ -401,7 +403,10 @@ impl Module { // take some time, so parse all function bodies in parallel. let results = maybe_parallel!(bodies.(into_iter | into_par_iter)) .map(|(id, body, args, ty)| { - (id, LocalFunction::parse(self, indices, id, ty, args, body)) + ( + id, + LocalFunction::parse(self, indices, id, ty, args, body, on_instr_pos), + ) }) .collect::>(); @@ -440,6 +445,19 @@ fn used_local_functions<'a>(cx: &mut EmitContext<'a>) -> Vec<(FunctionId, &'a Lo functions } +fn collect_non_default_code_offsets( + code_transform: &mut Vec<(InstrLocId, usize)>, + code_offset: usize, + map: Vec<(InstrLocId, usize)>, +) { + for (src, dst) in map { + let dst = dst + code_offset; + if !src.is_default() { + code_transform.push((src, dst)); + } + } +} + impl Emit for ModuleFunctions { fn emit(&self, cx: &mut EmitContext) { log::debug!("emit code section"); @@ -451,6 +469,8 @@ impl Emit for ModuleFunctions { let mut cx = cx.start_section(Section::Code); cx.encoder.usize(functions.len()); + let generate_map = cx.module.config.preserve_code_transform; + // Functions can typically take awhile to serialize, so serialize // everything in parallel. Afterwards we'll actually place all the // functions together. @@ -459,15 +479,22 @@ impl Emit for ModuleFunctions { log::debug!("emit function {:?} {:?}", id, cx.module.funcs.get(id).name); let mut wasm = Vec::new(); let mut encoder = Encoder::new(&mut wasm); + let mut map = if generate_map { Some(Vec::new()) } else { None }; + let (used_locals, local_indices) = func.emit_locals(cx.module, &mut encoder); - func.emit_instructions(cx.indices, &local_indices, &mut encoder); - (wasm, id, used_locals, local_indices) + func.emit_instructions(cx.indices, &local_indices, &mut encoder, map.as_mut()); + (wasm, id, used_locals, local_indices, map) }) .collect::>(); cx.indices.locals.reserve(bytes.len()); - for (wasm, id, used_locals, local_indices) in bytes { - cx.encoder.bytes(&wasm); + for (wasm, id, used_locals, local_indices, map) in bytes { + cx.encoder.usize(wasm.len()); + let code_offset = cx.encoder.pos(); + cx.encoder.raw(&wasm); + if let Some(map) = map { + collect_non_default_code_offsets(&mut cx.code_transform, code_offset, map); + } cx.indices.locals.insert(id, local_indices); cx.locals.insert(id, used_locals); } diff --git a/src/module/mod.rs b/src/module/mod.rs index 16951b18..a6bab2c2 100644 --- a/src/module/mod.rs +++ b/src/module/mod.rs @@ -17,6 +17,7 @@ mod types; use crate::emit::{Emit, EmitContext, IdsToIndices, Section}; use crate::encode::Encoder; use crate::error::Result; +pub use crate::ir::InstrLocId; pub use crate::module::custom::{ CustomSection, CustomSectionId, ModuleCustomSections, RawCustomSection, TypedCustomSectionId, UntypedCustomSectionId, @@ -70,6 +71,14 @@ pub struct Module { pub(crate) config: ModuleConfig, } +/// Maps from an offset of an instruction in the input Wasm to its offset in the +/// output Wasm. +/// +/// Note that an input offset may be mapped to multiple output offsets, and vice +/// versa, due to transformations like function inlinining or constant +/// propagation. +pub type CodeTransform = Vec<(InstrLocId, usize)>; + impl Module { /// Create a default, empty module that uses the given configuration. pub fn with_config(config: ModuleConfig) -> Self { @@ -173,8 +182,14 @@ impl Module { None => bail!("cannot have a code section without function section"), }; let reader = section.get_code_section_reader()?; - ret.parse_local_functions(reader, function_section_size, &mut indices) - .context("failed to parse code section")?; + let on_instr_loc = config.on_instr_loc.as_ref().map(|f| f.as_ref()); + ret.parse_local_functions( + reader, + function_section_size, + &mut indices, + on_instr_loc, + ) + .context("failed to parse code section")?; } wasmparser::SectionCode::DataCount => { let count = section.get_data_count_section_content()?; @@ -231,7 +246,7 @@ impl Module { } /// Emit this module into a `.wasm` file at the given path. - pub fn emit_wasm_file

(&self, path: P) -> Result<()> + pub fn emit_wasm_file

(&mut self, path: P) -> Result<()> where P: AsRef, { @@ -241,7 +256,7 @@ impl Module { } /// Emit this module into an in-memory wasm buffer. - pub fn emit_wasm(&self) -> Vec { + pub fn emit_wasm(&mut self) -> Vec { log::debug!("start emit"); let indices = &mut IdsToIndices::default(); @@ -249,11 +264,14 @@ impl Module { wasm.extend(&[0x00, 0x61, 0x73, 0x6d]); // magic wasm.extend(&[0x01, 0x00, 0x00, 0x00]); // version + let mut customs = mem::replace(&mut self.customs, ModuleCustomSections::default()); + let mut cx = EmitContext { module: self, indices, encoder: Encoder::new(&mut wasm), locals: Default::default(), + code_transform: Vec::new(), }; self.types.emit(&mut cx); self.imports.emit(&mut cx); @@ -280,13 +298,18 @@ impl Module { let indices = mem::replace(cx.indices, Default::default()); - for (_id, section) in self.customs.iter() { + for (_id, section) in customs.iter_mut() { if !self.config.generate_dwarf && section.name().starts_with(".debug") { log::debug!("skipping DWARF custom section {}", section.name()); continue; } log::debug!("emitting custom section {}", section.name()); + + if self.config.preserve_code_transform { + section.apply_code_transform(&cx.code_transform); + } + cx.custom_section(§ion.name()) .encoder .raw(§ion.data(&indices));