Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a standard header for metadata in all serialized modules #2747

Merged
merged 2 commits into from Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 13 additions & 15 deletions lib/engine-dylib/src/artifact.rs
Expand Up @@ -28,7 +28,7 @@ use wasmer_compiler::{
};
use wasmer_engine::{
register_frame_info, Artifact, DeserializeError, FunctionExtent, GlobalFrameInfoRegistration,
InstantiationError, SerializeError,
InstantiationError, MetadataHeader, SerializeError,
};
#[cfg(feature = "compiler")]
use wasmer_engine::{Engine, Tunables};
Expand Down Expand Up @@ -217,10 +217,8 @@ impl DylibArtifact {

let serialized_data = metadata.serialize()?;

let mut metadata_binary = vec![0; 16];
let mut writable = &mut metadata_binary[..];
leb128::write::unsigned(&mut writable, serialized_data.len() as u64)
.expect("Should write number");
let mut metadata_binary = vec![];
metadata_binary.extend(MetadataHeader::new(serialized_data.len()));
metadata_binary.extend(serialized_data);

let (compile_info, symbol_registry) = metadata.split();
Expand Down Expand Up @@ -256,8 +254,13 @@ impl DylibArtifact {
function_body_inputs,
)?;
let mut obj = get_object_for_target(&target_triple).map_err(to_compile_error)?;
emit_data(&mut obj, WASMER_METADATA_SYMBOL, &metadata_binary, 16)
.map_err(to_compile_error)?;
emit_data(
&mut obj,
WASMER_METADATA_SYMBOL,
&metadata_binary,
MetadataHeader::ALIGN as u64,
)
.map_err(to_compile_error)?;
emit_compilation(&mut obj, compilation, &symbol_registry, &target_triple)
.map_err(to_compile_error)?;
let file = tempfile::Builder::new()
Expand Down Expand Up @@ -618,9 +621,7 @@ impl DylibArtifact {
DeserializeError::CorruptedBinary(format!("Library loading failed: {}", e))
})?;
let shared_path: PathBuf = PathBuf::from(path);
// We reserve 16 bytes for the length because the rest of the metadata
// needs to be aligned to 16 bytes.
let symbol: LibrarySymbol<*mut [u8; 16]> =
let symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> =
lib.get(WASMER_METADATA_SYMBOL).map_err(|e| {
DeserializeError::CorruptedBinary(format!(
"The provided object file doesn't seem to be generated by Wasmer: {}",
Expand All @@ -630,12 +631,9 @@ impl DylibArtifact {
use std::slice;

let metadata = &**symbol;
let mut readable = &metadata[..];
let metadata_len = leb128::read::unsigned(&mut readable).map_err(|_e| {
DeserializeError::CorruptedBinary("Can't read metadata size".to_string())
})?;
let metadata_len = MetadataHeader::parse(metadata)?;
let metadata_slice: &'static [u8] =
slice::from_raw_parts(metadata.as_ptr().add(16), metadata_len as usize);
slice::from_raw_parts(metadata.as_ptr().add(MetadataHeader::LEN), metadata_len);

let metadata = ModuleMetadata::deserialize(metadata_slice)?;

Expand Down
18 changes: 9 additions & 9 deletions lib/engine-staticlib/src/artifact.rs
Expand Up @@ -17,7 +17,9 @@ use wasmer_compiler::{
CompileModuleInfo, Compiler, FunctionBodyData, ModuleEnvironment, ModuleMiddlewareChain,
ModuleTranslationState,
};
use wasmer_engine::{Artifact, DeserializeError, InstantiationError, SerializeError};
use wasmer_engine::{
Artifact, DeserializeError, InstantiationError, MetadataHeader, SerializeError,
};
#[cfg(feature = "compiler")]
use wasmer_engine::{Engine, Tunables};
#[cfg(feature = "compiler")]
Expand Down Expand Up @@ -206,10 +208,8 @@ impl StaticlibArtifact {
*/

let serialized_data = bincode::serialize(&metadata).map_err(to_compile_error)?;
let mut metadata_binary = vec![0; 10];
let mut writable = &mut metadata_binary[..];
leb128::write::unsigned(&mut writable, serialized_data.len() as u64)
.expect("Should write number");
let mut metadata_binary = vec![];
metadata_binary.extend(MetadataHeader::new(serialized_data.len()));
metadata_binary.extend(serialized_data);
let metadata_length = metadata_binary.len();

Expand Down Expand Up @@ -321,15 +321,15 @@ impl StaticlibArtifact {
engine: &StaticlibEngine,
bytes: &[u8],
) -> Result<Self, DeserializeError> {
let mut reader = bytes;
let data_len = leb128::read::unsigned(&mut reader).unwrap() as usize;
let metadata_len = MetadataHeader::parse(bytes)?;

let metadata: ModuleMetadata = bincode::deserialize(&bytes[10..(data_len + 10)]).unwrap();
let metadata: ModuleMetadata =
bincode::deserialize(&bytes[MetadataHeader::LEN..][..metadata_len]).unwrap();

const WORD_SIZE: usize = mem::size_of::<usize>();
let mut byte_buffer = [0u8; WORD_SIZE];

let mut cur_offset = data_len + 10;
let mut cur_offset = MetadataHeader::LEN + metadata_len;
byte_buffer[0..WORD_SIZE].clone_from_slice(&bytes[cur_offset..(cur_offset + WORD_SIZE)]);
cur_offset += WORD_SIZE;

Expand Down
77 changes: 10 additions & 67 deletions lib/engine-universal/src/artifact.rs
Expand Up @@ -8,13 +8,14 @@ use crate::serialize::SerializableCompilation;
use crate::serialize::SerializableModule;
use enumset::EnumSet;
use loupe::MemoryUsage;
use std::mem;
use std::sync::{Arc, Mutex};
use wasmer_compiler::{CompileError, CpuFeature, Features, Triple};
#[cfg(feature = "compiler")]
use wasmer_compiler::{CompileModuleInfo, ModuleEnvironment, ModuleMiddlewareChain};
use wasmer_engine::{
register_frame_info, Artifact, DeserializeError, FunctionExtent, GlobalFrameInfoRegistration,
SerializeError,
MetadataHeader, SerializeError,
};
#[cfg(feature = "compiler")]
use wasmer_engine::{Engine, Tunables};
Expand All @@ -28,9 +29,6 @@ use wasmer_vm::{
VMTrampoline,
};

const SERIALIZED_METADATA_LENGTH_OFFSET: usize = 22;
const SERIALIZED_METADATA_CONTENT_OFFSET: usize = 32;

/// A compiled wasm module, ready to be instantiated.
#[derive(MemoryUsage)]
pub struct UniversalArtifact {
Expand All @@ -46,11 +44,9 @@ pub struct UniversalArtifact {
}

impl UniversalArtifact {
const MAGIC_HEADER: &'static [u8; 22] = b"\0wasmer-universal\0\0\0\0\0";

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to preserve this. I see one thing as the metadata header, and the other is the file header.
Meaning: I think it will be useful to separate the file format from the metadata serialized version.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metadata header itself already has a magic number and a length, so I felt that this was somewhat redundant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also in the case of the universal engine, there is only metadata and nothing else.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is that the magic is only relevant for the file format (thus, for the universal engine).
We don't need magic in the metadata itself, since the metadata field name in the dynamic library is already the identifier.

Therefore, I think a better abstraction is to put the magic in the engine artifact and the rest (metadata version, length, ...) on the metadata as the PR is doing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metadata header needs to be 16 bytes since the serialized metadata is required to be 16-byte aligned. Since the length only uses 8 bytes, we have 8 bytes left that we might as well use for a magic.

Replacing the custom header of wasmer-universal with the standard MetadataHeader was done to make the code more maintainable by centralizing header-related code.

/// Check if the provided bytes look like a serialized `UniversalArtifact`.
pub fn is_deserializable(bytes: &[u8]) -> bool {
bytes.starts_with(Self::MAGIC_HEADER)
MetadataHeader::parse(bytes).is_ok()
}

/// Compile a data buffer into a `UniversalArtifact`, which may then be instantiated.
Expand Down Expand Up @@ -151,22 +147,8 @@ impl UniversalArtifact {
universal: &UniversalEngine,
bytes: &[u8],
) -> Result<Self, DeserializeError> {
if !Self::is_deserializable(bytes) {
return Err(DeserializeError::Incompatible(
"The provided bytes are not wasmer-universal".to_string(),
));
}

let mut inner_bytes = &bytes[SERIALIZED_METADATA_LENGTH_OFFSET..];

let metadata_len = leb128::read::unsigned(&mut inner_bytes).map_err(|_e| {
DeserializeError::CorruptedBinary("Can't read metadata size".to_string())
})?;
let metadata_slice: &[u8] = std::slice::from_raw_parts(
&bytes[SERIALIZED_METADATA_CONTENT_OFFSET] as *const u8,
metadata_len as usize,
);

let metadata_len = MetadataHeader::parse(bytes)?;
let metadata_slice: &[u8] = &bytes[MetadataHeader::LEN..][..metadata_len];
let serializable = SerializableModule::deserialize(metadata_slice)?;
Self::from_parts(&mut universal.inner_mut(), serializable)
.map_err(DeserializeError::Compiler)
Expand Down Expand Up @@ -345,51 +327,12 @@ impl Artifact for UniversalArtifact {
&self.func_data_registry
}
fn serialize(&self) -> Result<Vec<u8>, SerializeError> {
// Prepend the header.
let mut serialized = Self::MAGIC_HEADER.to_vec();

serialized.resize(SERIALIZED_METADATA_CONTENT_OFFSET, 0);
let mut writable_leb = &mut serialized[SERIALIZED_METADATA_LENGTH_OFFSET..];
let serialized_data = self.serializable.serialize()?;
let length = serialized_data.len();
leb128::write::unsigned(&mut writable_leb, length as u64).expect("Should write number");

let offset = pad_and_extend::<SerializableModule>(&mut serialized, &serialized_data);
assert_eq!(offset, SERIALIZED_METADATA_CONTENT_OFFSET);
assert!(mem::align_of::<SerializableModule>() <= MetadataHeader::ALIGN);

Ok(serialized)
}
}

/// It pads the data with the desired alignment
pub fn pad_and_extend<T>(prev_data: &mut Vec<u8>, data: &[u8]) -> usize {
let align = std::mem::align_of::<T>();

let mut offset = prev_data.len();
if offset & (align - 1) != 0 {
offset += align - (offset & (align - 1));
prev_data.resize(offset, 0);
}
prev_data.extend(data);
offset
}

#[cfg(test)]
mod tests {
use super::pad_and_extend;

#[test]
fn test_pad_and_extend() {
let mut data: Vec<u8> = vec![];
let offset = pad_and_extend::<i64>(&mut data, &[1, 0, 0, 0, 0, 0, 0, 0]);
assert_eq!(offset, 0);
let offset = pad_and_extend::<i32>(&mut data, &[2, 0, 0, 0]);
assert_eq!(offset, 8);
let offset = pad_and_extend::<i64>(&mut data, &[3, 0, 0, 0, 0, 0, 0, 0]);
assert_eq!(offset, 16);
assert_eq!(
data,
&[1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0]
);
let mut metadata_binary = vec![];
metadata_binary.extend(MetadataHeader::new(serialized_data.len()));
metadata_binary.extend(serialized_data);
Ok(metadata_binary)
}
}
70 changes: 68 additions & 2 deletions lib/engine/src/artifact.rs
@@ -1,12 +1,14 @@
use crate::{
resolve_imports, InstantiationError, Resolver, RuntimeError, SerializeError, Tunables,
resolve_imports, DeserializeError, InstantiationError, Resolver, RuntimeError, SerializeError,
Tunables,
};
use enumset::EnumSet;
use loupe::MemoryUsage;
use std::any::Any;
use std::fs;
use std::convert::TryInto;
use std::path::Path;
use std::sync::Arc;
use std::{fs, mem};
use wasmer_compiler::{CpuFeature, Features};
use wasmer_types::entity::{BoxedSlice, PrimaryMap};
use wasmer_types::{
Expand Down Expand Up @@ -227,3 +229,67 @@ impl dyn Artifact + 'static {
self.upcast_any_mut().downcast_mut::<T>()
}
}

/// Metadata header which holds an ABI version and the length of the remaining
/// metadata.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct MetadataHeader {
magic: [u8; 8],
version: u32,
len: u32,
}

impl MetadataHeader {
/// Current ABI version. Increment this any time breaking changes are made
/// to the format of the serialized data.
const CURRENT_VERSION: u32 = 1;

/// Magic number to identify wasmer metadata.
const MAGIC: [u8; 8] = *b"WASMER\0\0";

/// Length of the metadata header.
pub const LEN: usize = 16;

/// Alignment of the metadata.
pub const ALIGN: usize = 16;

/// Creates a new header for metadata of the given length.
pub fn new(len: usize) -> [u8; 16] {
let header = MetadataHeader {
magic: Self::MAGIC,
version: Self::CURRENT_VERSION,
len: len.try_into().expect("metadata exceeds maximum length"),
};
unsafe { mem::transmute(header) }
}

/// Parses the header and returns the length of the metadata following it.
pub fn parse(bytes: &[u8]) -> Result<usize, DeserializeError> {
if bytes.as_ptr() as usize % 16 != 0 {
return Err(DeserializeError::CorruptedBinary(
"misaligned metadata".to_string(),
));
}
let bytes: [u8; 16] = bytes
.get(..16)
.ok_or_else(|| {
DeserializeError::CorruptedBinary("invalid metadata header".to_string())
})?
.try_into()
.unwrap();
let header: MetadataHeader = unsafe { mem::transmute(bytes) };
if header.magic != Self::MAGIC {
return Err(DeserializeError::Incompatible(
"The provided bytes were not serialized by Wasmer".to_string(),
));
}
if header.version != Self::CURRENT_VERSION {
return Err(DeserializeError::Incompatible(
"The provided bytes were serialized by an incompatible version of Wasmer"
.to_string(),
));
}
Ok(header.len as usize)
}
}
2 changes: 1 addition & 1 deletion lib/engine/src/lib.rs
Expand Up @@ -28,7 +28,7 @@ mod resolver;
mod trap;
mod tunables;

pub use crate::artifact::Artifact;
pub use crate::artifact::{Artifact, MetadataHeader};
pub use crate::engine::{Engine, EngineId};
pub use crate::error::{
DeserializeError, ImportError, InstantiationError, LinkError, SerializeError,
Expand Down