Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate unsafe code from both ser and de of u8 #26

Merged
merged 1 commit into from Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 43 additions & 54 deletions borsh/src/de/mod.rs
Expand Up @@ -36,18 +36,20 @@ pub trait BorshDeserialize: Sized {
Ok(result)
}

/// Whether Self is u8.
/// NOTE: `Vec<u8>` is the most common use-case for serialization and deserialization, it's
/// worth handling it as a special case to improve performance.
/// It's a workaround for specific `Vec<u8>` implementation versus generic `Vec<T>`
/// implementation. See https://github.com/rust-lang/rfcs/pull/1210 for details.
///
/// It's marked unsafe, because if the type is not `u8` it leads to UB. See related issues:
/// - https://github.com/near/borsh-rs/issues/17
/// - https://github.com/near/borsh-rs/issues/18
#[inline]
unsafe fn is_u8() -> bool {
false
#[doc(hidden)]
fn vec_from_bytes(len: u32, buf: &mut &[u8]) -> Result<Option<Vec<Self>>> {
let _ = len;
let _ = buf;
Ok(None)
}

#[inline]
#[doc(hidden)]
fn copy_from_bytes(buf: &mut &[u8], out: &mut [Self]) -> Result<bool> {
let _ = buf;
let _ = out;
Ok(false)
}
}

Expand All @@ -66,8 +68,33 @@ impl BorshDeserialize for u8 {
}

#[inline]
unsafe fn is_u8() -> bool {
true
#[doc(hidden)]
fn vec_from_bytes(len: u32, buf: &mut &[u8]) -> Result<Option<Vec<Self>>> {
let len = len.try_into().map_err(|_| ErrorKind::InvalidInput)?;
if buf.len() < len {
return Err(Error::new(
ErrorKind::InvalidInput,
ERROR_UNEXPECTED_LENGTH_OF_INPUT,
));
}
let (front, rest) = buf.split_at(len);
*buf = rest;
Ok(Some(front.to_vec()))
}

#[inline]
#[doc(hidden)]
fn copy_from_bytes(buf: &mut &[u8], out: &mut [Self]) -> Result<bool> {
if buf.len() < out.len() {
return Err(Error::new(
ErrorKind::InvalidInput,
ERROR_UNEXPECTED_LENGTH_OF_INPUT,
));
}
let (front, rest) = buf.split_at(out.len());
out.copy_from_slice(front);
*buf = rest;
Ok(true)
}
}

Expand Down Expand Up @@ -246,35 +273,8 @@ where
let len = u32::deserialize(buf)?;
if len == 0 {
Ok(Vec::new())
} else if unsafe { T::is_u8() } && size_of::<T>() == size_of::<u8>() {
let len = len.try_into().map_err(|_| ErrorKind::InvalidInput)?;
if buf.len() < len {
return Err(Error::new(
ErrorKind::InvalidInput,
ERROR_UNEXPECTED_LENGTH_OF_INPUT,
));
}
let result = buf[..len].to_vec();
*buf = &buf[len..];
// See comment from https://doc.rust-lang.org/std/mem/fn.transmute.html
// The no-copy, unsafe way, still using transmute, but not UB.
// This is equivalent to the original, but safer, and reuses the
// same `Vec` internals. Therefore, the new inner type must have the
// exact same size, and the same alignment, as the old type.
//
// The size of the memory should match because `size_of::<T>() == size_of::<u8>()`.
//
// `T::is_u8()` is a workaround for not being able to implement `Vec<u8>` separately.
let result = unsafe {
// Ensure the original vector is not dropped.
let mut v_clone = core::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(
v_clone.as_mut_ptr() as *mut T,
v_clone.len(),
v_clone.capacity(),
)
};
Ok(result)
} else if let Some(vec_bytes) = T::vec_from_bytes(len, buf)? {
Ok(vec_bytes)
} else if size_of::<T>() == 0 {
let mut result = Vec::new();
result.push(T::deserialize(buf)?);
Expand Down Expand Up @@ -493,18 +493,7 @@ macro_rules! impl_arrays {
#[inline]
fn deserialize(buf: &mut &[u8]) -> Result<Self> {
let mut result = [T::default(); $len];
if unsafe { T::is_u8() } && size_of::<T>() == size_of::<u8>() {
if buf.len() < $len {
return Err(Error::new(
ErrorKind::InvalidInput,
ERROR_UNEXPECTED_LENGTH_OF_INPUT,
));
}
// The size of the memory should match because `size_of::<T>() == size_of::<u8>()`.
// `T::is_u8()` is a workaround for not being able to implement `[u8; *]` separately.
result.copy_from_slice(unsafe { core::slice::from_raw_parts(buf.as_ptr() as *const T, $len) });
*buf = &buf[$len..];
} else {
if !T::copy_from_bytes(buf, &mut result)? {
for i in 0..$len {
result[i] = T::deserialize(buf)?;
}
Expand Down
41 changes: 13 additions & 28 deletions borsh/src/ser/mod.rs
@@ -1,6 +1,5 @@
use core::convert::TryFrom;
use core::hash::BuildHasher;
use core::mem::size_of;

use crate::maybestd::{
borrow::{Cow, ToOwned},
Expand All @@ -24,18 +23,14 @@ pub trait BorshSerialize {
Ok(result)
}

/// Whether Self is u8.
/// NOTE: `Vec<u8>` is the most common use-case for serialization and deserialization, it's
/// worth handling it as a special case to improve performance.
/// It's a workaround for specific `Vec<u8>` implementation versus generic `Vec<T>`
/// implementation. See https://github.com/rust-lang/rfcs/pull/1210 for details.
///
/// It's marked unsafe, because if the type is not `u8` it leads to UB. See related issues:
/// - https://github.com/near/borsh-rs/issues/17
/// - https://github.com/near/borsh-rs/issues/18
#[inline]
unsafe fn is_u8() -> bool {
false
#[doc(hidden)]
fn u8_slice(slice: &[Self]) -> Option<&[u8]>
where
Self: Sized,
{
let _ = slice;
None
}
}

Expand All @@ -46,8 +41,8 @@ impl BorshSerialize for u8 {
}

#[inline]
unsafe fn is_u8() -> bool {
true
fn u8_slice(slice: &[Self]) -> Option<&[u8]> {
Some(slice)
}
}

Expand Down Expand Up @@ -159,13 +154,8 @@ impl BorshSerialize for String {
/// Helper method that is used to serialize a slice of data (without the length marker).
#[inline]
fn serialize_slice<T: BorshSerialize, W: Write>(data: &[T], writer: &mut W) -> Result<()> {
if unsafe { T::is_u8() } && size_of::<T>() == size_of::<u8>() {
// The code below uses unsafe memory representation from `&[T]` to `&[u8]`.
// The size of the memory should match because `size_of::<T>() == size_of::<u8>()`.
//
// `T::is_u8()` is a workaround for not being able to implement `Vec<u8>` separately.
let buf = unsafe { core::slice::from_raw_parts(data.as_ptr() as *const u8, data.len()) };
writer.write_all(buf)?;
if let Some(u8_slice) = T::u8_slice(data) {
writer.write_all(u8_slice)?;
} else {
for item in data {
item.serialize(writer)?;
Expand Down Expand Up @@ -408,13 +398,8 @@ macro_rules! impl_arrays {
{
#[inline]
fn serialize<W: Write>(&self, writer: &mut W) -> Result<()> {
if unsafe { T::is_u8() } && size_of::<T>() == size_of::<u8>() {
// The code below uses unsafe memory representation from `&[T]` to `&[u8]`.
// The size of the memory should match because `size_of::<T>() == size_of::<u8>()`.
//
// `T::is_u8()` is a workaround for not being able to implement `[u8; *]` separately.
let buf = unsafe { core::slice::from_raw_parts(self.as_ptr() as *const u8, self.len()) };
writer.write_all(buf)?;
if let Some(u8_slice) = T::u8_slice(self) {
writer.write_all(u8_slice)?;
} else {
for el in self.iter() {
el.serialize(writer)?;
Expand Down