Skip to content

Commit

Permalink
Reintroduce unsafe in encoding::string::merge
Browse files Browse the repository at this point in the history
3ab36fda9a1788e0 introduced a ~3% regression on some decoding
benchmarks. My best guess is that swapping the value temporarily with a
new string is not being optimized well. This commit reverts to using
String::as_mut_vec, but carefully clears the string on decode failure.
It also fixes up the associated regression test, which wasn't properly
covering the malformed UTF-8 case.

    // Before #194
    dataset/google_message3_2/decode
                            time:   [219.45 ms 219.48 ms 219.51 ms]
                            thrpt:  [458.64 MiB/s 458.72 MiB/s 458.78 MiB/s]

    // #194
    dataset/google_message3_2/decode
                            time:   [226.08 ms 226.11 ms 226.18 ms]
                            thrpt:  [445.12 MiB/s 445.25 MiB/s 445.32 MiB/s]

    // This commit
    dataset/google_message3_2/decode
                            time:   [219.05 ms 219.76 ms 221.78 ms]
                            thrpt:  [453.94 MiB/s 458.12 MiB/s 459.61 MiB/s]
  • Loading branch information
danburkert committed Jun 17, 2019
1 parent 5a25f27 commit ec0e8b6
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! Meant to be used only from `Message` implementations.

use std::cmp::min;
use std::mem;
use std::str;
use std::u32;
use std::usize;

Expand Down Expand Up @@ -728,11 +728,21 @@ pub mod string {
where
B: Buf,
{
let mut value_bytes = mem::replace(value, String::new()).into_bytes();
bytes::merge(wire_type, &mut value_bytes, buf)?;
*value = String::from_utf8(value_bytes)
.map_err(|_| DecodeError::new("invalid string value: data is not UTF-8 encoded"))?;
Ok(())
unsafe {
// String::as_mut_vec is unsafe because it doesn't check that the bytes
// inserted into it the resulting vec are valid UTF-8. We check
// explicitly in order to ensure this is safe.
let value = value.as_mut_vec();
super::bytes::merge(wire_type, value, buf)?;

if let Err(_) = str::from_utf8(value) {
// Clear the malformed data.
value.clear();
return Err(DecodeError::new("invalid string value: data is not UTF-8 encoded"));
}

Ok(())
}
}

length_delimited!(String);
Expand Down Expand Up @@ -1304,9 +1314,10 @@ mod test {
}

#[test]
fn string_merge_failure() {
fn string_merge_invalid_utf8() {
let mut s = String::new();
let mut buf = Cursor::new(b"\x80\x80");
let mut buf = Cursor::new(b"\x02\x80\x80");

let r = string::merge(WireType::LengthDelimited, &mut s, &mut buf);
r.expect_err("must be an error");
assert!(s.is_empty());
Expand Down

0 comments on commit ec0e8b6

Please sign in to comment.