From 42ca3a3ad1c575d121e9f97e6e73c415218ad4f9 Mon Sep 17 00:00:00 2001 From: Stepan Koltsov Date: Mon, 20 May 2019 04:07:28 +0100 Subject: [PATCH] Implement the same OOM fix for reading Bytes Nobody noticed likely because `with-bytes` feature is rarely used. CC: https://github.com/stepancheg/rust-protobuf/issues/411 --- protobuf/src/buf_read_iter.rs | 75 ++++++++++++++++++++++++++++++++--- protobuf/src/stream.rs | 8 +++- 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/protobuf/src/buf_read_iter.rs b/protobuf/src/buf_read_iter.rs index d7de43070..340438b1d 100644 --- a/protobuf/src/buf_read_iter.rs +++ b/protobuf/src/buf_read_iter.rs @@ -15,6 +15,8 @@ use bytes::BytesMut; use error::WireError; use ProtobufError; use ProtobufResult; +use stream::READ_RAW_BYTES_MAX_ALLOC; + // If an input stream is constructed with a `Read`, we create a // `BufReader` with an internal buffer of this size. @@ -221,6 +223,61 @@ impl<'ignore> BufReadIter<'ignore> { Ok(r) } + /// Read at most `max` bytes, append to `Vec`. + /// + /// Returns 0 when EOF or limit reached. + fn read_to_vec(&mut self, vec: &mut Vec, max: usize) -> ProtobufResult { + let rem = self.fill_buf()?; + + let len = cmp::min(rem.len(), max); + vec.extend_from_slice(&rem[..len]); + self.pos_within_buf += len; + Ok(len) + } + + /// Read exact number of bytes into `Vec`. + /// + /// `Vec` is cleared in the beginning. + pub fn read_exact_to_vec(&mut self, count: usize, target: &mut Vec) -> ProtobufResult<()> { + // TODO: also do some limits when reading from unlimited source + if count as u64 > self.bytes_until_limit() { + return Err(ProtobufError::WireError(WireError::TruncatedMessage)); + } + + target.clear(); + + if count >= READ_RAW_BYTES_MAX_ALLOC && count > target.capacity() { + // avoid calling `reserve` on buf with very large buffer: could be a malformed message + + target.reserve(READ_RAW_BYTES_MAX_ALLOC); + + while target.len() < count { + if count - target.len() <= target.len() { + target.reserve_exact(count - target.len()); + } else { + target.reserve(1); + } + + let max = cmp::min(target.capacity() - target.len(), count - target.len()); + let read = self.read_to_vec(target, max)?; + if read == 0 { + return Err(ProtobufError::WireError(WireError::TruncatedMessage)); + } + } + } else { + target.reserve_exact(count); + + unsafe { + self.read_exact(&mut target.get_unchecked_mut(..count))?; + target.set_len(count); + } + } + + debug_assert_eq!(count, target.len()); + + Ok(()) + } + #[cfg(feature = "bytes")] pub fn read_exact_bytes(&mut self, len: usize) -> ProtobufResult { if let InputSource::Bytes(bytes) = self.input_source { @@ -231,15 +288,21 @@ impl<'ignore> BufReadIter<'ignore> { self.pos_within_buf += len; Ok(r) } else { - let mut r = BytesMut::with_capacity(len); - unsafe { - { - let mut buf = &mut r.bytes_mut()[..len]; + if len >= READ_RAW_BYTES_MAX_ALLOC { + // We cannot trust `len` because protobuf message could be malformed. + // Reading should not result in OOM when allocating a buffer. + let mut v = Vec::new(); + self.read_exact_to_vec(len, &mut v)?; + Ok(Bytes::from(v)) + } else { + let mut r = BytesMut::with_capacity(len); + unsafe { + let buf = &mut r.bytes_mut()[..len]; self.read_exact(buf)?; + r.advance_mut(len); } - r.advance_mut(len); + Ok(r.freeze()) } - Ok(r.freeze()) } } diff --git a/protobuf/src/stream.rs b/protobuf/src/stream.rs index 5eec381be..b372eb18c 100644 --- a/protobuf/src/stream.rs +++ b/protobuf/src/stream.rs @@ -36,7 +36,7 @@ const OUTPUT_STREAM_BUFFER_SIZE: usize = 8 * 1024; const DEFAULT_RECURSION_LIMIT: u32 = 100; /// Max allocated vec when reading length-delimited from unknown input stream -const READ_RAW_BYTES_MAX_ALLOC: usize = 10_000_000; +pub(crate) const READ_RAW_BYTES_MAX_ALLOC: usize = 10_000_000; /// Serialization constants. pub mod wire_format { @@ -712,6 +712,12 @@ impl<'a> CodedInputStream<'a> { /// Read raw bytes into the supplied vector. The vector will be resized as needed and /// overwritten. pub fn read_raw_bytes_into(&mut self, count: u32, target: &mut Vec) -> ProtobufResult<()> { + if false { + // Master uses this version, but keep existing version for a while + // to avoid possible breakages. + return self.source.read_exact_to_vec(count as usize, target); + } + let count = count as usize; // TODO: also do some limits when reading from unlimited source