From 1d31d30798495d0669b4b1bd0c43e375c2cf1d73 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Wed, 8 Jun 2022 08:27:34 +0100 Subject: [PATCH] Don't overwrite existing data on snappy decompress (#1806) (#1807) * Don't trample existing data on snappy decompress (#1806) * Review feedback --- parquet/src/compression.rs | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs index f4aecbf4e86..a5e49360a28 100644 --- a/parquet/src/compression.rs +++ b/parquet/src/compression.rs @@ -49,13 +49,14 @@ use crate::errors::{ParquetError, Result}; /// Parquet compression codec interface. pub trait Codec: Send { - /// Compresses data stored in slice `input_buf` and writes the compressed result + /// Compresses data stored in slice `input_buf` and appends the compressed result /// to `output_buf`. + /// /// Note that you'll need to call `clear()` before reusing the same `output_buf` /// across different `compress` calls. fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec) -> Result<()>; - /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`. + /// Decompresses data stored in slice `input_buf` and appends output to `output_buf`. /// Returns the total number of bytes written. fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec) -> Result; @@ -111,9 +112,10 @@ mod snappy_codec { output_buf: &mut Vec, ) -> Result { let len = decompress_len(input_buf)?; - output_buf.resize(len, 0); + let offset = output_buf.len(); + output_buf.resize(offset + len, 0); self.decoder - .decompress(input_buf, output_buf) + .decompress(input_buf, &mut output_buf[offset..]) .map_err(|e| e.into()) } @@ -340,13 +342,13 @@ mod tests { .expect("Error when compressing"); // Decompress with c2 - let mut decompressed_size = c2 + let decompressed_size = c2 .decompress(compressed.as_slice(), &mut decompressed) .expect("Error when decompressing"); assert_eq!(data.len(), decompressed_size); - decompressed.truncate(decompressed_size); assert_eq!(data, decompressed.as_slice()); + decompressed.clear(); compressed.clear(); // Compress with c2 @@ -354,12 +356,32 @@ mod tests { .expect("Error when compressing"); // Decompress with c1 - decompressed_size = c1 + let decompressed_size = c1 .decompress(compressed.as_slice(), &mut decompressed) .expect("Error when decompressing"); assert_eq!(data.len(), decompressed_size); - decompressed.truncate(decompressed_size); assert_eq!(data, decompressed.as_slice()); + + decompressed.clear(); + compressed.clear(); + + // Test does not trample existing data in output buffers + let prefix = &[0xDE, 0xAD, 0xBE, 0xEF]; + decompressed.extend_from_slice(prefix); + compressed.extend_from_slice(prefix); + + c2.compress(data, &mut compressed) + .expect("Error when compressing"); + + assert_eq!(&compressed[..4], prefix); + + let decompressed_size = c2 + .decompress(&compressed[4..], &mut decompressed) + .expect("Error when decompressing"); + + assert_eq!(data.len(), decompressed_size); + assert_eq!(data, &decompressed[4..]); + assert_eq!(&decompressed[..4], prefix); } fn test_codec(c: CodecType) {