From 57ebd7f5e678f3267f2987fe59ee882c0dc94c1a Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Wed, 8 Jun 2022 08:59:09 +0200 Subject: [PATCH 1/3] Add lz4 block de/compress_into and bound calc --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/lz4.rs | 81 ++++++++++++++++++++++++++++++++++++++++++ tests/test_variants.py | 22 ++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec38a8cb..b6da8279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,7 +78,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cramjam" -version = "2.5.0" +version = "2.6.0" dependencies = [ "brotli2", "bzip2", diff --git a/Cargo.toml b/Cargo.toml index d0d1a320..fde609f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam" -version = "2.5.0" +version = "2.6.0" authors = ["Miles Granger "] edition = "2018" license = "MIT" diff --git a/src/lz4.rs b/src/lz4.rs index 4f462c4b..569cf6e0 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -14,8 +14,14 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_block, m)?)?; m.add_function(wrap_pyfunction!(decompress_block, m)?)?; + m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_function(wrap_pyfunction!(compress_block_into, m)?)?; + m.add_function(wrap_pyfunction!(decompress_block_into, m)?)?; + + m.add_function(wrap_pyfunction!(compress_block_bound, m)?)?; + m.add_class::()?; Ok(()) } @@ -120,6 +126,81 @@ pub fn compress_block( Ok(RustyBuffer::from(out)) } +/// LZ4 _block_ decompression into a pre-allocated buffer. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer) +/// ``` +#[pyfunction] +pub fn decompress_block_into(input: BytesType, mut output: BytesType) -> PyResult { + use lz4::block; + to_py_err!(DecompressionError -> block::decompress_to_buffer(input.as_bytes(), None, output.as_bytes_mut())) +} + +/// lZ4 _block_ compression into pre-allocated buffer. +/// +/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.compress_block_into( +/// ... b'some bytes here', +/// ... output=output_buffer, +/// ... mode=Option[str], +/// ... acceleration=Option[int], +/// ... compression=Option[int], +/// ... store_size=Option[bool] +/// ... ) +/// ``` +#[pyfunction] +#[allow(unused_variables)] +pub fn compress_block_into( + data: BytesType, + mut output: BytesType, + mode: Option<&str>, + acceleration: Option, + compression: Option, + store_size: Option, +) -> PyResult { + use lz4::{block, block::CompressionMode}; + + let store_size = store_size.unwrap_or(true); + let mode = match mode { + Some(m) => match m { + "default" => CompressionMode::DEFAULT, + "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), + "high_compression" => CompressionMode::HIGHCOMPRESSION(compression.unwrap_or(9)), + _ => return Err(DecompressionError::new_err(format!("Unrecognized mode '{}'", m))), + }, + None => CompressionMode::DEFAULT, + }; + to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut())) +} + +/// +/// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if +/// data is too long to be compressed by lz4. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.compress_block_into( +/// ... b'some bytes here', +/// ... output=output_buffer, +/// ... mode=Option[str], +/// ... acceleration=Option[int], +/// ... compression=Option[int], +/// ... store_size=Option[bool] +/// ... ) +/// ``` +#[pyfunction] +pub fn compress_block_bound(src: BytesType) -> PyResult { + lz4::block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) +} + /// Snappy Compressor object for streaming compression #[pyclass] pub struct Compressor { diff --git a/tests/test_variants.py b/tests/test_variants.py index 2ba4bde0..67796317 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -190,6 +190,28 @@ def test_variant_snappy_raw_into(data): assert same_same(decompressed_buffer[:n_bytes], data) +@given(data=st.binary()) +def test_variant_lz4_block_into(data): + """ + A little more special than other de/compress_into variants, as the underlying + snappy raw api makes a hard expectation that its calculated len is used. + """ + + compressed = cramjam.lz4.compress_block(data) + compressed_size = cramjam.lz4.compress_block_bound(data) + compressed_buffer = np.zeros(compressed_size, dtype=np.uint8) + n_bytes = cramjam.lz4.compress_block_into(data, compressed_buffer) + assert n_bytes == len(compressed) + + decompressed_buffer = np.zeros(len(data), dtype=np.uint8) + n_bytes = cramjam.lz4.decompress_block_into( + compressed_buffer[:n_bytes].tobytes(), decompressed_buffer + ) + assert n_bytes == len(data) + + assert same_same(decompressed_buffer[:n_bytes], data) + + @pytest.mark.parametrize("Obj", (cramjam.File, cramjam.Buffer)) @given(data=st.binary()) def test_dunders(Obj, tmp_path_factory, data): From deefc854684bcb1f13914773f29f2b1fd3c81416 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Wed, 8 Jun 2022 09:10:56 +0200 Subject: [PATCH 2/3] Fixup: refactor out compressionmode from parts --- src/lz4.rs | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/lz4.rs b/src/lz4.rs index 569cf6e0..08baf179 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -2,6 +2,7 @@ use crate::exceptions::{CompressionError, DecompressionError}; use crate::io::{AsBytes, RustyBuffer}; use crate::{to_py_err, BytesType}; +use lz4::{block, block::CompressionMode}; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::PyResult; @@ -79,7 +80,6 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult) -> PyResult { - use lz4::block; let out = to_py_err!(DecompressionError -> block::decompress(data.as_bytes(), output_len.map(|v| v as i32)))?; Ok(RustyBuffer::from(out)) } @@ -110,18 +110,8 @@ pub fn compress_block( compression: Option, store_size: Option, ) -> PyResult { - use lz4::{block, block::CompressionMode}; - let store_size = store_size.unwrap_or(true); - let mode = match mode { - Some(m) => match m { - "default" => CompressionMode::DEFAULT, - "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), - "high_compression" => CompressionMode::HIGHCOMPRESSION(compression.unwrap_or(9)), - _ => return Err(DecompressionError::new_err(format!("Unrecognized mode '{}'", m))), - }, - None => CompressionMode::DEFAULT, - }; + let mode = compression_mode(mode, compression, acceleration)?; let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?; Ok(RustyBuffer::from(out)) } @@ -135,7 +125,6 @@ pub fn compress_block( /// ``` #[pyfunction] pub fn decompress_block_into(input: BytesType, mut output: BytesType) -> PyResult { - use lz4::block; to_py_err!(DecompressionError -> block::decompress_to_buffer(input.as_bytes(), None, output.as_bytes_mut())) } @@ -165,10 +154,18 @@ pub fn compress_block_into( compression: Option, store_size: Option, ) -> PyResult { - use lz4::{block, block::CompressionMode}; - let store_size = store_size.unwrap_or(true); - let mode = match mode { + let mode = compression_mode(mode, compression, acceleration)?; + to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut())) +} + +#[inline] +fn compression_mode( + mode: Option<&str>, + compression: Option, + acceleration: Option, +) -> PyResult { + let m = match mode { Some(m) => match m { "default" => CompressionMode::DEFAULT, "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), @@ -177,7 +174,7 @@ pub fn compress_block_into( }, None => CompressionMode::DEFAULT, }; - to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut())) + Ok(m) } /// @@ -198,7 +195,7 @@ pub fn compress_block_into( /// ``` #[pyfunction] pub fn compress_block_bound(src: BytesType) -> PyResult { - lz4::block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) } /// Snappy Compressor object for streaming compression From d266289339cf10050ec98ced70e7238352a0d08d Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Wed, 8 Jun 2022 09:20:52 +0200 Subject: [PATCH 3/3] Fixup: check compressed matches in both cases --- tests/test_variants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_variants.py b/tests/test_variants.py index 67796317..65f7178b 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -202,6 +202,7 @@ def test_variant_lz4_block_into(data): compressed_buffer = np.zeros(compressed_size, dtype=np.uint8) n_bytes = cramjam.lz4.compress_block_into(data, compressed_buffer) assert n_bytes == len(compressed) + assert same_same(compressed, compressed_buffer[:n_bytes]) decompressed_buffer = np.zeros(len(data), dtype=np.uint8) n_bytes = cramjam.lz4.decompress_block_into(