diff --git a/Cargo.lock b/Cargo.lock index ec38a8cb..b6da8279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,7 +78,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cramjam" -version = "2.5.0" +version = "2.6.0" dependencies = [ "brotli2", "bzip2", diff --git a/Cargo.toml b/Cargo.toml index d0d1a320..fde609f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam" -version = "2.5.0" +version = "2.6.0" authors = ["Miles Granger "] edition = "2018" license = "MIT" diff --git a/src/lz4.rs b/src/lz4.rs index 4f462c4b..08baf179 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -2,6 +2,7 @@ use crate::exceptions::{CompressionError, DecompressionError}; use crate::io::{AsBytes, RustyBuffer}; use crate::{to_py_err, BytesType}; +use lz4::{block, block::CompressionMode}; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::PyResult; @@ -14,8 +15,14 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_block, m)?)?; m.add_function(wrap_pyfunction!(decompress_block, m)?)?; + m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_function(wrap_pyfunction!(compress_block_into, m)?)?; + m.add_function(wrap_pyfunction!(decompress_block_into, m)?)?; + + m.add_function(wrap_pyfunction!(compress_block_bound, m)?)?; + m.add_class::()?; Ok(()) } @@ -73,7 +80,6 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult) -> PyResult { - use lz4::block; let out = to_py_err!(DecompressionError -> block::decompress(data.as_bytes(), output_len.map(|v| v as i32)))?; Ok(RustyBuffer::from(out)) } @@ -104,10 +110,62 @@ pub fn compress_block( compression: Option, store_size: Option, ) -> PyResult { - use lz4::{block, block::CompressionMode}; + let store_size = store_size.unwrap_or(true); + let mode = compression_mode(mode, compression, acceleration)?; + let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?; + Ok(RustyBuffer::from(out)) +} + +/// LZ4 _block_ decompression into a pre-allocated buffer. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer) +/// ``` +#[pyfunction] +pub fn decompress_block_into(input: BytesType, mut output: BytesType) -> PyResult { + to_py_err!(DecompressionError -> block::decompress_to_buffer(input.as_bytes(), None, output.as_bytes_mut())) +} +/// lZ4 _block_ compression into pre-allocated buffer. +/// +/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.compress_block_into( +/// ... b'some bytes here', +/// ... output=output_buffer, +/// ... mode=Option[str], +/// ... acceleration=Option[int], +/// ... compression=Option[int], +/// ... store_size=Option[bool] +/// ... ) +/// ``` +#[pyfunction] +#[allow(unused_variables)] +pub fn compress_block_into( + data: BytesType, + mut output: BytesType, + mode: Option<&str>, + acceleration: Option, + compression: Option, + store_size: Option, +) -> PyResult { let store_size = store_size.unwrap_or(true); - let mode = match mode { + let mode = compression_mode(mode, compression, acceleration)?; + to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut())) +} + +#[inline] +fn compression_mode( + mode: Option<&str>, + compression: Option, + acceleration: Option, +) -> PyResult { + let m = match mode { Some(m) => match m { "default" => CompressionMode::DEFAULT, "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), @@ -116,8 +174,28 @@ pub fn compress_block( }, None => CompressionMode::DEFAULT, }; - let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?; - Ok(RustyBuffer::from(out)) + Ok(m) +} + +/// +/// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if +/// data is too long to be compressed by lz4. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.lz4.compress_block_into( +/// ... b'some bytes here', +/// ... output=output_buffer, +/// ... mode=Option[str], +/// ... acceleration=Option[int], +/// ... compression=Option[int], +/// ... store_size=Option[bool] +/// ... ) +/// ``` +#[pyfunction] +pub fn compress_block_bound(src: BytesType) -> PyResult { + block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) } /// Snappy Compressor object for streaming compression diff --git a/tests/test_variants.py b/tests/test_variants.py index 2ba4bde0..65f7178b 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -190,6 +190,29 @@ def test_variant_snappy_raw_into(data): assert same_same(decompressed_buffer[:n_bytes], data) +@given(data=st.binary()) +def test_variant_lz4_block_into(data): + """ + A little more special than other de/compress_into variants, as the underlying + snappy raw api makes a hard expectation that its calculated len is used. + """ + + compressed = cramjam.lz4.compress_block(data) + compressed_size = cramjam.lz4.compress_block_bound(data) + compressed_buffer = np.zeros(compressed_size, dtype=np.uint8) + n_bytes = cramjam.lz4.compress_block_into(data, compressed_buffer) + assert n_bytes == len(compressed) + assert same_same(compressed, compressed_buffer[:n_bytes]) + + decompressed_buffer = np.zeros(len(data), dtype=np.uint8) + n_bytes = cramjam.lz4.decompress_block_into( + compressed_buffer[:n_bytes].tobytes(), decompressed_buffer + ) + assert n_bytes == len(data) + + assert same_same(decompressed_buffer[:n_bytes], data) + + @pytest.mark.parametrize("Obj", (cramjam.File, cramjam.Buffer)) @given(data=st.binary()) def test_dunders(Obj, tmp_path_factory, data):