Skip to content

Commit

Permalink
Support Lz4 de/compress_block_into functions (#80)
Browse files Browse the repository at this point in the history
* Add lz4 block de/compress_into and bound calc
  • Loading branch information
milesgranger committed Jun 10, 2022
1 parent dc1775b commit ca69184
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "2.5.0"
version = "2.6.0"
authors = ["Miles Granger <miles59923@gmail.com>"]
edition = "2018"
license = "MIT"
Expand Down
88 changes: 83 additions & 5 deletions src/lz4.rs
Expand Up @@ -2,6 +2,7 @@
use crate::exceptions::{CompressionError, DecompressionError};
use crate::io::{AsBytes, RustyBuffer};
use crate::{to_py_err, BytesType};
use lz4::{block, block::CompressionMode};
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::PyResult;
Expand All @@ -14,8 +15,14 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_block, m)?)?;
m.add_function(wrap_pyfunction!(decompress_block, m)?)?;

m.add_function(wrap_pyfunction!(compress_into, m)?)?;
m.add_function(wrap_pyfunction!(decompress_into, m)?)?;
m.add_function(wrap_pyfunction!(compress_block_into, m)?)?;
m.add_function(wrap_pyfunction!(decompress_block_into, m)?)?;

m.add_function(wrap_pyfunction!(compress_block_bound, m)?)?;

m.add_class::<Compressor>()?;
Ok(())
}
Expand Down Expand Up @@ -73,7 +80,6 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult<usiz
/// ```
#[pyfunction]
pub fn decompress_block(data: BytesType, output_len: Option<usize>) -> PyResult<RustyBuffer> {
use lz4::block;
let out = to_py_err!(DecompressionError -> block::decompress(data.as_bytes(), output_len.map(|v| v as i32)))?;
Ok(RustyBuffer::from(out))
}
Expand Down Expand Up @@ -104,10 +110,62 @@ pub fn compress_block(
compression: Option<i32>,
store_size: Option<bool>,
) -> PyResult<RustyBuffer> {
use lz4::{block, block::CompressionMode};
let store_size = store_size.unwrap_or(true);
let mode = compression_mode(mode, compression, acceleration)?;
let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?;
Ok(RustyBuffer::from(out))
}

/// LZ4 _block_ decompression into a pre-allocated buffer.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer)
/// ```
#[pyfunction]
pub fn decompress_block_into(input: BytesType, mut output: BytesType) -> PyResult<usize> {
to_py_err!(DecompressionError -> block::decompress_to_buffer(input.as_bytes(), None, output.as_bytes_mut()))
}

/// lZ4 _block_ compression into pre-allocated buffer.
///
/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block)
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.compress_block_into(
/// ... b'some bytes here',
/// ... output=output_buffer,
/// ... mode=Option[str],
/// ... acceleration=Option[int],
/// ... compression=Option[int],
/// ... store_size=Option[bool]
/// ... )
/// ```
#[pyfunction]
#[allow(unused_variables)]
pub fn compress_block_into(
data: BytesType,
mut output: BytesType,
mode: Option<&str>,
acceleration: Option<i32>,
compression: Option<i32>,
store_size: Option<bool>,
) -> PyResult<usize> {
let store_size = store_size.unwrap_or(true);
let mode = match mode {
let mode = compression_mode(mode, compression, acceleration)?;
to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut()))
}

#[inline]
fn compression_mode(
mode: Option<&str>,
compression: Option<i32>,
acceleration: Option<i32>,
) -> PyResult<CompressionMode> {
let m = match mode {
Some(m) => match m {
"default" => CompressionMode::DEFAULT,
"fast" => CompressionMode::FAST(acceleration.unwrap_or(1)),
Expand All @@ -116,8 +174,28 @@ pub fn compress_block(
},
None => CompressionMode::DEFAULT,
};
let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?;
Ok(RustyBuffer::from(out))
Ok(m)
}

///
/// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if
/// data is too long to be compressed by lz4.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.compress_block_into(
/// ... b'some bytes here',
/// ... output=output_buffer,
/// ... mode=Option[str],
/// ... acceleration=Option[int],
/// ... compression=Option[int],
/// ... store_size=Option[bool]
/// ... )
/// ```
#[pyfunction]
pub fn compress_block_bound(src: BytesType) -> PyResult<usize> {
block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))
}

/// Snappy Compressor object for streaming compression
Expand Down
23 changes: 23 additions & 0 deletions tests/test_variants.py
Expand Up @@ -190,6 +190,29 @@ def test_variant_snappy_raw_into(data):
assert same_same(decompressed_buffer[:n_bytes], data)


@given(data=st.binary())
def test_variant_lz4_block_into(data):
"""
A little more special than other de/compress_into variants, as the underlying
snappy raw api makes a hard expectation that its calculated len is used.
"""

compressed = cramjam.lz4.compress_block(data)
compressed_size = cramjam.lz4.compress_block_bound(data)
compressed_buffer = np.zeros(compressed_size, dtype=np.uint8)
n_bytes = cramjam.lz4.compress_block_into(data, compressed_buffer)
assert n_bytes == len(compressed)
assert same_same(compressed, compressed_buffer[:n_bytes])

decompressed_buffer = np.zeros(len(data), dtype=np.uint8)
n_bytes = cramjam.lz4.decompress_block_into(
compressed_buffer[:n_bytes].tobytes(), decompressed_buffer
)
assert n_bytes == len(data)

assert same_same(decompressed_buffer[:n_bytes], data)


@pytest.mark.parametrize("Obj", (cramjam.File, cramjam.Buffer))
@given(data=st.binary())
def test_dunders(Obj, tmp_path_factory, data):
Expand Down

0 comments on commit ca69184

Please sign in to comment.