Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Lz4 de/compress_block_into functions #80

Merged
merged 3 commits into from Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "2.5.0"
version = "2.6.0"
authors = ["Miles Granger <miles59923@gmail.com>"]
edition = "2018"
license = "MIT"
Expand Down
88 changes: 83 additions & 5 deletions src/lz4.rs
Expand Up @@ -2,6 +2,7 @@
use crate::exceptions::{CompressionError, DecompressionError};
use crate::io::{AsBytes, RustyBuffer};
use crate::{to_py_err, BytesType};
use lz4::{block, block::CompressionMode};
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::PyResult;
Expand All @@ -14,8 +15,14 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_block, m)?)?;
m.add_function(wrap_pyfunction!(decompress_block, m)?)?;

m.add_function(wrap_pyfunction!(compress_into, m)?)?;
m.add_function(wrap_pyfunction!(decompress_into, m)?)?;
m.add_function(wrap_pyfunction!(compress_block_into, m)?)?;
m.add_function(wrap_pyfunction!(decompress_block_into, m)?)?;

m.add_function(wrap_pyfunction!(compress_block_bound, m)?)?;

m.add_class::<Compressor>()?;
Ok(())
}
Expand Down Expand Up @@ -73,7 +80,6 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult<usiz
/// ```
#[pyfunction]
pub fn decompress_block(data: BytesType, output_len: Option<usize>) -> PyResult<RustyBuffer> {
use lz4::block;
let out = to_py_err!(DecompressionError -> block::decompress(data.as_bytes(), output_len.map(|v| v as i32)))?;
Ok(RustyBuffer::from(out))
}
Expand Down Expand Up @@ -104,10 +110,62 @@ pub fn compress_block(
compression: Option<i32>,
store_size: Option<bool>,
) -> PyResult<RustyBuffer> {
use lz4::{block, block::CompressionMode};
let store_size = store_size.unwrap_or(true);
let mode = compression_mode(mode, compression, acceleration)?;
let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?;
Ok(RustyBuffer::from(out))
}

/// LZ4 _block_ decompression into a pre-allocated buffer.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer)
/// ```
#[pyfunction]
pub fn decompress_block_into(input: BytesType, mut output: BytesType) -> PyResult<usize> {
to_py_err!(DecompressionError -> block::decompress_to_buffer(input.as_bytes(), None, output.as_bytes_mut()))
}

/// lZ4 _block_ compression into pre-allocated buffer.
///
/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block)
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.compress_block_into(
/// ... b'some bytes here',
/// ... output=output_buffer,
/// ... mode=Option[str],
/// ... acceleration=Option[int],
/// ... compression=Option[int],
/// ... store_size=Option[bool]
/// ... )
/// ```
#[pyfunction]
#[allow(unused_variables)]
pub fn compress_block_into(
data: BytesType,
mut output: BytesType,
mode: Option<&str>,
acceleration: Option<i32>,
compression: Option<i32>,
store_size: Option<bool>,
) -> PyResult<usize> {
let store_size = store_size.unwrap_or(true);
let mode = match mode {
let mode = compression_mode(mode, compression, acceleration)?;
to_py_err!(CompressionError -> block::compress_to_buffer(data.as_bytes(), Some(mode), store_size, output.as_bytes_mut()))
}

#[inline]
fn compression_mode(
mode: Option<&str>,
compression: Option<i32>,
acceleration: Option<i32>,
) -> PyResult<CompressionMode> {
let m = match mode {
Some(m) => match m {
"default" => CompressionMode::DEFAULT,
"fast" => CompressionMode::FAST(acceleration.unwrap_or(1)),
Expand All @@ -116,8 +174,28 @@ pub fn compress_block(
},
None => CompressionMode::DEFAULT,
};
let out = to_py_err!(CompressionError -> block::compress(data.as_bytes(), Some(mode), store_size))?;
Ok(RustyBuffer::from(out))
Ok(m)
}

///
/// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if
/// data is too long to be compressed by lz4.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lz4.compress_block_into(
/// ... b'some bytes here',
/// ... output=output_buffer,
/// ... mode=Option[str],
/// ... acceleration=Option[int],
/// ... compression=Option[int],
/// ... store_size=Option[bool]
/// ... )
/// ```
#[pyfunction]
pub fn compress_block_bound(src: BytesType) -> PyResult<usize> {
block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))
}

/// Snappy Compressor object for streaming compression
Expand Down
23 changes: 23 additions & 0 deletions tests/test_variants.py
Expand Up @@ -190,6 +190,29 @@ def test_variant_snappy_raw_into(data):
assert same_same(decompressed_buffer[:n_bytes], data)


@given(data=st.binary())
def test_variant_lz4_block_into(data):
"""
A little more special than other de/compress_into variants, as the underlying
snappy raw api makes a hard expectation that its calculated len is used.
"""

compressed = cramjam.lz4.compress_block(data)
compressed_size = cramjam.lz4.compress_block_bound(data)
compressed_buffer = np.zeros(compressed_size, dtype=np.uint8)
n_bytes = cramjam.lz4.compress_block_into(data, compressed_buffer)
assert n_bytes == len(compressed)
assert same_same(compressed, compressed_buffer[:n_bytes])

decompressed_buffer = np.zeros(len(data), dtype=np.uint8)
n_bytes = cramjam.lz4.decompress_block_into(
compressed_buffer[:n_bytes].tobytes(), decompressed_buffer
)
assert n_bytes == len(data)

assert same_same(decompressed_buffer[:n_bytes], data)


@pytest.mark.parametrize("Obj", (cramjam.File, cramjam.Buffer))
@given(data=st.binary())
def test_dunders(Obj, tmp_path_factory, data):
Expand Down