Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement compress for NEON #369

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ fn bench_single_compression_avx512(b: &mut Bencher) {
}
}

#[bench]
#[cfg(feature = "neon")]
fn bench_single_compression_neon(b: &mut Bencher) {
if let Some(platform) = Platform::neon() {
bench_single_compression_fn(b, platform);
}
}

fn bench_many_chunks_fn(b: &mut Bencher, platform: Platform) {
let degree = platform.simd_degree();
let mut inputs = Vec::new();
Expand Down
15 changes: 15 additions & 0 deletions c/blake3_c_rust_bindings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,21 @@ pub mod ffi {
pub mod neon {
extern "C" {
// NEON low level functions
pub fn blake3_compress_xof_neon(
cv: *const u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
out: *mut u8,
);
pub fn blake3_compress_in_place_neon(
cv: *mut u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
);
pub fn blake3_hash_many_neon(
inputs: *const *const u8,
num_inputs: usize,
Expand Down
12 changes: 12 additions & 0 deletions c/blake3_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ void blake3_compress_in_place(uint32_t cv[8],
}
#endif
#endif

#if BLAKE3_USE_NEON == 1
blake3_compress_in_place_neon(cv, block, block_len, counter, flags);
return;
#endif

blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
}

Expand Down Expand Up @@ -217,6 +223,12 @@ void blake3_compress_xof(const uint32_t cv[8],
}
#endif
#endif

#if BLAKE3_USE_NEON == 1
blake3_compress_xof_neon(cv, block, block_len, counter, flags, out);
return;
#endif

blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
}

Expand Down
10 changes: 10 additions & 0 deletions c/blake3_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,16 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
#endif

#if BLAKE3_USE_NEON == 1
void blake3_compress_in_place_neon(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags);

void blake3_compress_xof_neon(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags, uint8_t out[64]);

void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
Expand Down