Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to efficiently hash entire non-root subtrees to guts #329

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
85 changes: 85 additions & 0 deletions src/guts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,38 @@
pub const BLOCK_LEN: usize = 64;
pub const CHUNK_LEN: usize = 1024;

fn is_subtree(start_chunk: u64, len: u64) -> bool {
const CHUNK_LEN_U64: u64 = CHUNK_LEN as u64;
let chunks = len / CHUNK_LEN_U64 + (len % CHUNK_LEN_U64 != 0) as u64;
let block_mask = chunks.next_power_of_two() - 1;
start_chunk & block_mask == 0
}

/// Compute the hash of a subtree consisting of one or many chunks.
///
/// The range given by `start_chunk` and `len` must be a single subtree, i.e.
/// `is_subtree(start_chunk, len)` must be true. The `is_root` flag indicates
/// whether the subtree is the root of the tree.
///
/// Subtrees that start at a non zero chunk can not be the root.
pub fn hash_subtree(start_chunk: u64, data: &[u8], is_root: bool) -> crate::Hash {
debug_assert!(is_subtree(start_chunk, data.len() as u64));
debug_assert!(start_chunk == 0 || !is_root);
let mut hasher = crate::Hasher::new_with_start_chunk(start_chunk);
hasher.update(data);
hasher.finalize_node(is_root)
}

/// Rayon parallel version of [`hash_block`].
#[cfg(feature = "rayon")]
pub fn hash_subtree_rayon(start_chunk: u64, data: &[u8], is_root: bool) -> crate::Hash {
debug_assert!(is_subtree(start_chunk, data.len() as u64));
debug_assert!(start_chunk == 0 || !is_root);
let mut hasher = crate::Hasher::new_with_start_chunk(start_chunk);
hasher.update_rayon(data);
hasher.finalize_node(is_root)
}

#[derive(Clone, Debug)]
pub struct ChunkState(crate::ChunkState);

Expand Down Expand Up @@ -98,4 +130,57 @@ mod test {
let root = parent_cv(&parent, &chunk2_cv, true);
assert_eq!(hasher.finalize(), root);
}

/// This is a recursive version of [`hash_subtree`], for testing.
fn recursive_hash_subtree(start_chunk: u64, data: &[u8], is_root: bool) -> crate::Hash {
if data.len() <= CHUNK_LEN {
let mut hasher = ChunkState::new(start_chunk);
hasher.update(data);
hasher.finalize(is_root)
} else {
let chunks = data.len() / CHUNK_LEN + (data.len() % CHUNK_LEN != 0) as usize;
let chunks = chunks.next_power_of_two();
let mid = chunks / 2;
let mid_bytes = mid * CHUNK_LEN;
let left = recursive_hash_subtree(start_chunk, &data[..mid_bytes], false);
let right = recursive_hash_subtree(start_chunk + mid as u64, &data[mid_bytes..], false);
parent_cv(&left, &right, is_root)
}
}

#[test]
fn test_hash_subtree() {
assert_eq!(crate::hash(b"foo"), hash_subtree(0, b"foo", true));

assert_eq!(is_subtree(4, 1024 * 4 - 1), true);
assert_eq!(is_subtree(1, 1024 * 4), false);

let data = (0..1024 << 4).map(|i| i as u8).collect::<Vec<_>>();
for block_log in 0..4 {
let block_size = 1usize << block_log;
let block_size_u64 = block_size as u64;
for i in 0..100 {
let start_chunk = i * block_size_u64;
assert_eq!(
recursive_hash_subtree(start_chunk, &data[..CHUNK_LEN], false),
hash_subtree(start_chunk, &data[..CHUNK_LEN], false)
);
assert_eq!(
recursive_hash_subtree(start_chunk, &data[..block_size * CHUNK_LEN], false),
hash_subtree(start_chunk, &data[..block_size * CHUNK_LEN], false)
);
}
}
}

#[test]
fn wrong_hash_small() {
let start_chunk = 97648;
let len = 8448;
let is_root = false;
let data = (0..len).map(|i| i as u8).collect::<Vec<_>>();
let expected = recursive_hash_subtree(start_chunk, &data, is_root);
let actual = hash_subtree(start_chunk, &data, is_root);
assert_eq!(expected, actual);
}
}
25 changes: 18 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
#[cfg(feature = "zeroize")]
extern crate zeroize_crate as zeroize; // Needed because `zeroize::Zeroize` assumes the crate is named `zeroize`.


#[cfg(test)]
mod test;

Expand Down Expand Up @@ -977,6 +976,15 @@ impl Hasher {
Self::new_internal(IV, 0)
}

/// Construct a new `Hasher` with a start chunk
fn new_with_start_chunk(start_chunk: u64) -> Self {
Self {
key: *IV,
chunk_state: ChunkState::new(IV, start_chunk, 0, Platform::detect()),
cv_stack: ArrayVec::new(),
}
}

/// Construct a new `Hasher` for the keyed hash function. See
/// [`keyed_hash`].
///
Expand Down Expand Up @@ -1246,7 +1254,6 @@ impl Hasher {
// also. Convert it directly into an Output. Otherwise, we need to
// merge subtrees below.
if self.cv_stack.is_empty() {
debug_assert_eq!(self.chunk_state.chunk_counter, 0);
return self.chunk_state.output();
}

Expand All @@ -1265,11 +1272,6 @@ impl Hasher {
let mut output: Output;
let mut num_cvs_remaining = self.cv_stack.len();
if self.chunk_state.len() > 0 {
debug_assert_eq!(
self.cv_stack.len(),
self.chunk_state.chunk_counter.count_ones() as usize,
"cv stack does not need a merge"
);
output = self.chunk_state.output();
} else {
debug_assert!(self.cv_stack.len() >= 2);
Expand Down Expand Up @@ -1304,6 +1306,15 @@ impl Hasher {
self.final_output().root_hash()
}

fn finalize_node(&self, is_root: bool) -> Hash {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just exposes a way to create non root hashes

let output = self.final_output();
if is_root {
output.root_hash()
} else {
output.chaining_value().into()
}
}

/// Finalize the hash state and return an [`OutputReader`], which can
/// supply any number of output bytes.
///
Expand Down