Skip to content

Commit

Permalink
Add functions for counting the results of a set operation (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
james7132 committed Mar 20, 2024
1 parent ce8313c commit 7507a73
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 8 deletions.
112 changes: 112 additions & 0 deletions benches/benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,110 @@ fn grow_and_insert(c: &mut Criterion) {
});
}

fn iter_union_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("iter_union_count/1m", |b| {
b.iter(|| black_box(fb_a.union(&fb_b).count()))
});
}

fn iter_intersect_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("iter_intersection_count/1m", |b| {
b.iter(|| black_box(fb_a.intersection(&fb_b).count()));
});
}

fn iter_difference_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("iter_difference_count/1m", |b| {
b.iter(|| black_box(fb_a.difference(&fb_b).count()));
});
}

fn iter_symmetric_difference_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("iter_symmetric_difference_count/1m", |b| {
b.iter(|| black_box(fb_a.symmetric_difference(&fb_b).count()));
});
}

fn union_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("union_count/1m", |b| {
b.iter(|| black_box(fb_a.union_count(&fb_b)))
});
}

fn intersect_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("intersection_count/1m", |b| {
b.iter(|| black_box(fb_a.intersection_count(&fb_b)));
});
}

fn difference_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("difference_count/1m", |b| {
b.iter(|| black_box(fb_a.difference_count(&fb_b)));
});
}

fn symmetric_difference_count(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
let mut fb_b = FixedBitSet::with_capacity(N);

fb_a.insert_range(..);
fb_b.insert_range(..);

c.bench_function("symmetric_difference_count/1m", |b| {
b.iter(|| black_box(fb_a.symmetric_difference_count(&fb_b)));
});
}

fn union_with(c: &mut Criterion) {
const N: usize = 1_000_000;
let mut fb_a = FixedBitSet::with_capacity(N);
Expand Down Expand Up @@ -201,6 +305,14 @@ criterion_group!(
iter_ones_sparse,
iter_ones_all_ones,
iter_ones_all_ones_rev,
iter_union_count,
iter_intersect_count,
iter_difference_count,
iter_symmetric_difference_count,
union_count,
intersect_count,
difference_count,
symmetric_difference_count,
insert_range,
insert,
intersect_with,
Expand Down
129 changes: 121 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ impl FixedBitSet {
blocks
}

#[inline]
fn batch_count_ones(blocks: impl IntoIterator<Item = Block>) -> usize {
blocks.into_iter().map(|x| x.count_ones() as usize).sum()
}

/// Grows the internal size of the bitset before inserting a bit
///
/// Unlike `insert`, this cannot panic, but may allocate if the bit is outside of the existing buffer's range.
Expand Down Expand Up @@ -428,18 +433,16 @@ impl FixedBitSet {

/// Count the number of set bits in the given bit range.
///
/// This function is potentially much faster than using `ones(other).count()`.
/// Use `..` to count the whole content of the bitset.
///
/// **Panics** if the range extends past the end of the bitset.
#[inline]
pub fn count_ones<T: IndexRange>(&self, range: T) -> usize {
Masks::new(range, self.length)
.map(|(block, mask)| {
// SAFETY: Masks cannot return a block index that is out of range.
let value = unsafe { *self.get_unchecked(block) };
(value & mask).count_ones() as usize
})
.sum()
Self::batch_count_ones(Masks::new(range, self.length).map(|(block, mask)| {
// SAFETY: Masks cannot return a block index that is out of range.
unsafe { *self.get_unchecked(block) & mask }
}))
}

/// Sets every bit in the given range to the given state (`enabled`)
Expand Down Expand Up @@ -669,6 +672,73 @@ impl FixedBitSet {
}
}

/// Computes how many bits would be set in the union between two bitsets.
///
/// This is potentially much faster than using `union(other).count()`. Unlike
/// other methods like using [`union_with`] followed by [`count_ones`], this
/// does not mutate in place or require separate allocations.
#[inline]
pub fn union_count(&self, other: &FixedBitSet) -> usize {
let me = self.as_slice();
let other = other.as_slice();
let mut count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x | *y)));
if other.len() > me.len() {
count += Self::batch_count_ones(other[me.len()..].iter().copied());
} else if self.len() > other.len() {
count += Self::batch_count_ones(me[other.len()..].iter().copied());
}
count
}

/// Computes how many bits would be set in the intersection between two bitsets.
///
/// This is potentially much faster than using `intersection(other).count()`. Unlike
/// other methods like using [`intersect_with`] followed by [`count_ones`], this
/// does not mutate in place or require separate allocations.
#[inline]
pub fn intersection_count(&self, other: &FixedBitSet) -> usize {
Self::batch_count_ones(
self.as_slice()
.iter()
.zip(other.as_slice())
.map(|(x, y)| (*x & *y)),
)
}

/// Computes how many bits would be set in the difference between two bitsets.
///
/// This is potentially much faster than using `difference(other).count()`. Unlike
/// other methods like using [`difference_with`] followed by [`count_ones`], this
/// does not mutate in place or require separate allocations.
#[inline]
pub fn difference_count(&self, other: &FixedBitSet) -> usize {
Self::batch_count_ones(
self.as_slice()
.iter()
.zip(other.as_slice().iter())
.map(|(x, y)| (*x & !*y)),
)
}

/// Computes how many bits would be set in the symmetric difference between two bitsets.
///
/// This is potentially much faster than using `symmetric_difference(other).count()`. Unlike
/// other methods like using [`symmetric_difference_with`] followed by [`count_ones`], this
/// does not mutate in place or require separate allocations.
#[inline]
pub fn symmetric_difference_count(&self, other: &FixedBitSet) -> usize {
let me = self.as_slice();
let other = other.as_slice();
let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x ^ *y)));
if other.len() > me.len() {
count + Self::batch_count_ones(other[me.len()..].iter().copied())
} else if me.len() > other.len() {
count + Self::batch_count_ones(me[other.len()..].iter().copied())
} else {
count
}
}

/// Returns `true` if `self` has no elements in common with `other`. This
/// is equivalent to checking for an empty intersection.
pub fn is_disjoint(&self, other: &FixedBitSet) -> bool {
Expand Down Expand Up @@ -1830,7 +1900,8 @@ mod tests {
let mut b = FixedBitSet::with_capacity(len);
a.set_range(..a_end, true);
b.set_range(b_start.., true);

let count = a.intersection_count(&b);
let iterator_count = a.intersection(&b).count();
let mut ab = a.intersection(&b).collect::<FixedBitSet>();

for i in 0..b_start {
Expand All @@ -1850,6 +1921,15 @@ mod tests {
ab, a,
"intersection and intersect_with produce the same results"
);
assert_eq!(
ab.count_ones(..),
count,
"intersection and intersection_count produce the same results"
);
assert_eq!(
count, iterator_count,
"intersection and intersection_count produce the same results"
);
}

#[test]
Expand All @@ -1862,6 +1942,8 @@ mod tests {
let mut b = FixedBitSet::with_capacity(b_len);
a.set_range(a_start.., true);
b.set_range(..b_end, true);
let count = a.union_count(&b);
let iterator_count = a.union(&b).count();
let ab = a.union(&b).collect::<FixedBitSet>();
for i in a_start..a_len {
assert!(ab.contains(i));
Expand All @@ -1875,6 +1957,15 @@ mod tests {

a.union_with(&b);
assert_eq!(ab, a, "union and union_with produce the same results");
assert_eq!(
count,
ab.count_ones(..),
"union and union_count produce the same results"
);
assert_eq!(
count, iterator_count,
"union and union_count produce the same results"
);
}

#[test]
Expand All @@ -1888,6 +1979,8 @@ mod tests {
let mut b = FixedBitSet::with_capacity(b_len);
a.set_range(a_start..a_end, true);
b.set_range(b_start..b_len, true);
let count = a.difference_count(&b);
let iterator_count = a.difference(&b).count();
let mut a_diff_b = a.difference(&b).collect::<FixedBitSet>();
for i in a_start..b_start {
assert!(a_diff_b.contains(i));
Expand All @@ -1903,6 +1996,15 @@ mod tests {
a_diff_b, a,
"difference and difference_with produce the same results"
);
assert_eq!(
a_diff_b.count_ones(..),
count,
"difference and difference_count produce the same results"
);
assert_eq!(
count, iterator_count,
"intersection and intersection_count produce the same results"
);
}

#[test]
Expand All @@ -1916,6 +2018,8 @@ mod tests {
let mut b = FixedBitSet::with_capacity(b_len);
a.set_range(a_start..a_end, true);
b.set_range(b_start..b_len, true);
let count = a.symmetric_difference_count(&b);
let iterator_count = a.symmetric_difference(&b).count();
let a_sym_diff_b = a.symmetric_difference(&b).collect::<FixedBitSet>();
for i in 0..a_start {
assert!(!a_sym_diff_b.contains(i));
Expand All @@ -1935,6 +2039,15 @@ mod tests {
a_sym_diff_b, a,
"symmetric_difference and _with produce the same results"
);
assert_eq!(
a_sym_diff_b.count_ones(..),
count,
"symmetric_difference and _count produce the same results"
);
assert_eq!(
count, iterator_count,
"symmetric_difference and _count produce the same results"
);
}

#[test]
Expand Down

0 comments on commit 7507a73

Please sign in to comment.