From 843a2e5699a36537ff962d921d1c9aa0712a8d7c Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Sat, 29 Oct 2022 07:09:00 +1300 Subject: [PATCH] Add BooleanArray::true_count and BooleanArray::false_count (#2957) * Add BooleanArray::true_count and BooleanArray::false_count * Review feedback --- arrow-array/src/array/boolean_array.rs | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index c7a44c7d5f9..31dde3a3dda 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -103,6 +103,33 @@ impl BooleanArray { &self.data.buffers()[0] } + /// Returns the number of non null, true values within this array + pub fn true_count(&self) -> usize { + match self.data.null_buffer() { + Some(nulls) => { + let null_chunks = nulls.bit_chunks(self.offset(), self.len()); + let value_chunks = self.values().bit_chunks(self.offset(), self.len()); + null_chunks + .iter() + .zip(value_chunks.iter()) + .chain(std::iter::once(( + null_chunks.remainder_bits(), + value_chunks.remainder_bits(), + ))) + .map(|(a, b)| (a & b).count_ones() as usize) + .sum() + } + None => self + .values() + .count_set_bits_offset(self.offset(), self.len()), + } + } + + /// Returns the number of non null, false values within this array + pub fn false_count(&self) -> usize { + self.len() - self.null_count() - self.true_count() + } + /// Returns the boolean value at index `i`. /// /// # Safety @@ -285,6 +312,7 @@ impl>> FromIterator for BooleanArray #[cfg(test)] mod tests { use super::*; + use rand::{thread_rng, Rng}; #[test] fn test_boolean_fmt_debug() { @@ -431,4 +459,31 @@ mod tests { fn test_from_array_data_validation() { let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32)); } + + #[test] + fn test_true_false_count() { + let mut rng = thread_rng(); + + for _ in 0..10 { + // No nulls + let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect(); + let b = BooleanArray::from(d.clone()); + + let expected_true = d.iter().filter(|x| **x).count(); + assert_eq!(b.true_count(), expected_true); + assert_eq!(b.false_count(), d.len() - expected_true); + + // With nulls + let d: Vec<_> = (0..2000) + .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5))) + .collect(); + let b = BooleanArray::from(d.clone()); + + let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count(); + assert_eq!(b.true_count(), expected_true); + + let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count(); + assert_eq!(b.false_count(), expected_false); + } + } }