Skip to content

Commit

Permalink
Add BooleanArray::from_unary and BooleanArray::from_binary (#3258)
Browse files Browse the repository at this point in the history
* Add BooleanArray::from_unary and BooleanArray::from_binary

* Add docs

* Tweak signatures

* Remove fallibility from combine_option_bitmap

* Remove unused compare_option_bitmap

* Remove fallibility

* Fix doc
  • Loading branch information
tustvold committed Dec 2, 2022
1 parent 9abdb55 commit ecbb8c2
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 307 deletions.
87 changes: 87 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::iterator::BooleanIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
Expand Down Expand Up @@ -173,6 +174,92 @@ impl BooleanArray {
) -> impl Iterator<Item = Option<bool>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}

/// Create a [`BooleanArray`] by evaluating the operation for
/// each element of the provided array
///
/// ```
/// # use arrow_array::{BooleanArray, Int32Array};
///
/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
/// let r = BooleanArray::from_unary(&array, |x| x > 2);
/// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true]));
/// ```
pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
where
F: FnMut(T::Item) -> bool,
{
let null_bit_buffer = left
.data()
.null_buffer()
.map(|b| b.bit_slice(left.offset(), left.len()));

let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i))
});

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Self::from(data)
}

/// Create a [`BooleanArray`] by evaluating the binary operation for
/// each element of the provided arrays
///
/// ```
/// # use arrow_array::{BooleanArray, Int32Array};
///
/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
/// let b = Int32Array::from(vec![1, 2, 0, 2, 5]);
/// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b);
/// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true]));
/// ```
///
/// # Panics
///
/// This function panics if left and right are not the same length
///
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
left: T,
right: S,
mut op: F,
) -> Self
where
F: FnMut(T::Item, S::Item) -> bool,
{
assert_eq!(left.len(), right.len());

let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len());

let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i), right.value_unchecked(i))
});

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Self::from(data)
}
}

impl Array for BooleanArray {
Expand Down
141 changes: 141 additions & 0 deletions arrow-data/src/bit_mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@

//! Utils for working with packed bit masks

use crate::ArrayData;
use arrow_buffer::bit_chunk_iterator::BitChunks;
use arrow_buffer::bit_util::{ceil, get_bit, set_bit};
use arrow_buffer::buffer::buffer_bin_and;
use arrow_buffer::Buffer;

/// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the
/// bits in `data` in the range `[offset_read..offset_read+len]`
Expand Down Expand Up @@ -62,9 +65,41 @@ pub fn set_bits(
null_count as usize
}

/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
///
/// This function is useful when implementing operations on higher level arrays.
pub fn combine_option_bitmap(
arrays: &[&ArrayData],
len_in_bits: usize,
) -> Option<Buffer> {
let (buffer, offset) = arrays
.iter()
.map(|array| (array.null_buffer().cloned(), array.offset()))
.reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
((None, _), (None, _)) => (None, 0),
((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
(Some(buffer), offset)
}
((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
Some(buffer_bin_and(
&buffer_left,
offset_left,
&buffer_right,
offset_right,
len_in_bits,
)),
0,
),
})?;

Some(buffer?.bit_slice(offset, len_in_bits))
}

#[cfg(test)]
mod tests {
use super::*;
use arrow_schema::DataType;
use std::sync::Arc;

#[test]
fn test_set_bits_aligned() {
Expand Down Expand Up @@ -187,4 +222,110 @@ mod tests {
assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
}

fn make_data_with_null_bit_buffer(
len: usize,
offset: usize,
null_bit_buffer: Option<Buffer>,
) -> Arc<ArrayData> {
let buffer = Buffer::from(&vec![11; len + offset]);

Arc::new(
ArrayData::try_new(
DataType::UInt8,
len,
null_bit_buffer,
offset,
vec![buffer],
vec![],
)
.unwrap(),
)
}

#[test]
fn test_combine_option_bitmap() {
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
let some_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
let inverse_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
let some_other_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
assert_eq!(None, combine_option_bitmap(&[], 8));
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&[&some_bitmap], 8)
);
assert_eq!(
None,
combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8)
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8)
);
assert_eq!(
Some(Buffer::from([0b11010111])),
combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8)
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,)
);
assert_eq!(
Some(Buffer::from([0b0])),
combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,)
);
assert_eq!(
Some(Buffer::from([0b01000010])),
combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
);
assert_eq!(
Some(Buffer::from([0b00001001])),
combine_option_bitmap(
&[
&some_bitmap.slice(3, 5),
&inverse_bitmap.slice(2, 5),
&some_other_bitmap.slice(1, 5)
],
5,
)
);
}

#[test]
fn test_combine_option_bitmap_with_offsets() {
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
let bitmap0 =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010])));
let bitmap1 =
make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1])));
let bitmap2 =
make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10])));
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&bitmap1], 8)
);
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&bitmap2], 8)
);
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&bitmap1, &none_bitmap], 8)
);
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&none_bitmap, &bitmap2], 8)
);
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&bitmap0, &bitmap1], 8)
);
assert_eq!(
Some(Buffer::from([0b10101010])),
combine_option_bitmap(&[&bitmap1, &bitmap2], 8)
);
}
}
8 changes: 4 additions & 4 deletions arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,10 @@ where
}

// Create the combined `Bitmap`
let null_bit_buffer = crate::compute::util::combine_option_bitmap(
let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
&[left.data_ref(), right.data_ref()],
left.len(),
)?;
);

let lanes = T::lanes();
let buffer_size = left.len() * std::mem::size_of::<T::Native>();
Expand Down Expand Up @@ -660,10 +660,10 @@ where
)));
}

let null_bit_buffer = crate::compute::util::combine_option_bitmap(
let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
&[left.data_ref(), right.data_ref()],
left.len(),
)?;
);

// Safety justification: Since the inputs are valid Arrow arrays, all values are
// valid indexes into the dictionary (which is verified during construction)
Expand Down
10 changes: 5 additions & 5 deletions arrow/src/compute/kernels/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ use crate::array::{
PrimitiveArray,
};
use crate::buffer::Buffer;
use crate::compute::util::combine_option_bitmap;
use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType};
use crate::downcast_dictionary_array;
use crate::error::{ArrowError, Result};
use crate::util::bit_iterator::try_for_each_valid_idx;
use arrow_buffer::MutableBuffer;
use arrow_data::bit_mask::combine_option_bitmap;
use std::sync::Arc;

#[inline]
Expand Down Expand Up @@ -215,7 +215,7 @@ where
return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
}

let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
Expand Down Expand Up @@ -275,7 +275,7 @@ where

let len = a.len();

let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
Expand Down Expand Up @@ -333,7 +333,7 @@ where
if a.null_count() == 0 && b.null_count() == 0 {
try_binary_no_nulls(len, a, b, op)
} else {
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);

let null_count = null_buffer
.as_ref()
Expand Down Expand Up @@ -401,7 +401,7 @@ where
if a.null_count() == 0 && b.null_count() == 0 {
try_binary_no_nulls_mut(len, a, b, op)
} else {
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/compute/kernels/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ use crate::buffer::{
bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or,
buffer_unary_not, Buffer, MutableBuffer,
};
use crate::compute::util::combine_option_bitmap;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
use crate::util::bit_util::ceil;
use arrow_data::bit_mask::combine_option_bitmap;

/// Updates null buffer based on data buffer and null buffer of the operand at other side
/// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false
Expand Down Expand Up @@ -108,7 +108,7 @@ pub(crate) fn build_null_buffer_for_and_or(
len_in_bits: usize,
) -> Option<Buffer> {
// `arrays` are not empty, so safely do `unwrap` directly.
combine_option_bitmap(&[left_data, right_data], len_in_bits).unwrap()
combine_option_bitmap(&[left_data, right_data], len_in_bits)
}

/// Updates null buffer based on data buffer and null buffer of the operand at other side
Expand Down

0 comments on commit ecbb8c2

Please sign in to comment.