Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arbitrary size combine option bitmap #1781

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ where
}

let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;

let values = left
.values()
Expand Down Expand Up @@ -117,7 +117,7 @@ where
}

let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;

let buffer = if let Some(b) = &null_bit_buffer {
let values = left.values().iter().zip(right.values()).enumerate().map(
Expand Down Expand Up @@ -316,7 +316,7 @@ where

// Create the combined `Bitmap`
let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;

let lanes = T::lanes();
let buffer_size = left.len() * std::mem::size_of::<T::Native>();
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/compute/kernels/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ where

let left_data = left.data_ref();
let right_data = right.data_ref();
let null_bit_buffer = combine_option_bitmap(left_data, right_data, len)?;
let null_bit_buffer = combine_option_bitmap(&[left_data, right_data], len)?;

let left_buffer = &left_data.buffers()[0];
let right_buffer = &right_data.buffers()[0];
Expand Down
15 changes: 8 additions & 7 deletions arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ macro_rules! compare_op {
}

let null_bit_buffer =
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
combine_option_bitmap(&[$left.data_ref(), $right.data_ref()], $left.len())?;

// Safety:
// `i < $left.len()` and $left.len() == $right.len()
Expand Down Expand Up @@ -86,7 +86,7 @@ macro_rules! compare_op_primitive {
}

let null_bit_buffer =
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
combine_option_bitmap(&[$left.data_ref(), $right.data_ref()], $left.len())?;

let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
let lhs_chunks_iter = $left.values().chunks_exact(8);
Expand Down Expand Up @@ -258,7 +258,7 @@ where
}

let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;

let mut result = BooleanBufferBuilder::new(left.len());
for i in 0..left.len() {
Expand Down Expand Up @@ -567,7 +567,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
));
}
let null_bit_buffer =
combine_option_bitmap(array.data_ref(), regex_array.data_ref(), array.len())?;
combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len())?;

let mut patterns: HashMap<String, Regex> = HashMap::new();
let mut result = BooleanBufferBuilder::new(array.len());
Expand Down Expand Up @@ -1676,7 +1676,8 @@ where
));
}

let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], len)?;

// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
Expand Down Expand Up @@ -2617,7 +2618,7 @@ where
let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
Expand Down Expand Up @@ -2674,7 +2675,7 @@ where
let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/compute/kernels/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
)));
}

let output_bitmap = combine_option_bitmap(left.data(), right.data(), left.len())?;
let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len())?;

let left_offsets = left.value_offsets();
let right_offsets = right.value_offsets();
Expand Down
92 changes: 60 additions & 32 deletions arrow/src/compute/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,39 @@ use crate::error::{ArrowError, Result};
use num::{One, ToPrimitive, Zero};
use std::ops::Add;

/// Combines the null bitmaps of two arrays using a bitwise `and` operation.
/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
///
/// This function is useful when implementing operations on higher level arrays.
ismailmaj marked this conversation as resolved.
Show resolved Hide resolved
#[allow(clippy::unnecessary_wraps)]
pub(super) fn combine_option_bitmap(
left_data: &ArrayData,
right_data: &ArrayData,
arrays: &[&ArrayData],
len_in_bits: usize,
) -> Result<Option<Buffer>> {
let left_offset_in_bits = left_data.offset();
let right_offset_in_bits = right_data.offset();

let left = left_data.null_buffer();
let right = right_data.null_buffer();

match left {
None => match right {
None => Ok(None),
Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
},
Some(l) => match right {
None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),

Some(r) => Ok(Some(buffer_bin_and(
l,
left_offset_in_bits,
r,
right_offset_in_bits,
len_in_bits,
))),
},
}
arrays
.iter()
.map(|array| (array.null_buffer().cloned(), array.offset()))
.reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
((None, _), (None, _)) => (None, 0),
((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
(Some(buffer), offset)
}
((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
Some(buffer_bin_and(
&buffer_left,
offset_left,
&buffer_right,
offset_right,
len_in_bits,
)),
0,
),
})
.map_or(
Err(ArrowError::ComputeError(
"Arrays must not be empty".to_string(),
)),
|(buffer, offset)| Ok(buffer.map(|buffer| buffer.slice(offset))),
)
}

/// Takes/filters a list array's inner data using the offsets of the list array.
Expand Down Expand Up @@ -206,25 +207,52 @@ pub(super) mod tests {
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
let inverse_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
let some_other_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
assert_eq!(
None,
combine_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
combine_option_bitmap(&[], 8).unwrap_err().to_string(),
"Compute error: Arrays must not be empty",
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
combine_option_bitmap(&[&some_bitmap], 8).unwrap()
);
assert_eq!(
None,
combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b11010111])),
combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b0])),
combine_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01000010])),
combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
.unwrap()
);
assert_eq!(
Some(Buffer::from([0b00001001])),
combine_option_bitmap(
&[
&some_bitmap.slice(3, 5),
&inverse_bitmap.slice(2, 5),
&some_other_bitmap.slice(1, 5)
],
5,
)
.unwrap()
);
}

Expand Down