Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement FromIterator for Builders #2038

33 changes: 1 addition & 32 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,38 +222,7 @@ impl<'a> BooleanArray {

impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tustvold I noticed some implementations panic if the upper size hint is not available and others pick whatever is available. Is this a deliberate decision? AFAIK we use size hint for slight optimization and it should not be enforced such a way. Thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be optional for the non-trusted length variants


let num_bytes = bit_util::ceil(data_len, 8);
let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);

let data = val_buf.as_slice_mut();

let null_slice = null_buf.as_slice_mut();
iter.enumerate().for_each(|(i, item)| {
if let Some(a) = item.borrow() {
bit_util::set_bit(null_slice, i);
if *a {
bit_util::set_bit(data, i);
}
}
});

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
data_len,
None,
Some(null_buf.into()),
0,
vec![val_buf.into()],
vec![],
)
};
BooleanArray::from(data)
iter.into_iter().collect::<BooleanBuilder>().finish()
}
}

Expand Down
82 changes: 71 additions & 11 deletions arrow/src/array/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use std::any::Any;
use std::borrow::Borrow;
use std::sync::Arc;

use crate::array::ArrayBuilder;
Expand Down Expand Up @@ -57,6 +58,24 @@ use super::BooleanBufferBuilder;
/// assert!(arr.is_valid(3));
/// assert!(!arr.is_null(3));
/// ```
///
/// Using `from_iter`
/// ```
/// use arrow::array::{Array, BooleanBuilder};
/// let v = vec![Some(false), Some(true), Some(false), Some(true)];
/// let arr = v.into_iter().collect::<BooleanBuilder>().finish();
/// assert_eq!(4, arr.len());
/// assert_eq!(0, arr.offset());
/// assert_eq!(0, arr.null_count());
/// assert!(arr.is_valid(0));
/// assert_eq!(false, arr.value(0));
/// assert!(arr.is_valid(1));
/// assert_eq!(true, arr.value(1));
/// assert!(arr.is_valid(2));
/// assert_eq!(false, arr.value(2));
/// assert!(arr.is_valid(3));
/// assert_eq!(true, arr.value(3));
/// ```
#[derive(Debug)]
pub struct BooleanBuilder {
values_builder: BooleanBufferBuilder,
Expand Down Expand Up @@ -140,6 +159,21 @@ impl BooleanBuilder {
}

impl ArrayBuilder for BooleanBuilder {
/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.values_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.values_builder.is_empty()
}

/// Builds the array and reset this builder.
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}

/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
Expand All @@ -154,26 +188,38 @@ impl ArrayBuilder for BooleanBuilder {
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.values_builder.len()
}
impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanBuilder {
fn from_iter<T: IntoIterator<Item = Ptr>>(iter: T) -> Self {
let iter = iter.into_iter();
let (lower, upper) = iter.size_hint();
let size_hint = upper.unwrap_or(lower);

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.values_builder.is_empty()
}
let mut bitmap_builder = BooleanBufferBuilder::new(size_hint);
let mut values_builder = BooleanBufferBuilder::new(size_hint);

/// Builds the array and reset this builder.
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
iter.for_each(|item| {
if let Some(a) = item.borrow() {
bitmap_builder.append(true);
values_builder.append(*a);
} else {
bitmap_builder.append(false);
values_builder.append(false);
}
});

BooleanBuilder {
values_builder,
bitmap_builder,
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::array::Array;

#[test]
fn test_boolean_array_builder_append_slice() {
Expand All @@ -200,4 +246,18 @@ mod tests {

assert_eq!(arr1, arr2);
}

#[test]
fn test_boolean_array_builder_from_iter() {
let v = vec![Some(false), Some(true), Some(false), Some(true)];
let arr = v.into_iter().collect::<BooleanBuilder>().finish();
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
for i in 0..3 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
}
}
}