Skip to content

Commit

Permalink
Omit validity buffer in PrimitiveArray::from_iter when all values are…
Browse files Browse the repository at this point in the history
… valid (#1859)

* Omit validity buffer in PrimitiveArray::from_iter when all values are valid

* Use bool::then instead of if

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
  • Loading branch information
jhorstmann and tustvold committed Jun 13, 2022
1 parent fcf655e commit 9bcd052
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
4 changes: 3 additions & 1 deletion arrow/src/array/array.rs
Expand Up @@ -873,7 +873,9 @@ mod tests {

#[test]
fn test_memory_size_primitive_nullable() {
let arr: PrimitiveArray<Int64Type> = (0..128).map(Some).collect();
let arr: PrimitiveArray<Int64Type> = (0..128)
.map(|i| if i % 20 == 0 { Some(i) } else { None })
.collect();
let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
ArrayData::builder(arr.data_type().clone())
.add_buffer(MutableBuffer::new(0).into())
Expand Down
30 changes: 23 additions & 7 deletions arrow/src/array/array_primitive.rs
Expand Up @@ -397,29 +397,35 @@ impl<'a, T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();

let mut null_buf = BooleanBufferBuilder::new(lower);
let mut null_builder = BooleanBufferBuilder::new(lower);

let buffer: Buffer = iter
.map(|item| {
if let Some(a) = item.into().native {
null_buf.append(true);
null_builder.append(true);
a
} else {
null_buf.append(false);
null_builder.append(false);
// this ensures that null items on the buffer are not arbitrary.
// This is important because falible operations can use null values (e.g. a vectorized "add")
// This is important because fallible operations can use null values (e.g. a vectorized "add")
// which may panic (e.g. overflow if the number on the slots happen to be very large).
T::Native::default()
}
})
.collect();

let len = null_builder.len();
let null_buf: Buffer = null_builder.into();
let valid_count = null_buf.count_set_bits();
let null_count = len - valid_count;
let opt_null_buf = (null_count != 0).then(|| null_buf);

let data = unsafe {
ArrayData::new_unchecked(
T::DATA_TYPE,
null_buf.len(),
None,
Some(null_buf.into()),
len,
Some(null_count),
opt_null_buf,
0,
vec![buffer],
vec![],
Expand Down Expand Up @@ -1025,6 +1031,16 @@ mod tests {
assert_eq!(primitive_array.len(), 10);
}

#[test]
fn test_primitive_array_from_non_null_iter() {
let iter = (0..10_i32).map(Some);
let primitive_array = PrimitiveArray::<Int32Type>::from_iter(iter);
assert_eq!(primitive_array.len(), 10);
assert_eq!(primitive_array.null_count(), 0);
assert_eq!(primitive_array.data().null_buffer(), None);
assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
}

#[test]
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
Expand Down

0 comments on commit 9bcd052

Please sign in to comment.