Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Omit validity buffer in PrimitiveArray::from_iter when all values are valid #1859

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion arrow/src/array/array.rs
Expand Up @@ -873,7 +873,9 @@ mod tests {

#[test]
fn test_memory_size_primitive_nullable() {
let arr: PrimitiveArray<Int64Type> = (0..128).map(Some).collect();
let arr: PrimitiveArray<Int64Type> = (0..128)
.map(|i| if i % 20 == 0 { Some(i) } else { None })
.collect();
let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
ArrayData::builder(arr.data_type().clone())
.add_buffer(MutableBuffer::new(0).into())
Expand Down
30 changes: 23 additions & 7 deletions arrow/src/array/array_primitive.rs
Expand Up @@ -397,29 +397,35 @@ impl<'a, T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();

let mut null_buf = BooleanBufferBuilder::new(lower);
let mut null_builder = BooleanBufferBuilder::new(lower);

let buffer: Buffer = iter
.map(|item| {
if let Some(a) = item.into().native {
null_buf.append(true);
null_builder.append(true);
a
} else {
null_buf.append(false);
null_builder.append(false);
// this ensures that null items on the buffer are not arbitrary.
// This is important because falible operations can use null values (e.g. a vectorized "add")
// This is important because fallible operations can use null values (e.g. a vectorized "add")
// which may panic (e.g. overflow if the number on the slots happen to be very large).
T::Native::default()
}
})
.collect();

let len = null_builder.len();
let null_buf: Buffer = null_builder.into();
let valid_count = null_buf.count_set_bits();
let null_count = len - valid_count;
let opt_null_buf = (null_count != 0).then(|| null_buf);

let data = unsafe {
ArrayData::new_unchecked(
T::DATA_TYPE,
null_buf.len(),
None,
Some(null_buf.into()),
len,
Some(null_count),
opt_null_buf,
0,
vec![buffer],
vec![],
Expand Down Expand Up @@ -1025,6 +1031,16 @@ mod tests {
assert_eq!(primitive_array.len(), 10);
}

#[test]
fn test_primitive_array_from_non_null_iter() {
let iter = (0..10_i32).map(Some);
let primitive_array = PrimitiveArray::<Int32Type>::from_iter(iter);
assert_eq!(primitive_array.len(), 10);
assert_eq!(primitive_array.null_count(), 0);
assert_eq!(primitive_array.data().null_buffer(), None);
assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
}

#[test]
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
Expand Down