From 9bcd052bd4ecb4a90463c7ffe7f84bd6a37f8d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Mon, 13 Jun 2022 20:32:55 +0200 Subject: [PATCH] Omit validity buffer in PrimitiveArray::from_iter when all values are valid (#1859) * Omit validity buffer in PrimitiveArray::from_iter when all values are valid * Use bool::then instead of if Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow/src/array/array.rs | 4 +++- arrow/src/array/array_primitive.rs | 30 +++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index f28aba59d73..c566ff99f12 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -873,7 +873,9 @@ mod tests { #[test] fn test_memory_size_primitive_nullable() { - let arr: PrimitiveArray = (0..128).map(Some).collect(); + let arr: PrimitiveArray = (0..128) + .map(|i| if i % 20 == 0 { Some(i) } else { None }) + .collect(); let empty_with_bitmap = PrimitiveArray::::from( ArrayData::builder(arr.data_type().clone()) .add_buffer(MutableBuffer::new(0).into()) diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs index 8893703aa85..6f496562f89 100644 --- a/arrow/src/array/array_primitive.rs +++ b/arrow/src/array/array_primitive.rs @@ -397,29 +397,35 @@ impl<'a, T: ArrowPrimitiveType, Ptr: Into>> FromIterator let iter = iter.into_iter(); let (lower, _) = iter.size_hint(); - let mut null_buf = BooleanBufferBuilder::new(lower); + let mut null_builder = BooleanBufferBuilder::new(lower); let buffer: Buffer = iter .map(|item| { if let Some(a) = item.into().native { - null_buf.append(true); + null_builder.append(true); a } else { - null_buf.append(false); + null_builder.append(false); // this ensures that null items on the buffer are not arbitrary. - // This is important because falible operations can use null values (e.g. a vectorized "add") + // This is important because fallible operations can use null values (e.g. a vectorized "add") // which may panic (e.g. overflow if the number on the slots happen to be very large). T::Native::default() } }) .collect(); + let len = null_builder.len(); + let null_buf: Buffer = null_builder.into(); + let valid_count = null_buf.count_set_bits(); + let null_count = len - valid_count; + let opt_null_buf = (null_count != 0).then(|| null_buf); + let data = unsafe { ArrayData::new_unchecked( T::DATA_TYPE, - null_buf.len(), - None, - Some(null_buf.into()), + len, + Some(null_count), + opt_null_buf, 0, vec![buffer], vec![], @@ -1025,6 +1031,16 @@ mod tests { assert_eq!(primitive_array.len(), 10); } + #[test] + fn test_primitive_array_from_non_null_iter() { + let iter = (0..10_i32).map(Some); + let primitive_array = PrimitiveArray::::from_iter(iter); + assert_eq!(primitive_array.len(), 10); + assert_eq!(primitive_array.null_count(), 0); + assert_eq!(primitive_array.data().null_buffer(), None); + assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + } + #[test] #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ (values buffer)")]