diff --git a/arrow/src/array/builder/boolean_builder.rs b/arrow/src/array/builder/boolean_builder.rs index 1e052b644c8..e28e37bc9e2 100644 --- a/arrow/src/array/builder/boolean_builder.rs +++ b/arrow/src/array/builder/boolean_builder.rs @@ -28,6 +28,7 @@ use crate::error::ArrowError; use crate::error::Result; use super::BooleanBufferBuilder; +use super::NullBufferBuilder; /// Array builder for fixed-width primitive types /// @@ -62,9 +63,7 @@ use super::BooleanBufferBuilder; #[derive(Debug)] pub struct BooleanBuilder { values_builder: BooleanBufferBuilder, - /// We only materialize the builder when we add `false`. - /// This optimization is **very** important for the performance. - bitmap_builder: Option, + null_buffer_builder: NullBufferBuilder, } impl BooleanBuilder { @@ -72,7 +71,7 @@ impl BooleanBuilder { pub fn new(capacity: usize) -> Self { Self { values_builder: BooleanBufferBuilder::new(capacity), - bitmap_builder: None, + null_buffer_builder: NullBufferBuilder::new(capacity), } } @@ -85,19 +84,23 @@ impl BooleanBuilder { #[inline] pub fn append_value(&mut self, v: bool) { self.values_builder.append(v); - if let Some(b) = self.bitmap_builder.as_mut() { - b.append(true) - } + self.null_buffer_builder.append_non_null(); } /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { - self.materialize_bitmap_builder(); - self.bitmap_builder.as_mut().unwrap().append(false); + self.null_buffer_builder.append_null(); self.values_builder.advance(1); } + /// Appends `n` `null`s into the builder. + #[inline] + pub fn append_nulls(&mut self, n: usize) { + self.null_buffer_builder.append_n_nulls(n); + self.values_builder.advance(n); + } + /// Appends an `Option` into the builder #[inline] pub fn append_option(&mut self, v: Option) { @@ -110,10 +113,8 @@ impl BooleanBuilder { /// Appends a slice of type `T` into the builder #[inline] pub fn append_slice(&mut self, v: &[bool]) { - if let Some(b) = self.bitmap_builder.as_mut() { - b.append_n(v.len(), true) - } self.values_builder.append_slice(v); + self.null_buffer_builder.append_n_non_nulls(v.len()); } /// Appends values from a slice of type `T` and a validity boolean slice. @@ -126,13 +127,7 @@ impl BooleanBuilder { "Value and validity lengths must be equal".to_string(), )) } else { - is_valid - .iter() - .any(|v| !*v) - .then(|| self.materialize_bitmap_builder()); - if let Some(b) = self.bitmap_builder.as_mut() { - b.append_slice(is_valid) - } + self.null_buffer_builder.append_slice(is_valid); self.values_builder.append_slice(values); Ok(()) } @@ -141,7 +136,7 @@ impl BooleanBuilder { /// Builds the [BooleanArray] and reset this builder. pub fn finish(&mut self) -> BooleanArray { let len = self.len(); - let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish()); + let null_bit_buffer = self.null_buffer_builder.finish(); let builder = ArrayData::builder(DataType::Boolean) .len(len) .add_buffer(self.values_builder.finish()) @@ -150,15 +145,6 @@ impl BooleanBuilder { let array_data = unsafe { builder.build_unchecked() }; BooleanArray::from(array_data) } - - fn materialize_bitmap_builder(&mut self) { - if self.bitmap_builder.is_none() { - let mut b = BooleanBufferBuilder::new(0); - b.reserve(self.values_builder.capacity()); - b.append_n(self.values_builder.len(), true); - self.bitmap_builder = Some(b); - } - } } impl ArrayBuilder for BooleanBuilder { diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index 5ef89d8f460..96f90c4cae2 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -23,12 +23,12 @@ use crate::error::{ArrowError, Result}; use std::any::Any; use std::sync::Arc; -use super::BooleanBufferBuilder; +use super::NullBufferBuilder; #[derive(Debug)] pub struct FixedSizeBinaryBuilder { values_builder: UInt8BufferBuilder, - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, value_length: i32, } @@ -43,7 +43,7 @@ impl FixedSizeBinaryBuilder { ); Self { values_builder: UInt8BufferBuilder::new(capacity), - bitmap_builder: BooleanBufferBuilder::new(if byte_width > 0 { + null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 { capacity / byte_width as usize } else { 0 @@ -64,7 +64,7 @@ impl FixedSizeBinaryBuilder { )) } else { self.values_builder.append_slice(value.as_ref()); - self.bitmap_builder.append(true); + self.null_buffer_builder.append_non_null(); Ok(()) } } @@ -74,7 +74,7 @@ impl FixedSizeBinaryBuilder { pub fn append_null(&mut self) { self.values_builder .append_slice(&vec![0u8; self.value_length as usize][..]); - self.bitmap_builder.append(false); + self.null_buffer_builder.append_null(); } /// Builds the [`FixedSizeBinaryArray`] and reset this builder. @@ -83,7 +83,7 @@ impl FixedSizeBinaryBuilder { let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length)) .add_buffer(self.values_builder.finish()) - .null_bit_buffer(Some(self.bitmap_builder.finish())) + .null_bit_buffer(self.null_buffer_builder.finish()) .len(array_length); let array_data = unsafe { array_data_builder.build_unchecked() }; FixedSizeBinaryArray::from(array_data) @@ -108,12 +108,12 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { /// Returns the number of array slots in the builder fn len(&self) -> usize { - self.bitmap_builder.len() + self.null_buffer_builder.len() } /// Returns whether the number of array slots is zero fn is_empty(&self) -> bool { - self.bitmap_builder.is_empty() + self.null_buffer_builder.is_empty() } /// Builds the array and reset this builder. diff --git a/arrow/src/array/builder/fixed_size_list_builder.rs b/arrow/src/array/builder/fixed_size_list_builder.rs index 4e80e585a94..343ce3657b3 100644 --- a/arrow/src/array/builder/fixed_size_list_builder.rs +++ b/arrow/src/array/builder/fixed_size_list_builder.rs @@ -25,12 +25,12 @@ use crate::datatypes::DataType; use crate::datatypes::Field; use super::ArrayBuilder; -use super::BooleanBufferBuilder; +use super::NullBufferBuilder; /// Array builder for [`FixedSizeListArray`] #[derive(Debug)] pub struct FixedSizeListBuilder { - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, values_builder: T, list_len: i32, } @@ -48,7 +48,7 @@ impl FixedSizeListBuilder { /// `capacity` is the number of items to pre-allocate space for in this builder pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self { Self { - bitmap_builder: BooleanBufferBuilder::new(capacity), + null_buffer_builder: NullBufferBuilder::new(capacity), values_builder, list_len: value_length, } @@ -76,12 +76,12 @@ where /// Returns the number of array slots in the builder fn len(&self) -> usize { - self.bitmap_builder.len() + self.null_buffer_builder.len() } /// Returns whether the number of array slots is zero fn is_empty(&self) -> bool { - self.bitmap_builder.is_empty() + self.null_buffer_builder.is_empty() } /// Builds the array and reset this builder. @@ -109,7 +109,7 @@ where /// Finish the current fixed-length list array slot #[inline] pub fn append(&mut self, is_valid: bool) { - self.bitmap_builder.append(is_valid); + self.null_buffer_builder.append(is_valid); } /// Builds the [`FixedSizeListBuilder`] and reset this builder. @@ -131,14 +131,14 @@ where len, ); - let null_bit_buffer = self.bitmap_builder.finish(); + let null_bit_buffer = self.null_buffer_builder.finish(); let array_data = ArrayData::builder(DataType::FixedSizeList( Box::new(Field::new("item", values_data.data_type().clone(), true)), self.list_len, )) .len(len) .add_child_data(values_data.clone()) - .null_bit_buffer(Some(null_bit_buffer)); + .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data.build_unchecked() }; diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs index 54c1855a1b7..1e29c470cac 100644 --- a/arrow/src/array/builder/generic_binary_builder.rs +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -25,14 +25,14 @@ use crate::{ use std::any::Any; use std::sync::Arc; -use super::{BooleanBufferBuilder, BufferBuilder}; +use super::{BufferBuilder, NullBufferBuilder}; /// Array builder for [`GenericBinaryArray`] #[derive(Debug)] pub struct GenericBinaryBuilder { value_builder: UInt8BufferBuilder, offsets_builder: BufferBuilder, - null_buffer_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, } impl GenericBinaryBuilder { @@ -44,7 +44,7 @@ impl GenericBinaryBuilder { Self { value_builder: UInt8BufferBuilder::new(capacity), offsets_builder, - null_buffer_builder: BooleanBufferBuilder::new(1024), + null_buffer_builder: NullBufferBuilder::new(1024), } } @@ -76,7 +76,7 @@ impl GenericBinaryBuilder { .len(self.len()) .add_buffer(self.offsets_builder.finish()) .add_buffer(self.value_builder.finish()) - .null_bit_buffer(Some(self.null_buffer_builder.finish())); + .null_bit_buffer(self.null_buffer_builder.finish()); self.offsets_builder.append(OffsetSize::zero()); let array_data = unsafe { array_builder.build_unchecked() }; diff --git a/arrow/src/array/builder/generic_list_builder.rs b/arrow/src/array/builder/generic_list_builder.rs index e478cc7a3b1..911182f6571 100644 --- a/arrow/src/array/builder/generic_list_builder.rs +++ b/arrow/src/array/builder/generic_list_builder.rs @@ -25,13 +25,13 @@ use crate::array::OffsetSizeTrait; use crate::datatypes::DataType; use crate::datatypes::Field; -use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder}; +use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; /// Array builder for [`GenericListArray`] #[derive(Debug)] pub struct GenericListBuilder { offsets_builder: BufferBuilder, - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, values_builder: T, } @@ -49,7 +49,7 @@ impl GenericListBuilder usize { - self.bitmap_builder.len() + self.null_buffer_builder.len() } /// Returns whether the number of array slots is zero fn is_empty(&self) -> bool { - self.bitmap_builder.is_empty() + self.null_buffer_builder.is_empty() } /// Builds the array and reset this builder. @@ -113,7 +113,7 @@ where pub fn append(&mut self, is_valid: bool) { self.offsets_builder .append(OffsetSize::from_usize(self.values_builder.len()).unwrap()); - self.bitmap_builder.append(is_valid); + self.null_buffer_builder.append(is_valid); } /// Builds the [`GenericListArray`] and reset this builder. @@ -128,7 +128,7 @@ where let values_data = values_arr.data(); let offset_buffer = self.offsets_builder.finish(); - let null_bit_buffer = self.bitmap_builder.finish(); + let null_bit_buffer = self.null_buffer_builder.finish(); self.offsets_builder.append(OffsetSize::zero()); let field = Box::new(Field::new( "item", @@ -144,7 +144,7 @@ where .len(len) .add_buffer(offset_buffer) .add_child_data(values_data.clone()) - .null_bit_buffer(Some(null_bit_buffer)); + .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data_builder.build_unchecked() }; diff --git a/arrow/src/array/builder/map_builder.rs b/arrow/src/array/builder/map_builder.rs index 6078f8f2251..7e68abd5f1a 100644 --- a/arrow/src/array/builder/map_builder.rs +++ b/arrow/src/array/builder/map_builder.rs @@ -18,7 +18,7 @@ use std::any::Any; use std::sync::Arc; -use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder}; +use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; use crate::array::array::Array; use crate::array::ArrayData; use crate::array::ArrayRef; @@ -32,7 +32,7 @@ use crate::error::Result; #[derive(Debug)] pub struct MapBuilder { offsets_builder: BufferBuilder, - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, field_names: MapFieldNames, key_builder: K, value_builder: V, @@ -78,7 +78,7 @@ impl MapBuilder { offsets_builder.append(len); Self { offsets_builder, - bitmap_builder: BooleanBufferBuilder::new(capacity), + null_buffer_builder: NullBufferBuilder::new(capacity), field_names: field_names.unwrap_or_default(), key_builder, value_builder, @@ -107,7 +107,7 @@ impl MapBuilder { ))); } self.offsets_builder.append(self.key_builder.len() as i32); - self.bitmap_builder.append(is_valid); + self.null_buffer_builder.append(is_valid); self.len += 1; Ok(()) } @@ -145,7 +145,7 @@ impl MapBuilder { StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]); let offset_buffer = self.offsets_builder.finish(); - let null_bit_buffer = self.bitmap_builder.finish(); + let null_bit_buffer = self.null_buffer_builder.finish(); self.offsets_builder.append(self.len); let map_field = Box::new(Field::new( self.field_names.entry.as_str(), @@ -156,7 +156,7 @@ impl MapBuilder { .len(len) .add_buffer(offset_buffer) .add_child_data(struct_array.into_data()) - .null_bit_buffer(Some(null_bit_buffer)); + .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data.build_unchecked() }; diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs index f97ff4cac67..77dd907f6ee 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow/src/array/builder/mod.rs @@ -30,6 +30,7 @@ mod generic_binary_builder; mod generic_list_builder; mod generic_string_builder; mod map_builder; +mod null_buffer_builder; mod primitive_builder; mod primitive_dictionary_builder; mod string_dictionary_builder; @@ -53,6 +54,7 @@ pub use generic_binary_builder::GenericBinaryBuilder; pub use generic_list_builder::GenericListBuilder; pub use generic_string_builder::GenericStringBuilder; pub use map_builder::{MapBuilder, MapFieldNames}; +use null_buffer_builder::NullBufferBuilder; pub use primitive_builder::PrimitiveBuilder; pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder; pub use string_dictionary_builder::StringDictionaryBuilder; diff --git a/arrow/src/array/builder/null_buffer_builder.rs b/arrow/src/array/builder/null_buffer_builder.rs new file mode 100644 index 00000000000..ef2e4c50ab9 --- /dev/null +++ b/arrow/src/array/builder/null_buffer_builder.rs @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::buffer::Buffer; + +use super::BooleanBufferBuilder; + +/// Builder for creating the null bit buffer. +/// This builder only materializes the buffer when we append `false`. +/// If you only append `true`s to the builder, what you get will be +/// `None` when calling [`finish`](#method.finish). +/// This optimization is **very** important for the performance. +#[derive(Debug)] +pub(super) struct NullBufferBuilder { + bitmap_builder: Option, + /// Store the length of the buffer before materializing. + len: usize, + capacity: usize, +} + +impl NullBufferBuilder { + /// Creates a new empty builder. + /// `capacity` is the number of bits in the null buffer. + pub fn new(capacity: usize) -> Self { + Self { + bitmap_builder: None, + len: 0, + capacity, + } + } + + /// Appends `n` `true`s into the builder + /// to indicate that these `n` items are not nulls. + #[inline] + pub fn append_n_non_nulls(&mut self, n: usize) { + if let Some(buf) = self.bitmap_builder.as_mut() { + buf.append_n(n, true) + } else { + self.len += n; + } + } + + /// Appends a `true` into the builder + /// to indicate that this item is not null. + #[inline] + pub fn append_non_null(&mut self) { + if let Some(buf) = self.bitmap_builder.as_mut() { + buf.append(true) + } else { + self.len += 1; + } + } + + /// Appends `n` `false`s into the builder + /// to indicate that these `n` items are nulls. + #[inline] + pub fn append_n_nulls(&mut self, n: usize) { + self.materialize_if_needed(); + self.bitmap_builder.as_mut().unwrap().append_n(n, false); + } + + /// Appends a `false` into the builder + /// to indicate that this item is null. + #[inline] + pub fn append_null(&mut self) { + self.materialize_if_needed(); + self.bitmap_builder.as_mut().unwrap().append(false); + } + + /// Appends a boolean value into the builder. + #[inline] + pub fn append(&mut self, not_null: bool) { + if not_null { + self.append_non_null() + } else { + self.append_null() + } + } + + /// Appends a boolean slice into the builder + /// to indicate the validations of these items. + pub fn append_slice(&mut self, slice: &[bool]) { + if slice.iter().any(|v| !v) { + self.materialize_if_needed() + } + if let Some(buf) = self.bitmap_builder.as_mut() { + buf.append_slice(slice) + } else { + self.len += slice.len(); + } + } + + /// Builds the null buffer and resets the builder. + /// Returns `None` if the builder only contains `true`s. + pub fn finish(&mut self) -> Option { + let buf = self.bitmap_builder.as_mut().map(|b| b.finish()); + self.bitmap_builder = None; + self.len = 0; + buf + } + + #[inline] + fn materialize_if_needed(&mut self) { + if self.bitmap_builder.is_none() { + self.materialize() + } + } + + #[cold] + fn materialize(&mut self) { + if self.bitmap_builder.is_none() { + let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity)); + b.append_n(self.len, true); + self.bitmap_builder = Some(b); + } + } +} + +impl NullBufferBuilder { + pub fn len(&self) -> usize { + if let Some(b) = &self.bitmap_builder { + b.len() + } else { + self.len + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_null_buffer_builder() { + let mut builder = NullBufferBuilder::new(0); + builder.append_null(); + builder.append_non_null(); + builder.append_n_nulls(2); + builder.append_n_non_nulls(2); + assert_eq!(6, builder.len()); + + let buf = builder.finish().unwrap(); + assert_eq!(Buffer::from(&[0b110010_u8]), buf); + } + + #[test] + fn test_null_buffer_builder_all_nulls() { + let mut builder = NullBufferBuilder::new(0); + builder.append_null(); + builder.append_n_nulls(2); + builder.append_slice(&[false, false, false]); + assert_eq!(6, builder.len()); + + let buf = builder.finish().unwrap(); + assert_eq!(Buffer::from(&[0b0_u8]), buf); + } + + #[test] + fn test_null_buffer_builder_no_null() { + let mut builder = NullBufferBuilder::new(0); + builder.append_non_null(); + builder.append_n_non_nulls(2); + builder.append_slice(&[true, true, true]); + assert_eq!(6, builder.len()); + + let buf = builder.finish(); + assert!(buf.is_none()); + } + + #[test] + fn test_null_buffer_builder_reset() { + let mut builder = NullBufferBuilder::new(0); + builder.append_slice(&[true, false, true]); + builder.finish(); + assert!(builder.is_empty()); + + builder.append_slice(&[true, true, true]); + assert!(builder.finish().is_none()); + assert!(builder.is_empty()); + + builder.append_slice(&[true, true, false, true]); + + let buf = builder.finish().unwrap(); + assert_eq!(Buffer::from(&[0b1011_u8]), buf); + } +} diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow/src/array/builder/primitive_builder.rs index 36f4ae2b7d9..3b9db1f01e6 100644 --- a/arrow/src/array/builder/primitive_builder.rs +++ b/arrow/src/array/builder/primitive_builder.rs @@ -23,15 +23,13 @@ use crate::array::ArrayRef; use crate::array::PrimitiveArray; use crate::datatypes::ArrowPrimitiveType; -use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder}; +use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; /// Array builder for fixed-width primitive types #[derive(Debug)] pub struct PrimitiveBuilder { values_builder: BufferBuilder, - /// We only materialize the builder when we add `false`. - /// This optimization is **very** important for performance of `StringBuilder`. - bitmap_builder: Option, + null_buffer_builder: NullBufferBuilder, } impl ArrayBuilder for PrimitiveBuilder { @@ -71,7 +69,7 @@ impl PrimitiveBuilder { pub fn new(capacity: usize) -> Self { Self { values_builder: BufferBuilder::::new(capacity), - bitmap_builder: None, + null_buffer_builder: NullBufferBuilder::new(capacity), } } @@ -83,24 +81,20 @@ impl PrimitiveBuilder { /// Appends a value of type `T` into the builder #[inline] pub fn append_value(&mut self, v: T::Native) { - if let Some(b) = self.bitmap_builder.as_mut() { - b.append(true); - } + self.null_buffer_builder.append_non_null(); self.values_builder.append(v); } /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { - self.materialize_bitmap_builder_if_needed(); - self.bitmap_builder.as_mut().unwrap().append(false); + self.null_buffer_builder.append_null(); self.values_builder.advance(1); } #[inline] pub fn append_nulls(&mut self, n: usize) { - self.materialize_bitmap_builder_if_needed(); - self.bitmap_builder.as_mut().unwrap().append_n(n, false); + self.null_buffer_builder.append_n_nulls(n); self.values_builder.advance(n); } @@ -116,9 +110,7 @@ impl PrimitiveBuilder { /// Appends a slice of type `T` into the builder #[inline] pub fn append_slice(&mut self, v: &[T::Native]) { - if let Some(b) = self.bitmap_builder.as_mut() { - b.append_n(v.len(), true); - } + self.null_buffer_builder.append_n_non_nulls(v.len()); self.values_builder.append_slice(v); } @@ -130,12 +122,7 @@ impl PrimitiveBuilder { is_valid.len(), "Value and validity lengths must be equal" ); - if is_valid.iter().any(|v| !*v) { - self.materialize_bitmap_builder_if_needed(); - } - if let Some(b) = self.bitmap_builder.as_mut() { - b.append_slice(is_valid); - } + self.null_buffer_builder.append_slice(is_valid); self.values_builder.append_slice(values); } @@ -155,50 +142,23 @@ impl PrimitiveBuilder { .1 .expect("append_trusted_len_iter requires an upper bound"); - if let Some(b) = self.bitmap_builder.as_mut() { - b.append_n(len, true); - } + self.null_buffer_builder.append_n_non_nulls(len); self.values_builder.append_trusted_len_iter(iter); } - /// Builds the `PrimitiveArray` and reset this builder. + /// Builds the [`PrimitiveArray`] and reset this builder. pub fn finish(&mut self) -> PrimitiveArray { let len = self.len(); - let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish()); - let null_count = len - - null_bit_buffer - .as_ref() - .map(|b| b.count_set_bits()) - .unwrap_or(len); + let null_bit_buffer = self.null_buffer_builder.finish(); let builder = ArrayData::builder(T::DATA_TYPE) .len(len) .add_buffer(self.values_builder.finish()) - .null_bit_buffer(if null_count > 0 { - null_bit_buffer - } else { - None - }); + .null_bit_buffer(null_bit_buffer); let array_data = unsafe { builder.build_unchecked() }; PrimitiveArray::::from(array_data) } - #[inline] - fn materialize_bitmap_builder_if_needed(&mut self) { - if self.bitmap_builder.is_some() { - return; - } - self.materialize_bitmap_builder() - } - - #[cold] - fn materialize_bitmap_builder(&mut self) { - let mut b = BooleanBufferBuilder::new(0); - b.reserve(self.values_builder.capacity()); - b.append_n(self.values_builder.len(), true); - self.bitmap_builder = Some(b); - } - /// Returns the current values buffer as a slice pub fn values_slice(&self) -> &[T::Native] { self.values_builder.as_slice() diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index c679dd6217e..373a8458283 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -24,6 +24,8 @@ use crate::array::*; use crate::datatypes::DataType; use crate::datatypes::Field; +use super::NullBufferBuilder; + /// Array builder for Struct types. /// /// Note that callers should make sure that methods of all the child field builders are @@ -31,7 +33,7 @@ use crate::datatypes::Field; pub struct StructBuilder { fields: Vec, field_builders: Vec>, - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, len: usize, } @@ -39,7 +41,7 @@ impl fmt::Debug for StructBuilder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("StructBuilder") .field("fields", &self.fields) - .field("bitmap_builder", &self.bitmap_builder) + .field("bitmap_builder", &self.null_buffer_builder) .field("len", &self.len) .finish() } @@ -173,7 +175,7 @@ impl StructBuilder { Self { fields, field_builders, - bitmap_builder: BooleanBufferBuilder::new(0), + null_buffer_builder: NullBufferBuilder::new(0), len: 0, } } @@ -202,7 +204,7 @@ impl StructBuilder { /// should be appended for each child sub-array in a consistent way. #[inline] pub fn append(&mut self, is_valid: bool) { - self.bitmap_builder.append(is_valid); + self.null_buffer_builder.append(is_valid); self.len += 1; } @@ -220,14 +222,11 @@ impl StructBuilder { child_data.push(arr.into_data()); } - let null_bit_buffer = self.bitmap_builder.finish(); - let null_count = self.len - null_bit_buffer.count_set_bits(); - let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone())) + let null_bit_buffer = self.null_buffer_builder.finish(); + let builder = ArrayData::builder(DataType::Struct(self.fields.clone())) .len(self.len) - .child_data(child_data); - if null_count > 0 { - builder = builder.null_bit_buffer(Some(null_bit_buffer)); - } + .child_data(child_data) + .null_bit_buffer(null_bit_buffer); self.len = 0; diff --git a/arrow/src/array/builder/union_builder.rs b/arrow/src/array/builder/union_builder.rs index 95d9ea40a3d..8ef5028afc8 100644 --- a/arrow/src/array/builder/union_builder.rs +++ b/arrow/src/array/builder/union_builder.rs @@ -29,7 +29,7 @@ use crate::datatypes::Field; use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType}; use crate::error::{ArrowError, Result}; -use super::{BooleanBufferBuilder, BufferBuilder}; +use super::{BufferBuilder, NullBufferBuilder}; use crate::array::make_array; @@ -45,7 +45,7 @@ struct FieldData { /// The number of array slots represented by the buffer slots: usize, /// A builder for the null bitmap - bitmap_builder: BooleanBufferBuilder, + null_buffer_builder: NullBufferBuilder, } /// A type-erased [`BufferBuilder`] used by [`FieldData`] @@ -79,7 +79,7 @@ impl FieldData { data_type, slots: 0, values_buffer: Box::new(BufferBuilder::::new(1)), - bitmap_builder: BooleanBufferBuilder::new(1), + null_buffer_builder: NullBufferBuilder::new(1), } } @@ -91,14 +91,14 @@ impl FieldData { .expect("Tried to append unexpected type") .append(v); - self.bitmap_builder.append(true); + self.null_buffer_builder.append(true); self.slots += 1; } /// Appends a null to this `FieldData`. fn append_null(&mut self) { self.values_buffer.append_null(); - self.bitmap_builder.append(false); + self.null_buffer_builder.append(false); self.slots += 1; } } @@ -264,7 +264,7 @@ impl UnionBuilder { data_type, mut values_buffer, slots, - mut bitmap_builder, + null_buffer_builder: mut bitmap_builder, }, ) in self.fields.into_iter() { @@ -272,7 +272,7 @@ impl UnionBuilder { let arr_data_builder = ArrayDataBuilder::new(data_type.clone()) .add_buffer(buffer) .len(slots) - .null_bit_buffer(Some(bitmap_builder.finish())); + .null_bit_buffer(bitmap_builder.finish()); let arr_data_ref = unsafe { arr_data_builder.build_unchecked() }; let array_ref = make_array(arr_data_ref);