Skip to content

Commit

Permalink
Lazily materialize the null buffer builder for all array builders. (#…
Browse files Browse the repository at this point in the history
…2127)

* create null buffer builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* use null buffer builder in boolean array builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* add capacity field and update docs

Signed-off-by: remzi <13716567376yh@gmail.com>

* update fixed size binary builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update fixed size list builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update binary builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update list builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update map builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update struct builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* update union builder

Signed-off-by: remzi <13716567376yh@gmail.com>

* rename

Signed-off-by: remzi <13716567376yh@gmail.com>

* add tests

Signed-off-by: remzi <13716567376yh@gmail.com>

* optimize

Signed-off-by: remzi <13716567376yh@gmail.com>

* expose null buffer builder in builder mod

Signed-off-by: remzi <13716567376yh@gmail.com>

* add docs

Signed-off-by: remzi <13716567376yh@gmail.com>

* reduce redundant computation

Signed-off-by: remzi <13716567376yh@gmail.com>

* inline methods to achieve better performance

Signed-off-by: remzi <13716567376yh@gmail.com>

* use instead of pub(self) use

Signed-off-by: remzi <13716567376yh@gmail.com>

* Update arrow/src/array/builder/null_buffer_builder.rs

fix spelling mistake

Co-authored-by: Jörn Horstmann <git@jhorstmann.net>

* reorder

Signed-off-by: remzi <13716567376yh@gmail.com>

* rename methods and update docs

Signed-off-by: remzi <13716567376yh@gmail.com>

Co-authored-by: Jörn Horstmann <git@jhorstmann.net>
  • Loading branch information
HaoYang670 and jhorstmann committed Jul 26, 2022
1 parent 19fd885 commit 9c70e4a
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 133 deletions.
44 changes: 15 additions & 29 deletions arrow/src/array/builder/boolean_builder.rs
Expand Up @@ -28,6 +28,7 @@ use crate::error::ArrowError;
use crate::error::Result;

use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

/// Array builder for fixed-width primitive types
///
Expand Down Expand Up @@ -62,17 +63,15 @@ use super::BooleanBufferBuilder;
#[derive(Debug)]
pub struct BooleanBuilder {
values_builder: BooleanBufferBuilder,
/// We only materialize the builder when we add `false`.
/// This optimization is **very** important for the performance.
bitmap_builder: Option<BooleanBufferBuilder>,
null_buffer_builder: NullBufferBuilder,
}

impl BooleanBuilder {
/// Creates a new primitive array builder
pub fn new(capacity: usize) -> Self {
Self {
values_builder: BooleanBufferBuilder::new(capacity),
bitmap_builder: None,
null_buffer_builder: NullBufferBuilder::new(capacity),
}
}

Expand All @@ -85,19 +84,23 @@ impl BooleanBuilder {
#[inline]
pub fn append_value(&mut self, v: bool) {
self.values_builder.append(v);
if let Some(b) = self.bitmap_builder.as_mut() {
b.append(true)
}
self.null_buffer_builder.append_non_null();
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.materialize_bitmap_builder();
self.bitmap_builder.as_mut().unwrap().append(false);
self.null_buffer_builder.append_null();
self.values_builder.advance(1);
}

/// Appends `n` `null`s into the builder.
#[inline]
pub fn append_nulls(&mut self, n: usize) {
self.null_buffer_builder.append_n_nulls(n);
self.values_builder.advance(n);
}

/// Appends an `Option<T>` into the builder
#[inline]
pub fn append_option(&mut self, v: Option<bool>) {
Expand All @@ -110,10 +113,8 @@ impl BooleanBuilder {
/// Appends a slice of type `T` into the builder
#[inline]
pub fn append_slice(&mut self, v: &[bool]) {
if let Some(b) = self.bitmap_builder.as_mut() {
b.append_n(v.len(), true)
}
self.values_builder.append_slice(v);
self.null_buffer_builder.append_n_non_nulls(v.len());
}

/// Appends values from a slice of type `T` and a validity boolean slice.
Expand All @@ -126,13 +127,7 @@ impl BooleanBuilder {
"Value and validity lengths must be equal".to_string(),
))
} else {
is_valid
.iter()
.any(|v| !*v)
.then(|| self.materialize_bitmap_builder());
if let Some(b) = self.bitmap_builder.as_mut() {
b.append_slice(is_valid)
}
self.null_buffer_builder.append_slice(is_valid);
self.values_builder.append_slice(values);
Ok(())
}
Expand All @@ -141,7 +136,7 @@ impl BooleanBuilder {
/// Builds the [BooleanArray] and reset this builder.
pub fn finish(&mut self) -> BooleanArray {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
let null_bit_buffer = self.null_buffer_builder.finish();
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(self.values_builder.finish())
Expand All @@ -150,15 +145,6 @@ impl BooleanBuilder {
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}

fn materialize_bitmap_builder(&mut self) {
if self.bitmap_builder.is_none() {
let mut b = BooleanBufferBuilder::new(0);
b.reserve(self.values_builder.capacity());
b.append_n(self.values_builder.len(), true);
self.bitmap_builder = Some(b);
}
}
}

impl ArrayBuilder for BooleanBuilder {
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/fixed_size_binary_builder.rs
Expand Up @@ -23,12 +23,12 @@ use crate::error::{ArrowError, Result};
use std::any::Any;
use std::sync::Arc;

use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
values_builder: UInt8BufferBuilder,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
value_length: i32,
}

Expand All @@ -43,7 +43,7 @@ impl FixedSizeBinaryBuilder {
);
Self {
values_builder: UInt8BufferBuilder::new(capacity),
bitmap_builder: BooleanBufferBuilder::new(if byte_width > 0 {
null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
capacity / byte_width as usize
} else {
0
Expand All @@ -64,7 +64,7 @@ impl FixedSizeBinaryBuilder {
))
} else {
self.values_builder.append_slice(value.as_ref());
self.bitmap_builder.append(true);
self.null_buffer_builder.append_non_null();
Ok(())
}
}
Expand All @@ -74,7 +74,7 @@ impl FixedSizeBinaryBuilder {
pub fn append_null(&mut self) {
self.values_builder
.append_slice(&vec![0u8; self.value_length as usize][..]);
self.bitmap_builder.append(false);
self.null_buffer_builder.append_null();
}

/// Builds the [`FixedSizeBinaryArray`] and reset this builder.
Expand All @@ -83,7 +83,7 @@ impl FixedSizeBinaryBuilder {
let array_data_builder =
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(self.values_builder.finish())
.null_bit_buffer(Some(self.bitmap_builder.finish()))
.null_bit_buffer(self.null_buffer_builder.finish())
.len(array_length);
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
Expand All @@ -108,12 +108,12 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/fixed_size_list_builder.rs
Expand Up @@ -25,12 +25,12 @@ use crate::datatypes::DataType;
use crate::datatypes::Field;

use super::ArrayBuilder;
use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

/// Array builder for [`FixedSizeListArray`]
#[derive(Debug)]
pub struct FixedSizeListBuilder<T: ArrayBuilder> {
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: T,
list_len: i32,
}
Expand All @@ -48,7 +48,7 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
/// `capacity` is the number of items to pre-allocate space for in this builder
pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
Self {
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
list_len: value_length,
}
Expand Down Expand Up @@ -76,12 +76,12 @@ where

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down Expand Up @@ -109,7 +109,7 @@ where
/// Finish the current fixed-length list array slot
#[inline]
pub fn append(&mut self, is_valid: bool) {
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
}

/// Builds the [`FixedSizeListBuilder`] and reset this builder.
Expand All @@ -131,14 +131,14 @@ where
len,
);

let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
let array_data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.add_child_data(values_data.clone())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data.build_unchecked() };

Expand Down
8 changes: 4 additions & 4 deletions arrow/src/array/builder/generic_binary_builder.rs
Expand Up @@ -25,14 +25,14 @@ use crate::{
use std::any::Any;
use std::sync::Arc;

use super::{BooleanBufferBuilder, BufferBuilder};
use super::{BufferBuilder, NullBufferBuilder};

/// Array builder for [`GenericBinaryArray`]
#[derive(Debug)]
pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
value_builder: UInt8BufferBuilder,
offsets_builder: BufferBuilder<OffsetSize>,
null_buffer_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
}

impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
Expand All @@ -44,7 +44,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
Self {
value_builder: UInt8BufferBuilder::new(capacity),
offsets_builder,
null_buffer_builder: BooleanBufferBuilder::new(1024),
null_buffer_builder: NullBufferBuilder::new(1024),
}
}

Expand Down Expand Up @@ -76,7 +76,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
.len(self.len())
.add_buffer(self.offsets_builder.finish())
.add_buffer(self.value_builder.finish())
.null_bit_buffer(Some(self.null_buffer_builder.finish()));
.null_bit_buffer(self.null_buffer_builder.finish());

self.offsets_builder.append(OffsetSize::zero());
let array_data = unsafe { array_builder.build_unchecked() };
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/generic_list_builder.rs
Expand Up @@ -25,13 +25,13 @@ use crate::array::OffsetSizeTrait;
use crate::datatypes::DataType;
use crate::datatypes::Field;

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};

/// Array builder for [`GenericListArray`]
#[derive(Debug)]
pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
offsets_builder: BufferBuilder<OffsetSize>,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: T,
}

Expand All @@ -49,7 +49,7 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize
offsets_builder.append(OffsetSize::zero());
Self {
offsets_builder,
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
}
}
Expand Down Expand Up @@ -77,12 +77,12 @@ where

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down Expand Up @@ -113,7 +113,7 @@ where
pub fn append(&mut self, is_valid: bool) {
self.offsets_builder
.append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
}

/// Builds the [`GenericListArray`] and reset this builder.
Expand All @@ -128,7 +128,7 @@ where
let values_data = values_arr.data();

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
self.offsets_builder.append(OffsetSize::zero());
let field = Box::new(Field::new(
"item",
Expand All @@ -144,7 +144,7 @@ where
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data.clone())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data_builder.build_unchecked() };

Expand Down
12 changes: 6 additions & 6 deletions arrow/src/array/builder/map_builder.rs
Expand Up @@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};
use crate::array::array::Array;
use crate::array::ArrayData;
use crate::array::ArrayRef;
Expand All @@ -32,7 +32,7 @@ use crate::error::Result;
#[derive(Debug)]
pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
offsets_builder: BufferBuilder<i32>,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
field_names: MapFieldNames,
key_builder: K,
value_builder: V,
Expand Down Expand Up @@ -78,7 +78,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
offsets_builder.append(len);
Self {
offsets_builder,
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
field_names: field_names.unwrap_or_default(),
key_builder,
value_builder,
Expand Down Expand Up @@ -107,7 +107,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
)));
}
self.offsets_builder.append(self.key_builder.len() as i32);
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
self.len += 1;
Ok(())
}
Expand Down Expand Up @@ -145,7 +145,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
self.offsets_builder.append(self.len);
let map_field = Box::new(Field::new(
self.field_names.entry.as_str(),
Expand All @@ -156,7 +156,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
.len(len)
.add_buffer(offset_buffer)
.add_child_data(struct_array.into_data())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data.build_unchecked() };

Expand Down

0 comments on commit 9c70e4a

Please sign in to comment.