Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazily materialize the null buffer builder for all array builders. #2127

Merged
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3354e33
create null buffer builder
HaoYang670 Jul 22, 2022
118417d
use null buffer builder in boolean array builder
HaoYang670 Jul 22, 2022
6f13da6
add capacity field and update docs
HaoYang670 Jul 22, 2022
6b83eb1
update fixed size binary builder
HaoYang670 Jul 22, 2022
22b6a33
update fixed size list builder
HaoYang670 Jul 22, 2022
5c78441
update binary builder
HaoYang670 Jul 22, 2022
98cd443
update list builder
HaoYang670 Jul 22, 2022
092cb30
update map builder
HaoYang670 Jul 22, 2022
caa4c13
update struct builder
HaoYang670 Jul 22, 2022
a54d2f6
update union builder
HaoYang670 Jul 22, 2022
bd2e642
rename
HaoYang670 Jul 22, 2022
b473be3
add tests
HaoYang670 Jul 22, 2022
1674093
optimize
HaoYang670 Jul 22, 2022
04c43cc
expose null buffer builder in builder mod
HaoYang670 Jul 22, 2022
dd49d30
add docs
HaoYang670 Jul 22, 2022
9b6d984
reduce redundant computation
HaoYang670 Jul 23, 2022
0c0031f
inline methods to achieve better performance
HaoYang670 Jul 23, 2022
b09081f
use instead of pub(self) use
HaoYang670 Jul 23, 2022
584aa22
merge master
HaoYang670 Jul 23, 2022
1444d7b
Update arrow/src/array/builder/null_buffer_builder.rs
HaoYang670 Jul 25, 2022
f15c088
Merge branch 'master' into lazily_materialize_null_buffer_builder
HaoYang670 Jul 25, 2022
c073b32
reorder
HaoYang670 Jul 25, 2022
28be7d8
Merge branch 'master' into lazily_materialize_null_buffer_builder
HaoYang670 Jul 25, 2022
0895a80
Merge branch 'lazily_materialize_null_buffer_builder' of https://gith…
HaoYang670 Jul 25, 2022
49c963c
rename methods and update docs
HaoYang670 Jul 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 15 additions & 30 deletions arrow/src/array/builder/boolean_builder.rs
Expand Up @@ -28,6 +28,7 @@ use crate::error::ArrowError;
use crate::error::Result;

use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

/// Array builder for fixed-width primitive types
///
Expand Down Expand Up @@ -62,17 +63,15 @@ use super::BooleanBufferBuilder;
#[derive(Debug)]
pub struct BooleanBuilder {
values_builder: BooleanBufferBuilder,
/// We only materialize the builder when we add `false`.
/// This optimization is **very** important for the performance.
bitmap_builder: Option<BooleanBufferBuilder>,
null_buffer_builder: NullBufferBuilder,
}

impl BooleanBuilder {
/// Creates a new primitive array builder
pub fn new(capacity: usize) -> Self {
Self {
values_builder: BooleanBufferBuilder::new(capacity),
bitmap_builder: None,
null_buffer_builder: NullBufferBuilder::new(capacity),
}
}

Expand All @@ -85,17 +84,20 @@ impl BooleanBuilder {
#[inline]
pub fn append_value(&mut self, v: bool) {
self.values_builder.append(v);
if let Some(b) = self.bitmap_builder.as_mut() {
b.append(true)
}
self.null_buffer_builder.append_n_true(1);
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.materialize_bitmap_builder();
self.bitmap_builder.as_mut().unwrap().append(false);
self.values_builder.advance(1);
self.append_nulls(1)
}

/// Appends `n` `null`s into the builder.
#[inline]
pub fn append_nulls(&mut self, n: usize) {
Comment on lines +98 to +99
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a simple doc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added!

self.null_buffer_builder.append_n_false(n);
self.values_builder.advance(n);
}

/// Appends an `Option<T>` into the builder
Expand All @@ -110,9 +112,7 @@ impl BooleanBuilder {
/// Appends a slice of type `T` into the builder
#[inline]
pub fn append_slice(&mut self, v: &[bool]) {
if let Some(b) = self.bitmap_builder.as_mut() {
b.append_n(v.len(), true)
}
self.null_buffer_builder.append_n_true(v.len());
self.values_builder.append_slice(v);
}

Expand All @@ -126,13 +126,7 @@ impl BooleanBuilder {
"Value and validity lengths must be equal".to_string(),
))
} else {
is_valid
.iter()
.any(|v| !*v)
.then(|| self.materialize_bitmap_builder());
if let Some(b) = self.bitmap_builder.as_mut() {
b.append_slice(is_valid)
}
self.null_buffer_builder.append_slice(is_valid);
self.values_builder.append_slice(values);
Ok(())
}
Expand All @@ -141,7 +135,7 @@ impl BooleanBuilder {
/// Builds the [BooleanArray] and reset this builder.
pub fn finish(&mut self) -> BooleanArray {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
let null_bit_buffer = self.null_buffer_builder.finish();
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(self.values_builder.finish())
Expand All @@ -150,15 +144,6 @@ impl BooleanBuilder {
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}

fn materialize_bitmap_builder(&mut self) {
if self.bitmap_builder.is_none() {
let mut b = BooleanBufferBuilder::new(0);
b.reserve(self.values_builder.capacity());
b.append_n(self.values_builder.len(), true);
self.bitmap_builder = Some(b);
}
}
}

impl ArrayBuilder for BooleanBuilder {
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/fixed_size_binary_builder.rs
Expand Up @@ -23,12 +23,12 @@ use crate::error::{ArrowError, Result};
use std::any::Any;
use std::sync::Arc;

use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
values_builder: UInt8BufferBuilder,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
value_length: i32,
}

Expand All @@ -43,7 +43,7 @@ impl FixedSizeBinaryBuilder {
);
Self {
values_builder: UInt8BufferBuilder::new(capacity),
bitmap_builder: BooleanBufferBuilder::new(if byte_width > 0 {
null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
capacity / byte_width as usize
} else {
0
Expand All @@ -64,7 +64,7 @@ impl FixedSizeBinaryBuilder {
))
} else {
self.values_builder.append_slice(value.as_ref());
self.bitmap_builder.append(true);
self.null_buffer_builder.append_true();
Ok(())
}
}
Expand All @@ -74,7 +74,7 @@ impl FixedSizeBinaryBuilder {
pub fn append_null(&mut self) {
self.values_builder
.append_slice(&vec![0u8; self.value_length as usize][..]);
self.bitmap_builder.append(false);
self.null_buffer_builder.append_false();
}

/// Builds the [`FixedSizeBinaryArray`] and reset this builder.
Expand All @@ -83,7 +83,7 @@ impl FixedSizeBinaryBuilder {
let array_data_builder =
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(self.values_builder.finish())
.null_bit_buffer(Some(self.bitmap_builder.finish()))
.null_bit_buffer(self.null_buffer_builder.finish())
.len(array_length);
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
Expand All @@ -108,12 +108,12 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/fixed_size_list_builder.rs
Expand Up @@ -25,12 +25,12 @@ use crate::datatypes::DataType;
use crate::datatypes::Field;

use super::ArrayBuilder;
use super::BooleanBufferBuilder;
use super::NullBufferBuilder;

/// Array builder for [`FixedSizeListArray`]
#[derive(Debug)]
pub struct FixedSizeListBuilder<T: ArrayBuilder> {
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: T,
list_len: i32,
}
Expand All @@ -48,7 +48,7 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
/// `capacity` is the number of items to pre-allocate space for in this builder
pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
Self {
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
list_len: value_length,
}
Expand Down Expand Up @@ -76,12 +76,12 @@ where

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down Expand Up @@ -109,7 +109,7 @@ where
/// Finish the current fixed-length list array slot
#[inline]
pub fn append(&mut self, is_valid: bool) {
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
}

/// Builds the [`FixedSizeListBuilder`] and reset this builder.
Expand All @@ -131,14 +131,14 @@ where
len,
);

let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
let array_data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.add_child_data(values_data.clone())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data.build_unchecked() };

Expand Down
8 changes: 4 additions & 4 deletions arrow/src/array/builder/generic_binary_builder.rs
Expand Up @@ -25,14 +25,14 @@ use crate::{
use std::any::Any;
use std::sync::Arc;

use super::{BooleanBufferBuilder, BufferBuilder};
use super::{BufferBuilder, NullBufferBuilder};

/// Array builder for [`GenericBinaryArray`]
#[derive(Debug)]
pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
value_builder: UInt8BufferBuilder,
offsets_builder: BufferBuilder<OffsetSize>,
null_buffer_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
}

impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
Expand All @@ -44,7 +44,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
Self {
value_builder: UInt8BufferBuilder::new(capacity),
offsets_builder,
null_buffer_builder: BooleanBufferBuilder::new(1024),
null_buffer_builder: NullBufferBuilder::new(1024),
}
}

Expand Down Expand Up @@ -76,7 +76,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
.len(self.len())
.add_buffer(self.offsets_builder.finish())
.add_buffer(self.value_builder.finish())
.null_bit_buffer(Some(self.null_buffer_builder.finish()));
.null_bit_buffer(self.null_buffer_builder.finish());

self.offsets_builder.append(OffsetSize::zero());
let array_data = unsafe { array_builder.build_unchecked() };
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/builder/generic_list_builder.rs
Expand Up @@ -25,13 +25,13 @@ use crate::array::OffsetSizeTrait;
use crate::datatypes::DataType;
use crate::datatypes::Field;

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};

/// Array builder for [`GenericListArray`]
#[derive(Debug)]
pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
offsets_builder: BufferBuilder<OffsetSize>,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: T,
}

Expand All @@ -49,7 +49,7 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize
offsets_builder.append(OffsetSize::zero());
Self {
offsets_builder,
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
}
}
Expand Down Expand Up @@ -77,12 +77,12 @@ where

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.bitmap_builder.len()
self.null_buffer_builder.len()
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.bitmap_builder.is_empty()
self.null_buffer_builder.is_empty()
}

/// Builds the array and reset this builder.
Expand Down Expand Up @@ -113,7 +113,7 @@ where
pub fn append(&mut self, is_valid: bool) {
self.offsets_builder
.append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
}

/// Builds the [`GenericListArray`] and reset this builder.
Expand All @@ -128,7 +128,7 @@ where
let values_data = values_arr.data();

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
self.offsets_builder.append(OffsetSize::zero());
let field = Box::new(Field::new(
"item",
Expand All @@ -144,7 +144,7 @@ where
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data.clone())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data_builder.build_unchecked() };

Expand Down
12 changes: 6 additions & 6 deletions arrow/src/array/builder/map_builder.rs
Expand Up @@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};
use crate::array::array::Array;
use crate::array::ArrayData;
use crate::array::ArrayRef;
Expand All @@ -32,7 +32,7 @@ use crate::error::Result;
#[derive(Debug)]
pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
offsets_builder: BufferBuilder<i32>,
bitmap_builder: BooleanBufferBuilder,
null_buffer_builder: NullBufferBuilder,
field_names: MapFieldNames,
key_builder: K,
value_builder: V,
Expand Down Expand Up @@ -78,7 +78,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
offsets_builder.append(len);
Self {
offsets_builder,
bitmap_builder: BooleanBufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(capacity),
field_names: field_names.unwrap_or_default(),
key_builder,
value_builder,
Expand Down Expand Up @@ -107,7 +107,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
)));
}
self.offsets_builder.append(self.key_builder.len() as i32);
self.bitmap_builder.append(is_valid);
self.null_buffer_builder.append(is_valid);
self.len += 1;
Ok(())
}
Expand Down Expand Up @@ -145,7 +145,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
self.offsets_builder.append(self.len);
let map_field = Box::new(Field::new(
self.field_names.entry.as_str(),
Expand All @@ -156,7 +156,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
.len(len)
.add_buffer(offset_buffer)
.add_child_data(struct_array.into_data())
.null_bit_buffer(Some(null_bit_buffer));
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data.build_unchecked() };

Expand Down