Skip to content

Commit

Permalink
struct UnionBuilder will create child buffers with capacity (#2560)
Browse files Browse the repository at this point in the history
* UnionBuilder Create Children With Capacity

* Changed to initial capacity

* Using usize max function

* added commas per cargo fmt

* Format

Co-authored-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
  • Loading branch information
kastolars and tustvold committed Aug 24, 2022
1 parent d36f072 commit 87290ff
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions arrow/src/array/builder/union_builder.rs
Expand Up @@ -73,13 +73,17 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {

impl FieldData {
/// Creates a new `FieldData`.
fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType) -> Self {
fn new<T: ArrowPrimitiveType>(
type_id: i8,
data_type: DataType,
capacity: usize,
) -> Self {
Self {
type_id,
data_type,
slots: 0,
values_buffer: Box::new(BufferBuilder::<T::Native>::new(1)),
null_buffer_builder: NullBufferBuilder::new(1),
values_buffer: Box::new(BufferBuilder::<T::Native>::new(capacity)),
null_buffer_builder: NullBufferBuilder::new(capacity),
}
}

Expand Down Expand Up @@ -155,6 +159,7 @@ pub struct UnionBuilder {
type_id_builder: Int8BufferBuilder,
/// Builder to keep track of offsets (`None` for sparse unions)
value_offset_builder: Option<Int32BufferBuilder>,
initial_capacity: usize,
}

impl UnionBuilder {
Expand All @@ -175,6 +180,7 @@ impl UnionBuilder {
fields: HashMap::default(),
type_id_builder: Int8BufferBuilder::new(capacity),
value_offset_builder: Some(Int32BufferBuilder::new(capacity)),
initial_capacity: capacity,
}
}

Expand All @@ -185,6 +191,7 @@ impl UnionBuilder {
fields: HashMap::default(),
type_id_builder: Int8BufferBuilder::new(capacity),
value_offset_builder: None,
initial_capacity: capacity,
}
}

Expand Down Expand Up @@ -225,10 +232,18 @@ impl UnionBuilder {
data
}
None => match self.value_offset_builder {
Some(_) => FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE),
Some(_) => FieldData::new::<T>(
self.fields.len() as i8,
T::DATA_TYPE,
self.initial_capacity,
),
// In the case of a sparse union, we should pass the maximum of the currently length and the capacity.
None => {
let mut fd =
FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE);
let mut fd = FieldData::new::<T>(
self.fields.len() as i8,
T::DATA_TYPE,
self.len.max(self.initial_capacity),
);
for _ in 0..self.len {
fd.append_null();
}
Expand Down

0 comments on commit 87290ff

Please sign in to comment.