diff --git a/arrow/src/array/builder/union_builder.rs b/arrow/src/array/builder/union_builder.rs index f263fecf6f2..c0ae76853dd 100644 --- a/arrow/src/array/builder/union_builder.rs +++ b/arrow/src/array/builder/union_builder.rs @@ -73,13 +73,17 @@ impl FieldDataValues for BufferBuilder { impl FieldData { /// Creates a new `FieldData`. - fn new(type_id: i8, data_type: DataType) -> Self { + fn new( + type_id: i8, + data_type: DataType, + capacity: usize, + ) -> Self { Self { type_id, data_type, slots: 0, - values_buffer: Box::new(BufferBuilder::::new(1)), - null_buffer_builder: NullBufferBuilder::new(1), + values_buffer: Box::new(BufferBuilder::::new(capacity)), + null_buffer_builder: NullBufferBuilder::new(capacity), } } @@ -155,6 +159,7 @@ pub struct UnionBuilder { type_id_builder: Int8BufferBuilder, /// Builder to keep track of offsets (`None` for sparse unions) value_offset_builder: Option, + initial_capacity: usize, } impl UnionBuilder { @@ -175,6 +180,7 @@ impl UnionBuilder { fields: HashMap::default(), type_id_builder: Int8BufferBuilder::new(capacity), value_offset_builder: Some(Int32BufferBuilder::new(capacity)), + initial_capacity: capacity, } } @@ -185,6 +191,7 @@ impl UnionBuilder { fields: HashMap::default(), type_id_builder: Int8BufferBuilder::new(capacity), value_offset_builder: None, + initial_capacity: capacity, } } @@ -225,10 +232,18 @@ impl UnionBuilder { data } None => match self.value_offset_builder { - Some(_) => FieldData::new::(self.fields.len() as i8, T::DATA_TYPE), + Some(_) => FieldData::new::( + self.fields.len() as i8, + T::DATA_TYPE, + self.initial_capacity, + ), + // In the case of a sparse union, we should pass the maximum of the currently length and the capacity. None => { - let mut fd = - FieldData::new::(self.fields.len() as i8, T::DATA_TYPE); + let mut fd = FieldData::new::( + self.fields.len() as i8, + T::DATA_TYPE, + self.len.max(self.initial_capacity), + ); for _ in 0..self.len { fd.append_null(); }