Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove PrimitiveBuilder::finish_dict (#1978) #1980

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions arrow/src/array/array_primitive.rs
Expand Up @@ -166,6 +166,17 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
) -> impl Iterator<Item = Option<T::Native>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}

/// Returns the backing [`ArrayData`] of this [`PrimitiveArray`]
pub fn into_data(self) -> ArrayData {
self.into()
}
}

impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData {
fn from(a: PrimitiveArray<T>) -> Self {
a.data
}
}

impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
Expand Down
11 changes: 11 additions & 0 deletions arrow/src/array/array_string.rs
Expand Up @@ -195,6 +195,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
) -> impl Iterator<Item = Option<&str>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}

/// Returns the backing [`ArrayData`] of this [`GenericStringArray`]
pub fn into_data(self) -> ArrayData {
self.into()
}
}

impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<&'a Option<Ptr>>
Expand Down Expand Up @@ -336,6 +341,12 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for GenericStringArray<Offse
}
}

impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>> for ArrayData {
fn from(a: GenericStringArray<OffsetSize>) -> Self {
a.data
}
}

/// An array where each element is a variable-sized sequence of bytes representing a string
/// whose maximum length (in bytes) is represented by a i32.
///
Expand Down
26 changes: 0 additions & 26 deletions arrow/src/array/builder/primitive_builder.rs
Expand Up @@ -20,10 +20,8 @@ use std::sync::Arc;

use crate::array::ArrayData;
use crate::array::ArrayRef;
use crate::array::DictionaryArray;
use crate::array::PrimitiveArray;
use crate::datatypes::ArrowPrimitiveType;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
Expand Down Expand Up @@ -197,30 +195,6 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
PrimitiveArray::<T>::from(array_data)
}

/// Builds the `DictionaryArray` and reset this builder.
pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
Comment on lines -200 to -201
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like an API should not public.

let len = self.len();
let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
let null_count = len
- null_bit_buffer
.as_ref()
.map(|b| b.count_set_bits())
.unwrap_or(len);
let data_type = DataType::Dictionary(
Box::new(T::DATA_TYPE),
Box::new(values.data_type().clone()),
);
let mut builder = ArrayData::builder(data_type)
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer);
}
builder = builder.add_child_data(values.data().clone());
let array_data = unsafe { builder.build_unchecked() };
DictionaryArray::<T>::from(array_data)
}

fn materialize_bitmap_builder(&mut self) {
if self.bitmap_builder.is_some() {
return;
Expand Down
22 changes: 15 additions & 7 deletions arrow/src/array/builder/primitive_dictionary_builder.rs
Expand Up @@ -19,11 +19,8 @@ use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;

use crate::array::ArrayRef;
use crate::array::ArrowPrimitiveType;
use crate::array::DictionaryArray;
use crate::datatypes::ArrowNativeType;
use crate::datatypes::ToByteSlice;
use crate::array::{ArrayRef, ArrowPrimitiveType, DictionaryArray};
use crate::datatypes::{ArrowNativeType, DataType, ToByteSlice};
use crate::error::{ArrowError, Result};

use super::ArrayBuilder;
Expand Down Expand Up @@ -164,8 +161,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.map.clear();
let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
self.keys_builder.finish_dict(value_ref)
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));

let builder = keys
.into_data()
.into_builder()
.data_type(data_type)
.child_data(vec![values.into_data()]);

DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}

Expand Down
17 changes: 14 additions & 3 deletions arrow/src/array/builder/string_dictionary_builder.rs
Expand Up @@ -19,7 +19,7 @@ use super::PrimitiveBuilder;
use crate::array::{
Array, ArrayBuilder, ArrayRef, DictionaryArray, StringArray, StringBuilder,
};
use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType};
use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType};
use crate::error::{ArrowError, Result};
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
Expand Down Expand Up @@ -250,8 +250,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.dedup.clear();
let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
self.keys_builder.finish_dict(value_ref)
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));

let builder = keys
.into_data()
.into_builder()
.data_type(data_type)
.child_data(vec![values.into_data()]);

DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}

Expand Down
25 changes: 25 additions & 0 deletions arrow/src/array/data.rs
Expand Up @@ -1217,6 +1217,11 @@ impl ArrayData {
.zip(other.child_data.iter())
.all(|(a, b)| a.ptr_eq(b))
}

/// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
pub fn into_builder(self) -> ArrayDataBuilder {
self.into()
}
}

/// Return the expected [`DataTypeLayout`] Arrays of this data
Expand Down Expand Up @@ -1408,6 +1413,10 @@ impl ArrayDataBuilder {
}
}

pub fn data_type(self, data_type: DataType) -> Self {
Self { data_type, ..self }
}

#[inline]
#[allow(clippy::len_without_is_empty)]
pub const fn len(mut self, n: usize) -> Self {
Expand Down Expand Up @@ -1482,6 +1491,22 @@ impl ArrayDataBuilder {
}
}

impl From<ArrayData> for ArrayDataBuilder {
fn from(d: ArrayData) -> Self {
// TODO: Store Bitmap on ArrayData (#1799)
let null_bit_buffer = d.null_buffer().cloned();
Self {
null_bit_buffer,
data_type: d.data_type,
len: d.len,
null_count: Some(d.null_count),
offset: d.offset,
buffers: d.buffers,
child_data: d.child_data,
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down