From 47c0d19d12183b048bf7860b8a09d17e93b7f61a Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 8 Sep 2022 13:14:20 +0100 Subject: [PATCH] Simplify DictionaryBuilder constructors (#2684) (#2054) --- arrow/benches/string_dictionary_builder.rs | 8 +-- arrow/src/array/array_dictionary.rs | 22 +++---- .../builder/primitive_dictionary_builder.rs | 45 +++++++++----- .../builder/string_dictionary_builder.rs | 54 ++++++++++++----- arrow/src/array/equal/mod.rs | 8 +-- arrow/src/array/transform/mod.rs | 8 +-- arrow/src/compute/kernels/arithmetic.rs | 58 +++++-------------- arrow/src/compute/kernels/arity.rs | 12 +--- arrow/src/compute/kernels/cast.rs | 26 +++------ arrow/src/compute/kernels/comparison.rs | 54 ++++++----------- arrow/src/compute/kernels/take.rs | 5 +- arrow/src/json/reader.rs | 4 +- arrow/src/util/pretty.rs | 12 ++-- parquet/src/arrow/arrow_writer/mod.rs | 4 +- 14 files changed, 140 insertions(+), 180 deletions(-) diff --git a/arrow/benches/string_dictionary_builder.rs b/arrow/benches/string_dictionary_builder.rs index 1a3b9591720..411df3d69b5 100644 --- a/arrow/benches/string_dictionary_builder.rs +++ b/arrow/benches/string_dictionary_builder.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Int32Builder, StringBuilder, StringDictionaryBuilder}; +use arrow::array::StringDictionaryBuilder; +use arrow::datatypes::Int32Type; use criterion::{criterion_group, criterion_main, Criterion}; use rand::{thread_rng, Rng}; @@ -43,12 +44,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { let strings = build_strings(dict_size, total_size, key_len); b.iter(|| { - let keys = Int32Builder::with_capacity(strings.len()); - let values = StringBuilder::with_capacity( + let mut builder = StringDictionaryBuilder::::with_capacity( + strings.len(), key_len + 1, (key_len + 1) * dict_size, ); - let mut builder = StringDictionaryBuilder::new(keys, values); for val in &strings { builder.append(val).unwrap(); diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 79f2969df68..acdb427a22a 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -22,8 +22,8 @@ use std::iter::IntoIterator; use std::{convert::From, iter::FromIterator}; use super::{ - make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder, - StringArray, StringBuilder, StringDictionaryBuilder, + make_array, Array, ArrayData, ArrayRef, PrimitiveArray, StringArray, + StringDictionaryBuilder, }; use crate::datatypes::{ ArrowDictionaryKeyType, ArrowNativeType, ArrowPrimitiveType, DataType, @@ -329,9 +329,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator> for Dictionary fn from_iter>>(iter: I) -> Self { let it = iter.into_iter(); let (lower, _) = it.size_hint(); - let key_builder = PrimitiveBuilder::::with_capacity(lower); - let value_builder = StringBuilder::with_capacity(256, 1024); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024); it.for_each(|i| { if let Some(i) = i { // Note: impl ... for Result> fails with @@ -367,9 +365,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray fn from_iter>(iter: I) -> Self { let it = iter.into_iter(); let (lower, _) = it.size_hint(); - let key_builder = PrimitiveBuilder::::with_capacity(lower); - let value_builder = StringBuilder::with_capacity(256, 1024); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024); it.for_each(|i| { builder .append(i) @@ -589,9 +585,8 @@ mod tests { #[test] fn test_dictionary_array_fmt_debug() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(12345678).unwrap(); builder.append_null(); builder.append(22345678).unwrap(); @@ -601,9 +596,8 @@ mod tests { format!("{:?}", array) ); - let key_builder = PrimitiveBuilder::::with_capacity(20); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(20, 2); for _ in 0..20 { builder.append(1).unwrap(); } diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs b/arrow/src/array/builder/primitive_dictionary_builder.rs index 71223c68828..ffd782480d5 100644 --- a/arrow/src/array/builder/primitive_dictionary_builder.rs +++ b/arrow/src/array/builder/primitive_dictionary_builder.rs @@ -60,9 +60,7 @@ impl Eq for Value {} /// }; /// use arrow::datatypes::{UInt8Type, UInt32Type}; /// -/// let key_builder = PrimitiveBuilder::::with_capacity(3); -/// let value_builder = PrimitiveBuilder::::with_capacity(2); -/// let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); +/// let mut builder = PrimitiveDictionaryBuilder::::new(); /// builder.append(12345678).unwrap(); /// builder.append_null(); /// builder.append(22345678).unwrap(); @@ -95,22 +93,41 @@ where map: HashMap, K::Native>, } +impl Default for PrimitiveDictionaryBuilder +where + K: ArrowPrimitiveType, + V: ArrowPrimitiveType, +{ + fn default() -> Self { + Self::new() + } +} + impl PrimitiveDictionaryBuilder where K: ArrowPrimitiveType, V: ArrowPrimitiveType, { /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder. - pub fn new( - keys_builder: PrimitiveBuilder, - values_builder: PrimitiveBuilder, - ) -> Self { + pub fn new() -> Self { Self { - keys_builder, - values_builder, + keys_builder: PrimitiveBuilder::new(), + values_builder: PrimitiveBuilder::new(), map: HashMap::new(), } } + + /// Creates a new `PrimitiveDictionaryBuilder` with the provided capacities + /// + /// `keys_capacity`: the number of keys, i.e. length of array to build + /// `values_capacity`: the number of distinct dictionary values, i.e. size of dictionary + pub fn with_capacity(keys_capacity: usize, values_capacity: usize) -> Self { + Self { + keys_builder: PrimitiveBuilder::with_capacity(keys_capacity), + values_builder: PrimitiveBuilder::with_capacity(values_capacity), + map: HashMap::with_capacity(values_capacity), + } + } } impl ArrayBuilder for PrimitiveDictionaryBuilder @@ -211,9 +228,8 @@ mod tests { #[test] fn test_primitive_dictionary_builder() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(12345678).unwrap(); builder.append_null(); builder.append(22345678).unwrap(); @@ -239,9 +255,8 @@ mod tests { #[test] #[should_panic(expected = "DictionaryKeyOverflowError")] fn test_primitive_dictionary_overflow() { - let key_builder = PrimitiveBuilder::::with_capacity(257); - let value_builder = PrimitiveBuilder::::with_capacity(257); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(257, 257); // 256 unique keys. for i in 0..256 { builder.append(i + 1000).unwrap(); diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow/src/array/builder/string_dictionary_builder.rs index 6ad4e907552..6e0e4638ff6 100644 --- a/arrow/src/array/builder/string_dictionary_builder.rs +++ b/arrow/src/array/builder/string_dictionary_builder.rs @@ -42,9 +42,7 @@ use std::sync::Arc; /// // Create a dictionary array indexed by bytes whose values are Strings. /// // It can thus hold up to 256 distinct string values. /// -/// let key_builder = PrimitiveBuilder::::with_capacity(100); -/// let value_builder = StringBuilder::new(); -/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); +/// let mut builder = StringDictionaryBuilder::::new(); /// /// // The builder builds the dictionary value by value /// builder.append("abc").unwrap(); @@ -84,12 +82,23 @@ where values_builder: StringBuilder, } +impl Default for StringDictionaryBuilder +where + K: ArrowDictionaryKeyType, +{ + fn default() -> Self { + Self::new() + } +} + impl StringDictionaryBuilder where K: ArrowDictionaryKeyType, { - /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder. - pub fn new(keys_builder: PrimitiveBuilder, values_builder: StringBuilder) -> Self { + /// Creates a new `StringDictionaryBuilder` + pub fn new() -> Self { + let keys_builder = PrimitiveBuilder::new(); + let values_builder = StringBuilder::new(); Self { state: Default::default(), dedup: HashMap::with_capacity_and_hasher(keys_builder.capacity(), ()), @@ -98,6 +107,24 @@ where } } + /// Creates a new `StringDictionaryBuilder` with the provided capacities + /// + /// `keys_capacity`: the number of keys, i.e. length of array to build + /// `value_capacity`: the number of distinct dictionary values, i.e. size of dictionary + /// `string_capacity`: the total number of bytes of all distinct strings in the dictionary + pub fn with_capacity( + keys_capacity: usize, + value_capacity: usize, + string_capacity: usize, + ) -> Self { + Self { + state: Default::default(), + dedup: Default::default(), + keys_builder: PrimitiveBuilder::with_capacity(keys_capacity), + values_builder: StringBuilder::with_capacity(value_capacity, string_capacity), + } + } + /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary /// which is initialized with the given values. /// The indices of those dictionary values are used as keys. @@ -111,7 +138,7 @@ where /// /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]); /// - /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::::with_capacity(3), &dictionary_values).unwrap(); + /// let mut builder = StringDictionaryBuilder::new_with_dictionary(3, &dictionary_values).unwrap(); /// builder.append("def").unwrap(); /// builder.append_null(); /// builder.append("abc").unwrap(); @@ -123,7 +150,7 @@ where /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)])); /// ``` pub fn new_with_dictionary( - keys_builder: PrimitiveBuilder, + keys_capacity: usize, dictionary_values: &StringArray, ) -> Result { let state = ahash::RandomState::default(); @@ -162,7 +189,7 @@ where Ok(Self { state, dedup, - keys_builder, + keys_builder: PrimitiveBuilder::with_capacity(keys_capacity), values_builder, }) } @@ -290,9 +317,7 @@ mod tests { #[test] fn test_string_dictionary_builder() { - let key_builder = PrimitiveBuilder::::with_capacity(5); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -317,10 +342,8 @@ mod tests { fn test_string_dictionary_builder_with_existing_dictionary() { let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]); - let key_builder = PrimitiveBuilder::::with_capacity(6); let mut builder = - StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) - .unwrap(); + StringDictionaryBuilder::new_with_dictionary(6, &dictionary).unwrap(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -349,9 +372,8 @@ mod tests { let dictionary: Vec> = vec![None]; let dictionary = StringArray::from(dictionary); - let key_builder = PrimitiveBuilder::::with_capacity(4); let mut builder = - StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) + StringDictionaryBuilder::::new_with_dictionary(4, &dictionary) .unwrap(); builder.append("abc").unwrap(); builder.append_null(); diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index 34df0bda0b1..52be64a3fa7 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -265,8 +265,8 @@ mod tests { use crate::array::{ array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BooleanArray, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, Int32Builder, - ListBuilder, NullArray, PrimitiveBuilder, StringArray, StringDictionaryBuilder, - StructArray, UnionBuilder, + ListBuilder, NullArray, StringArray, StringDictionaryBuilder, StructArray, + UnionBuilder, }; use crate::array::{GenericStringArray, Int32Array}; use crate::buffer::Buffer; @@ -1245,8 +1245,8 @@ mod tests { fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); - let mut builder = StringDictionaryBuilder::new_with_dictionary( - PrimitiveBuilder::::with_capacity(3), + let mut builder = StringDictionaryBuilder::::new_with_dictionary( + keys.len(), &values, ) .unwrap(); diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index 48859922a26..29d4434aafa 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -675,8 +675,8 @@ mod tests { array::{ Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray, FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array, - Int64Builder, ListBuilder, MapBuilder, NullArray, PrimitiveBuilder, - StringArray, StringDictionaryBuilder, StructArray, UInt8Array, + Int64Builder, ListBuilder, MapBuilder, NullArray, StringArray, + StringDictionaryBuilder, StructArray, UInt8Array, }, buffer::Buffer, datatypes::Field, @@ -963,8 +963,8 @@ mod tests { fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); - let mut builder = StringDictionaryBuilder::new_with_dictionary( - PrimitiveBuilder::::with_capacity(3), + let mut builder = StringDictionaryBuilder::::new_with_dictionary( + keys.len(), &values, ) .unwrap(); diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index eab4d2136aa..9bf4b00c313 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1367,9 +1367,7 @@ mod tests { #[test] fn test_primitive_array_add_dyn_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -1377,9 +1375,7 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(6).unwrap(); builder.append(7).unwrap(); builder.append(8).unwrap(); @@ -1408,9 +1404,7 @@ mod tests { assert!(c.is_null(3)); assert_eq!(10, c.value(4)); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append_null(); builder.append(7).unwrap(); @@ -1451,9 +1445,7 @@ mod tests { #[test] fn test_primitive_array_subtract_dyn_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(15).unwrap(); builder.append(8).unwrap(); builder.append(7).unwrap(); @@ -1461,9 +1453,7 @@ mod tests { builder.append(20).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(6).unwrap(); builder.append(7).unwrap(); builder.append(8).unwrap(); @@ -1492,9 +1482,7 @@ mod tests { assert!(c.is_null(3)); assert_eq!(8, c.value(4)); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append_null(); builder.append(7).unwrap(); @@ -1535,9 +1523,7 @@ mod tests { #[test] fn test_primitive_array_multiply_dyn_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -1545,9 +1531,7 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(6).unwrap(); builder.append(7).unwrap(); builder.append(8).unwrap(); @@ -1579,9 +1563,7 @@ mod tests { #[test] fn test_primitive_array_divide_dyn_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(15).unwrap(); builder.append(6).unwrap(); builder.append(1).unwrap(); @@ -1589,9 +1571,7 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append(3).unwrap(); builder.append(1).unwrap(); @@ -1620,9 +1600,7 @@ mod tests { assert!(c.is_null(3)); assert_eq!(18, c.value(4)); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append_null(); builder.append(7).unwrap(); @@ -1806,9 +1784,7 @@ mod tests { assert!(c.is_null(3)); assert_eq!(4, c.value(4)); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append_null(); builder.append(7).unwrap(); @@ -2082,15 +2058,13 @@ mod tests { #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_divide_dyn_by_zero_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(1); - let value_builder = PrimitiveBuilder::::with_capacity(1); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); builder.append(15).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(1); - let value_builder = PrimitiveBuilder::::with_capacity(1); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); builder.append(0).unwrap(); let b = builder.finish(); diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index 89151c28634..1251baf52fd 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -122,9 +122,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::array::{ - as_primitive_array, Float64Array, PrimitiveBuilder, PrimitiveDictionaryBuilder, - }; + use crate::array::{as_primitive_array, Float64Array, PrimitiveDictionaryBuilder}; use crate::datatypes::{Float64Type, Int32Type, Int8Type}; #[test] @@ -149,9 +147,7 @@ mod tests { #[test] fn test_unary_dict_and_unary_dyn() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(5).unwrap(); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -160,9 +156,7 @@ mod tests { builder.append(9).unwrap(); let dictionary_array = builder.finish(); - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(6).unwrap(); builder.append(7).unwrap(); builder.append(8).unwrap(); diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 71a4fcc955b..7d67bffdf4e 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -2563,9 +2563,8 @@ where .downcast_ref::>() .unwrap(); - let keys_builder = PrimitiveBuilder::::with_capacity(values.len()); - let values_builder = PrimitiveBuilder::::with_capacity(values.len()); - let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); + let mut b = + PrimitiveDictionaryBuilder::::with_capacity(values.len(), values.len()); // copy each element one at a time for i in 0..values.len() { @@ -2589,10 +2588,7 @@ where { let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?; let values = cast_values.as_any().downcast_ref::().unwrap(); - - let keys_builder = PrimitiveBuilder::::with_capacity(values.len()); - let values_builder = StringBuilder::with_capacity(1024, values.len()); - let mut b = StringDictionaryBuilder::new(keys_builder, values_builder); + let mut b = StringDictionaryBuilder::::with_capacity(values.len(), 1024, 1024); // copy each element one at a time for i in 0..values.len() { @@ -5001,9 +4997,7 @@ mod tests { // FROM a dictionary with of Utf8 values use DataType::*; - let keys_builder = PrimitiveBuilder::::new(); - let values_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("one").unwrap(); builder.append_null(); builder.append("three").unwrap(); @@ -5062,9 +5056,7 @@ mod tests { // that are out of bounds for a particular other kind of // index. - let keys_builder = PrimitiveBuilder::::new(); - let values_builder = PrimitiveBuilder::::new(); - let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); // add 200 distinct values (which can be stored by a // dictionary indexed by int32, but not a dictionary indexed @@ -5093,9 +5085,7 @@ mod tests { // Same test as test_cast_dict_to_dict_bad_index_value but use // string values (and encode the expected behavior here); - let keys_builder = PrimitiveBuilder::::new(); - let values_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); + let mut builder = StringDictionaryBuilder::::new(); // add 200 distinct values (which can be stored by a // dictionary indexed by int32, but not a dictionary indexed @@ -5124,9 +5114,7 @@ mod tests { // FROM a dictionary with of INT32 values use DataType::*; - let keys_builder = PrimitiveBuilder::::new(); - let values_builder = PrimitiveBuilder::::new(); - let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(1).unwrap(); builder.append_null(); builder.append(3).unwrap(); diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 978a2d9f4d3..5a79c2e82df 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -4884,9 +4884,8 @@ mod tests { #[test] fn test_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(123).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -4928,9 +4927,8 @@ mod tests { #[test] fn test_lt_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(123).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -4971,9 +4969,8 @@ mod tests { } #[test] fn test_lt_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = PrimitiveBuilder::::new(); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(123).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -5015,9 +5012,8 @@ mod tests { #[test] fn test_gt_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(123).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -5059,9 +5055,8 @@ mod tests { #[test] fn test_gt_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = PrimitiveBuilder::::new(); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(22).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -5103,9 +5098,8 @@ mod tests { #[test] fn test_neq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = PrimitiveBuilder::::new(); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(3, 2); builder.append(22).unwrap(); builder.append_null(); builder.append(23).unwrap(); @@ -5247,9 +5241,7 @@ mod tests { #[test] fn test_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -5275,9 +5267,7 @@ mod tests { } #[test] fn test_lt_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -5304,9 +5294,7 @@ mod tests { } #[test] fn test_lt_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -5333,9 +5321,7 @@ mod tests { } #[test] fn test_gt_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -5363,9 +5349,7 @@ mod tests { #[test] fn test_gt_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); @@ -5392,9 +5376,7 @@ mod tests { } #[test] fn test_neq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(); - let value_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("abc").unwrap(); builder.append_null(); builder.append("def").unwrap(); diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 19eb1b17ca2..8f1aab27b53 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -1960,10 +1960,7 @@ mod tests { #[test] fn test_take_dict() { - let keys_builder = Int16Builder::new(); - let values_builder = StringBuilder::new(); - - let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder); + let mut dict_builder = StringDictionaryBuilder::::new(); dict_builder.append("foo").unwrap(); dict_builder.append("bar").unwrap(); diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs index fb8f6cfab47..c32e5ca1848 100644 --- a/arrow/src/json/reader.rs +++ b/arrow/src/json/reader.rs @@ -914,9 +914,7 @@ impl Decoder { where T: ArrowPrimitiveType + ArrowDictionaryKeyType, { - let key_builder = PrimitiveBuilder::::with_capacity(row_len); - let values_builder = StringBuilder::with_capacity(row_len, row_len * 5); - StringDictionaryBuilder::new(key_builder, values_builder) + StringDictionaryBuilder::with_capacity(row_len, row_len, row_len * 5) } #[inline(always)] diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index b0013619b50..f819e389f96 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -108,10 +108,10 @@ mod tests { use crate::{ array::{ self, new_null_array, Array, Date32Array, Date64Array, - FixedSizeBinaryBuilder, Float16Array, Int32Array, PrimitiveBuilder, - StringArray, StringBuilder, StringDictionaryBuilder, StructArray, - Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, - Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, + FixedSizeBinaryBuilder, Float16Array, Int32Array, StringArray, + StringDictionaryBuilder, StructArray, Time32MillisecondArray, + Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UnionArray, UnionBuilder, }, buffer::Buffer, @@ -241,9 +241,7 @@ mod tests { DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)); let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)])); - let keys_builder = PrimitiveBuilder::::with_capacity(10); - let values_builder = StringBuilder::new(); - let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); + let mut builder = StringDictionaryBuilder::::new(); builder.append("one")?; builder.append_null(); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 6f9d5b3aff8..1fef695dc47 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1653,9 +1653,7 @@ mod tests { )])); // create some data - let key_builder = PrimitiveBuilder::::with_capacity(3); - let value_builder = PrimitiveBuilder::::with_capacity(2); - let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); + let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(12345678).unwrap(); builder.append_null(); builder.append(22345678).unwrap();