From 737df461a0857a47f8570d17276a0bfffa4788c3 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Sat, 29 Oct 2022 15:18:12 +1300 Subject: [PATCH] Specialize interleave for byte arrays (#2864) --- arrow-select/src/interleave.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 9b3de850132..95b694aba73 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -16,11 +16,9 @@ // under the License. use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder}; -use arrow_array::{ - downcast_primitive, make_array, new_empty_array, Array, ArrayRef, ArrowPrimitiveType, - GenericStringArray, OffsetSizeTrait, PrimitiveArray, -}; -use arrow_buffer::{Buffer, MutableBuffer}; +use arrow_array::types::*; +use arrow_array::*; +use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer}; use arrow_data::transform::MutableArrayData; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType}; @@ -85,8 +83,10 @@ pub fn interleave( downcast_primitive! { data_type => (primitive_helper, values, indices, data_type), - DataType::Utf8 => interleave_string::(values, indices, data_type), - DataType::LargeUtf8 => interleave_string::(values, indices, data_type), + DataType::Utf8 => interleave_bytes::(values, indices), + DataType::LargeUtf8 => interleave_bytes::(values, indices), + DataType::Binary => interleave_bytes::(values, indices), + DataType::LargeBinary => interleave_bytes::(values, indices), _ => interleave_fallback(values, indices) } } @@ -156,29 +156,28 @@ fn interleave_primitive( Ok(Arc::new(PrimitiveArray::::from(data))) } -fn interleave_string( +fn interleave_bytes( values: &[&dyn Array], indices: &[(usize, usize)], - data_type: &DataType, ) -> Result { - let interleaved = Interleave::<'_, GenericStringArray>::new(values, indices); + let interleaved = Interleave::<'_, GenericByteArray>::new(values, indices); let mut capacity = 0; - let mut offsets = BufferBuilder::::new(indices.len() + 1); - offsets.append(O::from_usize(0).unwrap()); + let mut offsets = BufferBuilder::::new(indices.len() + 1); + offsets.append(T::Offset::from_usize(0).unwrap()); for (a, b) in indices { let o = interleaved.arrays[*a].value_offsets(); let element_len = o[*b + 1].as_usize() - o[*b].as_usize(); capacity += element_len; - offsets.append(O::from_usize(capacity).expect("overflow")); + offsets.append(T::Offset::from_usize(capacity).expect("overflow")); } let mut values = MutableBuffer::new(capacity); for (a, b) in indices { - values.extend_from_slice(interleaved.arrays[*a].value(*b).as_bytes()); + values.extend_from_slice(interleaved.arrays[*a].value(*b).as_ref()); } - let builder = ArrayDataBuilder::new(data_type.clone()) + let builder = ArrayDataBuilder::new(T::DATA_TYPE) .len(indices.len()) .add_buffer(offsets.finish()) .add_buffer(values.into()) @@ -186,7 +185,7 @@ fn interleave_string( .null_count(interleaved.null_count); let data = unsafe { builder.build_unchecked() }; - Ok(Arc::new(GenericStringArray::::from(data))) + Ok(Arc::new(GenericByteArray::::from(data))) } /// Fallback implementation of interleave using [`MutableArrayData`]