From 66ea66bde115a3479efa879eaf8b437d8a11bfc2 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Fri, 28 Oct 2022 08:38:33 +1300 Subject: [PATCH] Specialize interleave string ~2-3x faster (#2944) * Add interleave string benchmark * Specialize interleave strings (#2864) * Review feedback --- arrow-array/src/array/string_array.rs | 10 +-- arrow-select/src/interleave.rs | 107 ++++++++++++++++++++------ arrow/benches/interleave_kernels.rs | 43 +++++------ 3 files changed, 108 insertions(+), 52 deletions(-) diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 7e2ed3667e2..0cf45a44859 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -90,8 +90,8 @@ impl GenericStringArray { /// caller is responsible for ensuring that index is within the array bounds #[inline] pub unsafe fn value_unchecked(&self, i: usize) -> &str { - let end = self.value_offsets().get_unchecked(i + 1); - let start = self.value_offsets().get_unchecked(i); + let end = self.value_offsets().get_unchecked(i + 1).as_usize(); + let start = self.value_offsets().get_unchecked(i).as_usize(); // Soundness // pointer alignment & location is ensured by RawPtrBox @@ -103,10 +103,8 @@ impl GenericStringArray { // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, // both of which should cleanly cast to isize on an architecture that supports // 32/64-bit offsets - let slice = std::slice::from_raw_parts( - self.value_data.as_ptr().offset(start.to_isize().unwrap()), - (*end - *start).to_usize().unwrap(), - ); + let slice = + std::slice::from_raw_parts(self.value_data.as_ptr().add(start), end - start); std::str::from_utf8_unchecked(slice) } diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 29f75894dcb..9b3de850132 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -16,11 +16,11 @@ // under the License. use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder}; -use arrow_array::cast::as_primitive_array; use arrow_array::{ downcast_primitive, make_array, new_empty_array, Array, ArrayRef, ArrowPrimitiveType, - PrimitiveArray, + GenericStringArray, OffsetSizeTrait, PrimitiveArray, }; +use arrow_buffer::{Buffer, MutableBuffer}; use arrow_data::transform::MutableArrayData; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType}; @@ -85,51 +85,110 @@ pub fn interleave( downcast_primitive! { data_type => (primitive_helper, values, indices, data_type), + DataType::Utf8 => interleave_string::(values, indices, data_type), + DataType::LargeUtf8 => interleave_string::(values, indices, data_type), _ => interleave_fallback(values, indices) } } +/// Common functionality for interleaving arrays +/// +/// T is the concrete Array type +struct Interleave<'a, T> { + /// The input arrays downcast to T + arrays: Vec<&'a T>, + /// The number of nulls in the interleaved output + null_count: usize, + /// The null buffer of the interleaved output + nulls: Option, +} + +impl<'a, T: Array + 'static> Interleave<'a, T> { + fn new(values: &[&'a dyn Array], indices: &'a [(usize, usize)]) -> Self { + let mut has_nulls = false; + let arrays: Vec<&T> = values + .iter() + .map(|x| { + has_nulls = has_nulls || x.null_count() != 0; + x.as_any().downcast_ref().unwrap() + }) + .collect(); + + let mut null_count = 0; + let nulls = has_nulls.then(|| { + let mut builder = BooleanBufferBuilder::new(indices.len()); + for (a, b) in indices { + let v = arrays[*a].is_valid(*b); + null_count += !v as usize; + builder.append(v) + } + builder.finish() + }); + + Self { + arrays, + null_count, + nulls, + } + } +} + fn interleave_primitive( values: &[&dyn Array], indices: &[(usize, usize)], data_type: &DataType, ) -> Result { - let mut has_nulls = false; - let cast: Vec<_> = values - .iter() - .map(|x| { - has_nulls = has_nulls || x.null_count() != 0; - as_primitive_array::(*x) - }) - .collect(); + let interleaved = Interleave::<'_, PrimitiveArray>::new(values, indices); let mut values = BufferBuilder::::new(indices.len()); for (a, b) in indices { - let v = cast[*a].value(*b); + let v = interleaved.arrays[*a].value(*b); values.append(v) } - let mut null_count = 0; - let nulls = has_nulls.then(|| { - let mut builder = BooleanBufferBuilder::new(indices.len()); - for (a, b) in indices { - let v = cast[*a].is_valid(*b); - null_count += !v as usize; - builder.append(v) - } - builder.finish() - }); - let builder = ArrayDataBuilder::new(data_type.clone()) .len(indices.len()) .add_buffer(values.finish()) - .null_bit_buffer(nulls) - .null_count(null_count); + .null_bit_buffer(interleaved.nulls) + .null_count(interleaved.null_count); let data = unsafe { builder.build_unchecked() }; Ok(Arc::new(PrimitiveArray::::from(data))) } +fn interleave_string( + values: &[&dyn Array], + indices: &[(usize, usize)], + data_type: &DataType, +) -> Result { + let interleaved = Interleave::<'_, GenericStringArray>::new(values, indices); + + let mut capacity = 0; + let mut offsets = BufferBuilder::::new(indices.len() + 1); + offsets.append(O::from_usize(0).unwrap()); + for (a, b) in indices { + let o = interleaved.arrays[*a].value_offsets(); + let element_len = o[*b + 1].as_usize() - o[*b].as_usize(); + capacity += element_len; + offsets.append(O::from_usize(capacity).expect("overflow")); + } + + let mut values = MutableBuffer::new(capacity); + for (a, b) in indices { + values.extend_from_slice(interleaved.arrays[*a].value(*b).as_bytes()); + } + + let builder = ArrayDataBuilder::new(data_type.clone()) + .len(indices.len()) + .add_buffer(offsets.finish()) + .add_buffer(values.into()) + .null_bit_buffer(interleaved.nulls) + .null_count(interleaved.null_count); + + let data = unsafe { builder.build_unchecked() }; + Ok(Arc::new(GenericStringArray::::from(data))) +} + /// Fallback implementation of interleave using [`MutableArrayData`] fn interleave_fallback( values: &[&dyn Array], diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs index 6cf56eb9895..0c3eec60c0c 100644 --- a/arrow/benches/interleave_kernels.rs +++ b/arrow/benches/interleave_kernels.rs @@ -60,31 +60,30 @@ fn do_bench( } fn add_benchmark(c: &mut Criterion) { - let a = create_primitive_array::(1024, 0.); + let i32 = create_primitive_array::(1024, 0.); + let i32_opt = create_primitive_array::(1024, 0.5); + let string = create_string_array_with_len::(1024, 0., 20); + let string_opt = create_string_array_with_len::(1024, 0.5, 20); - do_bench(c, "i32(0.0)", 100, &a, &[0..100, 100..230, 450..1000]); - do_bench(c, "i32(0.0)", 400, &a, &[0..100, 100..230, 450..1000]); - do_bench(c, "i32(0.0)", 1024, &a, &[0..100, 100..230, 450..1000]); - do_bench( - c, - "i32(0.0)", - 1024, - &a, - &[0..100, 100..230, 450..1000, 0..1000], - ); + let cases: &[(&str, &dyn Array)] = &[ + ("i32(0.0)", &i32), + ("i32(0.5)", &i32_opt), + ("str(20, 0.0)", &string), + ("str(20, 0.5)", &string_opt), + ]; - let a = create_primitive_array::(1024, 0.5); + for (prefix, base) in cases { + let slices: &[(usize, &[_])] = &[ + (100, &[0..100, 100..230, 450..1000]), + (400, &[0..100, 100..230, 450..1000]), + (1024, &[0..100, 100..230, 450..1000]), + (1024, &[0..100, 100..230, 450..1000, 0..1000]), + ]; - do_bench(c, "i32(0.5)", 100, &a, &[0..100, 100..230, 450..1000]); - do_bench(c, "i32(0.5)", 400, &a, &[0..100, 100..230, 450..1000]); - do_bench(c, "i32(0.5)", 1024, &a, &[0..100, 100..230, 450..1000]); - do_bench( - c, - "i32(0.5)", - 1024, - &a, - &[0..100, 100..230, 450..1000, 0..1000], - ); + for (len, slice) in slices { + do_bench(c, prefix, *len, *base, slice); + } + } } criterion_group!(benches, add_benchmark);