From 76da62443aeaf0b5085f56be511eabe067e25597 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 3 Oct 2022 16:00:13 +0100 Subject: [PATCH] Add string_dictionary benches for row format (#2677) (#2816) * Add string_dictionary benches for row format (#2677) * Fix copy-pasta --- arrow/benches/row_format.rs | 69 ++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 2802aa6ece0..ec872c12706 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -22,7 +22,10 @@ extern crate core; use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Int64Type, UInt64Type}; use arrow::row::{RowConverter, SortField}; -use arrow::util::bench_util::{create_primitive_array, create_string_array_with_len}; +use arrow::util::bench_util::{ + create_primitive_array, create_string_array_with_len, create_string_dict_array, +}; +use arrow_array::types::Int32Type; use criterion::{black_box, Criterion}; use std::sync::Arc; @@ -85,6 +88,46 @@ fn row_bench(c: &mut Criterion) { }); }); + let cols = + vec![Arc::new(create_string_dict_array::(4096, 0., 10)) as ArrayRef]; + + c.bench_function("row_batch 4096 string_dictionary(10, 0)", |b| { + b.iter(|| { + let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]); + black_box(converter.convert_columns(&cols)) + }); + }); + + let cols = + vec![Arc::new(create_string_dict_array::(4096, 0., 30)) as ArrayRef]; + + c.bench_function("row_batch 4096 string_dictionary(30, 0)", |b| { + b.iter(|| { + let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]); + black_box(converter.convert_columns(&cols)) + }); + }); + + let cols = + vec![Arc::new(create_string_dict_array::(4096, 0., 100)) as ArrayRef]; + + c.bench_function("row_batch 4096 string_dictionary(100, 0)", |b| { + b.iter(|| { + let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]); + black_box(converter.convert_columns(&cols)) + }); + }); + + let cols = + vec![Arc::new(create_string_dict_array::(4096, 0.5, 100)) as ArrayRef]; + + c.bench_function("row_batch 4096 string_dictionary(100, 0.5)", |b| { + b.iter(|| { + let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]); + black_box(converter.convert_columns(&cols)) + }); + }); + let cols = [ Arc::new(create_string_array_with_len::(4096, 0.5, 20)) as ArrayRef, Arc::new(create_string_array_with_len::(4096, 0., 30)) as ArrayRef, @@ -108,6 +151,30 @@ fn row_bench(c: &mut Criterion) { }); }, ); + + let cols = [ + Arc::new(create_string_dict_array::(4096, 0.5, 20)) as ArrayRef, + Arc::new(create_string_dict_array::(4096, 0., 30)) as ArrayRef, + Arc::new(create_string_dict_array::(4096, 0., 100)) as ArrayRef, + Arc::new(create_primitive_array::(4096, 0.)) as ArrayRef, + ]; + + let fields = [ + SortField::new(DataType::Utf8), + SortField::new(DataType::Utf8), + SortField::new(DataType::Utf8), + SortField::new(DataType::Int64), + ]; + + c.bench_function( + "row_batch 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", + |b| { + b.iter(|| { + let mut converter = RowConverter::new(fields.to_vec()); + black_box(converter.convert_columns(&cols)) + }); + }, + ); } criterion_group!(benches, row_bench);