Skip to content

Commit

Permalink
Convert rows to arrays (#2677) (#2826)
Browse files Browse the repository at this point in the history
* Convert rows to arrays (#2677)

* Review feedback

* Clippy
  • Loading branch information
tustvold committed Oct 6, 2022
1 parent a0a263f commit f8c4037
Show file tree
Hide file tree
Showing 6 changed files with 1,158 additions and 230 deletions.
140 changes: 37 additions & 103 deletions arrow/benches/row_format.rs
Expand Up @@ -20,161 +20,95 @@ extern crate criterion;
extern crate core;

use arrow::array::ArrayRef;
use arrow::datatypes::{DataType, Int64Type, UInt64Type};
use arrow::datatypes::{Int64Type, UInt64Type};
use arrow::row::{RowConverter, SortField};
use arrow::util::bench_util::{
create_primitive_array, create_string_array_with_len, create_string_dict_array,
};
use arrow_array::types::Int32Type;
use arrow_array::Array;
use criterion::{black_box, Criterion};
use std::sync::Arc;

fn row_bench(c: &mut Criterion) {
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
let fields: Vec<_> = cols
.iter()
.map(|x| SortField::new(x.data_type().clone()))
.collect();

c.bench_function("row_batch 4096 u64(0)", |b| {
c.bench_function(&format!("convert_columns {}", name), |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::UInt64)]);
black_box(converter.convert_columns(&cols))
let mut converter = RowConverter::new(fields.clone());
black_box(converter.convert_columns(&cols).unwrap())
});
});

let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
let mut converter = RowConverter::new(fields);
let rows = converter.convert_columns(&cols).unwrap();

c.bench_function("row_batch 4096 i64(0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Int64)]);
black_box(converter.convert_columns(&cols))
});
c.bench_function(&format!("convert_rows {}", name), |b| {
b.iter(|| black_box(converter.convert_rows(&rows).unwrap()));
});
}

fn row_bench(c: &mut Criterion) {
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
do_bench(c, "4096 u64(0)", cols);

let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
do_bench(c, "4096 i64(0)", cols);

let cols =
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];

c.bench_function("row_batch 4096 string(10, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string(10, 0)", cols);

let cols =
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];

c.bench_function("row_batch 4096 string(30, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string(30, 0)", cols);

let cols =
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];

c.bench_function("row_batch 4096 string(100, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string(100, 0)", cols);

let cols =
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];

c.bench_function("row_batch 4096 string(100, 0.5)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string(100, 0.5)", cols);

let cols =
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 10)) as ArrayRef];

c.bench_function("row_batch 4096 string_dictionary(10, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string_dictionary(10, 0)", cols);

let cols =
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef];

c.bench_function("row_batch 4096 string_dictionary(30, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string_dictionary(30, 0)", cols);

let cols =
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef];

c.bench_function("row_batch 4096 string_dictionary(100, 0)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});
do_bench(c, "4096 string_dictionary(100, 0)", cols);

let cols =
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 100)) as ArrayRef];
do_bench(c, "4096 string_dictionary(100, 0.5)", cols);

c.bench_function("row_batch 4096 string_dictionary(100, 0.5)", |b| {
b.iter(|| {
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
black_box(converter.convert_columns(&cols))
});
});

let cols = [
let cols = vec![
Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 20)) as ArrayRef,
Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef,
Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef,
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
];

let fields = [
SortField::new(DataType::Utf8),
SortField::new(DataType::Utf8),
SortField::new(DataType::Utf8),
SortField::new(DataType::Int64),
];

c.bench_function(
"row_batch 4096 string(20, 0.5), string(30, 0), string(100, 0), i64(0)",
|b| {
b.iter(|| {
let mut converter = RowConverter::new(fields.to_vec());
black_box(converter.convert_columns(&cols))
});
},
do_bench(
c,
"4096 string(20, 0.5), string(30, 0), string(100, 0), i64(0)",
cols,
);

let cols = [
let cols = vec![
Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 20)) as ArrayRef,
Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef,
Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef,
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
];

let fields = [
SortField::new(DataType::Utf8),
SortField::new(DataType::Utf8),
SortField::new(DataType::Utf8),
SortField::new(DataType::Int64),
];

c.bench_function(
"row_batch 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)",
|b| {
b.iter(|| {
let mut converter = RowConverter::new(fields.to_vec());
black_box(converter.convert_columns(&cols))
});
},
);
do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols);
}

criterion_group!(benches, row_bench);
Expand Down

0 comments on commit f8c4037

Please sign in to comment.