Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use required-features for CSV benchmarks/examples #2602

Merged
merged 1 commit into from Sep 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions arrow/Cargo.toml
Expand Up @@ -107,6 +107,16 @@ name = "dynamic_types"
required-features = ["prettyprint"]
path = "./examples/dynamic_types.rs"

[[example]]
name = "read_csv"
required-features = ["prettyprint", "csv"]
path = "./examples/read_csv.rs"

[[example]]
name = "read_csv_infer_schema"
required-features = ["prettyprint", "csv"]
path = "./examples/read_csv_infer_schema.rs"

[[bench]]
name = "aggregate_kernels"
harness = false
Expand Down Expand Up @@ -180,6 +190,7 @@ required-features = ["test_utils"]
[[bench]]
name = "csv_writer"
harness = false
required-features = ["csv"]

[[bench]]
name = "json_reader"
Expand Down
70 changes: 33 additions & 37 deletions arrow/benches/csv_writer.rs
Expand Up @@ -21,7 +21,6 @@ extern crate criterion;
use criterion::*;

use arrow::array::*;
#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
Expand All @@ -30,47 +29,44 @@ use std::fs::File;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c4", DataType::Boolean, true),
]);
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c4", DataType::Boolean, true),
]);

let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);

let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let path = env::temp_dir().join("bench_write_csv.csv");
let file = File::create(path).unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let path = env::temp_dir().join("bench_write_csv.csv");
let file = File::create(path).unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];

c.bench_function("record_batches_to_csv", |b| {
b.iter(|| {
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in &batches {
writer.write(batch).unwrap()
});
c.bench_function("record_batches_to_csv", |b| {
b.iter(|| {
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in &batches {
writer.write(batch).unwrap()
});
});
}
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
30 changes: 11 additions & 19 deletions arrow/examples/read_csv.rs
Expand Up @@ -20,30 +20,22 @@ extern crate arrow;
use std::fs::File;
use std::sync::Arc;

#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::{DataType, Field, Schema};
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;

fn main() {
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);

let path = format!("{}/test/data/uk_cities.csv", env!("CARGO_MANIFEST_DIR"));
let file = File::open(path).unwrap();
let path = format!("{}/test/data/uk_cities.csv", env!("CARGO_MANIFEST_DIR"));
let file = File::open(path).unwrap();

let mut csv =
csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None, None);
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
let mut csv =
csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None, None);
let batch = csv.next().unwrap().unwrap();
print_batches(&[batch]).unwrap();
}
30 changes: 11 additions & 19 deletions arrow/examples/read_csv_infer_schema.rs
Expand Up @@ -17,28 +17,20 @@

extern crate arrow;

#[cfg(feature = "csv")]
use arrow::csv;
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;
use std::fs::File;

fn main() {
#[cfg(feature = "csv")]
{
let path = format!(
"{}/test/data/uk_cities_with_headers.csv",
env!("CARGO_MANIFEST_DIR")
);
let file = File::open(path).unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
let path = format!(
"{}/test/data/uk_cities_with_headers.csv",
env!("CARGO_MANIFEST_DIR")
);
let file = File::open(path).unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let batch = csv.next().unwrap().unwrap();
print_batches(&[batch]).unwrap();
}