Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into split-out-arrow-s…
Browse files Browse the repository at this point in the history
…chema
  • Loading branch information
tustvold committed Sep 20, 2022
2 parents 3c3faf2 + 5b601b3 commit 6f62bb6
Show file tree
Hide file tree
Showing 33 changed files with 1,343 additions and 567 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -20,7 +20,7 @@ __blobstorage__

# .bak files
*.bak

*.bak2
# OS-specific .gitignores

# Mac .gitignore
Expand Down
115 changes: 114 additions & 1 deletion CHANGELOG-old.md

Large diffs are not rendered by default.

183 changes: 86 additions & 97 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion arrow-buffer/Cargo.toml
Expand Up @@ -17,7 +17,7 @@

[package]
name = "arrow-buffer"
version = "22.0.0"
version = "23.0.0"
description = "Buffer abstractions for Apache Arrow"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
Expand Down
9 changes: 7 additions & 2 deletions arrow-buffer/src/lib.rs
Expand Up @@ -19,6 +19,11 @@

pub mod alloc;
pub mod buffer;
pub use buffer::{Buffer, MutableBuffer};

mod bytes;
pub mod native;
pub mod util;
mod native;

pub use native::*;
mod util;
pub use util::*;
4 changes: 2 additions & 2 deletions arrow-flight/Cargo.toml
Expand Up @@ -18,7 +18,7 @@
[package]
name = "arrow-flight"
description = "Apache Arrow Flight"
version = "22.0.0"
version = "23.0.0"
edition = "2021"
rust-version = "1.62"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
Expand All @@ -27,7 +27,7 @@ repository = "https://github.com/apache/arrow-rs"
license = "Apache-2.0"

[dependencies]
arrow = { path = "../arrow", version = "22.0.0", default-features = false, features = ["ipc"] }
arrow = { path = "../arrow", version = "23.0.0", default-features = false, features = ["ipc"] }
base64 = { version = "0.13", default-features = false }
tonic = { version = "0.8", default-features = false, features = ["transport", "codegen", "prost"] }
bytes = { version = "1", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/README.md
Expand Up @@ -27,7 +27,7 @@ Add this to your Cargo.toml:

```toml
[dependencies]
arrow-flight = "22.0.0"
arrow-flight = "23.0.0"
```

Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
Expand Down
4 changes: 2 additions & 2 deletions arrow-pyarrow-integration-testing/Cargo.toml
Expand Up @@ -18,7 +18,7 @@
[package]
name = "arrow-pyarrow-integration-testing"
description = ""
version = "22.0.0"
version = "23.0.0"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
Expand All @@ -32,7 +32,7 @@ name = "arrow_pyarrow_integration_testing"
crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", version = "22.0.0", features = ["pyarrow"] }
arrow = { path = "../arrow", version = "23.0.0", features = ["pyarrow"] }
pyo3 = { version = "0.17", features = ["extension-module"] }

[package.metadata.maturin]
Expand Down
2 changes: 1 addition & 1 deletion arrow-schema/Cargo.toml
Expand Up @@ -17,7 +17,7 @@

[package]
name = "arrow-schema"
version = "22.0.0"
version = "23.0.0"
description = "Defines the logical types for arrow arrays"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
Expand Down
13 changes: 9 additions & 4 deletions arrow/Cargo.toml
Expand Up @@ -17,7 +17,7 @@

[package]
name = "arrow"
version = "22.0.0"
version = "23.0.0"
description = "Rust implementation of Apache Arrow"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
Expand All @@ -44,13 +44,13 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }

[dependencies]
arrow-buffer = { path = "../arrow-buffer", version = "22.0.0" }
arrow-schema = { version = "22.0.0", path = "../arrow-schema" }
arrow-buffer = { version = "23.0.0", path = "../arrow-buffer" }
arrow-schema = { version = "23.0.0", path = "../arrow-schema" }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
indexmap = { version = "1.9", default-features = false, features = ["std"] }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
num = { version = "0.4", default-features = false, features = ["std"] }
half = { version = "2.0", default-features = false }
half = { version = "2.0", default-features = false, features = ["num-traits"]}
hashbrown = { version = "0.12", default-features = false }
csv_crate = { version = "1.1", default-features = false, optional = true, package = "csv" }
regex = { version = "1.5.6", default-features = false, features = ["std", "unicode"] }
Expand Down Expand Up @@ -239,3 +239,8 @@ harness = false
name = "row_format"
harness = false
required-features = ["test_utils"]

[[bench]]
name = "bitwise_kernel"
harness = false
required-features = ["test_utils"]
4 changes: 2 additions & 2 deletions arrow/README.md
Expand Up @@ -35,7 +35,7 @@ This crate is tested with the latest stable version of Rust. We do not currently

The arrow crate follows the [SemVer standard](https://doc.rust-lang.org/cargo/reference/semver.html) defined by Cargo and works well within the Rust crate ecosystem.

However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `22.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.
However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `23.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.

## Feature Flags

Expand All @@ -61,7 +61,7 @@ The [Apache Arrow Status](https://arrow.apache.org/docs/status.html) page lists

## Safety

Arrow seeks to uphold the Rust Soundness Pledge as articulated eloquently [here](https://raphlinus.github.io/rust/22.0.01/18/soundness-pledge.html). Specifically:
Arrow seeks to uphold the Rust Soundness Pledge as articulated eloquently [here](https://raphlinus.github.io/rust/23.0.01/18/soundness-pledge.html). Specifically:

> The intent of this crate is to be free of soundness bugs. The developers will do their best to avoid them, and welcome help in analyzing and fixing them
Expand Down
121 changes: 121 additions & 0 deletions arrow/benches/bitwise_kernel.rs
@@ -0,0 +1,121 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;

use arrow::compute::kernels::bitwise::{
bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar,
bitwise_xor, bitwise_xor_scalar,
};
use arrow::datatypes::Int64Type;
use criterion::{black_box, Criterion};
use rand::RngCore;

extern crate arrow;

use arrow::util::bench_util::create_primitive_array;
use arrow::util::test_util::seedable_rng;

fn bitwise_array_benchmark(c: &mut Criterion) {
let size = 64 * 1024_usize;
let left_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let right_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let left_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
let right_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
// array and
let mut group = c.benchmark_group("bench bitwise array: and");
group.bench_function("bitwise array and, no nulls", |b| {
b.iter(|| {
black_box(bitwise_and(&left_without_null, &right_without_null).unwrap())
})
});
group.bench_function("bitwise array and, 20% nulls", |b| {
b.iter(|| black_box(bitwise_and(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// array or
let mut group = c.benchmark_group("bench bitwise: or");
group.bench_function("bitwise array or, no nulls", |b| {
b.iter(|| black_box(bitwise_or(&left_without_null, &right_without_null).unwrap()))
});
group.bench_function("bitwise array or, 20% nulls", |b| {
b.iter(|| black_box(bitwise_or(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// xor
let mut group = c.benchmark_group("bench bitwise: xor");
group.bench_function("bitwise array xor, no nulls", |b| {
b.iter(|| {
black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap())
})
});
group.bench_function("bitwise array xor, 20% nulls", |b| {
b.iter(|| black_box(bitwise_xor(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// not
let mut group = c.benchmark_group("bench bitwise: not");
group.bench_function("bitwise array not, no nulls", |b| {
b.iter(|| black_box(bitwise_not(&left_without_null).unwrap()))
});
group.bench_function("bitwise array not, 20% nulls", |b| {
b.iter(|| black_box(bitwise_not(&left_with_null).unwrap()))
});
group.finish();
}

fn bitwise_array_scalar_benchmark(c: &mut Criterion) {
let size = 64 * 1024_usize;
let array_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let array_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
let scalar = seedable_rng().next_u64() as i64;
// array scalar and
let mut group = c.benchmark_group("bench bitwise array scalar: and");
group.bench_function("bitwise array scalar and, no nulls", |b| {
b.iter(|| black_box(bitwise_and_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array and, 20% nulls", |b| {
b.iter(|| black_box(bitwise_and_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
// array scalar or
let mut group = c.benchmark_group("bench bitwise array scalar: or");
group.bench_function("bitwise array scalar or, no nulls", |b| {
b.iter(|| black_box(bitwise_or_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array scalar or, 20% nulls", |b| {
b.iter(|| black_box(bitwise_or_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
// array scalar xor
let mut group = c.benchmark_group("bench bitwise array scalar: xor");
group.bench_function("bitwise array scalar xor, no nulls", |b| {
b.iter(|| black_box(bitwise_xor_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array scalar xor, 20% nulls", |b| {
b.iter(|| black_box(bitwise_xor_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
}

criterion_group!(
benches,
bitwise_array_benchmark,
bitwise_array_scalar_benchmark
);
criterion_main!(benches);
10 changes: 10 additions & 0 deletions arrow/benches/equal.rs
Expand Up @@ -20,6 +20,7 @@

#[macro_use]
extern crate criterion;
use arrow::compute::eq_utf8_scalar;
use criterion::Criterion;

extern crate arrow;
Expand All @@ -31,6 +32,10 @@ fn bench_equal<A: Array + PartialEq<A>>(arr_a: &A) {
criterion::black_box(arr_a == arr_a);
}

fn bench_equal_utf8_scalar(arr_a: &GenericStringArray<i32>, right: &str) {
criterion::black_box(eq_utf8_scalar(arr_a, right).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
c.bench_function("equal_512", |b| b.iter(|| bench_equal(&arr_a)));
Expand All @@ -41,6 +46,11 @@ fn add_benchmark(c: &mut Criterion) {
let arr_a = create_string_array::<i32>(512, 0.0);
c.bench_function("equal_string_512", |b| b.iter(|| bench_equal(&arr_a)));

let arr_a = create_string_array::<i32>(512, 0.0);
c.bench_function("equal_string_scalar_empty_512", |b| {
b.iter(|| bench_equal_utf8_scalar(&arr_a, ""))
});

let arr_a_nulls = create_string_array::<i32>(512, 0.5);
c.bench_function("equal_string_nulls_512", |b| {
b.iter(|| bench_equal(&arr_a_nulls))
Expand Down
3 changes: 1 addition & 2 deletions arrow/src/bitmap.rs
Expand Up @@ -17,12 +17,11 @@

//! Defines [Bitmap] for tracking validity bitmaps

use crate::buffer::Buffer;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use std::mem;

use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or};
use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
use std::ops::{BitAnd, BitOr};

#[derive(Debug, Clone)]
Expand Down

0 comments on commit 6f62bb6

Please sign in to comment.