Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into split-out-arrow-data
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Sep 16, 2022
2 parents 3c3faf2 + 43d912c commit 1df1e57
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -20,7 +20,7 @@ __blobstorage__

# .bak files
*.bak

*.bak2
# OS-specific .gitignores

# Mac .gitignore
Expand Down
9 changes: 7 additions & 2 deletions arrow-buffer/src/lib.rs
Expand Up @@ -19,6 +19,11 @@

pub mod alloc;
pub mod buffer;
pub use buffer::{Buffer, MutableBuffer};

mod bytes;
pub mod native;
pub mod util;
mod native;

pub use native::*;
mod util;
pub use util::*;
5 changes: 5 additions & 0 deletions arrow/Cargo.toml
Expand Up @@ -239,3 +239,8 @@ harness = false
name = "row_format"
harness = false
required-features = ["test_utils"]

[[bench]]
name = "bitwise_kernel"
harness = false
required-features = ["test_utils"]
121 changes: 121 additions & 0 deletions arrow/benches/bitwise_kernel.rs
@@ -0,0 +1,121 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;

use arrow::compute::kernels::bitwise::{
bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar,
bitwise_xor, bitwise_xor_scalar,
};
use arrow::datatypes::Int64Type;
use criterion::{black_box, Criterion};
use rand::RngCore;

extern crate arrow;

use arrow::util::bench_util::create_primitive_array;
use arrow::util::test_util::seedable_rng;

fn bitwise_array_benchmark(c: &mut Criterion) {
let size = 64 * 1024_usize;
let left_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let right_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let left_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
let right_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
// array and
let mut group = c.benchmark_group("bench bitwise array: and");
group.bench_function("bitwise array and, no nulls", |b| {
b.iter(|| {
black_box(bitwise_and(&left_without_null, &right_without_null).unwrap())
})
});
group.bench_function("bitwise array and, 20% nulls", |b| {
b.iter(|| black_box(bitwise_and(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// array or
let mut group = c.benchmark_group("bench bitwise: or");
group.bench_function("bitwise array or, no nulls", |b| {
b.iter(|| black_box(bitwise_or(&left_without_null, &right_without_null).unwrap()))
});
group.bench_function("bitwise array or, 20% nulls", |b| {
b.iter(|| black_box(bitwise_or(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// xor
let mut group = c.benchmark_group("bench bitwise: xor");
group.bench_function("bitwise array xor, no nulls", |b| {
b.iter(|| {
black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap())
})
});
group.bench_function("bitwise array xor, 20% nulls", |b| {
b.iter(|| black_box(bitwise_xor(&left_with_null, &right_with_null).unwrap()))
});
group.finish();
// not
let mut group = c.benchmark_group("bench bitwise: not");
group.bench_function("bitwise array not, no nulls", |b| {
b.iter(|| black_box(bitwise_not(&left_without_null).unwrap()))
});
group.bench_function("bitwise array not, 20% nulls", |b| {
b.iter(|| black_box(bitwise_not(&left_with_null).unwrap()))
});
group.finish();
}

fn bitwise_array_scalar_benchmark(c: &mut Criterion) {
let size = 64 * 1024_usize;
let array_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
let array_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
let scalar = seedable_rng().next_u64() as i64;
// array scalar and
let mut group = c.benchmark_group("bench bitwise array scalar: and");
group.bench_function("bitwise array scalar and, no nulls", |b| {
b.iter(|| black_box(bitwise_and_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array and, 20% nulls", |b| {
b.iter(|| black_box(bitwise_and_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
// array scalar or
let mut group = c.benchmark_group("bench bitwise array scalar: or");
group.bench_function("bitwise array scalar or, no nulls", |b| {
b.iter(|| black_box(bitwise_or_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array scalar or, 20% nulls", |b| {
b.iter(|| black_box(bitwise_or_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
// array scalar xor
let mut group = c.benchmark_group("bench bitwise array scalar: xor");
group.bench_function("bitwise array scalar xor, no nulls", |b| {
b.iter(|| black_box(bitwise_xor_scalar(&array_without_null, scalar).unwrap()))
});
group.bench_function("bitwise array scalar xor, 20% nulls", |b| {
b.iter(|| black_box(bitwise_xor_scalar(&array_with_null, scalar).unwrap()))
});
group.finish();
}

criterion_group!(
benches,
bitwise_array_benchmark,
bitwise_array_scalar_benchmark
);
criterion_main!(benches);
3 changes: 1 addition & 2 deletions arrow/src/bitmap.rs
Expand Up @@ -17,12 +17,11 @@

//! Defines [Bitmap] for tracking validity bitmaps

use crate::buffer::Buffer;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use std::mem;

use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or};
use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
use std::ops::{BitAnd, BitOr};

#[derive(Debug, Clone)]
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/datatypes/native.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use super::DataType;
pub use arrow_buffer::native::{ArrowNativeType, ToByteSlice};
pub use arrow_buffer::{ArrowNativeType, ToByteSlice};
use half::f16;

/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
Expand Down
12 changes: 4 additions & 8 deletions arrow/src/ipc/reader.rs
Expand Up @@ -578,10 +578,7 @@ pub fn read_record_batch(
let mut node_index = 0;
let mut arrays = vec![];

let options = RecordBatchOptions {
row_count: Some(batch.length() as usize),
..Default::default()
};
let options = RecordBatchOptions::new().with_row_count(Some(batch.length() as usize));

if let Some(projection) = projection {
// project fields
Expand Down Expand Up @@ -1692,10 +1689,9 @@ mod tests {
#[test]
fn test_no_columns_batch() {
let schema = Arc::new(Schema::new(vec![]));
let options = RecordBatchOptions {
match_field_names: true,
row_count: Some(10),
};
let options = RecordBatchOptions::new()
.with_match_field_names(true)
.with_row_count(Some(10));
let input_batch =
RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
let output_batch = roundtrip_ipc_stream(&input_batch);
Expand Down
35 changes: 27 additions & 8 deletions arrow/src/record_batch.rs
Expand Up @@ -80,7 +80,7 @@ impl RecordBatch {
/// # }
/// ```
pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self> {
let options = RecordBatchOptions::default();
let options = RecordBatchOptions::new();
Self::try_new_impl(schema, columns, &options)
}

Expand Down Expand Up @@ -413,15 +413,29 @@ pub struct RecordBatchOptions {
pub row_count: Option<usize>,
}

impl Default for RecordBatchOptions {
fn default() -> Self {
impl RecordBatchOptions {
pub fn new() -> Self {
Self {
match_field_names: true,
row_count: None,
}
}
/// Sets the row_count of RecordBatchOptions and returns self
pub fn with_row_count(mut self, row_count: Option<usize>) -> Self {
self.row_count = row_count;
self
}
/// Sets the match_field_names of RecordBatchOptions and returns self
pub fn with_match_field_names(mut self, match_field_names: bool) -> Self {
self.match_field_names = match_field_names;
self
}
}
impl Default for RecordBatchOptions {
fn default() -> Self {
Self::new()
}
}

impl From<&StructArray> for RecordBatch {
/// Create a record batch from struct array, where each field of
/// the `StructArray` becomes a `Field` in the schema.
Expand Down Expand Up @@ -901,10 +915,7 @@ mod tests {
.to_string()
.contains("must either specify a row count or at least one column"));

let options = RecordBatchOptions {
row_count: Some(10),
..Default::default()
};
let options = RecordBatchOptions::new().with_row_count(Some(10));

let ok =
RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
Expand All @@ -929,4 +940,12 @@ mod tests {
);
assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap()));
}
#[test]
fn test_record_batch_options() {
let options = RecordBatchOptions::new()
.with_match_field_names(false)
.with_row_count(Some(20));
assert!(!options.match_field_names);
assert_eq!(options.row_count.unwrap(), 20)
}
}
2 changes: 1 addition & 1 deletion arrow/src/util/mod.rs
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

pub use arrow_buffer::util::{bit_chunk_iterator, bit_util};
pub use arrow_buffer::{bit_chunk_iterator, bit_util};

#[cfg(feature = "test_utils")]
pub mod bench_util;
Expand Down
35 changes: 32 additions & 3 deletions dev/release/update_change_log.sh
Expand Up @@ -29,16 +29,45 @@

set -e

SINCE_TAG="21.0.0"
FUTURE_RELEASE="22.0.0"
SINCE_TAG="22.0.0"
FUTURE_RELEASE="23.0.0"

SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"

OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md"
OLD_OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG-old.md"

# remove license header so github-changelog-generator has a clean base to append
sed -i.bak '1,18d' "${OUTPUT_PATH}"
sed -i.bak '1,21d' "${OUTPUT_PATH}"
sed -i.bak '1,21d' "${OLD_OUTPUT_PATH}"
# remove the github-changelog-generator footer from the old CHANGELOG.md
LINE_COUNT=$(wc -l <"${OUTPUT_PATH}")
sed -i.bak2 "$(( $LINE_COUNT-4+1 )),$ d" "${OUTPUT_PATH}"

# Copy the previous CHANGELOG.md to CHANGELOG-old.md
echo '<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# Historical Changelog
' | cat - "${OUTPUT_PATH}" "${OLD_OUTPUT_PATH}" > "${OLD_OUTPUT_PATH}".tmp
mv "${OLD_OUTPUT_PATH}".tmp "${OLD_OUTPUT_PATH}"

# use exclude-tags-regex to filter out tags used for object_store
# crates and only only look at tags that DO NOT begin with `object_store_`
Expand Down
2 changes: 1 addition & 1 deletion object_store/src/aws/client.rs
Expand Up @@ -411,7 +411,7 @@ impl S3Client {
pub async fn create_multipart(&self, location: &Path) -> Result<MultipartId> {
let credential = self.get_credential().await?;
let url = format!(
"{}/{}/{}?uploads",
"{}/{}/{}?uploads=",
self.config.endpoint,
self.config.bucket,
encode_path(location)
Expand Down

0 comments on commit 1df1e57

Please sign in to comment.