From 5f441eedff2b7621c46aded8b1caf3b665b8e8a9 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 15 Sep 2022 17:22:53 +0100
Subject: [PATCH 01/16] Fix multipart uploads on Minio (#2731)

The official Minio SDK uses "uploads=" as the URL when it initiates a
multipart upload instead of "uploads". This affects the AWSV4 signature
and causes object_store to fail a signature check when initiating the
upload to Minio.

It's possible that this contradicts the AWS S3 API docs:
https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html#API_CreateMultipartUpload_RequestSyntax
and we need to instead keep the URL as `?uploads` and
change the URL that goes into the signature instead.
---
 object_store/src/aws/client.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index d8ab3bba8f2..f800fec3dc5 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -411,7 +411,7 @@ impl S3Client {
     pub async fn create_multipart(&self, location: &Path) -> Result<MultipartId> {
         let credential = self.get_credential().await?;
         let url = format!(
-            "{}/{}/{}?uploads",
+            "{}/{}/{}?uploads=",
             self.config.endpoint,
             self.config.bucket,
             encode_path(location)

From a7a93295bd4a143d55fa31a1c6ac92045d73dc05 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Thu, 15 Sep 2022 17:23:22 +0100
Subject: [PATCH 02/16] Update read parquet example in parquet/arrow home
 (#2730)

* Update example to read parquet

* Remove outdated comment
---
 parquet/src/arrow/mod.rs | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index c0de656bf9c..c5fe0fa2a62 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -66,26 +66,23 @@
 //! # Example of reading parquet file into arrow record batch
 //!
 //! ```rust
-//! use arrow::record_batch::RecordBatchReader;
-//! use parquet::file::reader::{FileReader, SerializedFileReader};
-//! use parquet::arrow::{ParquetFileArrowReader, ArrowReader, ProjectionMask};
-//! use std::sync::Arc;
 //! use std::fs::File;
+//! use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
 //!
+//! # use std::sync::Arc;
 //! # use arrow::array::Int32Array;
 //! # use arrow::datatypes::{DataType, Field, Schema};
 //! # use arrow::record_batch::RecordBatch;
 //! # use parquet::arrow::arrow_writer::ArrowWriter;
+//! #
 //! # let ids = Int32Array::from(vec![1, 2, 3, 4]);
 //! # let schema = Arc::new(Schema::new(vec![
-//! #    Field::new("id", DataType::Int32, false),
+//! #     Field::new("id", DataType::Int32, false),
 //! # ]));
 //! #
-//! # // Write to a memory buffer (can also write to a File)
 //! # let file = File::create("data.parquet").unwrap();
 //! #
-//! # let batch =
-//! #    RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(ids)]).unwrap();
+//! # let batch = RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(ids)]).unwrap();
 //! # let batches = vec![batch];
 //! #
 //! # let mut writer = ArrowWriter::try_new(file, Arc::clone(&schema), None).unwrap();
@@ -97,26 +94,14 @@
 //!
 //! let file = File::open("data.parquet").unwrap();
 //!
-//! let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
-//! let mask = ProjectionMask::leaves(arrow_reader.parquet_schema(), [0]);
-//!
-//! println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
-//! println!("Arrow schema after projection is: {}",
-//! arrow_reader.get_schema_by_columns(mask.clone()).unwrap());
+//! let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+//! println!("Converted arrow schema is: {}", builder.schema());
 //!
-//! let mut unprojected = arrow_reader.get_record_reader(2048).unwrap();
-//! println!("Unprojected reader schema: {}", unprojected.schema());
+//! let mut reader = builder.build().unwrap();
 //!
-//! let mut record_batch_reader = arrow_reader.get_record_reader_by_columns(mask, 2048).unwrap();
+//! let record_batch = reader.next().unwrap().unwrap();
 //!
-//! for maybe_record_batch in record_batch_reader {
-//!    let record_batch = maybe_record_batch.unwrap();
-//!    if record_batch.num_rows() > 0 {
-//!        println!("Read {} records.", record_batch.num_rows());
-//!    } else {
-//!        println!("End of file!");
-//!    }
-//!}
+//! println!("Read {} records.", record_batch.num_rows());
 //! ```
 
 experimental!(mod array_reader);

From eb9b456fdde92d4ca12c7573fb38faf6e6657fc3 Mon Sep 17 00:00:00 2001
From: Kun Liu <liukun@apache.org>
Date: Fri, 16 Sep 2022 03:12:06 +0800
Subject: [PATCH 03/16] benchmark: bitwise operation (#2718)

* add benchmark for bitwise operation

* add bench for bitwise or xor not
---
 arrow/Cargo.toml                |   5 ++
 arrow/benches/bitwise_kernel.rs | 121 ++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 arrow/benches/bitwise_kernel.rs

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index c66cef61202..e52940b4fc4 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -240,3 +240,8 @@ harness = false
 name = "row_format"
 harness = false
 required-features = ["test_utils"]
+
+[[bench]]
+name = "bitwise_kernel"
+harness = false
+required-features = ["test_utils"]
diff --git a/arrow/benches/bitwise_kernel.rs b/arrow/benches/bitwise_kernel.rs
new file mode 100644
index 00000000000..741eb96125a
--- /dev/null
+++ b/arrow/benches/bitwise_kernel.rs
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+
+use arrow::compute::kernels::bitwise::{
+    bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar,
+    bitwise_xor, bitwise_xor_scalar,
+};
+use arrow::datatypes::Int64Type;
+use criterion::{black_box, Criterion};
+use rand::RngCore;
+
+extern crate arrow;
+
+use arrow::util::bench_util::create_primitive_array;
+use arrow::util::test_util::seedable_rng;
+
+fn bitwise_array_benchmark(c: &mut Criterion) {
+    let size = 64 * 1024_usize;
+    let left_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
+    let right_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
+    let left_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
+    let right_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
+    // array and
+    let mut group = c.benchmark_group("bench bitwise array: and");
+    group.bench_function("bitwise array and, no nulls", |b| {
+        b.iter(|| {
+            black_box(bitwise_and(&left_without_null, &right_without_null).unwrap())
+        })
+    });
+    group.bench_function("bitwise array and, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_and(&left_with_null, &right_with_null).unwrap()))
+    });
+    group.finish();
+    // array or
+    let mut group = c.benchmark_group("bench bitwise: or");
+    group.bench_function("bitwise array or, no nulls", |b| {
+        b.iter(|| black_box(bitwise_or(&left_without_null, &right_without_null).unwrap()))
+    });
+    group.bench_function("bitwise array or, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_or(&left_with_null, &right_with_null).unwrap()))
+    });
+    group.finish();
+    // xor
+    let mut group = c.benchmark_group("bench bitwise: xor");
+    group.bench_function("bitwise array xor, no nulls", |b| {
+        b.iter(|| {
+            black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap())
+        })
+    });
+    group.bench_function("bitwise array xor, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_xor(&left_with_null, &right_with_null).unwrap()))
+    });
+    group.finish();
+    // not
+    let mut group = c.benchmark_group("bench bitwise: not");
+    group.bench_function("bitwise array not, no nulls", |b| {
+        b.iter(|| black_box(bitwise_not(&left_without_null).unwrap()))
+    });
+    group.bench_function("bitwise array not, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_not(&left_with_null).unwrap()))
+    });
+    group.finish();
+}
+
+fn bitwise_array_scalar_benchmark(c: &mut Criterion) {
+    let size = 64 * 1024_usize;
+    let array_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
+    let array_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
+    let scalar = seedable_rng().next_u64() as i64;
+    // array scalar and
+    let mut group = c.benchmark_group("bench bitwise array scalar: and");
+    group.bench_function("bitwise array scalar and, no nulls", |b| {
+        b.iter(|| black_box(bitwise_and_scalar(&array_without_null, scalar).unwrap()))
+    });
+    group.bench_function("bitwise array and, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_and_scalar(&array_with_null, scalar).unwrap()))
+    });
+    group.finish();
+    // array scalar or
+    let mut group = c.benchmark_group("bench bitwise array scalar: or");
+    group.bench_function("bitwise array scalar or, no nulls", |b| {
+        b.iter(|| black_box(bitwise_or_scalar(&array_without_null, scalar).unwrap()))
+    });
+    group.bench_function("bitwise array scalar or, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_or_scalar(&array_with_null, scalar).unwrap()))
+    });
+    group.finish();
+    // array scalar xor
+    let mut group = c.benchmark_group("bench bitwise array scalar: xor");
+    group.bench_function("bitwise array scalar xor, no nulls", |b| {
+        b.iter(|| black_box(bitwise_xor_scalar(&array_without_null, scalar).unwrap()))
+    });
+    group.bench_function("bitwise array scalar xor, 20% nulls", |b| {
+        b.iter(|| black_box(bitwise_xor_scalar(&array_with_null, scalar).unwrap()))
+    });
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bitwise_array_benchmark,
+    bitwise_array_scalar_benchmark
+);
+criterion_main!(benches);

From 5238789244be27380347b19b0747c9dcd9938470 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Thu, 15 Sep 2022 15:21:11 -0400
Subject: [PATCH 04/16] Automate updates to `CHANGELOG-old.md` (#2732)

* feature complete

* fix footer issue

* fix duplicate changelog issue

* use tac instead of head for head -n -<num> is not universal

* adjust blank lines

* fix footer dropping

* line adj

* add .bak2 to gitignore
---
 .gitignore                       |  2 +-
 dev/release/update_change_log.sh | 35 +++++++++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2a21776aa54..b8506ea06cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,7 +20,7 @@ __blobstorage__
 
 # .bak files
 *.bak
-
+*.bak2
 # OS-specific .gitignores
 
 # Mac .gitignore
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 252cd285d92..a3af50a8a6e 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,16 +29,45 @@
 
 set -e
 
-SINCE_TAG="21.0.0"
-FUTURE_RELEASE="22.0.0"
+SINCE_TAG="22.0.0"
+FUTURE_RELEASE="23.0.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
 
 OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md"
+OLD_OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG-old.md"
 
 # remove license header so github-changelog-generator has a clean base to append
-sed -i.bak '1,18d' "${OUTPUT_PATH}"
+sed -i.bak '1,21d' "${OUTPUT_PATH}"
+sed -i.bak '1,21d' "${OLD_OUTPUT_PATH}"
+# remove the github-changelog-generator footer from the old CHANGELOG.md
+LINE_COUNT=$(wc -l <"${OUTPUT_PATH}")
+sed -i.bak2 "$(( $LINE_COUNT-4+1 )),$ d" "${OUTPUT_PATH}"
+
+# Copy the previous CHANGELOG.md to CHANGELOG-old.md
+echo '<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Historical Changelog
+' | cat - "${OUTPUT_PATH}" "${OLD_OUTPUT_PATH}" > "${OLD_OUTPUT_PATH}".tmp
+mv "${OLD_OUTPUT_PATH}".tmp "${OLD_OUTPUT_PATH}"
 
 # use exclude-tags-regex to filter out tags used for object_store
 # crates and only only look at tags that DO NOT begin with `object_store_`

From 0ebd71e0d3d132250a2e5743f24f952c58c236d3 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Thu, 15 Sep 2022 22:44:22 +0100
Subject: [PATCH 05/16] Partially flatten arrow-buffer (#2737)

* Partially flatten arrow-buffer

* Format
---
 arrow-buffer/src/lib.rs       | 9 +++++++--
 arrow/src/bitmap.rs           | 3 +--
 arrow/src/datatypes/native.rs | 2 +-
 arrow/src/util/mod.rs         | 2 +-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs
index a8aca7c3dad..74d2bd5ec86 100644
--- a/arrow-buffer/src/lib.rs
+++ b/arrow-buffer/src/lib.rs
@@ -19,6 +19,11 @@
 
 pub mod alloc;
 pub mod buffer;
+pub use buffer::{Buffer, MutableBuffer};
+
 mod bytes;
-pub mod native;
-pub mod util;
+mod native;
+
+pub use native::*;
+mod util;
+pub use util::*;
diff --git a/arrow/src/bitmap.rs b/arrow/src/bitmap.rs
index 4491da4632b..dbf9706677a 100644
--- a/arrow/src/bitmap.rs
+++ b/arrow/src/bitmap.rs
@@ -17,12 +17,11 @@
 
 //! Defines [Bitmap] for tracking validity bitmaps
 
-use crate::buffer::Buffer;
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
 use std::mem;
 
-use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or};
+use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
 use std::ops::{BitAnd, BitOr};
 
 #[derive(Debug, Clone)]
diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs
index 8c329a066e5..de35c4804fa 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow/src/datatypes/native.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use super::DataType;
-pub use arrow_buffer::native::{ArrowNativeType, ToByteSlice};
+pub use arrow_buffer::{ArrowNativeType, ToByteSlice};
 use half::f16;
 
 /// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index 5453c11ab8a..adafc9f5053 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub use arrow_buffer::util::{bit_chunk_iterator, bit_util};
+pub use arrow_buffer::{bit_chunk_iterator, bit_util};
 
 #[cfg(feature = "test_utils")]
 pub mod bench_util;

From 43d912c010b7374ceb3a632eedda4f55f90545d0 Mon Sep 17 00:00:00 2001
From: askoa <112126368+askoa@users.noreply.github.com>
Date: Fri, 16 Sep 2022 05:59:39 -0400
Subject: [PATCH 06/16] Better construction of RecordBatchOptions (#2729)

* include builder for RecordBatchOptions

* fix clippy warnings

* fix clippy warnings

* remove builder struct

* removed a wrong comment

* Update comment  in arrow/src/record_batch.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update comment in arrow/src/record_batch.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

Co-authored-by: askoa <askoa@local>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 arrow/src/ipc/reader.rs   | 12 ++++--------
 arrow/src/record_batch.rs | 35 +++++++++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 969c8c43f02..a784f54e20c 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -578,10 +578,7 @@ pub fn read_record_batch(
     let mut node_index = 0;
     let mut arrays = vec![];
 
-    let options = RecordBatchOptions {
-        row_count: Some(batch.length() as usize),
-        ..Default::default()
-    };
+    let options = RecordBatchOptions::new().with_row_count(Some(batch.length() as usize));
 
     if let Some(projection) = projection {
         // project fields
@@ -1692,10 +1689,9 @@ mod tests {
     #[test]
     fn test_no_columns_batch() {
         let schema = Arc::new(Schema::new(vec![]));
-        let options = RecordBatchOptions {
-            match_field_names: true,
-            row_count: Some(10),
-        };
+        let options = RecordBatchOptions::new()
+            .with_match_field_names(true)
+            .with_row_count(Some(10));
         let input_batch =
             RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
         let output_batch = roundtrip_ipc_stream(&input_batch);
diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs
index 4b0d36a43e5..f71c67fe774 100644
--- a/arrow/src/record_batch.rs
+++ b/arrow/src/record_batch.rs
@@ -80,7 +80,7 @@ impl RecordBatch {
     /// # }
     /// ```
     pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self> {
-        let options = RecordBatchOptions::default();
+        let options = RecordBatchOptions::new();
         Self::try_new_impl(schema, columns, &options)
     }
 
@@ -413,15 +413,29 @@ pub struct RecordBatchOptions {
     pub row_count: Option<usize>,
 }
 
-impl Default for RecordBatchOptions {
-    fn default() -> Self {
+impl RecordBatchOptions {
+    pub fn new() -> Self {
         Self {
             match_field_names: true,
             row_count: None,
         }
     }
+    /// Sets the row_count of RecordBatchOptions and returns self
+    pub fn with_row_count(mut self, row_count: Option<usize>) -> Self {
+        self.row_count = row_count;
+        self
+    }
+    /// Sets the match_field_names of RecordBatchOptions and returns self
+    pub fn with_match_field_names(mut self, match_field_names: bool) -> Self {
+        self.match_field_names = match_field_names;
+        self
+    }
+}
+impl Default for RecordBatchOptions {
+    fn default() -> Self {
+        Self::new()
+    }
 }
-
 impl From<&StructArray> for RecordBatch {
     /// Create a record batch from struct array, where each field of
     /// the `StructArray` becomes a `Field` in the schema.
@@ -901,10 +915,7 @@ mod tests {
             .to_string()
             .contains("must either specify a row count or at least one column"));
 
-        let options = RecordBatchOptions {
-            row_count: Some(10),
-            ..Default::default()
-        };
+        let options = RecordBatchOptions::new().with_row_count(Some(10));
 
         let ok =
             RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
@@ -929,4 +940,12 @@ mod tests {
         );
         assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap()));
     }
+    #[test]
+    fn test_record_batch_options() {
+        let options = RecordBatchOptions::new()
+            .with_match_field_names(false)
+            .with_row_count(Some(20));
+        assert!(!options.match_field_names);
+        assert_eq!(options.row_count.unwrap(), 20)
+    }
 }

From f572ec1bef4a66a00b78f1d80a39992d63444ec2 Mon Sep 17 00:00:00 2001
From: Remzi Yang <59198230+HaoYang670@users.noreply.github.com>
Date: Fri, 16 Sep 2022 18:47:20 +0800
Subject: [PATCH 07/16] Update `try_binary` and `checked_ops`, and remove
 `math_checked_op` (#2717)

* update try_binary
delete math_checked_op
update the return type of checked ops

Signed-off-by: remzi <13716567376yh@gmail.com>

* float div not panic on zero

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix nan test

Signed-off-by: remzi <13716567376yh@gmail.com>

* add float divide by zero

Signed-off-by: remzi <13716567376yh@gmail.com>

* add float tests

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix compile error

Signed-off-by: remzi <13716567376yh@gmail.com>

Signed-off-by: remzi <13716567376yh@gmail.com>
---
 arrow/Cargo.toml                        |   2 +-
 arrow/src/compute/kernels/arithmetic.rs | 220 ++++++++++--------------
 arrow/src/compute/kernels/arity.rs      |  14 +-
 arrow/src/datatypes/native.rs           |  66 +++++--
 4 files changed, 153 insertions(+), 149 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index e52940b4fc4..1580856dfc0 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -51,7 +51,7 @@ serde_json = { version = "1.0", default-features = false, features = ["std"], op
 indexmap = { version = "1.9", default-features = false, features = ["std"] }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
 num = { version = "0.4", default-features = false, features = ["std"] }
-half = { version = "2.0", default-features = false }
+half = { version = "2.0", default-features = false, features = ["num-traits"]}
 hashbrown = { version = "0.12", default-features = false }
 csv_crate = { version = "1.1", default-features = false, optional = true, package = "csv" }
 regex = { version = "1.5.6", default-features = false, features = ["std", "unicode"] }
diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index 04fe2393ec4..7b91a261c7e 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -78,32 +78,6 @@ where
     Ok(binary(left, right, op))
 }
 
-/// This is similar to `math_op` as it performs given operation between two input primitive arrays.
-/// But the given operation can return `None` if overflow is detected. For the case, this function
-/// returns an `Err`.
-fn math_checked_op<LT, RT, F>(
-    left: &PrimitiveArray<LT>,
-    right: &PrimitiveArray<RT>,
-    op: F,
-) -> Result<PrimitiveArray<LT>>
-where
-    LT: ArrowNumericType,
-    RT: ArrowNumericType,
-    F: Fn(LT::Native, RT::Native) -> Option<LT::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    try_binary(left, right, |a, b| {
-        op(a, b).ok_or_else(|| {
-            ArrowError::ComputeError(format!("Overflow happened on: {:?}, {:?}", a, b))
-        })
-    })
-}
-
 /// Helper function for operations where a valid `0` on the right array should
 /// result in an [ArrowError::DivideByZero], namely the division and modulo operations
 ///
@@ -121,26 +95,9 @@ where
     LT: ArrowNumericType,
     RT: ArrowNumericType,
     RT::Native: One + Zero,
-    F: Fn(LT::Native, RT::Native) -> Option<LT::Native>,
+    F: Fn(LT::Native, RT::Native) -> Result<LT::Native>,
 {
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    try_binary(left, right, |l, r| {
-        if r.is_zero() {
-            Err(ArrowError::DivideByZero)
-        } else {
-            op(l, r).ok_or_else(|| {
-                ArrowError::ComputeError(format!(
-                    "Overflow happened on: {:?}, {:?}",
-                    l, r
-                ))
-            })
-        }
-    })
+    try_binary(left, right, op)
 }
 
 /// Helper function for operations where a valid `0` on the right array should
@@ -161,16 +118,12 @@ fn math_checked_divide_op_on_iters<T, F>(
 where
     T: ArrowNumericType,
     T::Native: One + Zero,
-    F: Fn(T::Native, T::Native) -> T::Native,
+    F: Fn(T::Native, T::Native) -> Result<T::Native>,
 {
     let buffer = if null_bit_buffer.is_some() {
         let values = left.zip(right).map(|(left, right)| {
             if let (Some(l), Some(r)) = (left, right) {
-                if r.is_zero() {
-                    Err(ArrowError::DivideByZero)
-                } else {
-                    Ok(op(l, r))
-                }
+                op(l, r)
             } else {
                 Ok(T::default_value())
             }
@@ -179,15 +132,10 @@ where
         unsafe { Buffer::try_from_trusted_len_iter(values) }
     } else {
         // no value is null
-        let values = left.map(|l| l.unwrap()).zip(right.map(|r| r.unwrap())).map(
-            |(left, right)| {
-                if right.is_zero() {
-                    Err(ArrowError::DivideByZero)
-                } else {
-                    Ok(op(left, right))
-                }
-            },
-        );
+        let values = left
+            .map(|l| l.unwrap())
+            .zip(right.map(|r| r.unwrap()))
+            .map(|(left, right)| op(left, right));
         // Safety: Iterator comes from a PrimitiveArray which reports its size correctly
         unsafe { Buffer::try_from_trusted_len_iter(values) }
     }?;
@@ -654,7 +602,7 @@ where
     K: ArrowNumericType,
     T: ArrowNumericType,
     T::Native: One + Zero,
-    F: Fn(T::Native, T::Native) -> T::Native,
+    F: Fn(T::Native, T::Native) -> Result<T::Native>,
 {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(format!(
@@ -725,7 +673,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    math_checked_op(left, right, |a, b| a.add_checked(b))
+    try_binary(left, right, |a, b| a.add_checked(b))
 }
 
 /// Perform `left + right` operation on two arrays. If either left or right value is null
@@ -826,11 +774,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    try_unary(array, |value| {
-        value.add_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!("Overflow: adding {:?} to {:?}", scalar, value))
-        })
-    })
+    try_unary(array, |value| value.add_checked(scalar))
 }
 
 /// Add every value in an array by a scalar. If any value in the array is null then the
@@ -863,12 +807,8 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    try_unary_dyn::<_, T>(array, |value| {
-        value.add_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!("Overflow: adding {:?} to {:?}", scalar, value))
-        })
-    })
-    .map(|a| Arc::new(a) as ArrayRef)
+    try_unary_dyn::<_, T>(array, |value| value.add_checked(scalar))
+        .map(|a| Arc::new(a) as ArrayRef)
 }
 
 /// Perform `left - right` operation on two arrays. If either left or right value is null
@@ -900,7 +840,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    math_checked_op(left, right, |a, b| a.sub_checked(b))
+    try_binary(left, right, |a, b| a.sub_checked(b))
 }
 
 /// Perform `left - right` operation on two arrays. If either left or right value is null
@@ -953,14 +893,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp + Zero,
 {
-    try_unary(array, |value| {
-        value.sub_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!(
-                "Overflow: subtracting {:?} from {:?}",
-                scalar, value
-            ))
-        })
-    })
+    try_unary(array, |value| value.sub_checked(scalar))
 }
 
 /// Subtract every value in an array by a scalar. If any value in the array is null then the
@@ -991,15 +924,8 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    try_unary_dyn::<_, T>(array, |value| {
-        value.sub_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!(
-                "Overflow: subtracting {:?} from {:?}",
-                scalar, value
-            ))
-        })
-    })
-    .map(|a| Arc::new(a) as ArrayRef)
+    try_unary_dyn::<_, T>(array, |value| value.sub_checked(scalar))
+        .map(|a| Arc::new(a) as ArrayRef)
 }
 
 /// Perform `-` operation on an array. If value is null then the result is also null.
@@ -1052,7 +978,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    math_checked_op(left, right, |a, b| a.mul_checked(b))
+    try_binary(left, right, |a, b| a.mul_checked(b))
 }
 
 /// Perform `left * right` operation on two arrays. If either left or right value is null
@@ -1105,14 +1031,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp + Zero + One,
 {
-    try_unary(array, |value| {
-        value.mul_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!(
-                "Overflow: multiplying {:?} by {:?}",
-                value, scalar,
-            ))
-        })
-    })
+    try_unary(array, |value| value.mul_checked(scalar))
 }
 
 /// Multiply every value in an array by a scalar. If any value in the array is null then the
@@ -1143,15 +1062,8 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
-    try_unary_dyn::<_, T>(array, |value| {
-        value.mul_checked(scalar).ok_or_else(|| {
-            ArrowError::CastError(format!(
-                "Overflow: multiplying {:?} by {:?}",
-                value, scalar
-            ))
-        })
-    })
-    .map(|a| Arc::new(a) as ArrayRef)
+    try_unary_dyn::<_, T>(array, |value| value.mul_checked(scalar))
+        .map(|a| Arc::new(a) as ArrayRef)
 }
 
 /// Perform `left % right` operation on two arrays. If either left or right value is null
@@ -1170,7 +1082,13 @@ where
         a % b
     });
     #[cfg(not(feature = "simd"))]
-    return math_checked_divide_op(left, right, |a, b| Some(a % b));
+    return try_binary(left, right, |a, b| {
+        if b.is_zero() {
+            Err(ArrowError::DivideByZero)
+        } else {
+            Ok(a % b)
+        }
+    });
 }
 
 /// Perform `left / right` operation on two arrays. If either left or right value is null
@@ -1225,12 +1143,17 @@ where
 pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
     match left.data_type() {
         DataType::Dictionary(_, _) => {
-            typed_dict_math_op!(left, right, |a, b| a / b, math_divide_checked_op_dict)
+            typed_dict_math_op!(
+                left,
+                right,
+                |a, b| a.div_checked(b),
+                math_divide_checked_op_dict
+            )
         }
         _ => {
             downcast_primitive_array!(
                 (left, right) => {
-                    math_checked_divide_op(left, right, |a, b| Some(a / b)).map(|a| Arc::new(a) as ArrayRef)
+                    math_checked_divide_op(left, right, |a, b| a.div_checked(b)).map(|a| Arc::new(a) as ArrayRef)
                 }
                 _ => Err(ArrowError::CastError(format!(
                     "Unsupported data type {}, {}",
@@ -1331,15 +1254,8 @@ where
         return Err(ArrowError::DivideByZero);
     }
 
-    try_unary_dyn::<_, T>(array, |value| {
-        value.div_checked(divisor).ok_or_else(|| {
-            ArrowError::CastError(format!(
-                "Overflow: dividing {:?} by {:?}",
-                value, divisor
-            ))
-        })
-    })
-    .map(|a| Arc::new(a) as ArrayRef)
+    try_unary_dyn::<_, T>(array, |value| value.div_checked(divisor))
+        .map(|a| Arc::new(a) as ArrayRef)
 }
 
 #[cfg(test)]
@@ -2134,23 +2050,41 @@ mod tests {
 
     #[test]
     #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_divide_by_zero_with_checked() {
+    fn test_int_array_divide_by_zero_with_checked() {
         let a = Int32Array::from(vec![15]);
         let b = Int32Array::from(vec![0]);
         divide_checked(&a, &b).unwrap();
     }
 
+    #[test]
+    #[should_panic(expected = "DivideByZero")]
+    fn test_f32_array_divide_by_zero_with_checked() {
+        let a = Float32Array::from(vec![15.0]);
+        let b = Float32Array::from(vec![0.0]);
+        divide_checked(&a, &b).unwrap();
+    }
+
     #[test]
     #[should_panic(expected = "attempt to divide by zero")]
-    fn test_primitive_array_divide_by_zero() {
+    fn test_int_array_divide_by_zero() {
         let a = Int32Array::from(vec![15]);
         let b = Int32Array::from(vec![0]);
         divide(&a, &b).unwrap();
     }
 
+    #[test]
+    fn test_f32_array_divide_by_zero() {
+        let a = Float32Array::from(vec![1.5, 0.0, -1.5]);
+        let b = Float32Array::from(vec![0.0, 0.0, 0.0]);
+        let result = divide(&a, &b).unwrap();
+        assert_eq!(result.value(0), f32::INFINITY);
+        assert!(result.value(1).is_nan());
+        assert_eq!(result.value(2), f32::NEG_INFINITY);
+    }
+
     #[test]
     #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_divide_dyn_by_zero() {
+    fn test_int_array_divide_dyn_by_zero() {
         let a = Int32Array::from(vec![15]);
         let b = Int32Array::from(vec![0]);
         divide_dyn(&a, &b).unwrap();
@@ -2158,7 +2092,15 @@ mod tests {
 
     #[test]
     #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_divide_dyn_by_zero_dict() {
+    fn test_f32_array_divide_dyn_by_zero() {
+        let a = Float32Array::from(vec![1.5]);
+        let b = Float32Array::from(vec![0.0]);
+        divide_dyn(&a, &b).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "DivideByZero")]
+    fn test_int_array_divide_dyn_by_zero_dict() {
         let mut builder =
             PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
         builder.append(15).unwrap();
@@ -2174,14 +2116,38 @@ mod tests {
 
     #[test]
     #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_modulus_by_zero() {
+    fn test_f32_dict_array_divide_dyn_by_zero() {
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Float32Type>::with_capacity(1, 1);
+        builder.append(1.5).unwrap();
+        let a = builder.finish();
+
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Float32Type>::with_capacity(1, 1);
+        builder.append(0.0).unwrap();
+        let b = builder.finish();
+
+        divide_dyn(&a, &b).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "DivideByZero")]
+    fn test_i32_array_modulus_by_zero() {
         let a = Int32Array::from(vec![15]);
         let b = Int32Array::from(vec![0]);
         modulus(&a, &b).unwrap();
     }
 
     #[test]
-    fn test_primitive_array_divide_f64() {
+    #[should_panic(expected = "DivideByZero")]
+    fn test_f32_array_modulus_by_zero() {
+        let a = Float32Array::from(vec![1.5]);
+        let b = Float32Array::from(vec![0.0]);
+        modulus(&a, &b).unwrap();
+    }
+
+    #[test]
+    fn test_f64_array_divide() {
         let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
         let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
         let c = divide(&a, &b).unwrap();
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 21c633116ee..5060234c71b 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -261,9 +261,10 @@ where
 ///
 /// Like [`try_unary`] the function is only evaluated for non-null indices
 ///
-/// # Panic
+/// # Error
 ///
-/// Panics if the arrays have different lengths
+/// Return an error if the arrays have different lengths or
+/// the operation is under erroneous
 pub fn try_binary<A, B, F, O>(
     a: &PrimitiveArray<A>,
     b: &PrimitiveArray<B>,
@@ -275,13 +276,16 @@ where
     O: ArrowPrimitiveType,
     F: Fn(A::Native, B::Native) -> Result<O::Native>,
 {
-    assert_eq!(a.len(), b.len());
-    let len = a.len();
-
+    if a.len() != b.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform a binary operation on arrays of different length".to_string(),
+        ));
+    }
     if a.is_empty() {
         return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
     }
 
+    let len = a.len();
     let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
     let null_count = null_buffer
         .as_ref()
diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs
index de35c4804fa..dec0cc4b53b 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow/src/datatypes/native.rs
@@ -16,8 +16,10 @@
 // under the License.
 
 use super::DataType;
+use crate::error::{ArrowError, Result};
 pub use arrow_buffer::{ArrowNativeType, ToByteSlice};
 use half::f16;
+use num::Zero;
 
 /// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
 /// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
@@ -43,6 +45,8 @@ pub trait ArrowPrimitiveType: 'static {
 
 pub(crate) mod native_op {
     use super::ArrowNativeType;
+    use crate::error::{ArrowError, Result};
+    use num::Zero;
     use std::ops::{Add, Div, Mul, Sub};
 
     /// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking
@@ -61,33 +65,38 @@ pub(crate) mod native_op {
         + Sub<Output = Self>
         + Mul<Output = Self>
         + Div<Output = Self>
+        + Zero
     {
-        fn add_checked(self, rhs: Self) -> Option<Self> {
-            Some(self + rhs)
+        fn add_checked(self, rhs: Self) -> Result<Self> {
+            Ok(self + rhs)
         }
 
         fn add_wrapping(self, rhs: Self) -> Self {
             self + rhs
         }
 
-        fn sub_checked(self, rhs: Self) -> Option<Self> {
-            Some(self - rhs)
+        fn sub_checked(self, rhs: Self) -> Result<Self> {
+            Ok(self - rhs)
         }
 
         fn sub_wrapping(self, rhs: Self) -> Self {
             self - rhs
         }
 
-        fn mul_checked(self, rhs: Self) -> Option<Self> {
-            Some(self * rhs)
+        fn mul_checked(self, rhs: Self) -> Result<Self> {
+            Ok(self * rhs)
         }
 
         fn mul_wrapping(self, rhs: Self) -> Self {
             self * rhs
         }
 
-        fn div_checked(self, rhs: Self) -> Option<Self> {
-            Some(self / rhs)
+        fn div_checked(self, rhs: Self) -> Result<Self> {
+            if rhs.is_zero() {
+                Err(ArrowError::DivideByZero)
+            } else {
+                Ok(self / rhs)
+            }
         }
 
         fn div_wrapping(self, rhs: Self) -> Self {
@@ -99,32 +108,56 @@ pub(crate) mod native_op {
 macro_rules! native_type_op {
     ($t:tt) => {
         impl native_op::ArrowNativeTypeOp for $t {
-            fn add_checked(self, rhs: Self) -> Option<Self> {
-                self.checked_add(rhs)
+            fn add_checked(self, rhs: Self) -> Result<Self> {
+                self.checked_add(rhs).ok_or_else(|| {
+                    ArrowError::ComputeError(format!(
+                        "Overflow happened on: {:?} + {:?}",
+                        self, rhs
+                    ))
+                })
             }
 
             fn add_wrapping(self, rhs: Self) -> Self {
                 self.wrapping_add(rhs)
             }
 
-            fn sub_checked(self, rhs: Self) -> Option<Self> {
-                self.checked_sub(rhs)
+            fn sub_checked(self, rhs: Self) -> Result<Self> {
+                self.checked_sub(rhs).ok_or_else(|| {
+                    ArrowError::ComputeError(format!(
+                        "Overflow happened on: {:?} - {:?}",
+                        self, rhs
+                    ))
+                })
             }
 
             fn sub_wrapping(self, rhs: Self) -> Self {
                 self.wrapping_sub(rhs)
             }
 
-            fn mul_checked(self, rhs: Self) -> Option<Self> {
-                self.checked_mul(rhs)
+            fn mul_checked(self, rhs: Self) -> Result<Self> {
+                self.checked_mul(rhs).ok_or_else(|| {
+                    ArrowError::ComputeError(format!(
+                        "Overflow happened on: {:?} * {:?}",
+                        self, rhs
+                    ))
+                })
             }
 
             fn mul_wrapping(self, rhs: Self) -> Self {
                 self.wrapping_mul(rhs)
             }
 
-            fn div_checked(self, rhs: Self) -> Option<Self> {
-                self.checked_div(rhs)
+            fn div_checked(self, rhs: Self) -> Result<Self> {
+                if rhs.is_zero() {
+                    Err(ArrowError::DivideByZero)
+                } else {
+                    self.checked_div(rhs).ok_or_else(|| {
+                        ArrowError::ComputeError(format!(
+                            "Overflow happened on: {:?} / {:?}",
+                            self, rhs
+                        ))
+                    })
+                }
             }
 
             fn div_wrapping(self, rhs: Self) -> Self {
@@ -138,6 +171,7 @@ native_type_op!(i8);
 native_type_op!(i16);
 native_type_op!(i32);
 native_type_op!(i64);
+native_type_op!(i128);
 native_type_op!(u8);
 native_type_op!(u16);
 native_type_op!(u32);

From 968a7673c7e1341431bc4d55a4f50e9fa6aff7d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Fri, 16 Sep 2022 14:52:48 +0200
Subject: [PATCH 08/16] Speedup string equal/not equal to empty string, cleanup
 like/ilike kernels, fix escape bug (#2743)

* Speedup string == ""

* neq too

* Simplify kernels

* Simplify kernels

* Fix test

* Escape contains

* Fmt

* Fix
---
 arrow/benches/equal.rs                  |  10 ++
 arrow/src/compute/kernels/comparison.rs | 216 +++++++++---------------
 2 files changed, 89 insertions(+), 137 deletions(-)

diff --git a/arrow/benches/equal.rs b/arrow/benches/equal.rs
index af535506e86..f54aff1b5cc 100644
--- a/arrow/benches/equal.rs
+++ b/arrow/benches/equal.rs
@@ -20,6 +20,7 @@
 
 #[macro_use]
 extern crate criterion;
+use arrow::compute::eq_utf8_scalar;
 use criterion::Criterion;
 
 extern crate arrow;
@@ -31,6 +32,10 @@ fn bench_equal<A: Array + PartialEq<A>>(arr_a: &A) {
     criterion::black_box(arr_a == arr_a);
 }
 
+fn bench_equal_utf8_scalar(arr_a: &GenericStringArray<i32>, right: &str) {
+    criterion::black_box(eq_utf8_scalar(arr_a, right).unwrap());
+}
+
 fn add_benchmark(c: &mut Criterion) {
     let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
     c.bench_function("equal_512", |b| b.iter(|| bench_equal(&arr_a)));
@@ -41,6 +46,11 @@ fn add_benchmark(c: &mut Criterion) {
     let arr_a = create_string_array::<i32>(512, 0.0);
     c.bench_function("equal_string_512", |b| b.iter(|| bench_equal(&arr_a)));
 
+    let arr_a = create_string_array::<i32>(512, 0.0);
+    c.bench_function("equal_string_scalar_empty_512", |b| {
+        b.iter(|| bench_equal_utf8_scalar(&arr_a, ""))
+    });
+
     let arr_a_nulls = create_string_array::<i32>(512, 0.5);
     c.bench_function("equal_string_nulls_512", |b| {
         b.iter(|| bench_equal(&arr_a_nulls))
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 5a79c2e82df..d4eb5a3e1d2 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -233,61 +233,35 @@ pub fn like_utf8<OffsetSize: OffsetSizeTrait>(
 }
 
 #[inline]
-fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
+fn like_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor<Item = &'a str>>(
     left: L,
     right: &str,
+    op: F,
 ) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
     if !right.contains(is_like_pattern) {
         // fast path, can use equals
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i) == right {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+        compare_op_scalar(left, |item| op(item == right))
     } else if right.ends_with('%')
         && !right.ends_with("\\%")
         && !right[..right.len() - 1].contains(is_like_pattern)
     {
         // fast path, can use starts_with
         let starts_with = &right[..right.len() - 1];
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).starts_with(starts_with) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+
+        compare_op_scalar(left, |item| op(item.starts_with(starts_with)))
     } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
         // fast path, can use ends_with
         let ends_with = &right[1..];
 
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).ends_with(ends_with) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+        compare_op_scalar(left, |item| op(item.ends_with(ends_with)))
     } else if right.starts_with('%')
         && right.ends_with('%')
+        && !right.ends_with("\\%")
         && !right[1..right.len() - 1].contains(is_like_pattern)
     {
-        // fast path, can use contains
         let contains = &right[1..right.len() - 1];
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).contains(contains) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+
+        compare_op_scalar(left, |item| op(item.contains(contains)))
     } else {
         let re_pattern = replace_like_wildcards(right)?;
         let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
@@ -297,26 +271,16 @@ fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
             ))
         })?;
 
-        for i in 0..left.len() {
-            let haystack = unsafe { left.value_unchecked(i) };
-            if re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
+        compare_op_scalar(left, |item| op(re.is_match(item)))
+    }
+}
 
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![bool_buf.into()],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+#[inline]
+fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
+    left: L,
+    right: &str,
+) -> Result<BooleanArray> {
+    like_scalar_op(left, right, |x| x)
 }
 
 /// Perform SQL `left LIKE right` operation on [`StringArray`] /
@@ -415,86 +379,7 @@ fn nlike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
     left: L,
     right: &str,
 ) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i) != right {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.ends_with('%')
-        && !right.ends_with("\\%")
-        && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use starts_with
-        let starts_with = &right[..right.len() - 1];
-        for i in 0..left.len() {
-            unsafe {
-                if !(left.value_unchecked(i).starts_with(starts_with)) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use ends_with
-        let ends_with = &right[1..];
-
-        for i in 0..left.len() {
-            unsafe {
-                if !(left.value_unchecked(i).ends_with(ends_with)) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.starts_with('%')
-        && right.ends_with('%')
-        && !right[1..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use contains
-        let contains = &right[1..right.len() - 1];
-        for i in 0..left.len() {
-            unsafe {
-                if !(left.value_unchecked(i).contains(contains)) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else {
-        let re_pattern = replace_like_wildcards(right)?;
-        let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from LIKE pattern: {}",
-                e
-            ))
-        })?;
-
-        for i in 0..left.len() {
-            let haystack = unsafe { left.value_unchecked(i) };
-            if !re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![bool_buf.into()],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+    like_scalar_op(left, right, |x| !x)
 }
 
 /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
@@ -966,11 +851,48 @@ pub fn eq_utf8<OffsetSize: OffsetSizeTrait>(
     compare_op(left, right, |a, b| a == b)
 }
 
+fn utf8_empty<OffsetSize: OffsetSizeTrait, const EQ: bool>(
+    left: &GenericStringArray<OffsetSize>,
+) -> Result<BooleanArray> {
+    let null_bit_buffer = left
+        .data()
+        .null_buffer()
+        .map(|b| b.bit_slice(left.offset(), left.len()));
+
+    let buffer = unsafe {
+        MutableBuffer::from_trusted_len_iter_bool(left.value_offsets().windows(2).map(
+            |offset| {
+                if EQ {
+                    offset[1].to_usize().unwrap() == offset[0].to_usize().unwrap()
+                } else {
+                    offset[1].to_usize().unwrap() > offset[0].to_usize().unwrap()
+                }
+            },
+        ))
+    };
+
+    let data = unsafe {
+        ArrayData::new_unchecked(
+            DataType::Boolean,
+            left.len(),
+            None,
+            null_bit_buffer,
+            0,
+            vec![Buffer::from(buffer)],
+            vec![],
+        )
+    };
+    Ok(BooleanArray::from(data))
+}
+
 /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
 pub fn eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
     left: &GenericStringArray<OffsetSize>,
     right: &str,
 ) -> Result<BooleanArray> {
+    if right.is_empty() {
+        return utf8_empty::<_, true>(left);
+    }
     compare_op_scalar(left, |a| a == right)
 }
 
@@ -1167,6 +1089,9 @@ pub fn neq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
     left: &GenericStringArray<OffsetSize>,
     right: &str,
 ) -> Result<BooleanArray> {
+    if right.is_empty() {
+        return utf8_empty::<_, false>(left);
+    }
     compare_op_scalar(left, |a| a != right)
 }
 
@@ -4324,13 +4249,22 @@ mod tests {
 
     #[test]
     fn test_utf8_eq_scalar_on_slice() {
-        let a = StringArray::from(vec![Some("hi"), None, Some("hello"), Some("world")]);
-        let a = a.slice(1, 3);
+        let a = StringArray::from(
+            vec![Some("hi"), None, Some("hello"), Some("world"), Some("")],
+        );
+        let a = a.slice(1, 4);
         let a = as_string_array(&a);
         let a_eq = eq_utf8_scalar(a, "hello").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(vec![None, Some(true), Some(false)])
+            BooleanArray::from(vec![None, Some(true), Some(false), Some(false)])
+        );
+
+        let a_eq2 = eq_utf8_scalar(a, "").unwrap();
+
+        assert_eq!(
+            a_eq2,
+            BooleanArray::from(vec![None, Some(false), Some(false), Some(true)])
         );
     }
 
@@ -4528,6 +4462,14 @@ mod tests {
         vec![true, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_scalar_like_escape_contains,
+        vec!["ba%", "ba\\x"],
+        "%a\\%",
+        like_utf8_scalar,
+        vec![true, false]
+    );
+
     test_utf8!(
         test_utf8_scalar_ilike_regex,
         vec!["%%%"],

From 1da2bfbc82de12ac6fb699d2579d4a129929e004 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Fri, 16 Sep 2022 12:23:29 -0400
Subject: [PATCH 09/16] Update version to `23.0.0` and update `CHANGELOG`, add
 `label_issue.py` script (#2734)

* feature complete

* fix footer issue

* fix duplicate changelog issue

* use tac instead of head for head -n -<num> is not universal

* adjust blank lines

* fix footer dropping

* line adj

* add .bak2 to gitignore

* Create changelog

* Update version

* Add initial relabeling script

* more script

* tweaks

* Runnable as a script

* Update changelog

* updates

* remove overzealous api change labeling

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 CHANGELOG-old.md                             | 115 +++++++++++-
 CHANGELOG.md                                 | 183 +++++++++----------
 arrow-flight/Cargo.toml                      |   4 +-
 arrow-flight/README.md                       |   2 +-
 arrow-pyarrow-integration-testing/Cargo.toml |   4 +-
 arrow/Cargo.toml                             |   2 +-
 arrow/README.md                              |   4 +-
 dev/release/README.md                        |   2 +-
 dev/release/label_issues.py                  | 153 ++++++++++++++++
 integration-testing/Cargo.toml               |   2 +-
 parquet/Cargo.toml                           |   6 +-
 parquet_derive/Cargo.toml                    |   4 +-
 parquet_derive/README.md                     |   4 +-
 parquet_derive_test/Cargo.toml               |   6 +-
 14 files changed, 373 insertions(+), 118 deletions(-)
 create mode 100755 dev/release/label_issues.py

diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 70322b5cfd1..02cb7ec2449 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -17,9 +17,122 @@
   under the License.
 -->
 
-
 # Historical Changelog
 
+## [22.0.0](https://github.com/apache/arrow-rs/tree/22.0.0) (2022-09-02)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/21.0.0...22.0.0)
+
+**Breaking changes:**
+
+- Use `total_cmp` for floating value ordering and remove `nan_ordering` feature flag [\#2614](https://github.com/apache/arrow-rs/pull/2614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Gate dyn comparison of dictionary arrays behind `dyn_cmp_dict` [\#2597](https://github.com/apache/arrow-rs/pull/2597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Move JsonSerializable to json module \(\#2300\) [\#2595](https://github.com/apache/arrow-rs/pull/2595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Decimal precision scale datatype change [\#2532](https://github.com/apache/arrow-rs/pull/2532) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Refactor PrimitiveBuilder Constructors [\#2518](https://github.com/apache/arrow-rs/pull/2518) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Refactoring DecimalBuilder constructors [\#2517](https://github.com/apache/arrow-rs/pull/2517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Refactor FixedSizeBinaryBuilder Constructors [\#2516](https://github.com/apache/arrow-rs/pull/2516) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Refactor BooleanBuilder Constructors [\#2515](https://github.com/apache/arrow-rs/pull/2515) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Refactor UnionBuilder Constructors [\#2488](https://github.com/apache/arrow-rs/pull/2488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+
+**Implemented enhancements:**
+
+- Add  Macros to assist with static dispatch [\#2635](https://github.com/apache/arrow-rs/issues/2635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support comparison between DictionaryArray and BooleanArray [\#2617](https://github.com/apache/arrow-rs/issues/2617) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use `total_cmp` for floating value ordering and remove `nan_ordering` feature flag [\#2613](https://github.com/apache/arrow-rs/issues/2613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support empty projection in CSV, JSON readers [\#2603](https://github.com/apache/arrow-rs/issues/2603) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support SQL-compliant NaN ordering between for DictionaryArray and non-DictionaryArray [\#2599](https://github.com/apache/arrow-rs/issues/2599) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `dyn_cmp_dict` feature flag to gate dyn comparison of dictionary arrays [\#2596](https://github.com/apache/arrow-rs/issues/2596) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add max\_dyn and min\_dyn for max/min for dictionary array [\#2584](https://github.com/apache/arrow-rs/issues/2584) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Allow FlightSQL implementers to extend `do_get()` [\#2581](https://github.com/apache/arrow-rs/issues/2581) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Support SQL-compliant behavior on `eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#2569](https://github.com/apache/arrow-rs/issues/2569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add sql-compliant feature for enabling sql-compliant kernel behavior [\#2568](https://github.com/apache/arrow-rs/issues/2568)
+- Calculate `sum` for dictionary array [\#2565](https://github.com/apache/arrow-rs/issues/2565) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add test for float nan comparison [\#2556](https://github.com/apache/arrow-rs/issues/2556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Compare dictionary with string array [\#2548](https://github.com/apache/arrow-rs/issues/2548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Compare dictionary with primitive array in `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#2538](https://github.com/apache/arrow-rs/issues/2538) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Compare dictionary with primitive array in `eq_dyn` and `neq_dyn` [\#2535](https://github.com/apache/arrow-rs/issues/2535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- UnionBuilder Create Children With Capacity [\#2523](https://github.com/apache/arrow-rs/issues/2523) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speed up `like_utf8_scalar` for `%pat%` [\#2519](https://github.com/apache/arrow-rs/issues/2519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Replace macro with TypedDictionaryArray in comparison kernels [\#2513](https://github.com/apache/arrow-rs/issues/2513) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use same codebase for boolean kernels [\#2507](https://github.com/apache/arrow-rs/issues/2507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use u8 for Decimal Precision and Scale [\#2496](https://github.com/apache/arrow-rs/issues/2496) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Integrate skip row without pageIndex in SerializedPageReader in Fuzz Test [\#2475](https://github.com/apache/arrow-rs/issues/2475) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Avoid unecessary copies in Arrow IPC reader [\#2437](https://github.com/apache/arrow-rs/issues/2437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add GenericColumnReader::skip\_records Missing OffsetIndex Fallback [\#2433](https://github.com/apache/arrow-rs/issues/2433) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support Reading PageIndex with ParquetRecordBatchStream [\#2430](https://github.com/apache/arrow-rs/issues/2430) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Specialize FixedLenByteArrayReader for Parquet [\#2318](https://github.com/apache/arrow-rs/issues/2318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Make JSON support Optional via Feature Flag [\#2300](https://github.com/apache/arrow-rs/issues/2300) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- Casting timestamp array to string should not ignore timezone [\#2607](https://github.com/apache/arrow-rs/issues/2607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Ilike\_ut8\_scalar kernals have incorrect logic [\#2544](https://github.com/apache/arrow-rs/issues/2544) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Always validate the array data when creating array in IPC reader [\#2541](https://github.com/apache/arrow-rs/issues/2541) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Int96Converter Truncates Timestamps [\#2480](https://github.com/apache/arrow-rs/issues/2480) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Error Reading Page Index When Not Available  [\#2434](https://github.com/apache/arrow-rs/issues/2434) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `ParquetFileArrowReader::get_record_reader[_by_colum]` `batch_size` overallocates [\#2321](https://github.com/apache/arrow-rs/issues/2321) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- Document All Arrow Features in docs.rs [\#2633](https://github.com/apache/arrow-rs/issues/2633) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Closed issues:**
+
+- Add support for CAST from `Interval(DayTime)` to `Timestamp(Nanosecond, None)` [\#2606](https://github.com/apache/arrow-rs/issues/2606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Why do we check for null in TypedDictionaryArray value function [\#2564](https://github.com/apache/arrow-rs/issues/2564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add the `length` field for `Buffer` [\#2524](https://github.com/apache/arrow-rs/issues/2524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Avoid large over allocate buffer in async reader [\#2512](https://github.com/apache/arrow-rs/issues/2512) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Rewriting Decimal Builders using `const_generic`. [\#2390](https://github.com/apache/arrow-rs/issues/2390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Rewrite Decimal Array using `const_generic` [\#2384](https://github.com/apache/arrow-rs/issues/2384) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- Add downcast macros \(\#2635\) [\#2636](https://github.com/apache/arrow-rs/pull/2636) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Document all arrow features in docs.rs \(\#2633\) [\#2634](https://github.com/apache/arrow-rs/pull/2634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Document dyn\_cmp\_dict [\#2624](https://github.com/apache/arrow-rs/pull/2624) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support comparison between DictionaryArray and BooleanArray [\#2618](https://github.com/apache/arrow-rs/pull/2618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Cast timestamp array to string array with timezone [\#2608](https://github.com/apache/arrow-rs/pull/2608) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Support empty projection in CSV and JSON readers [\#2604](https://github.com/apache/arrow-rs/pull/2604) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make JSON support optional via a feature flag \(\#2300\) [\#2601](https://github.com/apache/arrow-rs/pull/2601) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support SQL-compliant NaN ordering for DictionaryArray and non-DictionaryArray [\#2600](https://github.com/apache/arrow-rs/pull/2600) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Split out integration test plumbing \(\#2594\) \(\#2300\) [\#2598](https://github.com/apache/arrow-rs/pull/2598) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Refactor Binary Builder and String Builder Constructors [\#2592](https://github.com/apache/arrow-rs/pull/2592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Dictionary like scalar kernels [\#2591](https://github.com/apache/arrow-rs/pull/2591) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Validate dictionary key in TypedDictionaryArray \(\#2578\) [\#2589](https://github.com/apache/arrow-rs/pull/2589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add max\_dyn and min\_dyn for max/min for dictionary array [\#2585](https://github.com/apache/arrow-rs/pull/2585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Code cleanup of array value functions [\#2583](https://github.com/apache/arrow-rs/pull/2583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Allow overriding of do\_get & export useful macro [\#2582](https://github.com/apache/arrow-rs/pull/2582) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([avantgardnerio](https://github.com/avantgardnerio))
+- MINOR: Upgrade to pyo3 0.17 [\#2576](https://github.com/apache/arrow-rs/pull/2576) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
+- Support SQL-compliant NaN behavior on eq\_dyn, neq\_dyn, lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn [\#2570](https://github.com/apache/arrow-rs/pull/2570) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add sum\_dyn to calculate sum for dictionary array [\#2566](https://github.com/apache/arrow-rs/pull/2566) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- struct UnionBuilder will create child buffers with capacity [\#2560](https://github.com/apache/arrow-rs/pull/2560) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kastolars](https://github.com/kastolars))
+- Don't panic on RleValueEncoder::flush\_buffer if empty \(\#2558\) [\#2559](https://github.com/apache/arrow-rs/pull/2559) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add the `length` field for Buffer and use more `Buffer` in IPC reader to avoid memory copy. [\#2557](https://github.com/apache/arrow-rs/pull/2557) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([HaoYang670](https://github.com/HaoYang670))
+- Add test for float nan comparison [\#2555](https://github.com/apache/arrow-rs/pull/2555) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Compare dictionary array with string array [\#2549](https://github.com/apache/arrow-rs/pull/2549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Always validate the array data \(except the `Decimal`\) when creating array in IPC reader [\#2547](https://github.com/apache/arrow-rs/pull/2547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- MINOR: Fix test\_row\_type\_validation test [\#2546](https://github.com/apache/arrow-rs/pull/2546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix ilike\_utf8\_scalar kernals [\#2545](https://github.com/apache/arrow-rs/pull/2545) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- fix typo [\#2540](https://github.com/apache/arrow-rs/pull/2540) ([00Masato](https://github.com/00Masato))
+- Compare dictionary array and primitive array in lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn kernels [\#2539](https://github.com/apache/arrow-rs/pull/2539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- \[MINOR\]Avoid large over allocate buffer in async reader [\#2537](https://github.com/apache/arrow-rs/pull/2537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Compare dictionary with primitive array in `eq_dyn` and `neq_dyn` [\#2533](https://github.com/apache/arrow-rs/pull/2533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add iterator for FixedSizeBinaryArray [\#2531](https://github.com/apache/arrow-rs/pull/2531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- add bench: decimal with byte array and fixed length byte array [\#2529](https://github.com/apache/arrow-rs/pull/2529) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([liukun4515](https://github.com/liukun4515))
+- Add FixedLengthByteArrayReader Remove ComplexObjectArrayReader [\#2528](https://github.com/apache/arrow-rs/pull/2528) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Split out byte array decoders \(\#2318\) [\#2527](https://github.com/apache/arrow-rs/pull/2527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Use offset index in ParquetRecordBatchStream [\#2526](https://github.com/apache/arrow-rs/pull/2526) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([thinkharderdev](https://github.com/thinkharderdev))
+- Clean the `create_array` in IPC reader. [\#2525](https://github.com/apache/arrow-rs/pull/2525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove DecimalByteArrayConvert \(\#2480\) [\#2522](https://github.com/apache/arrow-rs/pull/2522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Improve performance of `%pat%` \(\>3x speedup\) [\#2521](https://github.com/apache/arrow-rs/pull/2521) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- remove len field from MapBuilder [\#2520](https://github.com/apache/arrow-rs/pull/2520) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+-  Replace macro with TypedDictionaryArray in comparison kernels [\#2514](https://github.com/apache/arrow-rs/pull/2514) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Avoid large over allocate buffer in sync reader [\#2511](https://github.com/apache/arrow-rs/pull/2511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Avoid useless memory copies in IPC reader. [\#2510](https://github.com/apache/arrow-rs/pull/2510) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Refactor boolean kernels to use same codebase [\#2508](https://github.com/apache/arrow-rs/pull/2508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Remove Int96Converter \(\#2480\) [\#2481](https://github.com/apache/arrow-rs/pull/2481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+
 ## [21.0.0](https://github.com/apache/arrow-rs/tree/21.0.0) (2022-08-18)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/20.0.0...21.0.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69f2b8af6cf..4a063594dc9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,119 +19,108 @@
 
 # Changelog
 
-## [22.0.0](https://github.com/apache/arrow-rs/tree/22.0.0) (2022-09-02)
+## [23.0.0](https://github.com/apache/arrow-rs/tree/23.0.0) (2022-09-16)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/21.0.0...22.0.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/22.0.0...23.0.0)
 
 **Breaking changes:**
 
-- Use `total_cmp` for floating value ordering and remove `nan_ordering` feature flag [\#2614](https://github.com/apache/arrow-rs/pull/2614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Gate dyn comparison of dictionary arrays behind `dyn_cmp_dict` [\#2597](https://github.com/apache/arrow-rs/pull/2597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Move JsonSerializable to json module \(\#2300\) [\#2595](https://github.com/apache/arrow-rs/pull/2595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Decimal precision scale datatype change [\#2532](https://github.com/apache/arrow-rs/pull/2532) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Refactor PrimitiveBuilder Constructors [\#2518](https://github.com/apache/arrow-rs/pull/2518) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Refactoring DecimalBuilder constructors [\#2517](https://github.com/apache/arrow-rs/pull/2517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Refactor FixedSizeBinaryBuilder Constructors [\#2516](https://github.com/apache/arrow-rs/pull/2516) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Refactor BooleanBuilder Constructors [\#2515](https://github.com/apache/arrow-rs/pull/2515) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Refactor UnionBuilder Constructors [\#2488](https://github.com/apache/arrow-rs/pull/2488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Move JSON Test Format To integration-testing [\#2724](https://github.com/apache/arrow-rs/pull/2724) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Split out arrow-buffer crate \(\#2594\) [\#2693](https://github.com/apache/arrow-rs/pull/2693) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Simplify DictionaryBuilder constructors \(\#2684\) \(\#2054\) [\#2685](https://github.com/apache/arrow-rs/pull/2685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Deprecate RecordBatch::concat replace with concat\_batches \(\#2594\) [\#2683](https://github.com/apache/arrow-rs/pull/2683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add overflow-checking variant for primitive arithmetic kernels and explicitly define overflow behavior [\#2643](https://github.com/apache/arrow-rs/pull/2643) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Update thrift v0.16 and vendor parquet-format \(\#2502\) [\#2626](https://github.com/apache/arrow-rs/pull/2626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Update flight definitions including backwards-incompatible change to GetSchema [\#2586](https://github.com/apache/arrow-rs/pull/2586) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([liukun4515](https://github.com/liukun4515))
 
 **Implemented enhancements:**
 
-- Add  Macros to assist with static dispatch [\#2635](https://github.com/apache/arrow-rs/issues/2635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support comparison between DictionaryArray and BooleanArray [\#2617](https://github.com/apache/arrow-rs/issues/2617) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Use `total_cmp` for floating value ordering and remove `nan_ordering` feature flag [\#2613](https://github.com/apache/arrow-rs/issues/2613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support empty projection in CSV, JSON readers [\#2603](https://github.com/apache/arrow-rs/issues/2603) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support SQL-compliant NaN ordering between for DictionaryArray and non-DictionaryArray [\#2599](https://github.com/apache/arrow-rs/issues/2599) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `dyn_cmp_dict` feature flag to gate dyn comparison of dictionary arrays [\#2596](https://github.com/apache/arrow-rs/issues/2596) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add max\_dyn and min\_dyn for max/min for dictionary array [\#2584](https://github.com/apache/arrow-rs/issues/2584) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Allow FlightSQL implementers to extend `do_get()` [\#2581](https://github.com/apache/arrow-rs/issues/2581) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Support SQL-compliant behavior on `eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#2569](https://github.com/apache/arrow-rs/issues/2569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add sql-compliant feature for enabling sql-compliant kernel behavior [\#2568](https://github.com/apache/arrow-rs/issues/2568)
-- Calculate `sum` for dictionary array [\#2565](https://github.com/apache/arrow-rs/issues/2565) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add test for float nan comparison [\#2556](https://github.com/apache/arrow-rs/issues/2556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Compare dictionary with string array [\#2548](https://github.com/apache/arrow-rs/issues/2548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Compare dictionary with primitive array in `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#2538](https://github.com/apache/arrow-rs/issues/2538) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Compare dictionary with primitive array in `eq_dyn` and `neq_dyn` [\#2535](https://github.com/apache/arrow-rs/issues/2535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- UnionBuilder Create Children With Capacity [\#2523](https://github.com/apache/arrow-rs/issues/2523) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Speed up `like_utf8_scalar` for `%pat%` [\#2519](https://github.com/apache/arrow-rs/issues/2519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Replace macro with TypedDictionaryArray in comparison kernels [\#2513](https://github.com/apache/arrow-rs/issues/2513) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Use same codebase for boolean kernels [\#2507](https://github.com/apache/arrow-rs/issues/2507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Use u8 for Decimal Precision and Scale [\#2496](https://github.com/apache/arrow-rs/issues/2496) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Integrate skip row without pageIndex in SerializedPageReader in Fuzz Test [\#2475](https://github.com/apache/arrow-rs/issues/2475) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Avoid unecessary copies in Arrow IPC reader [\#2437](https://github.com/apache/arrow-rs/issues/2437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add GenericColumnReader::skip\_records Missing OffsetIndex Fallback [\#2433](https://github.com/apache/arrow-rs/issues/2433) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Support Reading PageIndex with ParquetRecordBatchStream [\#2430](https://github.com/apache/arrow-rs/issues/2430) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Specialize FixedLenByteArrayReader for Parquet [\#2318](https://github.com/apache/arrow-rs/issues/2318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Make JSON support Optional via Feature Flag [\#2300](https://github.com/apache/arrow-rs/issues/2300) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cleanup like and nlike utf8 kernels [\#2744](https://github.com/apache/arrow-rs/issues/2744) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speedup eq and neq kernels for utf8 arrays [\#2742](https://github.com/apache/arrow-rs/issues/2742) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- API for more ergonomic construction of `RecordBatchOptions` [\#2728](https://github.com/apache/arrow-rs/issues/2728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Automate updates to `CHANGELOG-old.md` [\#2726](https://github.com/apache/arrow-rs/issues/2726)
+- Don't check the `DivideByZero` error for float modulus [\#2720](https://github.com/apache/arrow-rs/issues/2720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `try_binary` should not panic on unequaled array length. [\#2715](https://github.com/apache/arrow-rs/issues/2715) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add benchmark for bitwise operation [\#2714](https://github.com/apache/arrow-rs/issues/2714) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add overflow-checking variants of arithmetic scalar dyn kernels [\#2712](https://github.com/apache/arrow-rs/issues/2712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add divide\_opt kernel which produce null values on division by zero error [\#2709](https://github.com/apache/arrow-rs/issues/2709) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `DataType` function to detect nested types [\#2704](https://github.com/apache/arrow-rs/issues/2704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support of sorting dictionary of other primitive types [\#2700](https://github.com/apache/arrow-rs/issues/2700) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Sort indices of dictionary string values [\#2697](https://github.com/apache/arrow-rs/issues/2697) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support empty projection in `RecordBatch::project` [\#2690](https://github.com/apache/arrow-rs/issues/2690) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support sorting dictionary encoded primitive integer arrays [\#2679](https://github.com/apache/arrow-rs/issues/2679) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use BitIndexIterator in min\_max\_helper [\#2674](https://github.com/apache/arrow-rs/issues/2674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support building comparator for dictionaries of primitive integer values [\#2672](https://github.com/apache/arrow-rs/issues/2672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Change max/min string macro to generic helper function `min_max_helper` [\#2657](https://github.com/apache/arrow-rs/issues/2657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add overflow-checking variant of arithmetic scalar kernels [\#2651](https://github.com/apache/arrow-rs/issues/2651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Compare dictionary with binary array [\#2644](https://github.com/apache/arrow-rs/issues/2644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add overflow-checking variant for primitive arithmetic kernels [\#2642](https://github.com/apache/arrow-rs/issues/2642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use `downcast_primitive_array` in arithmetic kernels [\#2639](https://github.com/apache/arrow-rs/issues/2639) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support DictionaryArray in temporal kernels [\#2622](https://github.com/apache/arrow-rs/issues/2622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Inline Generated Thift Code Into Parquet Crate [\#2502](https://github.com/apache/arrow-rs/issues/2502) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
 
 **Fixed bugs:**
 
-- Casting timestamp array to string should not ignore timezone [\#2607](https://github.com/apache/arrow-rs/issues/2607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Ilike\_ut8\_scalar kernals have incorrect logic [\#2544](https://github.com/apache/arrow-rs/issues/2544) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Always validate the array data when creating array in IPC reader [\#2541](https://github.com/apache/arrow-rs/issues/2541) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Int96Converter Truncates Timestamps [\#2480](https://github.com/apache/arrow-rs/issues/2480) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Error Reading Page Index When Not Available  [\#2434](https://github.com/apache/arrow-rs/issues/2434) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `ParquetFileArrowReader::get_record_reader[_by_colum]` `batch_size` overallocates [\#2321](https://github.com/apache/arrow-rs/issues/2321) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Documentation updates:**
-
-- Document All Arrow Features in docs.rs [\#2633](https://github.com/apache/arrow-rs/issues/2633) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Escape contains patterns for utf8 like kernels [\#2745](https://github.com/apache/arrow-rs/issues/2745) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Float Array should not panic on `DivideByZero` in the `Divide` kernel [\#2719](https://github.com/apache/arrow-rs/issues/2719) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- DictionaryBuilders can Create Invalid DictionaryArrays [\#2684](https://github.com/apache/arrow-rs/issues/2684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow` crate does not build with `features = ["ffi"]` and `default_features = false`. [\#2670](https://github.com/apache/arrow-rs/issues/2670) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Invalid results with `RowSelector` having `row_count` of 0 [\#2669](https://github.com/apache/arrow-rs/issues/2669) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- clippy error: unresolved import `crate::array::layout` [\#2659](https://github.com/apache/arrow-rs/issues/2659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cast the numeric without the `CastOptions` [\#2648](https://github.com/apache/arrow-rs/issues/2648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Explicitly define overflow behavior for primitive arithmetic kernels [\#2641](https://github.com/apache/arrow-rs/issues/2641) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- update the `flight.proto` and fix schema to SchemaResult [\#2571](https://github.com/apache/arrow-rs/issues/2571) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Panic when first data page is skipped using ColumnChunkData::Sparse [\#2543](https://github.com/apache/arrow-rs/issues/2543) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `SchemaResult` in IPC deviates from other implementations [\#2445](https://github.com/apache/arrow-rs/issues/2445) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
 
 **Closed issues:**
 
-- Add support for CAST from `Interval(DayTime)` to `Timestamp(Nanosecond, None)` [\#2606](https://github.com/apache/arrow-rs/issues/2606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Why do we check for null in TypedDictionaryArray value function [\#2564](https://github.com/apache/arrow-rs/issues/2564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add the `length` field for `Buffer` [\#2524](https://github.com/apache/arrow-rs/issues/2524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Avoid large over allocate buffer in async reader [\#2512](https://github.com/apache/arrow-rs/issues/2512) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Rewriting Decimal Builders using `const_generic`. [\#2390](https://github.com/apache/arrow-rs/issues/2390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Rewrite Decimal Array using `const_generic` [\#2384](https://github.com/apache/arrow-rs/issues/2384) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement collect for int values [\#2696](https://github.com/apache/arrow-rs/issues/2696) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- Add downcast macros \(\#2635\) [\#2636](https://github.com/apache/arrow-rs/pull/2636) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Document all arrow features in docs.rs \(\#2633\) [\#2634](https://github.com/apache/arrow-rs/pull/2634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Document dyn\_cmp\_dict [\#2624](https://github.com/apache/arrow-rs/pull/2624) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Support comparison between DictionaryArray and BooleanArray [\#2618](https://github.com/apache/arrow-rs/pull/2618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Cast timestamp array to string array with timezone [\#2608](https://github.com/apache/arrow-rs/pull/2608) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Support empty projection in CSV and JSON readers [\#2604](https://github.com/apache/arrow-rs/pull/2604) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Make JSON support optional via a feature flag \(\#2300\) [\#2601](https://github.com/apache/arrow-rs/pull/2601) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Support SQL-compliant NaN ordering for DictionaryArray and non-DictionaryArray [\#2600](https://github.com/apache/arrow-rs/pull/2600) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Split out integration test plumbing \(\#2594\) \(\#2300\) [\#2598](https://github.com/apache/arrow-rs/pull/2598) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Refactor Binary Builder and String Builder Constructors [\#2592](https://github.com/apache/arrow-rs/pull/2592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Dictionary like scalar kernels [\#2591](https://github.com/apache/arrow-rs/pull/2591) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Validate dictionary key in TypedDictionaryArray \(\#2578\) [\#2589](https://github.com/apache/arrow-rs/pull/2589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add max\_dyn and min\_dyn for max/min for dictionary array [\#2585](https://github.com/apache/arrow-rs/pull/2585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Code cleanup of array value functions [\#2583](https://github.com/apache/arrow-rs/pull/2583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- Allow overriding of do\_get & export useful macro [\#2582](https://github.com/apache/arrow-rs/pull/2582) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([avantgardnerio](https://github.com/avantgardnerio))
-- MINOR: Upgrade to pyo3 0.17 [\#2576](https://github.com/apache/arrow-rs/pull/2576) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
-- Support SQL-compliant NaN behavior on eq\_dyn, neq\_dyn, lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn [\#2570](https://github.com/apache/arrow-rs/pull/2570) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Add sum\_dyn to calculate sum for dictionary array [\#2566](https://github.com/apache/arrow-rs/pull/2566) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- struct UnionBuilder will create child buffers with capacity [\#2560](https://github.com/apache/arrow-rs/pull/2560) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kastolars](https://github.com/kastolars))
-- Don't panic on RleValueEncoder::flush\_buffer if empty \(\#2558\) [\#2559](https://github.com/apache/arrow-rs/pull/2559) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add the `length` field for Buffer and use more `Buffer` in IPC reader to avoid memory copy. [\#2557](https://github.com/apache/arrow-rs/pull/2557) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([HaoYang670](https://github.com/HaoYang670))
-- Add test for float nan comparison [\#2555](https://github.com/apache/arrow-rs/pull/2555) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Compare dictionary array with string array [\#2549](https://github.com/apache/arrow-rs/pull/2549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Always validate the array data \(except the `Decimal`\) when creating array in IPC reader [\#2547](https://github.com/apache/arrow-rs/pull/2547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- MINOR: Fix test\_row\_type\_validation test [\#2546](https://github.com/apache/arrow-rs/pull/2546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix ilike\_utf8\_scalar kernals [\#2545](https://github.com/apache/arrow-rs/pull/2545) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
-- fix typo [\#2540](https://github.com/apache/arrow-rs/pull/2540) ([00Masato](https://github.com/00Masato))
-- Compare dictionary array and primitive array in lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn kernels [\#2539](https://github.com/apache/arrow-rs/pull/2539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- \[MINOR\]Avoid large over allocate buffer in async reader [\#2537](https://github.com/apache/arrow-rs/pull/2537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
-- Compare dictionary with primitive array in `eq_dyn` and `neq_dyn` [\#2533](https://github.com/apache/arrow-rs/pull/2533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Add iterator for FixedSizeBinaryArray [\#2531](https://github.com/apache/arrow-rs/pull/2531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- add bench: decimal with byte array and fixed length byte array [\#2529](https://github.com/apache/arrow-rs/pull/2529) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([liukun4515](https://github.com/liukun4515))
-- Add FixedLengthByteArrayReader Remove ComplexObjectArrayReader [\#2528](https://github.com/apache/arrow-rs/pull/2528) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Split out byte array decoders \(\#2318\) [\#2527](https://github.com/apache/arrow-rs/pull/2527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Use offset index in ParquetRecordBatchStream [\#2526](https://github.com/apache/arrow-rs/pull/2526) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([thinkharderdev](https://github.com/thinkharderdev))
-- Clean the `create_array` in IPC reader. [\#2525](https://github.com/apache/arrow-rs/pull/2525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove DecimalByteArrayConvert \(\#2480\) [\#2522](https://github.com/apache/arrow-rs/pull/2522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Improve performance of `%pat%` \(\>3x speedup\) [\#2521](https://github.com/apache/arrow-rs/pull/2521) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- remove len field from MapBuilder [\#2520](https://github.com/apache/arrow-rs/pull/2520) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
--  Replace macro with TypedDictionaryArray in comparison kernels [\#2514](https://github.com/apache/arrow-rs/pull/2514) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Avoid large over allocate buffer in sync reader [\#2511](https://github.com/apache/arrow-rs/pull/2511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
-- Avoid useless memory copies in IPC reader. [\#2510](https://github.com/apache/arrow-rs/pull/2510) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Refactor boolean kernels to use same codebase [\#2508](https://github.com/apache/arrow-rs/pull/2508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Remove Int96Converter \(\#2480\) [\#2481](https://github.com/apache/arrow-rs/pull/2481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Speedup string equal/not equal to empty string, cleanup like/ilike kernels, fix escape bug [\#2743](https://github.com/apache/arrow-rs/pull/2743) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Partially flatten arrow-buffer [\#2737](https://github.com/apache/arrow-rs/pull/2737) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Automate updates to `CHANGELOG-old.md` [\#2732](https://github.com/apache/arrow-rs/pull/2732) ([iajoiner](https://github.com/iajoiner))
+- Update read parquet example in parquet/arrow home [\#2730](https://github.com/apache/arrow-rs/pull/2730) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([datapythonista](https://github.com/datapythonista))
+- Better construction of RecordBatchOptions [\#2729](https://github.com/apache/arrow-rs/pull/2729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([askoa](https://github.com/askoa))
+- benchmark: bitwise operation [\#2718](https://github.com/apache/arrow-rs/pull/2718) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Update `try_binary` and `checked_ops`, and remove `math_checked_op` [\#2717](https://github.com/apache/arrow-rs/pull/2717) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Support bitwise op in kernel: or,xor,not [\#2716](https://github.com/apache/arrow-rs/pull/2716) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Add overflow-checking variants of arithmetic scalar dyn kernels [\#2713](https://github.com/apache/arrow-rs/pull/2713) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add divide\_opt kernel which produce null values on division by zero error [\#2710](https://github.com/apache/arrow-rs/pull/2710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add DataType::is\_nested\(\) [\#2707](https://github.com/apache/arrow-rs/pull/2707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kfastov](https://github.com/kfastov))
+- Update criterion requirement from 0.3 to 0.4 [\#2706](https://github.com/apache/arrow-rs/pull/2706) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Support bitwise and operation in the kernel [\#2703](https://github.com/apache/arrow-rs/pull/2703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Add support of sorting dictionary of other primitive arrays [\#2701](https://github.com/apache/arrow-rs/pull/2701) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Clarify docs of binary and string builders [\#2699](https://github.com/apache/arrow-rs/pull/2699) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([datapythonista](https://github.com/datapythonista))
+- Sort indices of dictionary string values [\#2698](https://github.com/apache/arrow-rs/pull/2698) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add support for empty projection in RecordBatch::project [\#2691](https://github.com/apache/arrow-rs/pull/2691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Temporarily disable Golang integration tests re-enable JS [\#2689](https://github.com/apache/arrow-rs/pull/2689) ([tustvold](https://github.com/tustvold))
+- Verify valid UTF-8 when converting byte array \(\#2205\) [\#2686](https://github.com/apache/arrow-rs/pull/2686) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support sorting dictionary encoded primitive integer arrays [\#2680](https://github.com/apache/arrow-rs/pull/2680) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Skip RowSelectors with zero rows [\#2678](https://github.com/apache/arrow-rs/pull/2678) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([askoa](https://github.com/askoa))
+- Faster Null Path Selection in ArrayData Equality [\#2676](https://github.com/apache/arrow-rs/pull/2676) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dhruv9vats](https://github.com/dhruv9vats))
+- Use BitIndexIterator in min\_max\_helper [\#2675](https://github.com/apache/arrow-rs/pull/2675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Support building comparator for dictionaries of primitive integer values [\#2673](https://github.com/apache/arrow-rs/pull/2673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- json feature always requires base64 feature [\#2668](https://github.com/apache/arrow-rs/pull/2668) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([eagletmt](https://github.com/eagletmt))
+- Add try\_unary, binary, try\_binary kernels ~90% faster [\#2666](https://github.com/apache/arrow-rs/pull/2666) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use downcast\_dictionary\_array in unary\_dyn [\#2663](https://github.com/apache/arrow-rs/pull/2663) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- optimize the `numeric_cast_with_error` [\#2661](https://github.com/apache/arrow-rs/pull/2661) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- ffi feature also requires layout [\#2660](https://github.com/apache/arrow-rs/pull/2660) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Change max/min string macro to generic helper function min\_max\_helper [\#2658](https://github.com/apache/arrow-rs/pull/2658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix flaky test `test_fuzz_async_reader_selection` [\#2656](https://github.com/apache/arrow-rs/pull/2656) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([thinkharderdev](https://github.com/thinkharderdev))
+- MINOR: Ignore flaky test test\_fuzz\_async\_reader\_selection [\#2655](https://github.com/apache/arrow-rs/pull/2655) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- MutableBuffer::typed\_data - shared ref access to the typed slice [\#2652](https://github.com/apache/arrow-rs/pull/2652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([medwards](https://github.com/medwards))
+- Overflow-checking variant of arithmetic scalar kernels [\#2650](https://github.com/apache/arrow-rs/pull/2650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- support `CastOption` for casting numeric [\#2649](https://github.com/apache/arrow-rs/pull/2649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Help LLVM vectorize comparison kernel ~50-80% faster [\#2646](https://github.com/apache/arrow-rs/pull/2646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support comparison between dictionary array and binary array [\#2645](https://github.com/apache/arrow-rs/pull/2645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Use `downcast_primitive_array` in arithmetic kernels [\#2640](https://github.com/apache/arrow-rs/pull/2640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fully qualifying parquet items [\#2638](https://github.com/apache/arrow-rs/pull/2638) ([dingxiangfei2009](https://github.com/dingxiangfei2009))
+- Support DictionaryArray in temporal kernels [\#2623](https://github.com/apache/arrow-rs/pull/2623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Comparable Row Format [\#2593](https://github.com/apache/arrow-rs/pull/2593) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix bug in page skipping [\#2552](https://github.com/apache/arrow-rs/pull/2552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([thinkharderdev](https://github.com/thinkharderdev))
 
 
 
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index ecf02625c9d..a6fb8751c2d 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-flight"
 description = "Apache Arrow Flight"
-version = "22.0.0"
+version = "23.0.0"
 edition = "2021"
 rust-version = "1.62"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -27,7 +27,7 @@ repository = "https://github.com/apache/arrow-rs"
 license = "Apache-2.0"
 
 [dependencies]
-arrow = { path = "../arrow", version = "22.0.0", default-features = false, features = ["ipc"] }
+arrow = { path = "../arrow", version = "23.0.0", default-features = false, features = ["ipc"] }
 base64 = { version = "0.13", default-features = false }
 tonic = { version = "0.8", default-features = false, features = ["transport", "codegen", "prost"] }
 bytes = { version = "1", default-features = false }
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index 9e9a18ad478..e01809f3813 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -27,7 +27,7 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-arrow-flight = "22.0.0"
+arrow-flight = "23.0.0"
 ```
 
 Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 9aef5a0570a..38bbcf9e8bc 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-pyarrow-integration-testing"
 description = ""
-version = "22.0.0"
+version = "23.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -32,7 +32,7 @@ name = "arrow_pyarrow_integration_testing"
 crate-type = ["cdylib"]
 
 [dependencies]
-arrow = { path = "../arrow", version = "22.0.0", features = ["pyarrow"] }
+arrow = { path = "../arrow", version = "23.0.0", features = ["pyarrow"] }
 pyo3 = { version = "0.17", features = ["extension-module"] }
 
 [package.metadata.maturin]
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 1580856dfc0..f1918fccd1f 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "arrow"
-version = "22.0.0"
+version = "23.0.0"
 description = "Rust implementation of Apache Arrow"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
diff --git a/arrow/README.md b/arrow/README.md
index 7a95df0f225..a1c0e6279a5 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -35,7 +35,7 @@ This crate is tested with the latest stable version of Rust. We do not currently
 
 The arrow crate follows the [SemVer standard](https://doc.rust-lang.org/cargo/reference/semver.html) defined by Cargo and works well within the Rust crate ecosystem.
 
-However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `22.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.
+However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `23.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.
 
 ## Feature Flags
 
@@ -61,7 +61,7 @@ The [Apache Arrow Status](https://arrow.apache.org/docs/status.html) page lists
 
 ## Safety
 
-Arrow seeks to uphold the Rust Soundness Pledge as articulated eloquently [here](https://raphlinus.github.io/rust/22.0.01/18/soundness-pledge.html). Specifically:
+Arrow seeks to uphold the Rust Soundness Pledge as articulated eloquently [here](https://raphlinus.github.io/rust/23.0.01/18/soundness-pledge.html). Specifically:
 
 > The intent of this crate is to be free of soundness bugs. The developers will do their best to avoid them, and welcome help in analyzing and fixing them
 
diff --git a/dev/release/README.md b/dev/release/README.md
index 3783301e9be..48748eccbe8 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -78,7 +78,7 @@ CHANGELOG_GITHUB_TOKEN=<TOKEN> ./dev/release/update_change_log.sh
 git commit -a -m 'Create changelog'
 
 # update versions
-sed -i '' -e 's/14.0.0/22.0.0/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG.md`
+sed -i '' -e 's/14.0.0/23.0.0/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG.md`
 git commit -a -m 'Update version'
 ```
 
diff --git a/dev/release/label_issues.py b/dev/release/label_issues.py
new file mode 100755
index 00000000000..b004b7fa7f8
--- /dev/null
+++ b/dev/release/label_issues.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+
+# Python script to add labels to github issues from the PRs that closed them
+#
+# Required setup:
+# $ pip install PyGithub
+#
+# ARROW_GITHUB_API_TOKEN  needs to be set to your github token
+from github import Github
+import os
+import re
+
+
+
+# get all cross referenced issues from the named issue
+# (aka linked PRs)
+#    issue = arrow_repo.get_issue(issue_number)
+def get_cross_referenced_issues(issue):
+    all_issues = set()
+    for timeline_item in issue.get_timeline():
+        if timeline_item.event == 'cross-referenced' and timeline_item.source.type == 'issue':
+            all_issues.add(timeline_item.source.issue)
+
+    # convert to list
+    return [i for i in all_issues]
+
+
+# labels not to transfer
+BLACKLIST_LABELS = {'development-process', 'api-change'}
+
+# Adds labels to the specified issue with the labels from linked pull requests
+def relabel_issue(arrow_repo, issue_number):
+    #print(issue_number, 'fetching issue')
+    issue = arrow_repo.get_issue(issue_number)
+    print('considering issue', issue.html_url)
+    linked_issues = get_cross_referenced_issues(issue)
+    #print('  ', 'cross referenced issues:', linked_issues)
+
+    # Figure out what labels need to be added, if any
+    existing_labels = set()
+    for label in issue.labels:
+        existing_labels.add(label.name)
+
+    # find all labels to add
+    for linked_issue in linked_issues:
+        if linked_issue.pull_request is None:
+            print('  ', 'not pull request, skipping', linked_issue.html_url)
+            continue
+
+        if linked_issue.repository.name != 'arrow-rs':
+            print('  ', 'not in arrow-rs, skipping', linked_issue.html_url)
+            continue
+
+        print('  ', 'finding labels for linked pr', linked_issue.html_url)
+        linked_labels = set()
+        for label in linked_issue.labels:
+            linked_labels.add(label.name)
+            #print('  ', 'existing labels:', existing_labels)
+
+            labels_to_add = linked_labels.difference(existing_labels)
+
+            # remove any blacklist labels, if any
+            for l in BLACKLIST_LABELS:
+                labels_to_add.discard(l)
+
+            if len(labels_to_add) > 0:
+                print('  ', 'adding labels: ', labels_to_add, 'to', issue.number)
+                for label in labels_to_add:
+                    issue.add_to_labels(label)
+                    print('    ', 'added', label)
+                    existing_labels.add(label)
+
+                # leave a note about what updated these labels
+                issue.create_comment('`label_issue.py` automatically added labels {} from #{}'.format(labels_to_add, linked_issue.number))
+
+
+# what section headings in the CHANGELOG.md file contain closed issues that may need relabeling
+ISSUE_SECTION_NAMES = ['Closed issues:', 'Fixed bugs:', 'Implemented enhancements:']
+
+# find all possible issues / bugs by scraping CHANGELOG.md
+#
+# TODO: Find all tickets merged since this tag
+# The compare api can find all commits since that tag
+# I could not find a good way in the github API to find the PRs connected to a commit
+#since_tag = '22.0.0'
+
+def find_issues_from_changelog():
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    path = os.path.join(script_dir, '..', '..', 'CHANGELOG.md')
+
+    issues = set()
+
+    # Flag that
+    in_issue_section = False
+
+    with open(path, 'r') as f:
+        for line in f:
+            #print('line: ', line)
+            line = line.strip()
+            if line.startswith('**'):
+                section_name = line.replace('**', '')
+                if section_name in ISSUE_SECTION_NAMES:
+                    #print('  ', 'is issue section', section_name)
+                    in_issue_section = True
+                else:
+                    #print('  ', 'is not issue section', section_name)
+                    in_issue_section = False
+
+            if in_issue_section:
+                match = re.search('#([\d]+)', line)
+                if match is not None:
+                    #print('  ', 'reference', match.group(1))
+                    issues.add(match.group(1))
+
+    # Convert to list of number
+    return sorted([int(i) for i in issues])
+
+
+if __name__ == '__main__':
+    print('Attempting to label github issues from their corresponding PRs')
+
+    issues = find_issues_from_changelog()
+    print('Issues found in CHANGELOG: ', issues)
+
+    github_token = os.environ.get("ARROW_GITHUB_API_TOKEN")
+
+    print('logging into GITHUB...')
+    github = Github(github_token)
+
+    print('getting github repo...')
+    arrow_repo = github.get_repo('apache/arrow-rs')
+
+    for issue in issues:
+        relabel_issue(arrow_repo, issue)
diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml
index b9f6cf81855..e45b812dd6a 100644
--- a/integration-testing/Cargo.toml
+++ b/integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-integration-testing"
 description = "Binaries used in the Arrow integration tests"
-version = "22.0.0"
+version = "23.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index a2d11eb5862..9b95868f3fc 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet"
-version = "22.0.0"
+version = "23.0.0"
 license = "Apache-2.0"
 description = "Apache Parquet implementation in Rust"
 homepage = "https://github.com/apache/arrow-rs"
@@ -41,7 +41,7 @@ zstd = { version = "0.11.1", optional = true, default-features = false }
 chrono = { version = "0.4", default-features = false, features = ["alloc"] }
 num = { version = "0.4", default-features = false }
 num-bigint = { version = "0.4", default-features = false }
-arrow = { path = "../arrow", version = "22.0.0", optional = true, default-features = false, features = ["ipc"] }
+arrow = { path = "../arrow", version = "23.0.0", optional = true, default-features = false, features = ["ipc"] }
 base64 = { version = "0.13", default-features = false, features = ["std"], optional = true }
 clap = { version = "3", default-features = false, features = ["std", "derive", "env"], optional = true }
 serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
@@ -61,7 +61,7 @@ flate2 = { version = "1.0", default-features = false, features = ["rust_backend"
 lz4 = { version = "1.23", default-features = false }
 zstd = { version = "0.11", default-features = false }
 serde_json = { version = "1.0", features = ["std"], default-features = false }
-arrow = { path = "../arrow", version = "22.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint", "json"] }
+arrow = { path = "../arrow", version = "23.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint", "json"] }
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/parquet_derive/Cargo.toml b/parquet_derive/Cargo.toml
index e32ee1ace5b..54aa6d52f1e 100644
--- a/parquet_derive/Cargo.toml
+++ b/parquet_derive/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive"
-version = "22.0.0"
+version = "23.0.0"
 license = "Apache-2.0"
 description = "Derive macros for the Rust implementation of Apache Parquet"
 homepage = "https://github.com/apache/arrow-rs"
@@ -35,4 +35,4 @@ proc-macro = true
 proc-macro2 = { version = "1.0", default-features = false }
 quote = { version = "1.0", default-features = false }
 syn = { version = "1.0", default-features = false }
-parquet = { path = "../parquet", version = "22.0.0" }
+parquet = { path = "../parquet", version = "23.0.0" }
diff --git a/parquet_derive/README.md b/parquet_derive/README.md
index d3d7f56ebf6..4aae73dfc2e 100644
--- a/parquet_derive/README.md
+++ b/parquet_derive/README.md
@@ -32,8 +32,8 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-parquet = "22.0.0"
-parquet_derive = "22.0.0"
+parquet = "23.0.0"
+parquet_derive = "23.0.0"
 ```
 
 and this to your crate root:
diff --git a/parquet_derive_test/Cargo.toml b/parquet_derive_test/Cargo.toml
index 4b814c4c088..dd8486da2ca 100644
--- a/parquet_derive_test/Cargo.toml
+++ b/parquet_derive_test/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive_test"
-version = "22.0.0"
+version = "23.0.0"
 license = "Apache-2.0"
 description = "Integration test package for parquet-derive"
 homepage = "https://github.com/apache/arrow-rs"
@@ -29,6 +29,6 @@ publish = false
 rust-version = "1.62"
 
 [dependencies]
-parquet = { path = "../parquet", version = "22.0.0", default-features = false }
-parquet_derive = { path = "../parquet_derive", version = "22.0.0", default-features = false }
+parquet = { path = "../parquet", version = "23.0.0", default-features = false }
+parquet_derive = { path = "../parquet_derive", version = "23.0.0", default-features = false }
 chrono = { version="0.4.19", default-features = false, features = [ "clock" ] }

From 5a55406cf24171600a143a83a95046c7513fd92c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 16 Sep 2022 12:40:37 -0400
Subject: [PATCH 10/16] update new `arrow-buffer` crate to 23.0.0 (#2748)

* update new `arrow-buffer` crate to 23.0.0

* Update dependency
---
 arrow-buffer/Cargo.toml | 2 +-
 arrow/Cargo.toml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 87019111efc..c1bcd9f6306 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "arrow-buffer"
-version = "22.0.0"
+version = "23.0.0"
 description = "Buffer abstractions for Apache Arrow"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index f1918fccd1f..7391ffcf827 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -44,7 +44,7 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
 ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
 
 [dependencies]
-arrow-buffer = { path = "../arrow-buffer", version = "22.0.0" }
+arrow-buffer = { path = "../arrow-buffer", version = "23.0.0" }
 
 serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
 serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }

From ca00b671500b693f8c5e07ac4ea600269adfa2b6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 16 Sep 2022 19:42:20 -0400
Subject: [PATCH 11/16] Fix `verify_release_candidate.sh` for new arrow
 subcrates (#2752)

---
 dev/release/verify-release-candidate.sh | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index cf8050c1c9f..98c582c2e17 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -116,21 +116,16 @@ test_source_distribution() {
   export ARROW_TEST_DATA=$PWD/arrow-testing-data/data
   export PARQUET_TEST_DATA=$PWD/parquet-testing-data/data
 
-  # use local modules because we don't publish modules to crates.io yet
-  sed \
-    -i.bak \
-    -E \
-    -e 's/^arrow = "([^"]*)"/arrow = { version = "\1", path = "..\/arrow" }/g' \
-    -e 's/^parquet = "([^"]*)"/parquet = { version = "\1", path = "..\/parquet" }/g' \
-    */Cargo.toml
-
   (cd arrow && cargo build && cargo test)
   (cd arrow-flight && cargo build && cargo test)
   (cd parquet && cargo build && cargo test)
   (cd parquet_derive && cargo build && cargo test)
 
-  # verify that the crates can be published to crates.io
-  pushd arrow
+  # verify that the leaf crates can be published to crates.io
+  # we can't verify crates that depend on others
+  # (because the others haven't yet been published to crates.io)
+
+  pushd arrow-buffer
     cargo publish --dry-run
   popd
 

From 46fcb0c93c7b6e2067ff6a5b5bc0b0108ca3c2ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 17 Sep 2022 05:58:09 +0200
Subject: [PATCH 12/16] Speed up checked kernels for non-null data (~1.4-5x
 faster) (#2749)

* Speed up checked kernels

* Fast path for non-null

* Move some code
---
 arrow/src/compute/kernels/arity.rs | 31 ++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 5060234c71b..216e3bfcac3 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -106,15 +106,26 @@ where
     let len = array.len();
     let null_count = array.null_count();
 
-    let mut buffer = BufferBuilder::<O::Native>::new(len);
-    buffer.append_n_zeroed(array.len());
-    let slice = buffer.as_slice_mut();
+    if null_count == 0 {
+        let values = array.values().iter().map(|v| op(*v));
+        // JUSTIFICATION
+        //  Benefit
+        //      ~60% speedup
+        //  Soundness
+        //      `values` is an iterator with a known size because arrays are sized.
+        let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
+        return Ok(unsafe { build_primitive_array(len, buffer, 0, None) });
+    }
 
     let null_buffer = array
         .data_ref()
         .null_buffer()
         .map(|b| b.bit_slice(array.offset(), array.len()));
 
+    let mut buffer = BufferBuilder::<O::Native>::new(len);
+    buffer.append_n_zeroed(array.len());
+    let slice = buffer.as_slice_mut();
+
     try_for_each_valid_idx(array.len(), 0, null_count, null_buffer.as_deref(), |idx| {
         unsafe { *slice.get_unchecked_mut(idx) = op(array.value_unchecked(idx))? };
         Ok::<_, ArrowError>(())
@@ -284,9 +295,21 @@ where
     if a.is_empty() {
         return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
     }
-
     let len = a.len();
+
+    if a.null_count() == 0 && b.null_count() == 0 {
+        let values = a.values().iter().zip(b.values()).map(|(l, r)| op(*l, *r));
+        let buffer = unsafe { Buffer::try_from_trusted_len_iter(values) }?;
+        // JUSTIFICATION
+        //  Benefit
+        //      ~75% speedup
+        //  Soundness
+        //      `values` is an iterator with a known size from a PrimitiveArray
+        return Ok(unsafe { build_primitive_array(len, buffer, 0, None) });
+    }
+
     let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+
     let null_count = null_buffer
         .as_ref()
         .map(|x| len - x.count_set_bits())

From 5e83ef9cc7e426171f4cb9451fa004c55c7c95be Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 17 Sep 2022 01:04:02 -0700
Subject: [PATCH 13/16] Add value type check in try_unary_dict (#2755)

---
 arrow/src/compute/kernels/arity.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 216e3bfcac3..12cf9721f97 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -156,6 +156,13 @@ where
     T: ArrowPrimitiveType,
     F: Fn(T::Native) -> Result<T::Native>,
 {
+    if array.value_type() != T::DATA_TYPE {
+        return Err(ArrowError::CastError(format!(
+            "Cannot perform the unary operation on dictionary array of value type {}",
+            array.value_type()
+        )));
+    }
+
     let dict_values = array.values().as_any().downcast_ref().unwrap();
     let values = try_unary::<T, F, T>(dict_values, op)?.into_data();
     let data = array.data().clone().into_builder().child_data(vec![values]);

From 3bf6eb98ceb3962e1d9419da6dc93e609f7893e6 Mon Sep 17 00:00:00 2001
From: aksharau <akshara.uke@gmail.com>
Date: Mon, 19 Sep 2022 11:18:23 +0530
Subject: [PATCH 14/16] Fix: Issue 2721 : binary function should not panic but
 return error when array lengths are unequal (#2750)

---
 arrow/src/compute/kernels/arithmetic.rs | 14 +++-------
 arrow/src/compute/kernels/arity.rs      | 36 +++++++++++++++----------
 arrow/src/compute/kernels/bitwise.rs    |  9 ++-----
 3 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index 7b91a261c7e..b1a62ccfd6a 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -69,13 +69,7 @@ where
     RT: ArrowNumericType,
     F: Fn(LT::Native, RT::Native) -> LT::Native,
 {
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    Ok(binary(left, right, op))
+    binary(left, right, op)
 }
 
 /// Helper function for operations where a valid `0` on the right array should
@@ -1128,13 +1122,13 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp + Zero + One,
 {
-    Ok(binary_opt(left, right, |a, b| {
+    binary_opt(left, right, |a, b| {
         if b.is_zero() {
             None
         } else {
             Some(a.div_wrapping(b))
         }
-    }))
+    })
 }
 
 /// Perform `left / right` operation on two arrays. If either left or right value is null
@@ -1670,7 +1664,7 @@ mod tests {
         let b = Int32Array::from(vec![6, 7, 8]);
         let e = add(&a, &b).expect_err("should have failed due to different lengths");
         assert_eq!(
-            "ComputeError(\"Cannot perform math operation on arrays of different length\")",
+            "ComputeError(\"Cannot perform binary operation on arrays of different length\")",
             format!("{:?}", e)
         );
     }
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 12cf9721f97..2347502f96e 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -235,25 +235,29 @@ where
 /// especially when the operation can be vectorised, however, requires `op` to be infallible
 /// for all possible values of its inputs
 ///
-/// # Panic
+/// # Error
 ///
-/// Panics if the arrays have different lengths
+/// This function gives error if the arrays have different lengths
 pub fn binary<A, B, F, O>(
     a: &PrimitiveArray<A>,
     b: &PrimitiveArray<B>,
     op: F,
-) -> PrimitiveArray<O>
+) -> Result<PrimitiveArray<O>>
 where
     A: ArrowPrimitiveType,
     B: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
     F: Fn(A::Native, B::Native) -> O::Native,
 {
-    assert_eq!(a.len(), b.len());
+    if a.len() != b.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform binary operation on arrays of different length".to_string(),
+        ));
+    }
     let len = a.len();
 
     if a.is_empty() {
-        return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
+        return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
     }
 
     let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
@@ -270,7 +274,7 @@ where
     //      `values` is an iterator with a known size from a PrimitiveArray
     let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
 
-    unsafe { build_primitive_array(len, buffer, null_count, null_buffer) }
+    Ok(unsafe { build_primitive_array(len, buffer, null_count, null_buffer) })
 }
 
 /// Applies the provided fallible binary operation across `a` and `b`, returning any error,
@@ -344,32 +348,36 @@ where
 ///
 /// The function is only evaluated for non-null indices
 ///
-/// # Panic
+/// # Error
 ///
-/// Panics if the arrays have different lengths
+/// This function gives error if the arrays have different lengths
 pub(crate) fn binary_opt<A, B, F, O>(
     a: &PrimitiveArray<A>,
     b: &PrimitiveArray<B>,
     op: F,
-) -> PrimitiveArray<O>
+) -> Result<PrimitiveArray<O>>
 where
     A: ArrowPrimitiveType,
     B: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
     F: Fn(A::Native, B::Native) -> Option<O::Native>,
 {
-    assert_eq!(a.len(), b.len());
+    if a.len() != b.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform binary operation on arrays of different length".to_string(),
+        ));
+    }
 
     if a.is_empty() {
-        return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
+        return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
     }
 
     if a.null_count() == 0 && b.null_count() == 0 {
-        a.values()
+        Ok(a.values()
             .iter()
             .zip(b.values().iter())
             .map(|(a, b)| op(*a, *b))
-            .collect()
+            .collect())
     } else {
         let iter_a = ArrayIter::new(a);
         let iter_b = ArrayIter::new(b);
@@ -386,7 +394,7 @@ where
                     }
                 });
 
-        values.collect()
+        Ok(values.collect())
     }
 }
 
diff --git a/arrow/src/compute/kernels/bitwise.rs b/arrow/src/compute/kernels/bitwise.rs
index 2f3c9e490f4..0b877b32648 100644
--- a/arrow/src/compute/kernels/bitwise.rs
+++ b/arrow/src/compute/kernels/bitwise.rs
@@ -18,7 +18,7 @@
 use crate::array::PrimitiveArray;
 use crate::compute::{binary, unary};
 use crate::datatypes::ArrowNumericType;
-use crate::error::{ArrowError, Result};
+use crate::error::Result;
 use std::ops::{BitAnd, BitOr, BitXor, Not};
 
 // The helper function for bitwise operation with two array
@@ -31,12 +31,7 @@ where
     T: ArrowNumericType,
     F: Fn(T::Native, T::Native) -> T::Native,
 {
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform bitwise operation on arrays of different length".to_string(),
-        ));
-    }
-    Ok(binary(left, right, op))
+    binary(left, right, op)
 }
 
 /// Perform `left & right` operation on two arrays. If either left or right value is null

From 9599178c953a7980ec1841d06e2232a671b5cbb3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 20 Sep 2022 03:30:37 -0700
Subject: [PATCH 15/16] Add overflow-checking variants of arithmetic dyn
 kernels (#2740)

* Init

* More

* More

* Add tests

* Fix clippy

* Remove macro

* Update doc

* Fix clippy

* Remove length check

* Tweak try_binary to coordinate latest optimization

* Fix clippy

* Use for loop

* Split non-null variant into never inline function

* Add value type check

* Multiply by get_byte_width of output type.
---
 arrow/src/compute/kernels/arithmetic.rs | 470 +++++++++++++++++++++---
 arrow/src/compute/kernels/arity.rs      |  82 +++--
 2 files changed, 466 insertions(+), 86 deletions(-)

diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index b1a62ccfd6a..aa6c8cd6694 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -68,10 +68,30 @@ where
     LT: ArrowNumericType,
     RT: ArrowNumericType,
     F: Fn(LT::Native, RT::Native) -> LT::Native,
+    LT::Native: ArrowNativeTypeOp,
+    RT::Native: ArrowNativeTypeOp,
 {
     binary(left, right, op)
 }
 
+/// This is similar to `math_op` as it performs given operation between two input primitive arrays.
+/// But the given operation can return `Err` if overflow is detected. For the case, this function
+/// returns an `Err`.
+fn math_checked_op<LT, RT, F>(
+    left: &PrimitiveArray<LT>,
+    right: &PrimitiveArray<RT>,
+    op: F,
+) -> Result<PrimitiveArray<LT>>
+where
+    LT: ArrowNumericType,
+    RT: ArrowNumericType,
+    F: Fn(LT::Native, RT::Native) -> Result<LT::Native>,
+    LT::Native: ArrowNativeTypeOp,
+    RT::Native: ArrowNativeTypeOp,
+{
+    try_binary(left, right, op)
+}
+
 /// Helper function for operations where a valid `0` on the right array should
 /// result in an [ArrowError::DivideByZero], namely the division and modulo operations
 ///
@@ -516,57 +536,64 @@ macro_rules! typed_dict_math_op {
     }};
 }
 
-/// Helper function to perform math lambda function on values from two dictionary arrays, this
-/// version does not attempt to use SIMD explicitly (though the compiler may auto vectorize)
-macro_rules! math_dict_op {
-    ($left: expr, $right:expr, $op:expr, $value_ty:ty) => {{
-        if $left.len() != $right.len() {
-            return Err(ArrowError::ComputeError(format!(
-                "Cannot perform operation on arrays of different length ({}, {})",
-                $left.len(),
-                $right.len()
-            )));
-        }
+/// Perform given operation on two `DictionaryArray`s.
+/// Returns an error if the two arrays have different value type
+fn math_op_dict<K, T, F>(
+    left: &DictionaryArray<K>,
+    right: &DictionaryArray<K>,
+    op: F,
+) -> Result<PrimitiveArray<T>>
+where
+    K: ArrowNumericType,
+    T: ArrowNumericType,
+    F: Fn(T::Native, T::Native) -> T::Native,
+    T::Native: ArrowNativeTypeOp,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Cannot perform operation on arrays of different length ({}, {})",
+            left.len(),
+            right.len()
+        )));
+    }
 
-        // Safety justification: Since the inputs are valid Arrow arrays, all values are
-        // valid indexes into the dictionary (which is verified during construction)
-
-        let left_iter = unsafe {
-            $left
-                .values()
-                .as_any()
-                .downcast_ref::<$value_ty>()
-                .unwrap()
-                .take_iter_unchecked($left.keys_iter())
-        };
-
-        let right_iter = unsafe {
-            $right
-                .values()
-                .as_any()
-                .downcast_ref::<$value_ty>()
-                .unwrap()
-                .take_iter_unchecked($right.keys_iter())
-        };
-
-        let result = left_iter
-            .zip(right_iter)
-            .map(|(left_value, right_value)| {
-                if let (Some(left), Some(right)) = (left_value, right_value) {
-                    Some($op(left, right))
-                } else {
-                    None
-                }
-            })
-            .collect();
+    // Safety justification: Since the inputs are valid Arrow arrays, all values are
+    // valid indexes into the dictionary (which is verified during construction)
 
-        Ok(result)
-    }};
+    let left_iter = unsafe {
+        left.values()
+            .as_any()
+            .downcast_ref::<PrimitiveArray<T>>()
+            .unwrap()
+            .take_iter_unchecked(left.keys_iter())
+    };
+
+    let right_iter = unsafe {
+        right
+            .values()
+            .as_any()
+            .downcast_ref::<PrimitiveArray<T>>()
+            .unwrap()
+            .take_iter_unchecked(right.keys_iter())
+    };
+
+    let result = left_iter
+        .zip(right_iter)
+        .map(|(left_value, right_value)| {
+            if let (Some(left), Some(right)) = (left_value, right_value) {
+                Some(op(left, right))
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    Ok(result)
 }
 
 /// Perform given operation on two `DictionaryArray`s.
 /// Returns an error if the two arrays have different value type
-fn math_op_dict<K, T, F>(
+fn math_checked_op_dict<K, T, F>(
     left: &DictionaryArray<K>,
     right: &DictionaryArray<K>,
     op: F,
@@ -574,9 +601,21 @@ fn math_op_dict<K, T, F>(
 where
     K: ArrowNumericType,
     T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> T::Native,
+    F: Fn(T::Native, T::Native) -> Result<T::Native>,
+    T::Native: ArrowNativeTypeOp,
 {
-    math_dict_op!(left, right, op, PrimitiveArray<T>)
+    // left and right's value types are supposed to be same as guaranteed by the caller macro now.
+    if left.value_type() != T::DATA_TYPE {
+        return Err(ArrowError::NotYetImplemented(format!(
+            "Cannot perform provided operation on dictionary array of value type {}",
+            left.value_type()
+        )));
+    }
+
+    let left = left.downcast_dict::<PrimitiveArray<T>>().unwrap();
+    let right = right.downcast_dict::<PrimitiveArray<T>>().unwrap();
+
+    try_binary(left, right, op)
 }
 
 /// Helper function for operations where a valid `0` on the right array should
@@ -672,10 +711,13 @@ where
 
 /// Perform `left + right` operation on two arrays. If either left or right value is null
 /// then the result is also null.
+///
+/// This doesn't detect overflow. Once overflowing, the result will wrap around.
+/// For an overflow-checking variant, use `add_dyn_checked` instead.
 pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
     match left.data_type() {
         DataType::Dictionary(_, _) => {
-            typed_dict_math_op!(left, right, |a, b| a + b, math_op_dict)
+            typed_dict_math_op!(left, right, |a, b| a.add_wrapping(b), math_op_dict)
         }
         DataType::Date32 => {
             let l = as_primitive_array::<Date32Type>(left);
@@ -728,7 +770,84 @@ pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
         _ => {
             downcast_primitive_array!(
                 (left, right) => {
-                    math_op(left, right, |a, b| a + b).map(|a| Arc::new(a) as ArrayRef)
+                    math_op(left, right, |a, b| a.add_wrapping(b)).map(|a| Arc::new(a) as ArrayRef)
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Unsupported data type {}, {}",
+                    left.data_type(), right.data_type()
+                )))
+            )
+        }
+    }
+}
+
+/// Perform `left + right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+///
+/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
+/// use `add_dyn` instead.
+pub fn add_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
+    match left.data_type() {
+        DataType::Dictionary(_, _) => {
+            typed_dict_math_op!(
+                left,
+                right,
+                |a, b| a.add_checked(b),
+                math_checked_op_dict
+            )
+        }
+        DataType::Date32 => {
+            let l = as_primitive_array::<Date32Type>(left);
+            match right.data_type() {
+                DataType::Interval(IntervalUnit::YearMonth) => {
+                    let r = as_primitive_array::<IntervalYearMonthType>(right);
+                    let res = math_op(l, r, Date32Type::add_year_months)?;
+                    Ok(Arc::new(res))
+                }
+                DataType::Interval(IntervalUnit::DayTime) => {
+                    let r = as_primitive_array::<IntervalDayTimeType>(right);
+                    let res = math_op(l, r, Date32Type::add_day_time)?;
+                    Ok(Arc::new(res))
+                }
+                DataType::Interval(IntervalUnit::MonthDayNano) => {
+                    let r = as_primitive_array::<IntervalMonthDayNanoType>(right);
+                    let res = math_op(l, r, Date32Type::add_month_day_nano)?;
+                    Ok(Arc::new(res))
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Cannot perform arithmetic operation between array of type {} and array of type {}",
+                    left.data_type(), right.data_type()
+                ))),
+            }
+        }
+        DataType::Date64 => {
+            let l = as_primitive_array::<Date64Type>(left);
+            match right.data_type() {
+                DataType::Interval(IntervalUnit::YearMonth) => {
+                    let r = as_primitive_array::<IntervalYearMonthType>(right);
+                    let res = math_op(l, r, Date64Type::add_year_months)?;
+                    Ok(Arc::new(res))
+                }
+                DataType::Interval(IntervalUnit::DayTime) => {
+                    let r = as_primitive_array::<IntervalDayTimeType>(right);
+                    let res = math_op(l, r, Date64Type::add_day_time)?;
+                    Ok(Arc::new(res))
+                }
+                DataType::Interval(IntervalUnit::MonthDayNano) => {
+                    let r = as_primitive_array::<IntervalMonthDayNanoType>(right);
+                    let res = math_op(l, r, Date64Type::add_month_day_nano)?;
+                    Ok(Arc::new(res))
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Cannot perform arithmetic operation between array of type {} and array of type {}",
+                    left.data_type(), right.data_type()
+                ))),
+            }
+        }
+        _ => {
+            downcast_primitive_array!(
+                (left, right) => {
+                    math_checked_op(left, right, |a, b| a.add_checked(b)).map(|a| Arc::new(a) as ArrayRef)
                 }
                 _ => Err(ArrowError::CastError(format!(
                     "Unsupported data type {}, {}",
@@ -839,15 +958,47 @@ where
 
 /// Perform `left - right` operation on two arrays. If either left or right value is null
 /// then the result is also null.
+///
+/// This doesn't detect overflow. Once overflowing, the result will wrap around.
+/// For an overflow-checking variant, use `subtract_dyn_checked` instead.
 pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
     match left.data_type() {
         DataType::Dictionary(_, _) => {
-            typed_dict_math_op!(left, right, |a, b| a - b, math_op_dict)
+            typed_dict_math_op!(left, right, |a, b| a.sub_wrapping(b), math_op_dict)
         }
         _ => {
             downcast_primitive_array!(
                 (left, right) => {
-                    math_op(left, right, |a, b| a - b).map(|a| Arc::new(a) as ArrayRef)
+                    math_op(left, right, |a, b| a.sub_wrapping(b)).map(|a| Arc::new(a) as ArrayRef)
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Unsupported data type {}, {}",
+                    left.data_type(), right.data_type()
+                )))
+            )
+        }
+    }
+}
+
+/// Perform `left - right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+///
+/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
+/// use `subtract_dyn` instead.
+pub fn subtract_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
+    match left.data_type() {
+        DataType::Dictionary(_, _) => {
+            typed_dict_math_op!(
+                left,
+                right,
+                |a, b| a.sub_checked(b),
+                math_checked_op_dict
+            )
+        }
+        _ => {
+            downcast_primitive_array!(
+                (left, right) => {
+                    math_checked_op(left, right, |a, b| a.sub_checked(b)).map(|a| Arc::new(a) as ArrayRef)
                 }
                 _ => Err(ArrowError::CastError(format!(
                     "Unsupported data type {}, {}",
@@ -977,15 +1128,47 @@ where
 
 /// Perform `left * right` operation on two arrays. If either left or right value is null
 /// then the result is also null.
+///
+/// This doesn't detect overflow. Once overflowing, the result will wrap around.
+/// For an overflow-checking variant, use `multiply_dyn_checked` instead.
 pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
     match left.data_type() {
         DataType::Dictionary(_, _) => {
-            typed_dict_math_op!(left, right, |a, b| a * b, math_op_dict)
+            typed_dict_math_op!(left, right, |a, b| a.mul_wrapping(b), math_op_dict)
         }
         _ => {
             downcast_primitive_array!(
                 (left, right) => {
-                    math_op(left, right, |a, b| a * b).map(|a| Arc::new(a) as ArrayRef)
+                    math_op(left, right, |a, b| a.mul_wrapping(b)).map(|a| Arc::new(a) as ArrayRef)
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Unsupported data type {}, {}",
+                    left.data_type(), right.data_type()
+                )))
+            )
+        }
+    }
+}
+
+/// Perform `left * right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+///
+/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
+/// use `multiply_dyn` instead.
+pub fn multiply_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
+    match left.data_type() {
+        DataType::Dictionary(_, _) => {
+            typed_dict_math_op!(
+                left,
+                right,
+                |a, b| a.mul_checked(b),
+                math_checked_op_dict
+            )
+        }
+        _ => {
+            downcast_primitive_array!(
+                (left, right) => {
+                    math_checked_op(left, right, |a, b| a.mul_checked(b)).map(|a| Arc::new(a) as ArrayRef)
                 }
                 _ => Err(ArrowError::CastError(format!(
                     "Unsupported data type {}, {}",
@@ -1134,7 +1317,52 @@ where
 /// Perform `left / right` operation on two arrays. If either left or right value is null
 /// then the result is also null. If any right hand value is zero then the result of this
 /// operation will be `Err(ArrowError::DivideByZero)`.
+///
+/// This doesn't detect overflow. Once overflowing, the result will wrap around.
+/// For an overflow-checking variant, use `divide_dyn_checked` instead.
 pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
+    match left.data_type() {
+        DataType::Dictionary(_, _) => {
+            typed_dict_math_op!(
+                left,
+                right,
+                |a, b| {
+                    if b.is_zero() {
+                        Err(ArrowError::DivideByZero)
+                    } else {
+                        Ok(a.div_wrapping(b))
+                    }
+                },
+                math_divide_checked_op_dict
+            )
+        }
+        _ => {
+            downcast_primitive_array!(
+                (left, right) => {
+                    math_checked_divide_op(left, right, |a, b| {
+                        if b.is_zero() {
+                            Err(ArrowError::DivideByZero)
+                        } else {
+                            Ok(a.div_wrapping(b))
+                        }
+                    }).map(|a| Arc::new(a) as ArrayRef)
+                }
+                _ => Err(ArrowError::CastError(format!(
+                    "Unsupported data type {}, {}",
+                    left.data_type(), right.data_type()
+                )))
+            )
+        }
+    }
+}
+
+/// Perform `left / right` operation on two arrays. If either left or right value is null
+/// then the result is also null. If any right hand value is zero then the result of this
+/// operation will be `Err(ArrowError::DivideByZero)`.
+///
+/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
+/// use `divide_dyn` instead.
+pub fn divide_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
     match left.data_type() {
         DataType::Dictionary(_, _) => {
             typed_dict_math_op!(
@@ -2357,4 +2585,140 @@ mod tests {
         let expected = Int32Array::from(vec![None]);
         assert_eq!(expected, overflow.unwrap());
     }
+
+    #[test]
+    fn test_primitive_add_dyn_wrapping_overflow() {
+        let a = Int32Array::from(vec![i32::MAX, i32::MIN]);
+        let b = Int32Array::from(vec![1, 1]);
+
+        let wrapped = add_dyn(&a, &b).unwrap();
+        let expected =
+            Arc::new(Int32Array::from(vec![-2147483648, -2147483647])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = add_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_dictionary_add_dyn_wrapping_overflow() {
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(2, 2);
+        builder.append(i32::MAX).unwrap();
+        builder.append(i32::MIN).unwrap();
+        let a = builder.finish();
+
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(2, 2);
+        builder.append(1).unwrap();
+        builder.append(1).unwrap();
+        let b = builder.finish();
+
+        let wrapped = add_dyn(&a, &b).unwrap();
+        let expected =
+            Arc::new(Int32Array::from(vec![-2147483648, -2147483647])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = add_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_primitive_subtract_dyn_wrapping_overflow() {
+        let a = Int32Array::from(vec![-2]);
+        let b = Int32Array::from(vec![i32::MAX]);
+
+        let wrapped = subtract_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![i32::MAX])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = subtract_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_dictionary_subtract_dyn_wrapping_overflow() {
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(-2).unwrap();
+        let a = builder.finish();
+
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(i32::MAX).unwrap();
+        let b = builder.finish();
+
+        let wrapped = subtract_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![i32::MAX])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = subtract_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_primitive_mul_dyn_wrapping_overflow() {
+        let a = Int32Array::from(vec![10]);
+        let b = Int32Array::from(vec![i32::MAX]);
+
+        let wrapped = multiply_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![-10])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = multiply_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_dictionary_mul_dyn_wrapping_overflow() {
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(10).unwrap();
+        let a = builder.finish();
+
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(i32::MAX).unwrap();
+        let b = builder.finish();
+
+        let wrapped = multiply_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![-10])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = multiply_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_primitive_div_dyn_wrapping_overflow() {
+        let a = Int32Array::from(vec![i32::MIN]);
+        let b = Int32Array::from(vec![-1]);
+
+        let wrapped = divide_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![-2147483648])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = divide_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
+
+    #[test]
+    fn test_dictionary_div_dyn_wrapping_overflow() {
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(i32::MIN).unwrap();
+        let a = builder.finish();
+
+        let mut builder =
+            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1);
+        builder.append(-1).unwrap();
+        let b = builder.finish();
+
+        let wrapped = divide_dyn(&a, &b).unwrap();
+        let expected = Arc::new(Int32Array::from(vec![-2147483648])) as ArrayRef;
+        assert_eq!(&expected, &wrapped);
+
+        let overflow = divide_dyn_checked(&a, &b);
+        overflow.expect_err("overflow should be detected");
+    }
 }
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 2347502f96e..bf10289683f 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -18,7 +18,8 @@
 //! Defines kernels suitable to perform operations to primitive arrays.
 
 use crate::array::{
-    Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
+    Array, ArrayAccessor, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray,
+    PrimitiveArray,
 };
 use crate::buffer::Buffer;
 use crate::compute::util::combine_option_bitmap;
@@ -26,6 +27,7 @@ use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType};
 use crate::downcast_dictionary_array;
 use crate::error::{ArrowError, Result};
 use crate::util::bit_iterator::try_for_each_valid_idx;
+use arrow_buffer::MutableBuffer;
 use std::sync::Arc;
 
 #[inline]
@@ -287,16 +289,14 @@ where
 ///
 /// Return an error if the arrays have different lengths or
 /// the operation is under erroneous
-pub fn try_binary<A, B, F, O>(
-    a: &PrimitiveArray<A>,
-    b: &PrimitiveArray<B>,
+pub fn try_binary<A: ArrayAccessor, B: ArrayAccessor, F, O>(
+    a: A,
+    b: B,
     op: F,
 ) -> Result<PrimitiveArray<O>>
 where
-    A: ArrowPrimitiveType,
-    B: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
-    F: Fn(A::Native, B::Native) -> Result<O::Native>,
+    F: Fn(A::Item, B::Item) -> Result<O::Native>,
 {
     if a.len() != b.len() {
         return Err(ArrowError::ComputeError(
@@ -309,36 +309,52 @@ where
     let len = a.len();
 
     if a.null_count() == 0 && b.null_count() == 0 {
-        let values = a.values().iter().zip(b.values()).map(|(l, r)| op(*l, *r));
-        let buffer = unsafe { Buffer::try_from_trusted_len_iter(values) }?;
-        // JUSTIFICATION
-        //  Benefit
-        //      ~75% speedup
-        //  Soundness
-        //      `values` is an iterator with a known size from a PrimitiveArray
-        return Ok(unsafe { build_primitive_array(len, buffer, 0, None) });
+        try_binary_no_nulls(len, a, b, op)
+    } else {
+        let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+
+        let null_count = null_buffer
+            .as_ref()
+            .map(|x| len - x.count_set_bits())
+            .unwrap_or_default();
+
+        let mut buffer = BufferBuilder::<O::Native>::new(len);
+        buffer.append_n_zeroed(len);
+        let slice = buffer.as_slice_mut();
+
+        try_for_each_valid_idx(len, 0, null_count, null_buffer.as_deref(), |idx| {
+            unsafe {
+                *slice.get_unchecked_mut(idx) =
+                    op(a.value_unchecked(idx), b.value_unchecked(idx))?
+            };
+            Ok::<_, ArrowError>(())
+        })?;
+
+        Ok(unsafe {
+            build_primitive_array(len, buffer.finish(), null_count, null_buffer)
+        })
     }
+}
 
-    let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
-
-    let null_count = null_buffer
-        .as_ref()
-        .map(|x| len - x.count_set_bits())
-        .unwrap_or_default();
-
-    let mut buffer = BufferBuilder::<O::Native>::new(len);
-    buffer.append_n_zeroed(len);
-    let slice = buffer.as_slice_mut();
-
-    try_for_each_valid_idx(len, 0, null_count, null_buffer.as_deref(), |idx| {
+/// This intentional inline(never) attribute helps LLVM optimize the loop.
+#[inline(never)]
+fn try_binary_no_nulls<A: ArrayAccessor, B: ArrayAccessor, F, O>(
+    len: usize,
+    a: A,
+    b: B,
+    op: F,
+) -> Result<PrimitiveArray<O>>
+where
+    O: ArrowPrimitiveType,
+    F: Fn(A::Item, B::Item) -> Result<O::Native>,
+{
+    let mut buffer = MutableBuffer::new(len * O::get_byte_width());
+    for idx in 0..len {
         unsafe {
-            *slice.get_unchecked_mut(idx) =
-                op(a.value_unchecked(idx), b.value_unchecked(idx))?
+            buffer.push_unchecked(op(a.value_unchecked(idx), b.value_unchecked(idx))?);
         };
-        Ok::<_, ArrowError>(())
-    })?;
-
-    Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count, null_buffer) })
+    }
+    Ok(unsafe { build_primitive_array(len, buffer.into(), 0, None) })
 }
 
 /// Applies the provided binary operation across `a` and `b`, collecting the optional results

From 5b601b3065d1c239feef6badf3ff68b6d72916a3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 20 Sep 2022 06:30:55 -0400
Subject: [PATCH 16/16] MINOR: tweak arrow release instructions (#2758)

---
 dev/release/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dev/release/README.md b/dev/release/README.md
index 48748eccbe8..d418a09d070 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -74,9 +74,12 @@ git checkout -b make-release
 # manully edit ./dev/release/update_change_log.sh to reflect the release version
 # create the changelog
 CHANGELOG_GITHUB_TOKEN=<TOKEN> ./dev/release/update_change_log.sh
+# run automated script to copy labels to issues based on referenced PRs
+python dev/release/label_issues.py
 # review change log / edit issues and labels if needed, rerun
 git commit -a -m 'Create changelog'
 
+
 # update versions
 sed -i '' -e 's/14.0.0/23.0.0/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG.md`
 git commit -a -m 'Update version'
@@ -228,6 +231,7 @@ following commands
 Rust Arrow Crates:
 
 ```shell
+(cd arrow-buffer && cargo publish)
 (cd arrow && cargo publish)
 (cd arrow-flight && cargo publish)
 (cd parquet && cargo publish)