From fb016566ea4f46d461230e1586f7bb95c29d5934 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Thu, 15 Sep 2022 15:30:54 +0100 Subject: [PATCH] Split out arrow-buffer crate (#2594) (#2693) * Split out arrow-buffer crate (#2594) * Fix doc * Review feedback * Review feedback * Use 64-bit wide collect_bool --- Cargo.toml | 3 +- arrow-buffer/Cargo.toml | 47 +++ .../src/alloc/alignment.rs | 0 {arrow => arrow-buffer}/src/alloc/mod.rs | 32 +- .../src/buffer/immutable.rs | 4 +- arrow-buffer/src/buffer/mod.rs | 29 ++ {arrow => arrow-buffer}/src/buffer/mutable.rs | 45 +-- {arrow => arrow-buffer}/src/buffer/ops.rs | 21 +- {arrow => arrow-buffer}/src/buffer/scalar.rs | 2 +- {arrow => arrow-buffer}/src/bytes.rs | 2 +- arrow-buffer/src/lib.rs | 24 ++ arrow-buffer/src/native.rs | 303 ++++++++++++++++++ .../src/util/bit_chunk_iterator.rs | 4 +- {arrow => arrow-buffer}/src/util/bit_util.rs | 0 arrow-buffer/src/util/mod.rs | 19 ++ arrow/Cargo.toml | 2 + arrow/benches/buffer_bit_ops.rs | 6 +- arrow/src/alloc/types.rs | 73 ----- arrow/src/array/array_list.rs | 4 +- arrow/src/bitmap.rs | 29 +- arrow/src/buffer/mod.rs | 72 ----- arrow/src/compute/kernels/boolean.rs | 73 ++--- arrow/src/datatypes/native.rs | 286 +---------------- arrow/src/lib.rs | 5 +- arrow/src/util/mod.rs | 4 +- 25 files changed, 542 insertions(+), 547 deletions(-) create mode 100644 arrow-buffer/Cargo.toml rename {arrow => arrow-buffer}/src/alloc/alignment.rs (100%) rename {arrow => arrow-buffer}/src/alloc/mod.rs (84%) rename {arrow => arrow-buffer}/src/buffer/immutable.rs (99%) create mode 100644 arrow-buffer/src/buffer/mod.rs rename {arrow => arrow-buffer}/src/buffer/mutable.rs (95%) rename {arrow => arrow-buffer}/src/buffer/ops.rs (89%) rename {arrow => arrow-buffer}/src/buffer/scalar.rs (99%) rename {arrow => arrow-buffer}/src/bytes.rs (98%) create mode 100644 arrow-buffer/src/lib.rs create mode 100644 arrow-buffer/src/native.rs rename {arrow => arrow-buffer}/src/util/bit_chunk_iterator.rs (99%) rename {arrow => arrow-buffer}/src/util/bit_util.rs (100%) create mode 100644 arrow-buffer/src/util/mod.rs delete mode 100644 arrow/src/alloc/types.rs delete mode 100644 arrow/src/buffer/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 9bf55c0f236..d0233ccb376 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,10 +18,11 @@ [workspace] members = [ "arrow", + "arrow-buffer", + "arrow-flight", "parquet", "parquet_derive", "parquet_derive_test", - "arrow-flight", "integration-testing", "object_store", ] diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml new file mode 100644 index 00000000000..87019111efc --- /dev/null +++ b/arrow-buffer/Cargo.toml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-buffer" +version = "22.0.0" +description = "Buffer abstractions for Apache Arrow" +homepage = "https://github.com/apache/arrow-rs" +repository = "https://github.com/apache/arrow-rs" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow"] +include = [ + "benches/*.rs", + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "arrow_buffer" +path = "src/lib.rs" +bench = false + +[dependencies] +num = { version = "0.4", default-features = false, features = ["std"] } +half = { version = "2.0", default-features = false } + +[dev-dependencies] +rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } + +[build-dependencies] diff --git a/arrow/src/alloc/alignment.rs b/arrow-buffer/src/alloc/alignment.rs similarity index 100% rename from arrow/src/alloc/alignment.rs rename to arrow-buffer/src/alloc/alignment.rs diff --git a/arrow/src/alloc/mod.rs b/arrow-buffer/src/alloc/mod.rs similarity index 84% rename from arrow/src/alloc/mod.rs rename to arrow-buffer/src/alloc/mod.rs index 526850685c4..6b09c4b31b9 100644 --- a/arrow/src/alloc/mod.rs +++ b/arrow-buffer/src/alloc/mod.rs @@ -20,34 +20,29 @@ use std::alloc::{handle_alloc_error, Layout}; use std::fmt::{Debug, Formatter}; -use std::mem::size_of; use std::panic::RefUnwindSafe; use std::ptr::NonNull; use std::sync::Arc; mod alignment; -mod types; pub use alignment::ALIGNMENT; -pub use types::NativeType; #[inline] -unsafe fn null_pointer() -> NonNull { - NonNull::new_unchecked(ALIGNMENT as *mut T) +unsafe fn null_pointer() -> NonNull { + NonNull::new_unchecked(ALIGNMENT as *mut u8) } /// Allocates a cache-aligned memory region of `size` bytes with uninitialized values. /// This is more performant than using [allocate_aligned_zeroed] when all bytes will have /// an unknown or non-zero value and is semantically similar to `malloc`. -pub fn allocate_aligned(size: usize) -> NonNull { +pub fn allocate_aligned(size: usize) -> NonNull { unsafe { if size == 0 { null_pointer() } else { - let size = size * size_of::(); - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc(layout) as *mut T; + let raw_ptr = std::alloc::alloc(layout); NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) } } @@ -56,15 +51,13 @@ pub fn allocate_aligned(size: usize) -> NonNull { /// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them. /// This is more performant than using [allocate_aligned] and setting all bytes to zero /// and is semantically similar to `calloc`. -pub fn allocate_aligned_zeroed(size: usize) -> NonNull { +pub fn allocate_aligned_zeroed(size: usize) -> NonNull { unsafe { if size == 0 { null_pointer() } else { - let size = size * size_of::(); - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T; + let raw_ptr = std::alloc::alloc_zeroed(layout); NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) } } @@ -78,9 +71,8 @@ pub fn allocate_aligned_zeroed(size: usize) -> NonNull { /// * ptr must denote a block of memory currently allocated via this allocator, /// /// * size must be the same size that was used to allocate that block of memory, -pub unsafe fn free_aligned(ptr: NonNull, size: usize) { +pub unsafe fn free_aligned(ptr: NonNull, size: usize) { if ptr != null_pointer() { - let size = size * size_of::(); std::alloc::dealloc( ptr.as_ptr() as *mut u8, Layout::from_size_align_unchecked(size, ALIGNMENT), @@ -99,13 +91,11 @@ pub unsafe fn free_aligned(ptr: NonNull, size: usize) { /// /// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e., /// the rounded value must be less than usize::MAX). -pub unsafe fn reallocate( - ptr: NonNull, +pub unsafe fn reallocate( + ptr: NonNull, old_size: usize, new_size: usize, -) -> NonNull { - let old_size = old_size * size_of::(); - let new_size = new_size * size_of::(); +) -> NonNull { if ptr == null_pointer() { return allocate_aligned(new_size); } @@ -119,7 +109,7 @@ pub unsafe fn reallocate( ptr.as_ptr() as *mut u8, Layout::from_size_align_unchecked(old_size, ALIGNMENT), new_size, - ) as *mut T; + ); NonNull::new(raw_ptr).unwrap_or_else(|| { handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT)) }) diff --git a/arrow/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs similarity index 99% rename from arrow/src/buffer/immutable.rs rename to arrow-buffer/src/buffer/immutable.rs index 6d4d624efc1..c60d28afc78 100644 --- a/arrow/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -23,7 +23,7 @@ use std::{convert::AsRef, usize}; use crate::alloc::{Allocation, Deallocation}; use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; -use crate::{bytes::Bytes, datatypes::ArrowNativeType}; +use crate::{bytes::Bytes, native::ArrowNativeType}; use super::ops::bitwise_unary_op_helper; use super::MutableBuffer; @@ -271,7 +271,7 @@ impl Buffer { /// Prefer this to `collect` whenever possible, as it is ~60% faster. /// # Example /// ``` - /// # use arrow::buffer::Buffer; + /// # use arrow_buffer::buffer::Buffer; /// let v = vec![1u32]; /// let iter = v.iter().map(|x| x * 2); /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) }; diff --git a/arrow-buffer/src/buffer/mod.rs b/arrow-buffer/src/buffer/mod.rs new file mode 100644 index 00000000000..b9201f774fe --- /dev/null +++ b/arrow-buffer/src/buffer/mod.rs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents +//! a contiguous memory region that can be shared via `offsets`. + +mod immutable; +pub use immutable::*; +mod mutable; +pub use mutable::*; +mod ops; +mod scalar; +pub use scalar::*; + +pub use ops::*; diff --git a/arrow/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs similarity index 95% rename from arrow/src/buffer/mutable.rs rename to arrow-buffer/src/buffer/mutable.rs index d1e63399393..80644b63d11 100644 --- a/arrow/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -20,7 +20,7 @@ use crate::alloc::Deallocation; use crate::{ alloc, bytes::Bytes, - datatypes::{ArrowNativeType, ToByteSlice}, + native::{ArrowNativeType, ToByteSlice}, util::bit_util, }; use std::ptr::NonNull; @@ -31,12 +31,12 @@ use std::ptr::NonNull; /// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice] /// to insert many items, and `into` to convert it to [`Buffer`]. /// -/// For a safe, strongly typed API consider using [`crate::array::BufferBuilder`] +/// For a safe, strongly typed API consider using `arrow::array::BufferBuilder` /// /// # Example /// /// ``` -/// # use arrow::buffer::{Buffer, MutableBuffer}; +/// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let mut buffer = MutableBuffer::new(0); /// buffer.push(256u32); /// buffer.extend_from_slice(&[1u32]); @@ -75,7 +75,7 @@ impl MutableBuffer { /// all bytes are guaranteed to be `0u8`. /// # Example /// ``` - /// # use arrow::buffer::{Buffer, MutableBuffer}; + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let mut buffer = MutableBuffer::from_len_zeroed(127); /// assert_eq!(buffer.len(), 127); /// assert!(buffer.capacity() >= 127); @@ -131,7 +131,7 @@ impl MutableBuffer { /// `self.len + additional > capacity`. /// # Example /// ``` - /// # use arrow::buffer::{Buffer, MutableBuffer}; + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let mut buffer = MutableBuffer::new(0); /// buffer.reserve(253); // allocates for the first time /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation @@ -171,7 +171,7 @@ impl MutableBuffer { /// growing it (potentially reallocating it) and writing `value` in the newly available bytes. /// # Example /// ``` - /// # use arrow::buffer::{Buffer, MutableBuffer}; + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let mut buffer = MutableBuffer::new(0); /// buffer.resize(253, 2); // allocates for the first time /// assert_eq!(buffer.as_slice()[252], 2u8); @@ -195,7 +195,7 @@ impl MutableBuffer { /// /// # Example /// ``` - /// # use arrow::buffer::{Buffer, MutableBuffer}; + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// // 2 cache lines /// let mut buffer = MutableBuffer::new(128); /// assert_eq!(buffer.capacity(), 128); @@ -322,7 +322,7 @@ impl MutableBuffer { /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed. /// # Example /// ``` - /// # use arrow::buffer::MutableBuffer; + /// # use arrow_buffer::buffer::MutableBuffer; /// let mut buffer = MutableBuffer::new(0); /// buffer.extend_from_slice(&[2u32, 0]); /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes @@ -346,7 +346,7 @@ impl MutableBuffer { /// Extends the buffer with a new item, increasing its capacity if needed. /// # Example /// ``` - /// # use arrow::buffer::MutableBuffer; + /// # use arrow_buffer::buffer::MutableBuffer; /// let mut buffer = MutableBuffer::new(0); /// buffer.push(256u32); /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes @@ -384,7 +384,7 @@ impl MutableBuffer { /// # Safety /// The caller must ensure that the buffer was properly initialized up to `len`. #[inline] - pub(crate) unsafe fn set_len(&mut self, len: usize) { + pub unsafe fn set_len(&mut self, len: usize) { assert!(len <= self.capacity()); self.len = len; } @@ -394,16 +394,16 @@ impl MutableBuffer { /// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster /// as it eliminates the conditional `Iterator::next` #[inline] - pub(crate) fn collect_bool bool>(len: usize, mut f: F) -> Self { - let mut buffer = Self::new(bit_util::ceil(len, 8)); + pub fn collect_bool bool>(len: usize, mut f: F) -> Self { + let mut buffer = Self::new(bit_util::ceil(len, 64) * 8); - let chunks = len / 8; - let remainder = len % 8; + let chunks = len / 64; + let remainder = len % 64; for chunk in 0..chunks { let mut packed = 0; - for bit_idx in 0..8 { - let i = bit_idx + chunk * 8; - packed |= (f(i) as u8) << bit_idx; + for bit_idx in 0..64 { + let i = bit_idx + chunk * 64; + packed |= (f(i) as u64) << bit_idx; } // SAFETY: Already allocated sufficient capacity @@ -413,14 +413,15 @@ impl MutableBuffer { if remainder != 0 { let mut packed = 0; for bit_idx in 0..remainder { - let i = bit_idx + chunks * 8; - packed |= (f(i) as u8) << bit_idx; + let i = bit_idx + chunks * 64; + packed |= (f(i) as u64) << bit_idx; } // SAFETY: Already allocated sufficient capacity unsafe { buffer.push_unchecked(packed) } } + buffer.truncate(bit_util::ceil(len, 8)); buffer } } @@ -484,7 +485,7 @@ impl MutableBuffer { /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. /// # Example /// ``` - /// # use arrow::buffer::MutableBuffer; + /// # use arrow_buffer::buffer::MutableBuffer; /// let v = vec![1u32]; /// let iter = v.iter().map(|x| x * 2); /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) }; @@ -525,10 +526,10 @@ impl MutableBuffer { } /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length. - /// # use arrow::buffer::MutableBuffer; + /// # use arrow_buffer::buffer::MutableBuffer; /// # Example /// ``` - /// # use arrow::buffer::MutableBuffer; + /// # use arrow_buffer::buffer::MutableBuffer; /// let v = vec![false, true, false]; /// let iter = v.iter().map(|x| *x || true); /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) }; diff --git a/arrow/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs similarity index 89% rename from arrow/src/buffer/ops.rs rename to arrow-buffer/src/buffer/ops.rs index 7000f39767c..c1295ad9ab7 100644 --- a/arrow/src/buffer/ops.rs +++ b/arrow-buffer/src/buffer/ops.rs @@ -20,26 +20,19 @@ use crate::util::bit_util::ceil; /// Apply a bitwise operation `op` to four inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. -#[allow(clippy::too_many_arguments)] -pub(crate) fn bitwise_quaternary_op_helper( - first: &Buffer, - first_offset_in_bits: usize, - second: &Buffer, - second_offset_in_bits: usize, - third: &Buffer, - third_offset_in_bits: usize, - fourth: &Buffer, - fourth_offset_in_bits: usize, +pub fn bitwise_quaternary_op_helper( + buffers: [&Buffer; 4], + offsets: [usize; 4], len_in_bits: usize, op: F, ) -> Buffer where F: Fn(u64, u64, u64, u64) -> u64, { - let first_chunks = first.bit_chunks(first_offset_in_bits, len_in_bits); - let second_chunks = second.bit_chunks(second_offset_in_bits, len_in_bits); - let third_chunks = third.bit_chunks(third_offset_in_bits, len_in_bits); - let fourth_chunks = fourth.bit_chunks(fourth_offset_in_bits, len_in_bits); + let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits); + let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits); + let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits); + let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits); let chunks = first_chunks .iter() diff --git a/arrow/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs similarity index 99% rename from arrow/src/buffer/scalar.rs rename to arrow-buffer/src/buffer/scalar.rs index 7d663cd2bf9..a9f2df3d9ff 100644 --- a/arrow/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -16,7 +16,7 @@ // under the License. use crate::buffer::Buffer; -use crate::datatypes::ArrowNativeType; +use crate::native::ArrowNativeType; use std::ops::Deref; /// Provides a safe API for interpreting a [`Buffer`] as a slice of [`ArrowNativeType`] diff --git a/arrow/src/bytes.rs b/arrow-buffer/src/bytes.rs similarity index 98% rename from arrow/src/bytes.rs rename to arrow-buffer/src/bytes.rs index 75137a55295..20bf5a474b4 100644 --- a/arrow/src/bytes.rs +++ b/arrow-buffer/src/bytes.rs @@ -111,7 +111,7 @@ impl Drop for Bytes { fn drop(&mut self) { match &self.deallocation { Deallocation::Arrow(capacity) => { - unsafe { alloc::free_aligned::(self.ptr, *capacity) }; + unsafe { alloc::free_aligned(self.ptr, *capacity) }; } // The automatic drop implementation will free the memory once the reference count reaches zero Deallocation::Custom(_allocation) => (), diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs new file mode 100644 index 00000000000..a8aca7c3dad --- /dev/null +++ b/arrow-buffer/src/lib.rs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Buffer abstractions for [Apache Arrow](https://docs.rs/arrow) + +pub mod alloc; +pub mod buffer; +mod bytes; +pub mod native; +pub mod util; diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs new file mode 100644 index 00000000000..d8431953c43 --- /dev/null +++ b/arrow-buffer/src/native.rs @@ -0,0 +1,303 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use half::f16; + +mod private { + pub trait Sealed {} +} + +/// Trait expressing a Rust type that has the same in-memory representation +/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits). +/// +/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers +/// as is. +/// +/// # Transmute Safety +/// +/// A type T implementing this trait means that any arbitrary slice of bytes of length and +/// alignment `size_of::()` can be safely interpreted as a value of that type without +/// being unsound, i.e. potentially resulting in undefined behaviour. +/// +/// Note: in the case of floating point numbers this transmutation can result in a signalling +/// NaN, which, whilst sound, can be unwieldy. In general, whilst it is perfectly sound to +/// reinterpret bytes as different types using this trait, it is likely unwise. For more information +/// see [f32::from_bits] and [f64::from_bits]. +/// +/// Note: `bool` is restricted to `0` or `1`, and so `bool: !ArrowNativeType` +/// +/// # Sealed +/// +/// Due to the above restrictions, this trait is sealed to prevent accidental misuse +pub trait ArrowNativeType: + std::fmt::Debug + + Send + + Sync + + Copy + + PartialOrd + + std::str::FromStr + + Default + + private::Sealed + + 'static +{ + /// Convert native type from usize. + #[inline] + fn from_usize(_: usize) -> Option { + None + } + + /// Convert native type to usize. + #[inline] + fn to_usize(&self) -> Option { + None + } + + /// Convert native type to isize. + #[inline] + fn to_isize(&self) -> Option { + None + } + + /// Convert native type from i32. + #[inline] + fn from_i32(_: i32) -> Option { + None + } + + /// Convert native type from i64. + #[inline] + fn from_i64(_: i64) -> Option { + None + } + + /// Convert native type from i128. + #[inline] + fn from_i128(_: i128) -> Option { + None + } +} + +impl private::Sealed for i8 {} +impl ArrowNativeType for i8 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl private::Sealed for i16 {} +impl ArrowNativeType for i16 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl private::Sealed for i32 {} +impl ArrowNativeType for i32 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } + + /// Convert native type from i32. + #[inline] + fn from_i32(val: i32) -> Option { + Some(val) + } +} + +impl private::Sealed for i64 {} +impl ArrowNativeType for i64 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } + + /// Convert native type from i64. + #[inline] + fn from_i64(val: i64) -> Option { + Some(val) + } +} + +impl private::Sealed for i128 {} +impl ArrowNativeType for i128 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } + + /// Convert native type from i128. + #[inline] + fn from_i128(val: i128) -> Option { + Some(val) + } +} + +impl private::Sealed for u8 {} +impl ArrowNativeType for u8 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl private::Sealed for u16 {} +impl ArrowNativeType for u16 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl private::Sealed for u32 {} +impl ArrowNativeType for u32 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl private::Sealed for u64 {} +impl ArrowNativeType for u64 { + #[inline] + fn from_usize(v: usize) -> Option { + num::FromPrimitive::from_usize(v) + } + + #[inline] + fn to_usize(&self) -> Option { + num::ToPrimitive::to_usize(self) + } + + #[inline] + fn to_isize(&self) -> Option { + num::ToPrimitive::to_isize(self) + } +} + +impl ArrowNativeType for f16 {} +impl private::Sealed for f16 {} +impl ArrowNativeType for f32 {} +impl private::Sealed for f32 {} +impl ArrowNativeType for f64 {} +impl private::Sealed for f64 {} + +/// Allows conversion from supported Arrow types to a byte slice. +pub trait ToByteSlice { + /// Converts this instance into a byte slice + fn to_byte_slice(&self) -> &[u8]; +} + +impl ToByteSlice for [T] { + #[inline] + fn to_byte_slice(&self) -> &[u8] { + let raw_ptr = self.as_ptr() as *const T as *const u8; + unsafe { + std::slice::from_raw_parts(raw_ptr, self.len() * std::mem::size_of::()) + } + } +} + +impl ToByteSlice for T { + #[inline] + fn to_byte_slice(&self) -> &[u8] { + let raw_ptr = self as *const T as *const u8; + unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::()) } + } +} diff --git a/arrow/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs similarity index 99% rename from arrow/src/util/bit_chunk_iterator.rs rename to arrow-buffer/src/util/bit_chunk_iterator.rs index f0127ed2267..ba028204da1 100644 --- a/arrow/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -153,7 +153,7 @@ impl<'a> UnalignedBitChunk<'a> { self.chunks } - pub(crate) fn iter(&self) -> UnalignedBitChunkIterator<'a> { + pub fn iter(&self) -> UnalignedBitChunkIterator<'a> { self.prefix .into_iter() .chain(self.chunks.iter().cloned()) @@ -166,7 +166,7 @@ impl<'a> UnalignedBitChunk<'a> { } } -pub(crate) type UnalignedBitChunkIterator<'a> = std::iter::Chain< +pub type UnalignedBitChunkIterator<'a> = std::iter::Chain< std::iter::Chain< std::option::IntoIter, std::iter::Cloned>, diff --git a/arrow/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs similarity index 100% rename from arrow/src/util/bit_util.rs rename to arrow-buffer/src/util/bit_util.rs diff --git a/arrow-buffer/src/util/mod.rs b/arrow-buffer/src/util/mod.rs new file mode 100644 index 00000000000..c1cb284dcc1 --- /dev/null +++ b/arrow-buffer/src/util/mod.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod bit_chunk_iterator; +pub mod bit_util; diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 2de4db64276..c66cef61202 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -44,6 +44,8 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] +arrow-buffer = { path = "../arrow-buffer", version = "22.0.0" } + serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } indexmap = { version = "1.9", default-features = false, features = ["std"] } diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs index 6c6bb0463b2..68b22df4b13 100644 --- a/arrow/benches/buffer_bit_ops.rs +++ b/arrow/benches/buffer_bit_ops.rs @@ -38,15 +38,15 @@ fn create_buffer(size: usize) -> Buffer { } fn bench_buffer_and(left: &Buffer, right: &Buffer) { - criterion::black_box((left & right).unwrap()); + criterion::black_box(buffer_bin_and(left, 0, right, 0, left.len() * 8)); } fn bench_buffer_or(left: &Buffer, right: &Buffer) { - criterion::black_box((left | right).unwrap()); + criterion::black_box(buffer_bin_or(left, 0, right, 0, left.len() * 8)); } fn bench_buffer_not(buffer: &Buffer) { - criterion::black_box(!buffer); + criterion::black_box(buffer_unary_not(buffer, 0, buffer.len() * 8)); } fn bench_buffer_and_with_offsets( diff --git a/arrow/src/alloc/types.rs b/arrow/src/alloc/types.rs deleted file mode 100644 index 026e1241f46..00000000000 --- a/arrow/src/alloc/types.rs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::datatypes::DataType; -use half::f16; - -/// A type that Rust's custom allocator knows how to allocate and deallocate. -/// This is implemented for all Arrow's physical types whose in-memory representation -/// matches Rust's physical types. Consider this trait sealed. -/// # Safety -/// Do not implement this trait. -pub unsafe trait NativeType: - Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default + Sized + 'static -{ - type Bytes: AsRef<[u8]>; - - /// Whether a DataType is a valid type for this physical representation. - fn is_valid(data_type: &DataType) -> bool; - - /// How this type represents itself as bytes in little endianess. - /// This is used for IPC, where data is communicated with a specific endianess. - fn to_le_bytes(&self) -> Self::Bytes; -} - -macro_rules! create_native { - ($native_ty:ty,$($impl_pattern:pat_param)|+) => { - unsafe impl NativeType for $native_ty { - type Bytes = [u8; std::mem::size_of::()]; - - #[inline] - fn to_le_bytes(&self) -> Self::Bytes { - Self::to_le_bytes(*self) - } - - #[inline] - fn is_valid(data_type: &DataType) -> bool { - matches!(data_type, $($impl_pattern)|+) - } - } - }; -} - -create_native!(u8, DataType::UInt8); -create_native!(u16, DataType::UInt16); -create_native!(u32, DataType::UInt32); -create_native!(u64, DataType::UInt64); -create_native!(i8, DataType::Int8); -create_native!(i16, DataType::Int16); -create_native!( - i32, - DataType::Int32 | DataType::Date32 | DataType::Time32(_) -); -create_native!( - i64, - DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _) -); -create_native!(f16, DataType::Float16); -create_native!(f32, DataType::Float32); -create_native!(f64, DataType::Float64); diff --git a/arrow/src/array/array_list.rs b/arrow/src/array/array_list.rs index b9c05014c3f..e830acdc2b9 100644 --- a/arrow/src/array/array_list.rs +++ b/arrow/src/array/array_list.rs @@ -844,7 +844,7 @@ mod tests { #[test] #[should_panic(expected = "memory is not aligned")] fn test_primitive_array_alignment() { - let ptr = alloc::allocate_aligned::(8); + let ptr = alloc::allocate_aligned(8); let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32) @@ -860,7 +860,7 @@ mod tests { // https://github.com/apache/arrow-rs/issues/1545 #[cfg(not(feature = "force_validate"))] fn test_list_array_alignment() { - let ptr = alloc::allocate_aligned::(8); + let ptr = alloc::allocate_aligned(8); let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); diff --git a/arrow/src/bitmap.rs b/arrow/src/bitmap.rs index 4ba1bb9f888..4491da4632b 100644 --- a/arrow/src/bitmap.rs +++ b/arrow/src/bitmap.rs @@ -18,10 +18,11 @@ //! Defines [Bitmap] for tracking validity bitmaps use crate::buffer::Buffer; -use crate::error::Result; +use crate::error::{ArrowError, Result}; use crate::util::bit_util; use std::mem; +use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or}; use std::ops::{BitAnd, BitOr}; #[derive(Debug, Clone)] @@ -79,7 +80,18 @@ impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap { type Output = Result; fn bitand(self, rhs: &'b Bitmap) -> Result { - Ok(Bitmap::from((&self.bits & &rhs.bits)?)) + if self.bits.len() != rhs.bits.len() { + return Err(ArrowError::ComputeError( + "Buffers must be the same size to apply Bitwise AND.".to_string(), + )); + } + Ok(Bitmap::from(buffer_bin_and( + &self.bits, + 0, + &rhs.bits, + 0, + self.bit_len(), + ))) } } @@ -87,7 +99,18 @@ impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap { type Output = Result; fn bitor(self, rhs: &'b Bitmap) -> Result { - Ok(Bitmap::from((&self.bits | &rhs.bits)?)) + if self.bits.len() != rhs.bits.len() { + return Err(ArrowError::ComputeError( + "Buffers must be the same size to apply Bitwise OR.".to_string(), + )); + } + Ok(Bitmap::from(buffer_bin_or( + &self.bits, + 0, + &rhs.bits, + 0, + self.bit_len(), + ))) } } diff --git a/arrow/src/buffer/mod.rs b/arrow/src/buffer/mod.rs deleted file mode 100644 index b392b0583d6..00000000000 --- a/arrow/src/buffer/mod.rs +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents -//! a contiguous memory region that can be shared via `offsets`. - -mod immutable; -pub use immutable::*; -mod mutable; -pub use mutable::*; -mod ops; -mod scalar; -pub use scalar::*; - -pub use ops::*; - -use crate::error::{ArrowError, Result}; -use std::ops::{BitAnd, BitOr, Not}; - -impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { - type Output = Result; - - fn bitand(self, rhs: &'b Buffer) -> Result { - if self.len() != rhs.len() { - return Err(ArrowError::ComputeError( - "Buffers must be the same size to apply Bitwise AND.".to_string(), - )); - } - - let len_in_bits = self.len() * 8; - Ok(buffer_bin_and(self, 0, rhs, 0, len_in_bits)) - } -} - -impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { - type Output = Result; - - fn bitor(self, rhs: &'b Buffer) -> Result { - if self.len() != rhs.len() { - return Err(ArrowError::ComputeError( - "Buffers must be the same size to apply Bitwise OR.".to_string(), - )); - } - - let len_in_bits = self.len() * 8; - - Ok(buffer_bin_or(self, 0, rhs, 0, len_in_bits)) - } -} - -impl Not for &Buffer { - type Output = Buffer; - - fn not(self) -> Buffer { - let len_in_bits = self.len() * 8; - buffer_unary_not(self, 0, len_in_bits) - } -} diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow/src/compute/kernels/boolean.rs index c51953a7540..b8719ad2d6c 100644 --- a/arrow/src/compute/kernels/boolean.rs +++ b/arrow/src/compute/kernels/boolean.rs @@ -22,8 +22,6 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. -use std::ops::Not; - use crate::array::{Array, ArrayData, BooleanArray, PrimitiveArray}; use crate::buffer::{ bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or, @@ -85,14 +83,13 @@ pub(crate) fn build_null_buffer_for_and_kleene( // The final null bits are: // (a | (c & !d)) & (c | (a & !b)) Some(bitwise_quaternary_op_helper( - left_null_buffer, - left_offset, - left_buffer, - left_offset, - right_null_buffer, - right_offset, - right_buffer, - right_offset, + [ + left_null_buffer, + left_buffer, + right_null_buffer, + right_buffer, + ], + [left_offset, left_offset, right_offset, right_offset], len_in_bits, |a, b, c, d| (a | (c & !d)) & (c | (a & !b)), )) @@ -163,14 +160,13 @@ pub(crate) fn build_null_buffer_for_or_kleene( // The final null bits are: // (a | (c & d)) & (c | (a & b)) Some(bitwise_quaternary_op_helper( - left_null_buffer, - left_offset, - left_buffer, - left_offset, - right_null_buffer, - right_offset, - right_buffer, - right_offset, + [ + left_null_buffer, + left_buffer, + right_null_buffer, + right_buffer, + ], + [left_offset, left_offset, right_offset, right_offset], len_in_bits, |a, b, c, d| (a | (c & d)) & (c | (a & b)), )) @@ -493,7 +489,6 @@ where )); } let left_data = left.data(); - let right_data = right.data(); // If left has no bitmap, create a new one with all values set for nullity op later // left=0 (null) right=null output bitmap=null @@ -507,33 +502,31 @@ where // // Do the right expression !(right_values & right_bitmap) first since there are two steps // TRICK: convert BooleanArray buffer as a bitmap for faster operation - let right_combo_buffer = match right.data().null_bitmap() { + let rcb = match right.data().null_bitmap() { Some(right_bitmap) => { - // NOTE: right values and bitmaps are combined and stay at bit offset right.offset() - (right.values() & &right_bitmap.bits).ok().map(|b| b.not()) + let and = buffer_bin_and( + right.values(), + right.offset(), + &right_bitmap.bits, + right.offset(), + right.len(), + ); + buffer_unary_not(&and, 0, right.len()) } - None => Some(!right.values()), + None => buffer_unary_not(right.values(), right.offset(), right.len()), }; // AND of original left null bitmap with right expression // Here we take care of the possible offsets of the left and right arrays all at once. let modified_null_buffer = match left_data.null_bitmap() { - Some(left_null_bitmap) => match right_combo_buffer { - Some(rcb) => Some(buffer_bin_and( - &left_null_bitmap.bits, - left_data.offset(), - &rcb, - right_data.offset(), - left_data.len(), - )), - None => Some( - left_null_bitmap - .bits - .bit_slice(left_data.offset(), left.len()), - ), - }, - None => right_combo_buffer - .map(|rcb| rcb.bit_slice(right_data.offset(), right_data.len())), + Some(left_null_bitmap) => buffer_bin_and( + &left_null_bitmap.bits, + left_data.offset(), + &rcb, + 0, + left_data.len(), + ), + None => rcb, }; // Align/shift left data on offset as needed, since new bitmaps are shifted and aligned to 0 already @@ -556,7 +549,7 @@ where T::DATA_TYPE, left.len(), None, // force new to compute the number of null bits - modified_null_buffer, + Some(modified_null_buffer), 0, // No need for offset since left data has been shifted data_buffers, left_data.child_data().to_vec(), diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 444f2b27dce..8c329a066e5 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -16,82 +16,9 @@ // under the License. use super::DataType; +pub use arrow_buffer::native::{ArrowNativeType, ToByteSlice}; use half::f16; -mod private { - pub trait Sealed {} -} - -/// Trait expressing a Rust type that has the same in-memory representation -/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits). -/// -/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers -/// as is. -/// -/// # Transmute Safety -/// -/// A type T implementing this trait means that any arbitrary slice of bytes of length and -/// alignment `size_of::()` can be safely interpreted as a value of that type without -/// being unsound, i.e. potentially resulting in undefined behaviour. -/// -/// Note: in the case of floating point numbers this transmutation can result in a signalling -/// NaN, which, whilst sound, can be unwieldy. In general, whilst it is perfectly sound to -/// reinterpret bytes as different types using this trait, it is likely unwise. For more information -/// see [f32::from_bits] and [f64::from_bits]. -/// -/// Note: `bool` is restricted to `0` or `1`, and so `bool: !ArrowNativeType` -/// -/// # Sealed -/// -/// Due to the above restrictions, this trait is sealed to prevent accidental misuse -pub trait ArrowNativeType: - std::fmt::Debug - + Send - + Sync - + Copy - + PartialOrd - + std::str::FromStr - + Default - + private::Sealed - + 'static -{ - /// Convert native type from usize. - #[inline] - fn from_usize(_: usize) -> Option { - None - } - - /// Convert native type to usize. - #[inline] - fn to_usize(&self) -> Option { - None - } - - /// Convert native type to isize. - #[inline] - fn to_isize(&self) -> Option { - None - } - - /// Convert native type from i32. - #[inline] - fn from_i32(_: i32) -> Option { - None - } - - /// Convert native type from i64. - #[inline] - fn from_i64(_: i64) -> Option { - None - } - - /// Convert native type from i128. - #[inline] - fn from_i128(_: i128) -> Option { - None - } -} - /// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the /// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`]. pub trait ArrowPrimitiveType: 'static { @@ -219,214 +146,3 @@ native_type_op!(u64); impl native_op::ArrowNativeTypeOp for f16 {} impl native_op::ArrowNativeTypeOp for f32 {} impl native_op::ArrowNativeTypeOp for f64 {} - -impl private::Sealed for i8 {} -impl ArrowNativeType for i8 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl private::Sealed for i16 {} -impl ArrowNativeType for i16 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl private::Sealed for i32 {} -impl ArrowNativeType for i32 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } - - /// Convert native type from i32. - #[inline] - fn from_i32(val: i32) -> Option { - Some(val) - } -} - -impl private::Sealed for i64 {} -impl ArrowNativeType for i64 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } - - /// Convert native type from i64. - #[inline] - fn from_i64(val: i64) -> Option { - Some(val) - } -} - -impl private::Sealed for i128 {} -impl ArrowNativeType for i128 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } - - /// Convert native type from i128. - #[inline] - fn from_i128(val: i128) -> Option { - Some(val) - } -} - -impl private::Sealed for u8 {} -impl ArrowNativeType for u8 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl private::Sealed for u16 {} -impl ArrowNativeType for u16 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl private::Sealed for u32 {} -impl ArrowNativeType for u32 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl private::Sealed for u64 {} -impl ArrowNativeType for u64 { - #[inline] - fn from_usize(v: usize) -> Option { - num::FromPrimitive::from_usize(v) - } - - #[inline] - fn to_usize(&self) -> Option { - num::ToPrimitive::to_usize(self) - } - - #[inline] - fn to_isize(&self) -> Option { - num::ToPrimitive::to_isize(self) - } -} - -impl ArrowNativeType for f16 {} -impl private::Sealed for f16 {} -impl ArrowNativeType for f32 {} -impl private::Sealed for f32 {} -impl ArrowNativeType for f64 {} -impl private::Sealed for f64 {} - -/// Allows conversion from supported Arrow types to a byte slice. -pub trait ToByteSlice { - /// Converts this instance into a byte slice - fn to_byte_slice(&self) -> &[u8]; -} - -impl ToByteSlice for [T] { - #[inline] - fn to_byte_slice(&self) -> &[u8] { - let raw_ptr = self.as_ptr() as *const T as *const u8; - unsafe { - std::slice::from_raw_parts(raw_ptr, self.len() * std::mem::size_of::()) - } - } -} - -impl ToByteSlice for T { - #[inline] - fn to_byte_slice(&self) -> &[u8] { - let raw_ptr = self as *const T as *const u8; - unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::()) } - } -} diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 87a4799e3e2..a4d864754cd 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -248,11 +248,10 @@ #![deny(clippy::redundant_clone)] #![warn(missing_debug_implementations)] -pub mod alloc; +pub use arrow_buffer::{alloc, buffer}; + pub mod array; pub mod bitmap; -pub mod buffer; -mod bytes; pub mod compute; #[cfg(feature = "csv")] pub mod csv; diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index 6f68398e770..5453c11ab8a 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. +pub use arrow_buffer::util::{bit_chunk_iterator, bit_util}; + #[cfg(feature = "test_utils")] pub mod bench_util; -pub mod bit_chunk_iterator; pub mod bit_iterator; pub(crate) mod bit_mask; -pub mod bit_util; #[cfg(feature = "test_utils")] pub mod data_gen; pub mod display;