diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml index f8456c5368f..d1f501f540e 100644 --- a/.github/workflows/arrow.yml +++ b/.github/workflows/arrow.yml @@ -51,9 +51,9 @@ jobs: - name: Test run: | cargo test -p arrow - - name: Test --features=force_validate,prettyprint + - name: Test --features=force_validate,prettyprint,ffi run: | - cargo test -p arrow --features=force_validate,prettyprint + cargo test -p arrow --features=force_validate,prettyprint,ffi - name: Run examples run: | # Test arrow examples @@ -153,8 +153,8 @@ jobs: - name: Build run: | cd arrow - cargo build --no-default-features --features=csv,ipc,simd --target wasm32-unknown-unknown - cargo build --no-default-features --features=csv,ipc,simd --target wasm32-wasi + cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-unknown-unknown + cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-wasi clippy: name: Clippy @@ -172,4 +172,4 @@ jobs: rustup component add clippy - name: Run clippy run: | - cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils --all-targets -- -D warnings + cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi --all-targets -- -D warnings diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index d0a7c73ae3d..e2006a99b72 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -77,6 +77,8 @@ pyarrow = ["pyo3"] # this is not enabled by default as it is too computationally expensive # but is run as part of our CI checks force_validate = [] +# Enable ffi support +ffi = [] [dev-dependencies] rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index 422916996cd..844c68d13b4 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -16,15 +16,13 @@ // under the License. use std::any::Any; -use std::convert::{From, TryFrom}; +use std::convert::From; use std::fmt; use std::sync::Arc; use super::*; use crate::array::equal_json::JsonEqual; use crate::buffer::{Buffer, MutableBuffer}; -use crate::error::Result; -use crate::ffi; /// Trait for dealing with different types of array at runtime when the type of the /// array is not known in advance. @@ -216,15 +214,6 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual { self.data_ref().get_array_memory_size() + std::mem::size_of_val(self) - std::mem::size_of::() } - - /// returns two pointers that represent this array in the C Data Interface (FFI) - fn to_raw( - &self, - ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { - let data = self.data().clone(); - let array = ffi::ArrowArray::try_from(data)?; - Ok(ffi::ArrowArray::into_raw(array)) - } } /// A reference-counted reference to a generic `Array`. @@ -287,14 +276,6 @@ impl Array for ArrayRef { fn get_array_memory_size(&self) -> usize { self.as_ref().get_array_memory_size() } - - fn to_raw( - &self, - ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { - let data = self.data().clone(); - let array = ffi::ArrowArray::try_from(data)?; - Ok(ffi::ArrowArray::into_raw(array)) - } } impl<'a, T: Array> Array for &'a T { @@ -353,12 +334,6 @@ impl<'a, T: Array> Array for &'a T { fn get_array_memory_size(&self) -> usize { T::get_array_memory_size(self) } - - fn to_raw( - &self, - ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { - T::to_raw(self) - } } /// A generic trait for accessing the values of an [`Array`] @@ -733,42 +708,6 @@ fn new_null_sized_decimal( }) } -/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface -/// # Safety -/// Assumes that these pointers represent valid C Data Interfaces, both in memory -/// representation and lifetime via the `release` mechanism. -pub unsafe fn make_array_from_raw( - array: *const ffi::FFI_ArrowArray, - schema: *const ffi::FFI_ArrowSchema, -) -> Result { - let array = ffi::ArrowArray::try_from_raw(array, schema)?; - let data = ArrayData::try_from(array)?; - Ok(make_array(data)) -} - -/// Exports an array to raw pointers of the C Data Interface provided by the consumer. -/// # Safety -/// Assumes that these pointers represent valid C Data Interfaces, both in memory -/// representation and lifetime via the `release` mechanism. -/// -/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and -/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. -/// Usually the raw pointers are provided by the array data consumer. -pub unsafe fn export_array_into_raw( - src: ArrayRef, - out_array: *mut ffi::FFI_ArrowArray, - out_schema: *mut ffi::FFI_ArrowSchema, -) -> Result<()> { - let data = src.data(); - let array = ffi::FFI_ArrowArray::new(data); - let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; - - std::ptr::write_unaligned(out_array, array); - std::ptr::write_unaligned(out_schema, schema); - - Ok(()) -} - // Helper function for printing potentially long arrays. pub(super) fn print_long_array( array: &A, diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs index 12d6f440b78..72030f900a4 100644 --- a/arrow/src/array/ffi.rs +++ b/arrow/src/array/ffi.rs @@ -25,7 +25,7 @@ use crate::{ ffi::ArrowArrayRef, }; -use super::ArrayData; +use super::{make_array, ArrayData, ArrayRef}; impl TryFrom for ArrayData { type Error = ArrowError; @@ -39,10 +39,46 @@ impl TryFrom for ffi::ArrowArray { type Error = ArrowError; fn try_from(value: ArrayData) -> Result { - unsafe { ffi::ArrowArray::try_new(value) } + ffi::ArrowArray::try_new(value) } } +/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface +/// # Safety +/// Assumes that these pointers represent valid C Data Interfaces, both in memory +/// representation and lifetime via the `release` mechanism. +pub unsafe fn make_array_from_raw( + array: *const ffi::FFI_ArrowArray, + schema: *const ffi::FFI_ArrowSchema, +) -> Result { + let array = ffi::ArrowArray::try_from_raw(array, schema)?; + let data = ArrayData::try_from(array)?; + Ok(make_array(data)) +} + +/// Exports an array to raw pointers of the C Data Interface provided by the consumer. +/// # Safety +/// Assumes that these pointers represent valid C Data Interfaces, both in memory +/// representation and lifetime via the `release` mechanism. +/// +/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and +/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. +/// Usually the raw pointers are provided by the array data consumer. +pub unsafe fn export_array_into_raw( + src: ArrayRef, + out_array: *mut ffi::FFI_ArrowArray, + out_schema: *mut ffi::FFI_ArrowSchema, +) -> Result<()> { + let data = src.data(); + let array = ffi::FFI_ArrowArray::new(data); + let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; + + std::ptr::write_unaligned(out_array, array); + std::ptr::write_unaligned(out_schema, schema); + + Ok(()) +} + #[cfg(test)] mod tests { use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array, StringArray}; diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 2050bf95990..c49a3a5272e 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -176,6 +176,7 @@ mod cast; mod data; mod equal; mod equal_json; +#[cfg(feature = "ffi")] mod ffi; mod iterator; mod null; @@ -615,7 +616,8 @@ pub use self::cast::{ // ------------------------------ C Data Interface --------------------------- -pub use self::array::{export_array_into_raw, make_array_from_raw}; +#[cfg(feature = "ffi")] +pub use self::ffi::{export_array_into_raw, make_array_from_raw}; #[cfg(test)] mod tests { diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs index cb686bd8441..8ec5a455420 100644 --- a/arrow/src/buffer/immutable.rs +++ b/arrow/src/buffer/immutable.rs @@ -22,7 +22,6 @@ use std::sync::Arc; use std::{convert::AsRef, usize}; use crate::alloc::{Allocation, Deallocation}; -use crate::ffi::FFI_ArrowArray; use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; use crate::{bytes::Bytes, datatypes::ArrowNativeType}; @@ -77,30 +76,6 @@ impl Buffer { Buffer::build_with_arguments(ptr, len, Deallocation::Arrow(capacity)) } - /// Creates a buffer from an existing memory region (must already be byte-aligned), this - /// `Buffer` **does not** free this piece of memory when dropped. - /// - /// # Arguments - /// - /// * `ptr` - Pointer to raw parts - /// * `len` - Length of raw parts in **bytes** - /// * `data` - An [crate::ffi::FFI_ArrowArray] with the data - /// - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` - /// bytes and that the foreign deallocator frees the region. - #[deprecated( - note = "use from_custom_allocation instead which makes it clearer that the allocation is in fact owned" - )] - pub unsafe fn from_unowned( - ptr: NonNull, - len: usize, - data: Arc, - ) -> Self { - Self::from_custom_allocation(ptr, len, data) - } - /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero. /// diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index c082bc64c66..8b5c0873695 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -37,8 +37,10 @@ pub use types::*; mod datatype; pub use datatype::*; mod delta; -mod ffi; +#[cfg(feature = "ffi")] +mod ffi; +#[cfg(feature = "ffi")] pub use ffi::*; /// A reference-counted reference to a [`Schema`](crate::datatypes::Schema). diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 2d529317801..d2c0b485df5 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -29,14 +29,16 @@ //! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw}; //! # use arrow::error::{Result, ArrowError}; //! # use arrow::compute::kernels::arithmetic; -//! # use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; +//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema}; //! # use std::convert::TryFrom; //! # fn main() -> Result<()> { //! // create an array natively //! let array = Int32Array::from(vec![Some(1), None, Some(3)]); //! //! // export it -//! let (array_ptr, schema_ptr) = array.to_raw()?; +//! +//! let ffi_array = ArrowArray::try_new(array.data().clone())?; +//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array); //! //! // consumed and used by something else... //! @@ -456,7 +458,7 @@ struct ArrayPrivateData { impl FFI_ArrowArray { /// creates a new `FFI_ArrowArray` from existing data. - /// # Safety + /// # Memory Leaks /// This method releases `buffers`. Consumers of this struct *must* call `release` before /// releasing this struct, or contents in `buffers` leak. pub fn new(data: &ArrayData) -> Self { @@ -836,10 +838,11 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { impl ArrowArray { /// creates a new `ArrowArray`. This is used to export to the C Data Interface. - /// # Safety - /// See safety of [ArrowArray] - #[allow(clippy::too_many_arguments)] - pub unsafe fn try_new(data: ArrayData) -> Result { + /// + /// # Memory Leaks + /// This method releases `buffers`. Consumers of this struct *must* call `release` before + /// releasing this struct, or contents in `buffers` leak. + pub fn try_new(data: ArrayData) -> Result { let array = Arc::new(FFI_ArrowArray::new(&data)); let schema = Arc::new(FFI_ArrowSchema::try_from(data.data_type())?); Ok(ArrowArray { array, schema }) diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 95c69ca0be6..d214af50243 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -238,7 +238,9 @@ pub mod compute; pub mod csv; pub mod datatypes; pub mod error; +#[cfg(feature = "ffi")] pub mod ffi; +#[cfg(feature = "ffi")] pub mod ffi_stream; #[cfg(feature = "ipc")] pub mod ipc;