New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make FFI support optional, change APIs to be safe
(#2302)
#2303
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,15 +16,13 @@ | |
// under the License. | ||
|
||
use std::any::Any; | ||
use std::convert::{From, TryFrom}; | ||
use std::convert::From; | ||
use std::fmt; | ||
use std::sync::Arc; | ||
|
||
use super::*; | ||
use crate::array::equal_json::JsonEqual; | ||
use crate::buffer::{Buffer, MutableBuffer}; | ||
use crate::error::Result; | ||
use crate::ffi; | ||
|
||
/// Trait for dealing with different types of array at runtime when the type of the | ||
/// array is not known in advance. | ||
|
@@ -216,15 +214,6 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual { | |
self.data_ref().get_array_memory_size() + std::mem::size_of_val(self) | ||
- std::mem::size_of::<ArrayData>() | ||
} | ||
|
||
/// returns two pointers that represent this array in the C Data Interface (FFI) | ||
fn to_raw( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't see a compelling reason for this API to exist, ultimately the use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only use |
||
&self, | ||
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { | ||
let data = self.data().clone(); | ||
let array = ffi::ArrowArray::try_from(data)?; | ||
Ok(ffi::ArrowArray::into_raw(array)) | ||
} | ||
} | ||
|
||
/// A reference-counted reference to a generic `Array`. | ||
|
@@ -287,14 +276,6 @@ impl Array for ArrayRef { | |
fn get_array_memory_size(&self) -> usize { | ||
self.as_ref().get_array_memory_size() | ||
} | ||
|
||
fn to_raw( | ||
&self, | ||
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { | ||
let data = self.data().clone(); | ||
let array = ffi::ArrowArray::try_from(data)?; | ||
Ok(ffi::ArrowArray::into_raw(array)) | ||
} | ||
} | ||
|
||
impl<'a, T: Array> Array for &'a T { | ||
|
@@ -353,12 +334,6 @@ impl<'a, T: Array> Array for &'a T { | |
fn get_array_memory_size(&self) -> usize { | ||
T::get_array_memory_size(self) | ||
} | ||
|
||
fn to_raw( | ||
&self, | ||
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { | ||
T::to_raw(self) | ||
} | ||
} | ||
|
||
/// A generic trait for accessing the values of an [`Array`] | ||
|
@@ -733,42 +708,6 @@ fn new_null_sized_decimal( | |
}) | ||
} | ||
|
||
/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface | ||
/// # Safety | ||
/// Assumes that these pointers represent valid C Data Interfaces, both in memory | ||
/// representation and lifetime via the `release` mechanism. | ||
pub unsafe fn make_array_from_raw( | ||
array: *const ffi::FFI_ArrowArray, | ||
schema: *const ffi::FFI_ArrowSchema, | ||
) -> Result<ArrayRef> { | ||
let array = ffi::ArrowArray::try_from_raw(array, schema)?; | ||
let data = ArrayData::try_from(array)?; | ||
Ok(make_array(data)) | ||
} | ||
|
||
/// Exports an array to raw pointers of the C Data Interface provided by the consumer. | ||
/// # Safety | ||
/// Assumes that these pointers represent valid C Data Interfaces, both in memory | ||
/// representation and lifetime via the `release` mechanism. | ||
/// | ||
/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and | ||
/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. | ||
/// Usually the raw pointers are provided by the array data consumer. | ||
pub unsafe fn export_array_into_raw( | ||
src: ArrayRef, | ||
out_array: *mut ffi::FFI_ArrowArray, | ||
out_schema: *mut ffi::FFI_ArrowSchema, | ||
) -> Result<()> { | ||
let data = src.data(); | ||
let array = ffi::FFI_ArrowArray::new(data); | ||
let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; | ||
|
||
std::ptr::write_unaligned(out_array, array); | ||
std::ptr::write_unaligned(out_schema, schema); | ||
|
||
Ok(()) | ||
} | ||
|
||
Comment on lines
-736
to
-771
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are moved to the ffi module to make the feature flags easier |
||
// Helper function for printing potentially long arrays. | ||
pub(super) fn print_long_array<A, F>( | ||
array: &A, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,6 @@ use std::sync::Arc; | |
use std::{convert::AsRef, usize}; | ||
|
||
use crate::alloc::{Allocation, Deallocation}; | ||
use crate::ffi::FFI_ArrowArray; | ||
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; | ||
use crate::{bytes::Bytes, datatypes::ArrowNativeType}; | ||
|
||
|
@@ -77,30 +76,6 @@ impl Buffer { | |
Buffer::build_with_arguments(ptr, len, Deallocation::Arrow(capacity)) | ||
} | ||
|
||
/// Creates a buffer from an existing memory region (must already be byte-aligned), this | ||
/// `Buffer` **does not** free this piece of memory when dropped. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `ptr` - Pointer to raw parts | ||
/// * `len` - Length of raw parts in **bytes** | ||
/// * `data` - An [crate::ffi::FFI_ArrowArray] with the data | ||
/// | ||
/// # Safety | ||
/// | ||
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len` | ||
/// bytes and that the foreign deallocator frees the region. | ||
#[deprecated( | ||
note = "use from_custom_allocation instead which makes it clearer that the allocation is in fact owned" | ||
)] | ||
pub unsafe fn from_unowned( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method was already deprecated, so I opted to just remove it |
||
ptr: NonNull<u8>, | ||
len: usize, | ||
data: Arc<FFI_ArrowArray>, | ||
) -> Self { | ||
Self::from_custom_allocation(ptr, len, data) | ||
} | ||
|
||
/// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting | ||
/// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero. | ||
/// | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,14 +29,16 @@ | |
//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw}; | ||
//! # use arrow::error::{Result, ArrowError}; | ||
//! # use arrow::compute::kernels::arithmetic; | ||
//! # use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; | ||
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema}; | ||
//! # use std::convert::TryFrom; | ||
//! # fn main() -> Result<()> { | ||
//! // create an array natively | ||
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]); | ||
//! | ||
//! // export it | ||
//! let (array_ptr, schema_ptr) = array.to_raw()?; | ||
//! | ||
//! let ffi_array = ArrowArray::try_new(array.data().clone())?; | ||
//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array); | ||
//! | ||
//! // consumed and used by something else... | ||
//! | ||
|
@@ -456,7 +458,7 @@ struct ArrayPrivateData { | |
|
||
impl FFI_ArrowArray { | ||
/// creates a new `FFI_ArrowArray` from existing data. | ||
/// # Safety | ||
/// # Memory Leaks | ||
/// This method releases `buffers`. Consumers of this struct *must* call `release` before | ||
/// releasing this struct, or contents in `buffers` leak. | ||
pub fn new(data: &ArrayData) -> Self { | ||
|
@@ -836,10 +838,11 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { | |
|
||
impl ArrowArray { | ||
/// creates a new `ArrowArray`. This is used to export to the C Data Interface. | ||
/// # Safety | ||
/// See safety of [ArrowArray] | ||
#[allow(clippy::too_many_arguments)] | ||
pub unsafe fn try_new(data: ArrayData) -> Result<Self> { | ||
/// | ||
/// # Memory Leaks | ||
/// This method releases `buffers`. Consumers of this struct *must* call `release` before | ||
/// releasing this struct, or contents in `buffers` leak. | ||
pub fn try_new(data: ArrayData) -> Result<Self> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method wasn't actually unsafe, and was being called by safe methods previously without any additional checks #2301 |
||
let array = Arc::new(FFI_ArrowArray::new(&data)); | ||
let schema = Arc::new(FFI_ArrowSchema::try_from(data.data_type())?); | ||
Ok(ArrowArray { array, schema }) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤔 I was going to recommend adding this to the "list of features of this crate" but then it turns out we don't seem to have any docs for them 🤦
https://docs.rs/arrow/19.0.0/arrow/