diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 95c980bab6b..52920827d24 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -1221,7 +1221,7 @@ impl ArrayData { /// Return the expected [`DataTypeLayout`] Arrays of this data /// type are expected to have -fn layout(data_type: &DataType) -> DataTypeLayout { +pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout { // based on C/C++ implementation in // https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc) use std::mem::size_of; @@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout { /// Layout specification for a data type #[derive(Debug, PartialEq)] // Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91 -struct DataTypeLayout { +pub(crate) struct DataTypeLayout { /// A vector of buffer layout specifications, one for each expected buffer pub buffers: Vec, @@ -1359,7 +1359,7 @@ impl DataTypeLayout { /// Layout specification for a single data type buffer #[derive(Debug, PartialEq)] -enum BufferSpec { +pub(crate) enum BufferSpec { /// each element has a fixed width FixedWidth { byte_width: usize }, /// Variable width, such as string data for utf8 data diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index bbe62cf6a1f..b8009cf9be7 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -171,6 +171,7 @@ use crate::datatypes::*; pub use self::array::Array; pub use self::array::ArrayRef; +pub(crate) use self::data::layout; pub use self::data::ArrayData; pub use self::data::ArrayDataBuilder; pub use self::data::ArrayDataRef; diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 9be20ccd696..ad2062b4c07 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -120,7 +120,7 @@ use std::{ use bitflags::bitflags; -use crate::array::ArrayData; +use crate::array::{layout, ArrayData}; use crate::buffer::Buffer; use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; @@ -450,14 +450,30 @@ impl FFI_ArrowArray { let buffers = iter::once(data.null_buffer().cloned()) .chain(data.buffers().iter().map(|b| Some(b.clone()))) .collect::>(); - let n_buffers = buffers.len() as i64; + let data_layout = layout(data.data_type()); + // `n_buffers` is the number of buffers by the spec. + let n_buffers = { + data_layout.buffers.len() + { + // If the layout has a null buffer by Arrow spec. + // Note that even the array doesn't have a null buffer because it has + // no null value, we still need to count 1 here to follow the spec. + if data_layout.can_contain_null_mask { + 1 + } else { + 0 + } + } + } as i64; let buffers_ptr = buffers .iter() - .map(|maybe_buffer| match maybe_buffer { + .flat_map(|maybe_buffer| match maybe_buffer { // note that `raw_data` takes into account the buffer's offset - Some(b) => b.as_ptr() as *const c_void, - None => std::ptr::null(), + Some(b) => Some(b.as_ptr() as *const c_void), + // This is for null buffer. We only put a null pointer for + // null buffer if by spec it can contain null mask. + None if data_layout.can_contain_null_mask => Some(std::ptr::null()), + None => None, }) .collect::>(); @@ -881,7 +897,7 @@ mod tests { use crate::array::{ export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray, DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, - GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, + GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray, OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray, }; use crate::compute::kernels; @@ -1402,4 +1418,22 @@ mod tests { // (drop/release) Ok(()) } + + #[test] + fn null_array_n_buffers() -> Result<()> { + let array = NullArray::new(10); + let data = array.data(); + + let ffi_array = FFI_ArrowArray::new(data); + assert_eq!(0, ffi_array.n_buffers); + + let private_data = + unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) }; + + assert_eq!(0, private_data.buffers_ptr.len()); + + Box::into_raw(private_data); + + Ok(()) + } }