From fc572e2c2a1ec22e1429adfec1516d920ccb373c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 28 Jun 2022 16:58:47 -0700 Subject: [PATCH 1/5] Fix n_buffers --- arrow/src/array/data.rs | 6 +++--- arrow/src/array/mod.rs | 1 + arrow/src/ffi.rs | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 95c980bab6b..52920827d24 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -1221,7 +1221,7 @@ impl ArrayData { /// Return the expected [`DataTypeLayout`] Arrays of this data /// type are expected to have -fn layout(data_type: &DataType) -> DataTypeLayout { +pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout { // based on C/C++ implementation in // https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc) use std::mem::size_of; @@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout { /// Layout specification for a data type #[derive(Debug, PartialEq)] // Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91 -struct DataTypeLayout { +pub(crate) struct DataTypeLayout { /// A vector of buffer layout specifications, one for each expected buffer pub buffers: Vec, @@ -1359,7 +1359,7 @@ impl DataTypeLayout { /// Layout specification for a single data type buffer #[derive(Debug, PartialEq)] -enum BufferSpec { +pub(crate) enum BufferSpec { /// each element has a fixed width FixedWidth { byte_width: usize }, /// Variable width, such as string data for utf8 data diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index bbe62cf6a1f..b8009cf9be7 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -171,6 +171,7 @@ use crate::datatypes::*; pub use self::array::Array; pub use self::array::ArrayRef; +pub(crate) use self::data::layout; pub use self::data::ArrayData; pub use self::data::ArrayDataBuilder; pub use self::data::ArrayDataRef; diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 9be20ccd696..a4ea27a0a08 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -120,7 +120,7 @@ use std::{ use bitflags::bitflags; -use crate::array::ArrayData; +use crate::array::{layout, ArrayData}; use crate::buffer::Buffer; use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; @@ -450,7 +450,17 @@ impl FFI_ArrowArray { let buffers = iter::once(data.null_buffer().cloned()) .chain(data.buffers().iter().map(|b| Some(b.clone()))) .collect::>(); - let n_buffers = buffers.len() as i64; + let data_layout = layout(data.data_type()); + // `n_buffers` is the number of buffers by the spec. + let n_buffers = { + data_layout.buffers.len() + { + if data_layout.can_contain_null_mask { + 1 + } else { + 0 + } + } + } as i64; let buffers_ptr = buffers .iter() From 0148c41ea2c5aaf864a2e9b49260238e59790c06 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 28 Jun 2022 17:05:15 -0700 Subject: [PATCH 2/5] Add test --- arrow/src/ffi.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index a4ea27a0a08..3aedc002950 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -891,7 +891,7 @@ mod tests { use crate::array::{ export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray, DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, - GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, + GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray, OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray, }; use crate::compute::kernels; @@ -1412,4 +1412,15 @@ mod tests { // (drop/release) Ok(()) } + + #[test] + fn null_array_n_buffers() -> Result<()> { + let array = NullArray::new(10); + let data = array.data(); + + let ffi_array = FFI_ArrowArray::new(data); + assert_eq!(0, ffi_array.n_buffers); + + Ok(()) + } } From 448233e8a3bc6994fffa864d505d0d2be140e9cd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 28 Jun 2022 23:39:21 -0700 Subject: [PATCH 3/5] Add code comment --- arrow/src/ffi.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 3aedc002950..c344d284b23 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -454,6 +454,9 @@ impl FFI_ArrowArray { // `n_buffers` is the number of buffers by the spec. let n_buffers = { data_layout.buffers.len() + { + // If the layout has a null buffer by Arrow spec. + // Note that even the array doesn't have a null buffer because it has + // no null value, we still need to count 1 here to follow the spec. if data_layout.can_contain_null_mask { 1 } else { From 544b97f2070a7892cefaa61c73cd6f6dbfc7555b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 29 Jun 2022 00:04:02 -0700 Subject: [PATCH 4/5] Don't put null pointer if no null buffer by spec --- arrow/src/ffi.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index c344d284b23..ad2062b4c07 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -467,10 +467,13 @@ impl FFI_ArrowArray { let buffers_ptr = buffers .iter() - .map(|maybe_buffer| match maybe_buffer { + .flat_map(|maybe_buffer| match maybe_buffer { // note that `raw_data` takes into account the buffer's offset - Some(b) => b.as_ptr() as *const c_void, - None => std::ptr::null(), + Some(b) => Some(b.as_ptr() as *const c_void), + // This is for null buffer. We only put a null pointer for + // null buffer if by spec it can contain null mask. + None if data_layout.can_contain_null_mask => Some(std::ptr::null()), + None => None, }) .collect::>(); @@ -1424,6 +1427,13 @@ mod tests { let ffi_array = FFI_ArrowArray::new(data); assert_eq!(0, ffi_array.n_buffers); + let private_data = + unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) }; + + assert_eq!(0, private_data.buffers_ptr.len()); + + Box::into_raw(private_data); + Ok(()) } } From bfa679aa9ee57c5fc021afafcebc26d871d377d0 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 29 Jun 2022 09:23:29 -0700 Subject: [PATCH 5/5] Trigger Build