Skip to content

Commit

Permalink
Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
Browse files Browse the repository at this point in the history
* Fix n_buffers

* Add test

* Add code comment

* Don't put null pointer if no null buffer by spec

* Trigger Build
  • Loading branch information
viirya committed Jun 29, 2022
1 parent e81f93b commit 1a5dcb7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
6 changes: 3 additions & 3 deletions arrow/src/array/data.rs
Expand Up @@ -1221,7 +1221,7 @@ impl ArrayData {

/// Return the expected [`DataTypeLayout`] Arrays of this data
/// type are expected to have
fn layout(data_type: &DataType) -> DataTypeLayout {
pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout {
// based on C/C++ implementation in
// https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc)
use std::mem::size_of;
Expand Down Expand Up @@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
/// Layout specification for a data type
#[derive(Debug, PartialEq)]
// Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91
struct DataTypeLayout {
pub(crate) struct DataTypeLayout {
/// A vector of buffer layout specifications, one for each expected buffer
pub buffers: Vec<BufferSpec>,

Expand Down Expand Up @@ -1359,7 +1359,7 @@ impl DataTypeLayout {

/// Layout specification for a single data type buffer
#[derive(Debug, PartialEq)]
enum BufferSpec {
pub(crate) enum BufferSpec {
/// each element has a fixed width
FixedWidth { byte_width: usize },
/// Variable width, such as string data for utf8 data
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/mod.rs
Expand Up @@ -171,6 +171,7 @@ use crate::datatypes::*;

pub use self::array::Array;
pub use self::array::ArrayRef;
pub(crate) use self::data::layout;
pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;
Expand Down
46 changes: 40 additions & 6 deletions arrow/src/ffi.rs
Expand Up @@ -120,7 +120,7 @@ use std::{

use bitflags::bitflags;

use crate::array::ArrayData;
use crate::array::{layout, ArrayData};
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
Expand Down Expand Up @@ -450,14 +450,30 @@ impl FFI_ArrowArray {
let buffers = iter::once(data.null_buffer().cloned())
.chain(data.buffers().iter().map(|b| Some(b.clone())))
.collect::<Vec<_>>();
let n_buffers = buffers.len() as i64;
let data_layout = layout(data.data_type());
// `n_buffers` is the number of buffers by the spec.
let n_buffers = {
data_layout.buffers.len() + {
// If the layout has a null buffer by Arrow spec.
// Note that even the array doesn't have a null buffer because it has
// no null value, we still need to count 1 here to follow the spec.
if data_layout.can_contain_null_mask {
1
} else {
0
}
}
} as i64;

let buffers_ptr = buffers
.iter()
.map(|maybe_buffer| match maybe_buffer {
.flat_map(|maybe_buffer| match maybe_buffer {
// note that `raw_data` takes into account the buffer's offset
Some(b) => b.as_ptr() as *const c_void,
None => std::ptr::null(),
Some(b) => Some(b.as_ptr() as *const c_void),
// This is for null buffer. We only put a null pointer for
// null buffer if by spec it can contain null mask.
None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
None => None,
})
.collect::<Box<[_]>>();

Expand Down Expand Up @@ -881,7 +897,7 @@ mod tests {
use crate::array::{
export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray,
DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray,
OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray,
};
use crate::compute::kernels;
Expand Down Expand Up @@ -1402,4 +1418,22 @@ mod tests {
// (drop/release)
Ok(())
}

#[test]
fn null_array_n_buffers() -> Result<()> {
let array = NullArray::new(10);
let data = array.data();

let ffi_array = FFI_ArrowArray::new(data);
assert_eq!(0, ffi_array.n_buffers);

let private_data =
unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };

assert_eq!(0, private_data.buffers_ptr.len());

Box::into_raw(private_data);

Ok(())
}
}

0 comments on commit 1a5dcb7

Please sign in to comment.