Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate n_buffers in FFI_ArrowArray by data layout #1960

Merged
merged 5 commits into from Jun 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions arrow/src/array/data.rs
Expand Up @@ -1221,7 +1221,7 @@ impl ArrayData {

/// Return the expected [`DataTypeLayout`] Arrays of this data
/// type are expected to have
fn layout(data_type: &DataType) -> DataTypeLayout {
pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout {
// based on C/C++ implementation in
// https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc)
use std::mem::size_of;
Expand Down Expand Up @@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
/// Layout specification for a data type
#[derive(Debug, PartialEq)]
// Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91
struct DataTypeLayout {
pub(crate) struct DataTypeLayout {
/// A vector of buffer layout specifications, one for each expected buffer
pub buffers: Vec<BufferSpec>,

Expand Down Expand Up @@ -1359,7 +1359,7 @@ impl DataTypeLayout {

/// Layout specification for a single data type buffer
#[derive(Debug, PartialEq)]
enum BufferSpec {
pub(crate) enum BufferSpec {
/// each element has a fixed width
FixedWidth { byte_width: usize },
/// Variable width, such as string data for utf8 data
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/mod.rs
Expand Up @@ -171,6 +171,7 @@ use crate::datatypes::*;

pub use self::array::Array;
pub use self::array::ArrayRef;
pub(crate) use self::data::layout;
pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;
Expand Down
46 changes: 40 additions & 6 deletions arrow/src/ffi.rs
Expand Up @@ -120,7 +120,7 @@ use std::{

use bitflags::bitflags;

use crate::array::ArrayData;
use crate::array::{layout, ArrayData};
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
Expand Down Expand Up @@ -450,14 +450,30 @@ impl FFI_ArrowArray {
let buffers = iter::once(data.null_buffer().cloned())
.chain(data.buffers().iter().map(|b| Some(b.clone())))
.collect::<Vec<_>>();
let n_buffers = buffers.len() as i64;
viirya marked this conversation as resolved.
Show resolved Hide resolved
let data_layout = layout(data.data_type());
// `n_buffers` is the number of buffers by the spec.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let n_buffers = {
data_layout.buffers.len() + {
// If the layout has a null buffer by Arrow spec.
// Note that even the array doesn't have a null buffer because it has
// no null value, we still need to count 1 here to follow the spec.
if data_layout.can_contain_null_mask {
1
viirya marked this conversation as resolved.
Show resolved Hide resolved
} else {
0
}
}
} as i64;

let buffers_ptr = buffers
.iter()
.map(|maybe_buffer| match maybe_buffer {
.flat_map(|maybe_buffer| match maybe_buffer {
// note that `raw_data` takes into account the buffer's offset
Some(b) => b.as_ptr() as *const c_void,
None => std::ptr::null(),
Some(b) => Some(b.as_ptr() as *const c_void),
// This is for null buffer. We only put a null pointer for
// null buffer if by spec it can contain null mask.
None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
None => None,
})
.collect::<Box<[_]>>();

Expand Down Expand Up @@ -881,7 +897,7 @@ mod tests {
use crate::array::{
export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray,
DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray,
OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray,
};
use crate::compute::kernels;
Expand Down Expand Up @@ -1402,4 +1418,22 @@ mod tests {
// (drop/release)
Ok(())
}

#[test]
fn null_array_n_buffers() -> Result<()> {
let array = NullArray::new(10);
let data = array.data();

let ffi_array = FFI_ArrowArray::new(data);
assert_eq!(0, ffi_array.n_buffers);

let private_data =
unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };

assert_eq!(0, private_data.buffers_ptr.len());

Box::into_raw(private_data);

Ok(())
}
}