Skip to content

Commit

Permalink
Handle empty offsets buffer (#1824) (#2836)
Browse files Browse the repository at this point in the history
* Handle empty offsets buffer (#1824)

* Review feedback
  • Loading branch information
tustvold committed Oct 13, 2022
1 parent f8254e7 commit f4ee8b9
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 7 deletions.
30 changes: 28 additions & 2 deletions arrow-array/src/array/binary_array.rs
Expand Up @@ -17,7 +17,10 @@

use crate::iterator::GenericBinaryIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor, GenericListArray, OffsetSizeTrait};
use crate::{
empty_offsets, print_long_array, Array, ArrayAccessor, GenericListArray,
OffsetSizeTrait,
};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
Expand Down Expand Up @@ -286,7 +289,11 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericBinaryArray<OffsetS
2,
"BinaryArray data should contain 2 buffers only (offsets and values)"
);
let offsets = data.buffers()[0].as_ptr();
// Handle case of empty offsets
let offsets = match data.is_empty() && data.buffers()[0].is_empty() {
true => empty_offsets::<OffsetSize>().as_ptr() as *const _,
false => data.buffers()[0].as_ptr(),
};
let values = data.buffers()[1].as_ptr();
Self {
data,
Expand Down Expand Up @@ -845,4 +852,23 @@ mod tests {
.validate_full()
.expect("All null array has valid array data");
}

#[test]
fn test_empty_offsets() {
let string = BinaryArray::from(
ArrayData::builder(DataType::Binary)
.buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
.build()
.unwrap(),
);
assert_eq!(string.value_offsets(), &[0]);
let string = LargeBinaryArray::from(
ArrayData::builder(DataType::LargeBinary)
.buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
.build()
.unwrap(),
);
assert_eq!(string.len(), 0);
assert_eq!(string.value_offsets(), &[0]);
}
}
42 changes: 40 additions & 2 deletions arrow-array/src/array/list_array.rs
Expand Up @@ -43,6 +43,17 @@ impl OffsetSizeTrait for i64 {
const PREFIX: &'static str = "Large";
}

/// Returns a slice of `OffsetSize` consisting of a single zero value
#[inline]
pub(crate) fn empty_offsets<OffsetSize: OffsetSizeTrait>() -> &'static [OffsetSize] {
static OFFSET: &[i64] = &[0];
// SAFETY:
// OffsetSize is ArrowNativeType and is therefore trivially transmutable
let (prefix, val, suffix) = unsafe { OFFSET.align_to::<OffsetSize>() };
assert!(prefix.is_empty() && suffix.is_empty());
val
}

/// Generic struct for a variable-size list array.
///
/// Columnar format in Apache Arrow:
Expand Down Expand Up @@ -240,8 +251,13 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
}

let values = make_array(values);
let value_offsets = data.buffers()[0].as_ptr();
let value_offsets = unsafe { RawPtrBox::<OffsetSize>::new(value_offsets) };
// Handle case of empty offsets
let offsets = match data.is_empty() && data.buffers()[0].is_empty() {
true => empty_offsets::<OffsetSize>().as_ptr() as *const _,
false => data.buffers()[0].as_ptr(),
};

let value_offsets = unsafe { RawPtrBox::new(offsets) };
Ok(Self {
data,
values,
Expand Down Expand Up @@ -941,4 +957,26 @@ mod tests {
false,
);
}

#[test]
fn test_empty_offsets() {
let f = Box::new(Field::new("element", DataType::Int32, true));
let string = ListArray::from(
ArrayData::builder(DataType::List(f.clone()))
.buffers(vec![Buffer::from(&[])])
.add_child_data(ArrayData::new_empty(&DataType::Int32))
.build()
.unwrap(),
);
assert_eq!(string.value_offsets(), &[0]);
let string = LargeListArray::from(
ArrayData::builder(DataType::LargeList(f))
.buffers(vec![Buffer::from(&[])])
.add_child_data(ArrayData::new_empty(&DataType::Int32))
.build()
.unwrap(),
);
assert_eq!(string.len(), 0);
assert_eq!(string.value_offsets(), &[0]);
}
}
31 changes: 28 additions & 3 deletions arrow-array/src/array/string_array.rs
Expand Up @@ -18,8 +18,8 @@
use crate::iterator::GenericStringIter;
use crate::raw_pointer::RawPtrBox;
use crate::{
print_long_array, Array, ArrayAccessor, GenericBinaryArray, GenericListArray,
OffsetSizeTrait,
empty_offsets, print_long_array, Array, ArrayAccessor, GenericBinaryArray,
GenericListArray, OffsetSizeTrait,
};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
Expand Down Expand Up @@ -370,7 +370,11 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericStringArray<OffsetS
2,
"StringArray data should contain 2 buffers only (offsets and values)"
);
let offsets = data.buffers()[0].as_ptr();
// Handle case of empty offsets
let offsets = match data.is_empty() && data.buffers()[0].is_empty() {
true => empty_offsets::<OffsetSize>().as_ptr() as *const _,
false => data.buffers()[0].as_ptr(),
};
let values = data.buffers()[1].as_ptr();
Self {
data,
Expand Down Expand Up @@ -823,4 +827,25 @@ mod tests {
fn test_large_string_array_from_list_array_wrong_type() {
_test_generic_string_array_from_list_array_wrong_type::<i32>();
}

#[test]
fn test_empty_offsets() {
let string = StringArray::from(
ArrayData::builder(DataType::Utf8)
.buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
.build()
.unwrap(),
);
assert_eq!(string.len(), 0);
assert_eq!(string.value_offsets(), &[0]);

let string = LargeStringArray::from(
ArrayData::builder(DataType::LargeUtf8)
.buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
.build()
.unwrap(),
);
assert_eq!(string.len(), 0);
assert_eq!(string.value_offsets(), &[0]);
}
}

0 comments on commit f4ee8b9

Please sign in to comment.