Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added Offsets and OffsetsBuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 4, 2022
1 parent 9ea25f0 commit 0c69455
Show file tree
Hide file tree
Showing 46 changed files with 923 additions and 1,028 deletions.
5 changes: 2 additions & 3 deletions benches/iter_list.rs
Expand Up @@ -16,8 +16,7 @@ fn add_benchmark(c: &mut Criterion) {
let values = Buffer::from_iter(0..size as i32);
let values = PrimitiveArray::<i32>::from_data(DataType::Int32, values, None);

let mut offsets = (0..size as i32).step_by(2).collect::<Vec<_>>();
offsets.push(size as i32);
let mut offsets = (0..=size as i32).step_by(2).collect::<Vec<_>>();

let validity = (0..(offsets.len() - 1))
.map(|i| i % 4 == 0)
Expand All @@ -26,7 +25,7 @@ fn add_benchmark(c: &mut Criterion) {
let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let array = ListArray::<i32>::from_data(
data_type,
offsets.into(),
offsets.try_into().unwrap(),
Box::new(values),
Some(validity),
);
Expand Down
13 changes: 7 additions & 6 deletions src/array/binary/ffi.rs
Expand Up @@ -2,7 +2,7 @@ use crate::{
array::{FromFfi, ToFfi},
bitmap::align,
ffi,
offset::Offset,
offset::{Offset, OffsetsBuffer},
};

use crate::error::Result;
Expand All @@ -19,7 +19,7 @@ unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
}

fn offset(&self) -> Option<usize> {
let offset = self.offsets.offset();
let offset = self.offsets.buffer().offset();
if let Some(bitmap) = self.validity.as_ref() {
if bitmap.offset() == offset {
Some(offset)
Expand All @@ -32,7 +32,7 @@ unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
}

fn to_ffi_aligned(&self) -> Self {
let offset = self.offsets.offset();
let offset = self.offsets.buffer().offset();

let validity = self.validity.as_ref().map(|bitmap| {
if bitmap.offset() == offset {
Expand All @@ -59,8 +59,9 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2) }?;

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
))
// assumption that data from FFI is well constructed
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) };

Ok(Self::new(data_type, offsets, values, validity))
}
}
221 changes: 68 additions & 153 deletions src/array/binary/mod.rs
Expand Up @@ -6,16 +6,13 @@ use crate::{
buffer::Buffer,
datatypes::DataType,
error::Error,
offset::Offset,
offset::{Offset, OffsetsBuffer},
trusted_len::TrustedLen,
};

use either::Either;

use super::{
specification::{try_check_offsets, try_check_offsets_bounds},
Array, GenericBinaryArray,
};
use super::{specification::try_check_offsets_bounds, Array, GenericBinaryArray};

mod ffi;
pub(super) mod fmt;
Expand Down Expand Up @@ -60,7 +57,7 @@ pub use mutable::*;
#[derive(Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
}
Expand All @@ -70,19 +67,18 @@ impl<O: Offset> BinaryArray<O> {
///
/// # Errors
/// This function returns an error iff:
/// * the offsets are not monotonically increasing
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
/// # Implementation
/// This function is `O(N)` - checking monotinicity is `O(N)`
/// This function is `O(1)`
pub fn try_new(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
try_check_offsets(&offsets, values.len())?;
try_check_offsets_bounds(&offsets, values.len())?;

if validity
.as_ref()
Expand Down Expand Up @@ -131,7 +127,7 @@ impl<O: Offset> BinaryArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len() - 1
self.offsets.len()
}

/// Returns the element at index `i`
Expand Down Expand Up @@ -170,7 +166,7 @@ impl<O: Offset> BinaryArray<O> {

/// Returns the offsets of this [`BinaryArray`].
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
pub fn offsets(&self) -> &OffsetsBuffer<O> {
&self.offsets
}

Expand Down Expand Up @@ -250,98 +246,86 @@ impl<O: Offset> BinaryArray<O> {
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
// Safety: invariants are preserved
Left(bitmap) => Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)
}),
Left(bitmap) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)),
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)
})
}
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)
})
}
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(
self.data_type,
offsets,
values,
Some(mutable_bitmap),
)
}),
(None, None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)),
(None, Some(offsets)) => Left(BinaryArray::new(
self.data_type,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)),
(Some(mutable_values), None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)),
(Some(values), Some(offsets)) => Right(MutableBinaryArray::from_data(
self.data_type,
offsets,
values,
Some(mutable_bitmap),
)),
},
}
} else {
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, self.values, None)
}),
(None, Some(offsets)) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, offsets.into(), self.values, None)
}),
(Some(values), None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(self.data_type, offsets, values, None)
}),
(None, None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
None,
)),
(None, Some(offsets)) => Left(BinaryArray::new(
self.data_type,
offsets.into(),
self.values,
None,
)),
(Some(values), None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
values.into(),
None,
)),
(Some(values), Some(offsets)) => Right(MutableBinaryArray::from_data(
self.data_type,
offsets,
values,
None,
)),
}
}
}

/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty(data_type: DataType) -> Self {
Self::new(
data_type,
Buffer::from(vec![O::zero()]),
Buffer::new(),
None,
)
Self::new(data_type, OffsetsBuffer::new(), Buffer::new(), None)
}

/// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`.
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
vec![O::default(); 1 + length].into(),
vec![O::default(); 1 + length].try_into().unwrap(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand All @@ -356,72 +340,16 @@ impl<O: Offset> BinaryArray<O> {
}
}

/// Creates a new [`BinaryArray`] without checking for offsets monotinicity.
///
/// # Errors
/// This function returns an error iff:
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
/// # Implementation
/// This function is `O(1)`
pub unsafe fn try_new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
try_check_offsets_bounds(&offsets, values.len())?;

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}

if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(Error::oos(
"BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
));
}

Ok(Self {
data_type,
offsets,
values,
validity,
})
}

/// Alias for unwrapping [`Self::try_new`]
pub fn new(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(data_type, offsets, values, validity).unwrap()
}

/// Alias for unwrapping [`Self::try_new_unchecked`]
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new_unchecked(data_type, offsets, values, validity).unwrap()
}

/// Returns a [`BinaryArray`] from an iterator of trusted length.
///
/// The [`BinaryArray`] is guaranteed to not have a validity
Expand Down Expand Up @@ -487,23 +415,10 @@ impl<O: Offset> BinaryArray<O> {
unsafe { Self::try_from_trusted_len_iter_unchecked(iter) }
}

/// Alias for [`Self::new_unchecked`]
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::new_unchecked(data_type, offsets, values, validity)
}

/// Alias for `new`
pub fn from_data(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Expand Down

0 comments on commit 0c69455

Please sign in to comment.