Skip to content

Commit

Permalink
make slice work for nested types
Browse files Browse the repository at this point in the history
  • Loading branch information
nevi-me committed Jun 2, 2021
1 parent f41cb17 commit 0a2649f
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 17 deletions.
7 changes: 1 addition & 6 deletions arrow/src/array/array_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,7 @@ impl From<ArrayData> for StructArray {
fn from(data: ArrayData) -> Self {
let mut boxed_fields = vec![];
for cd in data.child_data() {
let child_data = if data.offset() != 0 || data.len() != cd.len() {
cd.slice(data.offset(), data.len())
} else {
cd.clone()
};
boxed_fields.push(make_array(child_data));
boxed_fields.push(make_array(cd.clone()));
}
Self { data, boxed_fields }
}
Expand Down
81 changes: 70 additions & 11 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
use std::mem;
use std::sync::Arc;

use crate::datatypes::{DataType, IntervalUnit};
use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
buffer::{Buffer, MutableBuffer},
util::bit_util,
};

use super::equal::equal;
use super::{OffsetSizeTrait, equal::equal};

#[inline]
pub(crate) fn count_nulls(
Expand Down Expand Up @@ -385,15 +385,55 @@ impl ArrayData {
pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
assert!((offset + length) <= self.len());

let mut new_data = self.clone();

new_data.len = length;
new_data.offset = offset + self.offset;

new_data.null_count =
count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);

new_data
// If data type is primitive, it's quick to clone array
if self.child_data().is_empty() {
let mut new_data = self.clone();

new_data.len = length;
new_data.offset = offset + self.offset;

new_data.null_count =
count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);

new_data
} else {
// Slice into children
let new_offset = self.offset + offset;
let new_data = ArrayData {
data_type: self.data_type().clone(),
len: length,
null_count: count_nulls(self.null_buffer(), new_offset, length),
offset: new_offset,
buffers: self.buffers.clone(),
child_data: self.child_data().iter().map(|data| {
match self.data_type() {
DataType::List(_) => {
let (start, end) = get_list_child_slice::<i32>(
self.buffers.get(0).unwrap(),
offset,
length
);
data.slice(start, end - start)
}
DataType::LargeList(_) => {
let (start, end) = get_list_child_slice::<i64>(
self.buffers.get(0).unwrap(),
offset,
length
);
data.slice(start, end - start)
}
_ => {
// All other types don't require computing offsets
data.slice(offset, length)
}
}
}).collect(),
null_bitmap: self.null_bitmap().clone(),
};

new_data
}
}

/// Returns the `buffer` as a slice of type `T` starting at self.offset
Expand Down Expand Up @@ -467,6 +507,25 @@ impl ArrayData {
}
}

#[inline]
fn get_list_child_slice<OffsetSize: OffsetSizeTrait>(
buffer: &Buffer,
offset: usize,
length: usize
) -> (usize, usize) {
let raw_buffer = buffer.as_ptr();
let value_offsets: &[OffsetSize] = unsafe {
let value_offsets = RawPtrBox::<OffsetSize>::new(raw_buffer);
std::slice::from_raw_parts(
value_offsets.as_ptr().add(offset),
length + 1,
)
};
let start = value_offsets[0];
let end = value_offsets[length - 1];
(start.to_usize().unwrap(), end.to_usize().unwrap())
}

impl PartialEq for ArrayData {
fn eq(&self, other: &Self) -> bool {
equal(self, other)
Expand Down

0 comments on commit 0a2649f

Please sign in to comment.