Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved perf of take
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 7, 2022
1 parent 3c9c081 commit 8e843ed
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Expand Up @@ -40,7 +40,7 @@ pub use mutable::*;
/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![[1, 2].as_ref(), &[], &[3]]);
/// // the underlying representation:
/// assert_eq!(array.values(), &Buffer::from(vec![1, 2, 3]));
/// assert_eq!(array.offsets(), &Buffer::from(vec![0, 2, 2, 3]));
/// assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 3]));
/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
/// ```
///
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mod.rs
Expand Up @@ -51,7 +51,7 @@ impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
/// // the underlying representation
/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
/// assert_eq!(array.values(), &Buffer::from(b"hithere".to_vec()));
/// assert_eq!(array.offsets(), &Buffer::from(vec![0, 2, 2, 2 + 5]));
/// assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 2 + 5]));
/// # }
/// ```
///
Expand Down
33 changes: 11 additions & 22 deletions src/compute/take/generic_binary.rs
Expand Up @@ -17,10 +17,10 @@ pub fn take_values<O: Offset>(
let mut buffer = Vec::with_capacity(new_len);
starts
.iter()
.zip(offsets.buffer().windows(2))
.for_each(|(start_, window)| {
let start = start_.to_usize();
let end = (*start_ + (window[1] - window[0])).to_usize();
.map(|start| start.to_usize())
.zip(offsets.lengths())
.for_each(|(start, length)| {
let end = start + length;
buffer.extend_from_slice(&values[start..end]);
});
buffer.into()
Expand All @@ -32,27 +32,16 @@ pub fn take_no_validity<O: Offset, I: Index>(
values: &[u8],
indices: &[I],
) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
let mut length = O::zero();
let mut buffer = Vec::<u8>::new();
let offsets = offsets.buffer();
let offsets = indices.iter().map(|index| {
let index = index.to_usize();
let start = offsets[index];
let length_h = offsets[index + 1] - start;
length += length_h;

let _start = start.to_usize();
let end = (start + length_h).to_usize();
buffer.extend_from_slice(&values[_start..end]);
length
let lengths = indices.iter().map(|index| index.to_usize()).map(|index| {
let (start, end) = offsets.start_end(index);
// todo: remove this bound check
buffer.extend_from_slice(&values[start..end]);
end - start
});
let offsets = std::iter::once(O::zero())
.chain(offsets)
.collect::<Vec<_>>();
// Safety: offsets _are_ monotonically increasing
let offsets = unsafe { Offsets::new_unchecked(offsets) }.into();
let offsets = Offsets::try_from_lengths(lengths).expect("");

(offsets, buffer.into(), None)
(offsets.into(), buffer.into(), None)
}

// take implementation when only values contain nulls
Expand Down
6 changes: 6 additions & 0 deletions src/offset.rs
Expand Up @@ -420,6 +420,12 @@ impl<O: Offset> OffsetsBuffer<O> {
Self(self.0.slice_unchecked(offset, length))
}

/// Returns an iterator with the lengths of the offsets
#[inline]
pub fn lengths(&self) -> impl Iterator<Item = usize> + '_ {
self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
}

/// Returns the inner [`Buffer`].
#[inline]
pub fn into_inner(self) -> Buffer<O> {
Expand Down

0 comments on commit 8e843ed

Please sign in to comment.