Skip to content

Commit

Permalink
Check overflow in MutableArrayData extend offsets (#3123) (#3157)
Browse files Browse the repository at this point in the history
* Check overflow in MutableArrayData extend offsets (#3123)

* Update arrow-data/src/transform/utils.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
tustvold and alamb committed Nov 22, 2022
1 parent e214ccc commit f091cbb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
6 changes: 4 additions & 2 deletions arrow-data/src/transform/list.rs
Expand Up @@ -21,9 +21,11 @@ use super::{
};
use crate::ArrayData;
use arrow_buffer::ArrowNativeType;
use num::Integer;
use num::{CheckedAdd, Integer};

pub(super) fn build_extend<T: ArrowNativeType + Integer>(array: &ArrayData) -> Extend {
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);
if array.null_count() == 0 {
// fast case where we can copy regions without nullability checks
Expand Down
22 changes: 19 additions & 3 deletions arrow-data/src/transform/utils.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use arrow_buffer::{bit_util, ArrowNativeType, MutableBuffer};
use num::Integer;
use num::{CheckedAdd, Integer};

/// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero.
#[inline]
Expand All @@ -27,7 +27,7 @@ pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, len: usize) {
}
}

pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
buffer: &mut MutableBuffer,
mut last_offset: T,
offsets: &[T],
Expand All @@ -36,7 +36,10 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
offsets.windows(2).for_each(|offsets| {
// compute the new offset
let length = offsets[1] - offsets[0];
last_offset = last_offset + length;
// if you hit this appending to a StringArray / BinaryArray it is because you
// are trying to add more data than can fit into that type. Try breaking your data into
// smaller batches or using LargeStringArray / LargeBinaryArray
last_offset = last_offset.checked_add(&length).expect("offset overflow");
buffer.push(last_offset);
});
}
Expand All @@ -55,3 +58,16 @@ pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
debug_assert!(prefix.is_empty() && suffix.is_empty());
*offsets.get_unchecked(offsets.len() - 1)
}

#[cfg(test)]
mod tests {
use crate::transform::utils::extend_offsets;
use arrow_buffer::MutableBuffer;

#[test]
#[should_panic(expected = "offset overflow")]
fn test_overflow() {
let mut buffer = MutableBuffer::new(10);
extend_offsets(&mut buffer, i32::MAX - 4, &[0, 5]);
}
}
6 changes: 4 additions & 2 deletions arrow-data/src/transform/variable_size.rs
Expand Up @@ -18,7 +18,7 @@
use crate::ArrayData;
use arrow_buffer::{ArrowNativeType, MutableBuffer};
use num::traits::AsPrimitive;
use num::Integer;
use num::{CheckedAdd, Integer};

use super::{
Extend, _MutableArrayData,
Expand All @@ -39,7 +39,9 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
buffer.extend_from_slice(new_values);
}

pub(super) fn build_extend<T: ArrowNativeType + Integer + AsPrimitive<usize>>(
pub(super) fn build_extend<
T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);
Expand Down

0 comments on commit f091cbb

Please sign in to comment.