Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
fix invalid parquet read (#1330)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 13, 2022
1 parent 1fcfd7c commit a4173af
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/array/binary/mutable_values.rs
Expand Up @@ -128,7 +128,7 @@ impl<O: Offset> MutableBinaryValuesArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// Pushes a new item to the array.
Expand Down
4 changes: 2 additions & 2 deletions src/array/list/mutable.rs
Expand Up @@ -191,7 +191,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// The values
Expand All @@ -210,7 +210,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}

fn init_validity(&mut self) {
let len = self.offsets.len();
let len = self.offsets.len_proxy();

let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
validity.extend_constant(len, true);
Expand Down
4 changes: 2 additions & 2 deletions src/array/physical_binary.rs
Expand Up @@ -142,14 +142,14 @@ where

offsets.reserve(size_hint);

let start_index = offsets.len();
let start_index = offsets.len_proxy();

for item in iterator {
let bytes = item.as_ref();
values.extend_from_slice(bytes);
offsets.try_push_usize(bytes.len()).unwrap();
}
offsets.len() - start_index
offsets.len_proxy() - start_index
}

// Populates `offsets`, `values`, and `validity` [`Vec`]s with
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mutable_values.rs
Expand Up @@ -169,7 +169,7 @@ impl<O: Offset> MutableUtf8ValuesArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// Pushes a new item to the array.
Expand Down
4 changes: 2 additions & 2 deletions src/io/avro/read/nested.rs
Expand Up @@ -53,7 +53,7 @@ impl<O: Offset> DynMutableListArray<O> {
}

fn init_validity(&mut self) {
let len = self.offsets.len();
let len = self.offsets.len_proxy();

let mut validity = MutableBitmap::new();
validity.extend_constant(len, true);
Expand All @@ -64,7 +64,7 @@ impl<O: Offset> DynMutableListArray<O> {

impl<O: Offset> MutableArray for DynMutableListArray<O> {
fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

fn validity(&self) -> Option<&MutableBitmap> {
Expand Down
6 changes: 3 additions & 3 deletions src/io/parquet/read/deserialize/binary/utils.rs
Expand Up @@ -15,7 +15,7 @@ impl<O: Offset> Pushable<usize> for Offsets<O> {
}
#[inline]
fn len(&self) -> usize {
self.len()
self.len_proxy()
}

#[inline]
Expand Down Expand Up @@ -45,7 +45,7 @@ impl<O: Offset> Binary<O> {

#[inline]
pub fn push(&mut self, v: &[u8]) {
if self.offsets.len() == 100 && self.offsets.capacity() > 100 {
if self.offsets.len_proxy() == 100 && self.offsets.capacity() > 100 {
let bytes_per_row = self.values.len() / 100 + 1;
let bytes_estimate = bytes_per_row * self.offsets.capacity();
if bytes_estimate > self.values.capacity() {
Expand All @@ -64,7 +64,7 @@ impl<O: Offset> Binary<O> {

#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

#[inline]
Expand Down
14 changes: 10 additions & 4 deletions src/offset.rs
Expand Up @@ -150,7 +150,7 @@ impl<O: Offset> Offsets<O> {
#[inline]
pub fn start_end(&self, index: usize) -> (usize, usize) {
// soundness: the invariant of the function
assert!(index < self.len());
assert!(index < self.len_proxy());
unsafe { self.start_end_unchecked(index) }
}

Expand All @@ -165,12 +165,18 @@ impl<O: Offset> Offsets<O> {
(start, end)
}

/// Returns the length of this container
/// Returns the length an array with these offsets would be.
#[inline]
pub fn len(&self) -> usize {
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
}

#[inline]
/// Returns the number of offsets in this container.
pub fn len(&self) -> usize {
self.0.len()
}

/// Returns the byte slice stored in this buffer
#[inline]
pub fn as_slice(&self) -> &[O] {
Expand All @@ -180,7 +186,7 @@ impl<O: Offset> Offsets<O> {
/// Pops the last element
#[inline]
pub fn pop(&mut self) -> Option<O> {
if self.len() == 0 {
if self.len_proxy() == 0 {
None
} else {
self.0.pop()
Expand Down
2 changes: 1 addition & 1 deletion tests/it/array/list/mutable.rs
Expand Up @@ -32,7 +32,7 @@ fn basics() {
fn with_capacity() {
let array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::with_capacity(10);
assert!(array.offsets().capacity() >= 10);
assert_eq!(array.offsets().len(), 0);
assert_eq!(array.offsets().len_proxy(), 0);
assert_eq!(array.values().values().capacity(), 0);
assert_eq!(array.validity(), None);
}
Expand Down

0 comments on commit a4173af

Please sign in to comment.