Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

type_id and value_offset are incorrect for sliced UnionArray #2087

Merged
merged 2 commits into from Jul 16, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 83 additions & 7 deletions arrow/src/array/array_union.rs
Expand Up @@ -243,8 +243,8 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn type_id(&self, index: usize) -> i8 {
assert!(index - self.offset() < self.len());
self.data().buffers()[0].as_slice()[index] as i8
assert!(index < self.len());
self.data().buffers()[0].as_slice()[self.offset() + index] as i8
}

/// Returns the offset into the underlying values array for the array slot at `index`.
Expand All @@ -253,11 +253,11 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn value_offset(&self, index: usize) -> i32 {
assert!(index - self.offset() < self.len());
assert!(index < self.len());
if self.is_dense() {
self.data().buffers()[1].typed_data::<i32>()[index]
self.data().buffers()[1].typed_data::<i32>()[self.offset() + index]
} else {
index as i32
(self.offset() + index) as i32
}
}

Expand All @@ -267,8 +267,8 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn value(&self, index: usize) -> ArrayRef {
let type_id = self.type_id(self.offset() + index);
let value_offset = self.value_offset(self.offset() + index) as usize;
let type_id = self.type_id(index);
let value_offset = self.value_offset(index) as usize;
let child_data = self.boxed_fields[type_id as usize].clone();
child_data.slice(value_offset, 1)
}
Expand Down Expand Up @@ -383,6 +383,7 @@ mod tests {
use crate::array::*;
use crate::buffer::Buffer;
use crate::datatypes::{DataType, Field};
use crate::record_batch::RecordBatch;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm? I need use RecordBatch, otherwise:

error[E0412]: cannot find type `RecordBatch` in this scope
   --> arrow/src/array/array_union.rs:994:49
    |
994 |         fn test_slice_union(record_batch_slice: RecordBatch) {
    |                                                 ^^^^^^^^^^^ not found in this scope

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, the diff rendered strangely on my phone - made it look like there were no other changes to this file


#[test]
fn test_dense_i32() {
Expand Down Expand Up @@ -956,4 +957,79 @@ mod tests {
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
}

#[test]
fn slice_union_array() {
// [1, null, 3.0, null, 4]
fn create_sparse_union() -> UnionArray {
let mut builder = UnionBuilder::new_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append_null::<Float64Type>("c").unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.build().unwrap()
}

// [1, null, 3.0, null, 4]
fn create_dense_union() -> UnionArray {
let mut builder = UnionBuilder::new_dense(5);
builder.append::<Int32Type>("a", 1).unwrap();
viirya marked this conversation as resolved.
Show resolved Hide resolved
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append_null::<Float64Type>("c").unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.build().unwrap()
}

fn create_batch(union: UnionArray) -> RecordBatch {
let schema = Schema::new(vec![Field::new(
"struct_array",
union.data_type().clone(),
true,
)]);

RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap()
}

fn test_slice_union(record_batch_slice: RecordBatch) {
let union_slice = record_batch_slice
.column(0)
.as_any()
.downcast_ref::<UnionArray>()
.unwrap();

assert_eq!(union_slice.type_id(0), 0);
assert_eq!(union_slice.type_id(1), 1);
assert_eq!(union_slice.type_id(2), 1);

let slot = union_slice.value(0);
let array = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(array.len(), 1);
assert!(array.is_null(0));

let slot = union_slice.value(1);
let array = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(array.len(), 1);
assert!(array.is_valid(0));
assert_eq!(array.value(0), 3.0);

let slot = union_slice.value(2);
let array = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(array.len(), 1);
assert!(array.is_null(0));
}

// Sparse Union
let record_batch = create_batch(create_sparse_union());
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
test_slice_union(record_batch_slice);

// Dense Union
let record_batch = create_batch(create_dense_union());
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
test_slice_union(record_batch_slice);
}
}