Skip to content

Commit

Permalink
Always validate the array data (except the Decimal) when creating a…
Browse files Browse the repository at this point in the history
…rray in IPC reader (#2547)

* validate array data in ipc reader

Signed-off-by: remzi <13716567376yh@gmail.com>

* not validate decimal

Signed-off-by: remzi <13716567376yh@gmail.com>

Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 committed Aug 23, 2022
1 parent 7670c5f commit 3430537
Showing 1 changed file with 31 additions and 29 deletions.
60 changes: 31 additions & 29 deletions arrow/src/ipc/reader.rs
Expand Up @@ -411,12 +411,12 @@ fn create_primitive_array(
}
FixedSizeBinary(_) => {
// read 2 buffers: null buffer (optional) and data buffer
let builder = ArrayData::builder(data_type.clone())
ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

unsafe { builder.build_unchecked() }
.null_bit_buffer(null_buffer)
.build()
.unwrap()
}
Int8
| Int16
Expand All @@ -429,45 +429,45 @@ fn create_primitive_array(
| Interval(IntervalUnit::YearMonth) => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a signed i64, and cast appropriately
let builder = ArrayData::builder(DataType::Int64)
let data = ArrayData::builder(DataType::Int64)
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

let data = unsafe { builder.build_unchecked() };
.null_bit_buffer(null_buffer)
.build()
.unwrap();
let values = Arc::new(Int64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.into_data()
} else {
let builder = ArrayData::builder(data_type.clone())
ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

unsafe { builder.build_unchecked() }
.null_bit_buffer(null_buffer)
.build()
.unwrap()
}
}
Float32 => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a f64, and cast appropriately
let builder = ArrayData::builder(DataType::Float64)
let data = ArrayData::builder(DataType::Float64)
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

let data = unsafe { builder.build_unchecked() };
.null_bit_buffer(null_buffer)
.build()
.unwrap();
let values = Arc::new(Float64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.into_data()
} else {
let builder = ArrayData::builder(data_type.clone())
ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

unsafe { builder.build_unchecked() }
.null_bit_buffer(null_buffer)
.build()
.unwrap()
}
}
Boolean
Expand All @@ -479,21 +479,23 @@ fn create_primitive_array(
| Date64
| Duration(_)
| Interval(IntervalUnit::DayTime)
| Interval(IntervalUnit::MonthDayNano) => {
let builder = ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

unsafe { builder.build_unchecked() }
}
| Interval(IntervalUnit::MonthDayNano) => ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer)
.build()
.unwrap(),
Decimal128(_, _) | Decimal256(_, _) => {
// read 2 buffers: null buffer (optional) and data buffer
let builder = ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);

// Don't validate the decimal array so far,
// becasue validating decimal is some what complicated
// and there is no conclusion on whether we should do it.
// For more infomation, please look at https://github.com/apache/arrow-rs/issues/2387
unsafe { builder.build_unchecked() }
}
t => unreachable!("Data type {:?} either unsupported or not primitive", t),
Expand Down Expand Up @@ -527,7 +529,7 @@ fn create_list_array(

_ => unreachable!("Cannot create list or map array from {:?}", data_type),
};
make_array(unsafe { builder.build_unchecked() })
make_array(builder.build().unwrap())
}

/// Reads the correct number of buffers based on list type and null_count, and creates a
Expand Down

0 comments on commit 3430537

Please sign in to comment.