diff --git a/arrow/benches/lexsort.rs b/arrow/benches/lexsort.rs index aebb588cf9c..5c161ec8df0 100644 --- a/arrow/benches/lexsort.rs +++ b/arrow/benches/lexsort.rs @@ -105,7 +105,7 @@ fn do_bench(c: &mut Criterion, columns: &[Column], len: usize) { .iter() .map(|a| SortField::new(a.data_type().clone())) .collect(); - let mut converter = RowConverter::new(fields); + let mut converter = RowConverter::new(fields).unwrap(); let rows = converter.convert_columns(&arrays).unwrap(); let mut sort: Vec<_> = rows.iter().enumerate().collect(); sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b)); diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 48bb013116b..ac9f3106f7e 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -38,12 +38,12 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec) { c.bench_function(&format!("convert_columns {}", name), |b| { b.iter(|| { - let mut converter = RowConverter::new(fields.clone()); + let mut converter = RowConverter::new(fields.clone()).unwrap(); black_box(converter.convert_columns(&cols).unwrap()) }); }); - let mut converter = RowConverter::new(fields); + let mut converter = RowConverter::new(fields).unwrap(); let rows = converter.convert_columns(&cols).unwrap(); // using a pre-prepared row converter should be faster than the first time c.bench_function(&format!("convert_columns_prepared {}", name), |b| { diff --git a/arrow/src/row/dictionary.rs b/arrow/src/row/dictionary.rs index 950a7d8972f..d8426ad0c3e 100644 --- a/arrow/src/row/dictionary.rs +++ b/arrow/src/row/dictionary.rs @@ -33,8 +33,8 @@ use std::collections::HashMap; pub fn compute_dictionary_mapping( interner: &mut OrderPreservingInterner, values: &ArrayRef, -) -> Result>, ArrowError> { - Ok(downcast_primitive_array! { +) -> Vec> { + downcast_primitive_array! { values => interner .intern(values.iter().map(|x| x.map(|x| x.encode()))), DataType::Binary => { @@ -53,8 +53,8 @@ pub fn compute_dictionary_mapping( let iter = as_largestring_array(values).iter().map(|x| x.map(|x| x.as_bytes())); interner.intern(iter) } - t => return Err(ArrowError::NotYetImplemented(format!("dictionary value {} is not supported", t))), - }) + _ => unreachable!(), + } } /// Dictionary types are encoded as @@ -173,18 +173,11 @@ pub unsafe fn decode_dictionary( value_type => (decode_primitive_helper, values, value_type), DataType::Null => NullArray::new(values.len()).into_data(), DataType::Boolean => decode_bool(&values), - DataType::Decimal128(_, _) => decode_primitive_helper!(Decimal128Type, values, value_type), - DataType::Decimal256(_, _) => decode_primitive_helper!(Decimal256Type, values, value_type), DataType::Utf8 => decode_string::(&values), DataType::LargeUtf8 => decode_string::(&values), DataType::Binary => decode_binary::(&values), DataType::LargeBinary => decode_binary::(&values), - _ => { - return Err(ArrowError::NotYetImplemented(format!( - "decoding dictionary values of {}", - value_type - ))) - } + _ => unreachable!(), }; let data_type = diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs index e0312be1f8d..4fbaa3931b0 100644 --- a/arrow/src/row/mod.rs +++ b/arrow/src/row/mod.rs @@ -40,7 +40,7 @@ //! let mut converter = RowConverter::new(vec![ //! SortField::new(DataType::Int32), //! SortField::new(DataType::Utf8), -//! ]); +//! ]).unwrap(); //! let rows = converter.convert_columns(&arrays).unwrap(); //! //! // Compare rows @@ -83,7 +83,7 @@ //! .iter() //! .map(|a| SortField::new(a.data_type().clone())) //! .collect(); -//! let mut converter = RowConverter::new(fields); +//! let mut converter = RowConverter::new(fields).unwrap(); //! let rows = converter.convert_columns(&arrays).unwrap(); //! let mut sort: Vec<_> = rows.iter().enumerate().collect(); //! sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b)); @@ -231,12 +231,24 @@ impl SortField { impl RowConverter { /// Create a new [`RowConverter`] with the provided schema - pub fn new(fields: Vec) -> Self { + pub fn new(fields: Vec) -> Result { + if !Self::supports_fields(&fields) { + return Err(ArrowError::NotYetImplemented(format!( + "not yet implemented: {:?}", + fields + ))); + } + let interners = (0..fields.len()).map(|_| None).collect(); - Self { + Ok(Self { fields: fields.into(), interners, - } + }) + } + + /// Check if the given fields are supported by the row format. + pub fn supports_fields(fields: &[SortField]) -> bool { + fields.iter().all(|x| !DataType::is_nested(&x.data_type)) } /// Convert [`ArrayRef`] columns into [`Rows`] @@ -275,7 +287,7 @@ impl RowConverter { let interner = interner.get_or_insert_with(Default::default); - let mapping: Vec<_> = compute_dictionary_mapping(interner, values)? + let mapping: Vec<_> = compute_dictionary_mapping(interner, values) .into_iter() .map(|maybe_interned| { maybe_interned.map(|interned| interner.normalized_key(interned)) @@ -286,7 +298,7 @@ impl RowConverter { }) .collect::>>()?; - let mut rows = new_empty_rows(columns, &dictionaries, Arc::clone(&self.fields))?; + let mut rows = new_empty_rows(columns, &dictionaries, Arc::clone(&self.fields)); for ((column, field), dictionary) in columns.iter().zip(self.fields.iter()).zip(dictionaries) @@ -492,7 +504,7 @@ fn new_empty_rows( cols: &[ArrayRef], dictionaries: &[Option>>], fields: Arc<[SortField]>, -) -> Result { +) -> Rows { use fixed::FixedLengthEncoding; let num_rows = cols.first().map(|x| x.len()).unwrap_or(0); @@ -535,7 +547,7 @@ fn new_empty_rows( } _ => unreachable!(), } - t => return Err(ArrowError::NotYetImplemented(format!("not yet implemented: {}", t))) + _ => unreachable!(), } } @@ -565,11 +577,11 @@ fn new_empty_rows( let buffer = vec![0_u8; cur_offset]; - Ok(Rows { + Rows { buffer: buffer.into(), offsets: offsets.into(), fields, - }) + } } /// Encodes a column to the provided [`Rows`] incrementing the offsets as it progresses @@ -605,7 +617,7 @@ fn encode_column( column => encode_dictionary(out, column, dictionary.unwrap(), opts), _ => unreachable!() } - t => unimplemented!("not yet implemented: {}", t) + _ => unreachable!(), } } @@ -747,7 +759,8 @@ mod tests { let mut converter = RowConverter::new(vec![ SortField::new(DataType::Int16), SortField::new(DataType::Float32), - ]); + ]) + .unwrap(); let rows = converter.convert_columns(&cols).unwrap(); assert_eq!(rows.offsets.as_ref(), &[0, 8, 16, 24, 32, 40, 48, 56]); @@ -787,7 +800,8 @@ mod tests { fn test_decimal128() { let mut converter = RowConverter::new(vec![SortField::new( DataType::Decimal128(DECIMAL128_MAX_PRECISION, 7), - )]); + )]) + .unwrap(); let col = Arc::new( Decimal128Array::from_iter([ None, @@ -815,7 +829,8 @@ mod tests { fn test_decimal256() { let mut converter = RowConverter::new(vec![SortField::new( DataType::Decimal256(DECIMAL256_MAX_PRECISION, 7), - )]); + )]) + .unwrap(); let col = Arc::new( Decimal256Array::from_iter([ None, @@ -843,7 +858,8 @@ mod tests { #[test] fn test_bool() { - let mut converter = RowConverter::new(vec![SortField::new(DataType::Boolean)]); + let mut converter = + RowConverter::new(vec![SortField::new(DataType::Boolean)]).unwrap(); let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)])) as ArrayRef; @@ -862,7 +878,8 @@ mod tests { descending: true, nulls_first: false, }, - )]); + )]) + .unwrap(); let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); assert!(rows.row(2) < rows.row(1)); @@ -879,7 +896,7 @@ mod tests { let d = a.data_type().clone(); let mut converter = - RowConverter::new(vec![SortField::new(a.data_type().clone())]); + RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap(); let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap(); let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); @@ -905,7 +922,7 @@ mod tests { ); assert_eq!(dict_with_tz.data_type(), &d); - let mut converter = RowConverter::new(vec![SortField::new(d.clone())]); + let mut converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap(); let rows = converter .convert_columns(&[Arc::new(dict_with_tz) as _]) .unwrap(); @@ -917,7 +934,8 @@ mod tests { #[test] fn test_null_encoding() { let col = Arc::new(NullArray::new(10)); - let mut converter = RowConverter::new(vec![SortField::new(DataType::Null)]); + let mut converter = + RowConverter::new(vec![SortField::new(DataType::Null)]).unwrap(); let rows = converter.convert_columns(&[col]).unwrap(); assert_eq!(rows.num_rows(), 10); assert_eq!(rows.row(1).data.len(), 0); @@ -933,7 +951,8 @@ mod tests { Some(""), ])) as ArrayRef; - let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]); + let mut converter = + RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap(); let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); assert!(rows.row(1) < rows.row(0)); @@ -958,7 +977,8 @@ mod tests { Some(vec![0xFF_u8; variable::BLOCK_SIZE + 1]), ])) as ArrayRef; - let mut converter = RowConverter::new(vec![SortField::new(DataType::Binary)]); + let mut converter = + RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap(); let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); for i in 0..rows.num_rows() { @@ -983,7 +1003,8 @@ mod tests { descending: true, nulls_first: false, }, - )]); + )]) + .unwrap(); let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); for i in 0..rows.num_rows() { @@ -1017,7 +1038,7 @@ mod tests { ])) as ArrayRef; let mut converter = - RowConverter::new(vec![SortField::new(a.data_type().clone())]); + RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap(); let rows_a = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); assert!(rows_a.row(3) < rows_a.row(5)); @@ -1052,7 +1073,8 @@ mod tests { descending: true, nulls_first: false, }, - )]); + )]) + .unwrap(); let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); assert!(rows_c.row(3) > rows_c.row(5)); @@ -1078,7 +1100,7 @@ mod tests { let a = builder.finish(); let mut converter = - RowConverter::new(vec![SortField::new(a.data_type().clone())]); + RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap(); let rows = converter.convert_columns(&[Arc::new(a)]).unwrap(); assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) < rows.row(0)); @@ -1104,7 +1126,7 @@ mod tests { .build() .unwrap(); - let mut converter = RowConverter::new(vec![SortField::new(data_type)]); + let mut converter = RowConverter::new(vec![SortField::new(data_type)]).unwrap(); let rows = converter .convert_columns(&[Arc::new(DictionaryArray::::from(data))]) .unwrap(); @@ -1119,10 +1141,11 @@ mod tests { #[should_panic(expected = "rows were not produced by this RowConverter")] fn test_different_converter() { let values = Arc::new(Int32Array::from_iter([Some(1), Some(-1)])); - let mut converter = RowConverter::new(vec![SortField::new(DataType::Int32)]); + let mut converter = + RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap(); let rows = converter.convert_columns(&[values]).unwrap(); - let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]); + let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap(); let _ = converter.convert_rows(&rows); } @@ -1266,7 +1289,7 @@ mod tests { .map(|(o, a)| SortField::new_with_options(a.data_type().clone(), o)) .collect(); - let mut converter = RowConverter::new(columns); + let mut converter = RowConverter::new(columns).unwrap(); let rows = converter.convert_columns(&arrays).unwrap(); for i in 0..len {