From 620a04cc8976d4b234dd92e92a9c389de2c8a2f2 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Sun, 6 Nov 2022 21:49:40 +1300 Subject: [PATCH] Update parquet to depend on arrow subcrates (#3044) --- parquet/Cargo.toml | 14 ++++-- parquet/src/arrow/array_reader/builder.rs | 2 +- parquet/src/arrow/array_reader/byte_array.rs | 8 ++-- .../array_reader/byte_array_dictionary.rs | 25 +++------- parquet/src/arrow/array_reader/empty_array.rs | 5 +- .../array_reader/fixed_len_byte_array.rs | 15 +++--- parquet/src/arrow/array_reader/list_array.rs | 16 ++++--- parquet/src/arrow/array_reader/map_array.rs | 16 +++---- parquet/src/arrow/array_reader/mod.rs | 4 +- parquet/src/arrow/array_reader/null_array.rs | 8 ++-- .../src/arrow/array_reader/primitive_array.rs | 21 +++++---- .../src/arrow/array_reader/struct_array.rs | 9 ++-- parquet/src/arrow/array_reader/test_util.rs | 4 +- parquet/src/arrow/arrow_reader/filter.rs | 13 +++-- parquet/src/arrow/arrow_reader/mod.rs | 43 +++++++++-------- parquet/src/arrow/arrow_reader/selection.rs | 4 +- parquet/src/arrow/arrow_writer/byte_array.rs | 6 +-- parquet/src/arrow/arrow_writer/levels.rs | 47 +++++++++---------- parquet/src/arrow/arrow_writer/mod.rs | 21 ++++----- parquet/src/arrow/async_reader.rs | 8 ++-- parquet/src/arrow/buffer/bit_util.rs | 4 +- parquet/src/arrow/buffer/dictionary_buffer.rs | 11 +++-- parquet/src/arrow/buffer/offset_buffer.rs | 9 ++-- parquet/src/arrow/mod.rs | 12 ++--- parquet/src/arrow/record_reader/buffer.rs | 3 +- .../arrow/record_reader/definition_levels.rs | 8 ++-- parquet/src/arrow/record_reader/mod.rs | 6 +-- parquet/src/arrow/schema.rs | 10 ++-- parquet/src/arrow/schema/complex.rs | 2 +- parquet/src/arrow/schema/primitive.rs | 2 +- parquet/src/bin/parquet-fromcsv.rs | 3 +- parquet/src/column/writer/encoder.rs | 4 +- parquet/src/errors.rs | 4 +- 33 files changed, 182 insertions(+), 185 deletions(-) diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 70320ba6590..b4c69aa984e 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -30,6 +30,15 @@ edition = "2021" rust-version = "1.62" [dependencies] +arrow-array = { version = "26.0.0", path = "../arrow-array", default-features = false, optional = true } +arrow-buffer = { version = "26.0.0", path = "../arrow-buffer", default-features = false, optional = true } +arrow-cast = { version = "26.0.0", path = "../arrow-cast", default-features = false, optional = true } +arrow-csv = { version = "26.0.0", path = "../arrow-csv", default-features = false, optional = true } +arrow-data = { version = "26.0.0", path = "../arrow-data", default-features = false, optional = true } +arrow-schema = { version = "26.0.0", path = "../arrow-schema", default-features = false, optional = true } +arrow-select = { version = "26.0.0", path = "../arrow-select", default-features = false, optional = true } +arrow-ipc = { version = "26.0.0", path = "../arrow-ipc", default-features = false, optional = true } + ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] } bytes = { version = "1.1", default-features = false, features = ["std"] } thrift = { version = "0.16", default-features = false } @@ -41,7 +50,6 @@ zstd = { version = "0.11.1", optional = true, default-features = false } chrono = { version = "0.4", default-features = false, features = ["alloc"] } num = { version = "0.4", default-features = false } num-bigint = { version = "0.4", default-features = false } -arrow = { path = "../arrow", version = "26.0.0", optional = true, default-features = false, features = ["ipc"] } base64 = { version = "0.13", default-features = false, features = ["std"], optional = true } clap = { version = "4", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } @@ -70,9 +78,9 @@ all-features = true [features] default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] # Enable arrow reader/writer APIs -arrow = ["dep:arrow", "base64"] +arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"] # Enable CLI tools -cli = ["json", "base64", "clap", "arrow/csv"] +cli = ["json", "base64", "clap", "arrow-csv"] # Enable JSON APIs json = ["serde_json", "base64"] # Enable internal testing APIs diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs index c0216466d48..246bccfece4 100644 --- a/parquet/src/arrow/array_reader/builder.rs +++ b/parquet/src/arrow/array_reader/builder.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use arrow::datatypes::DataType; +use arrow_schema::DataType; use crate::arrow::array_reader::empty_array::make_empty_array_reader; use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader; diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs index 4bf4dee0d0b..22fa0ab45a2 100644 --- a/parquet/src/arrow/array_reader/byte_array.rs +++ b/parquet/src/arrow/array_reader/byte_array.rs @@ -30,9 +30,9 @@ use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder}; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; use crate::util::memory::ByteBufferPtr; -use arrow::array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait}; -use arrow::buffer::Buffer; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait}; +use arrow_buffer::Buffer; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::ops::Range; use std::sync::Arc; @@ -587,7 +587,7 @@ mod tests { use super::*; use crate::arrow::array_reader::test_util::{byte_array_all_encodings, utf8_column}; use crate::arrow::record_reader::buffer::ValuesBuffer; - use arrow::array::{Array, StringArray}; + use arrow_array::{Array, StringArray}; #[test] fn test_byte_array_decoder() { diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs index 0a5d94fa6ae..c4ed7e9070c 100644 --- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs +++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs @@ -20,9 +20,9 @@ use std::marker::PhantomData; use std::ops::Range; use std::sync::Arc; -use arrow::array::{Array, ArrayRef, OffsetSizeTrait}; -use arrow::buffer::Buffer; -use arrow::datatypes::{ArrowNativeType, DataType as ArrowType}; +use arrow_array::{Array, ArrayRef, OffsetSizeTrait}; +use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_schema::DataType as ArrowType; use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain}; use crate::arrow::array_reader::{read_records, skip_records, ArrayReader}; @@ -188,15 +188,11 @@ where } fn get_def_levels(&self) -> Option<&[i16]> { - self.def_levels_buffer - .as_ref() - .map(|buf| buf.typed_data()) + self.def_levels_buffer.as_ref().map(|buf| buf.typed_data()) } fn get_rep_levels(&self) -> Option<&[i16]> { - self.rep_levels_buffer - .as_ref() - .map(|buf| buf.typed_data()) + self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data()) } } @@ -395,7 +391,7 @@ where #[cfg(test)] mod tests { - use arrow::array::{Array, StringArray}; + use arrow_array::{Array, StringArray}; use arrow::compute::cast; use crate::arrow::array_reader::test_util::{ @@ -528,13 +524,7 @@ mod tests { assert_eq!( strings.iter().collect::>(), - vec![ - Some("0"), - Some("1"), - Some("1"), - Some("2"), - Some("2"), - ] + vec![Some("0"), Some("1"), Some("1"), Some("2"), Some("2"),] ) } @@ -625,7 +615,6 @@ mod tests { } } - #[test] fn test_too_large_dictionary() { let data: Vec<_> = (0..128) diff --git a/parquet/src/arrow/array_reader/empty_array.rs b/parquet/src/arrow/array_reader/empty_array.rs index abe839b9dc2..2a3711fa030 100644 --- a/parquet/src/arrow/array_reader/empty_array.rs +++ b/parquet/src/arrow/array_reader/empty_array.rs @@ -17,8 +17,9 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::Result; -use arrow::array::{ArrayDataBuilder, ArrayRef, StructArray}; -use arrow::datatypes::DataType as ArrowType; +use arrow_schema::DataType as ArrowType; +use arrow_array::{ArrayRef, StructArray}; +use arrow_data::ArrayDataBuilder; use std::any::Any; use std::sync::Arc; diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs index ba3a02c4f6b..e8d426d3a85 100644 --- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs +++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs @@ -27,12 +27,13 @@ use crate::column::reader::decoder::{ColumnValueDecoder, ValuesBufferSlice}; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; use crate::util::memory::ByteBufferPtr; -use arrow::array::{ - ArrayDataBuilder, ArrayRef, Decimal128Array, FixedSizeBinaryArray, - IntervalDayTimeArray, IntervalYearMonthArray, +use arrow_array::{ + ArrayRef, Decimal128Array, FixedSizeBinaryArray, IntervalDayTimeArray, + IntervalYearMonthArray, }; -use arrow::buffer::Buffer; -use arrow::datatypes::{DataType as ArrowType, IntervalUnit}; +use arrow_buffer::Buffer; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{DataType as ArrowType, IntervalUnit}; use std::any::Any; use std::ops::Range; use std::sync::Arc; @@ -427,10 +428,10 @@ mod tests { use super::*; use crate::arrow::arrow_reader::ParquetRecordBatchReader; use crate::arrow::ArrowWriter; - use arrow::array::{Array, Decimal128Array, ListArray}; + use arrow_array::{Array, Decimal128Array, ListArray}; use arrow::datatypes::Field; use arrow::error::Result as ArrowResult; - use arrow::record_batch::RecordBatch; + use arrow_array::RecordBatch; use bytes::Bytes; use std::sync::Arc; diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index f0b5092e1ad..965142f3840 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -18,13 +18,14 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::ParquetError; use crate::errors::Result; -use arrow::array::{ - new_empty_array, Array, ArrayData, ArrayRef, BooleanBufferBuilder, GenericListArray, - MutableArrayData, OffsetSizeTrait, +use arrow_array::{ + builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef, GenericListArray, + OffsetSizeTrait, }; -use arrow::buffer::Buffer; -use arrow::datatypes::DataType as ArrowType; -use arrow::datatypes::ToByteSlice; +use arrow_buffer::Buffer; +use arrow_buffer::ToByteSlice; +use arrow_data::{transform::MutableArrayData, ArrayData}; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::cmp::Ordering; use std::marker::PhantomData; @@ -257,8 +258,9 @@ mod tests { use crate::file::reader::{FileReader, SerializedFileReader}; use crate::schema::parser::parse_message_type; use crate::schema::types::SchemaDescriptor; - use arrow::array::{Array, ArrayDataBuilder, PrimitiveArray}; use arrow::datatypes::{Field, Int32Type as ArrowInt32, Int32Type}; + use arrow_array::{Array, PrimitiveArray}; + use arrow_data::ArrayDataBuilder; use std::sync::Arc; fn list_type( diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs index bb80fdbdc5f..cd1a76e8638 100644 --- a/parquet/src/arrow/array_reader/map_array.rs +++ b/parquet/src/arrow/array_reader/map_array.rs @@ -17,8 +17,8 @@ use crate::arrow::array_reader::{ArrayReader, ListArrayReader, StructArrayReader}; use crate::errors::Result; -use arrow::array::{Array, ArrayRef, MapArray}; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::{Array, ArrayRef, MapArray}; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::sync::Arc; @@ -125,10 +125,10 @@ mod tests { use super::*; use crate::arrow::arrow_reader::ParquetRecordBatchReader; use crate::arrow::ArrowWriter; - use arrow::array; - use arrow::array::{MapBuilder, PrimitiveBuilder, StringBuilder}; use arrow::datatypes::{Field, Int32Type, Schema}; - use arrow::record_batch::RecordBatch; + use arrow_array::builder::{MapBuilder, PrimitiveBuilder, StringBuilder}; + use arrow_array::cast::*; + use arrow_array::RecordBatch; use bytes::Bytes; #[test] @@ -203,9 +203,9 @@ mod tests { let col = record_batch.column(0); assert!(col.is_null(0)); assert!(col.is_null(1)); - let map_entry = array::as_map_array(col).value(2); - let struct_col = array::as_struct_array(&map_entry); - let key_col = array::as_string_array(struct_col.column(0)); // Key column + let map_entry = as_map_array(col).value(2); + let struct_col = as_struct_array(&map_entry); + let key_col = as_string_array(struct_col.column(0)); // Key column assert_eq!(key_col.value(0), "three"); assert_eq!(key_col.value(1), "four"); assert_eq!(key_col.value(2), "five"); diff --git a/parquet/src/arrow/array_reader/mod.rs b/parquet/src/arrow/array_reader/mod.rs index 3740f0faea6..aede5e86c69 100644 --- a/parquet/src/arrow/array_reader/mod.rs +++ b/parquet/src/arrow/array_reader/mod.rs @@ -18,8 +18,8 @@ //! Logic for reading into arrow arrays use crate::errors::Result; -use arrow::array::ArrayRef; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::ArrayRef; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::sync::Arc; diff --git a/parquet/src/arrow/array_reader/null_array.rs b/parquet/src/arrow/array_reader/null_array.rs index 405633f0a82..4ad6c97e2f6 100644 --- a/parquet/src/arrow/array_reader/null_array.rs +++ b/parquet/src/arrow/array_reader/null_array.rs @@ -22,9 +22,9 @@ use crate::column::page::PageIterator; use crate::data_type::DataType; use crate::errors::Result; use crate::schema::types::ColumnDescPtr; -use arrow::array::ArrayRef; -use arrow::buffer::Buffer; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::ArrayRef; +use arrow_buffer::Buffer; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::sync::Arc; @@ -82,7 +82,7 @@ where fn consume_batch(&mut self) -> Result { // convert to arrays - let array = arrow::array::NullArray::new(self.record_reader.num_values()); + let array = arrow_array::NullArray::new(self.record_reader.num_values()); // save definition and repetition buffers self.def_levels_buffer = self.record_reader.consume_def_levels(); diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 5fc5e639de9..012cad5c4c6 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -24,13 +24,14 @@ use crate::column::page::PageIterator; use crate::data_type::{DataType, Int96}; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; -use arrow::array::{ - ArrayDataBuilder, ArrayRef, BooleanArray, BooleanBufferBuilder, Decimal128Array, - Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray, - TimestampNanosecondBufferBuilder, UInt32Array, UInt64Array, +use arrow_array::{ + builder::{BooleanBufferBuilder, TimestampNanosecondBufferBuilder}, + ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array, + Int64Array, TimestampNanosecondArray, UInt32Array, UInt64Array, }; -use arrow::buffer::Buffer; -use arrow::datatypes::{DataType as ArrowType, TimeUnit}; +use arrow_buffer::Buffer; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{DataType as ArrowType, TimeUnit}; use std::any::Any; use std::sync::Arc; @@ -205,8 +206,8 @@ where let array = match target_type { ArrowType::Date64 => { // this is cheap as it internally reinterprets the data - let a = arrow::compute::cast(&array, &ArrowType::Date32)?; - arrow::compute::cast(&a, target_type)? + let a = arrow_cast::cast(&array, &ArrowType::Date32)?; + arrow_cast::cast(&a, target_type)? } ArrowType::Decimal128(p, s) => { let array = match array.data_type() { @@ -236,7 +237,7 @@ where Arc::new(array) as ArrayRef } - _ => arrow::compute::cast(&array, target_type)?, + _ => arrow_cast::cast(&array, target_type)?, }; // save definition and repetition buffers @@ -270,8 +271,8 @@ mod tests { use crate::schema::types::SchemaDescriptor; use crate::util::test_common::rand_gen::make_pages; use crate::util::InMemoryPageIterator; - use arrow::array::{Array, PrimitiveArray}; use arrow::datatypes::ArrowPrimitiveType; + use arrow_array::{Array, PrimitiveArray}; use arrow::datatypes::DataType::Decimal128; use rand::distributions::uniform::SampleUniform; diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index f682f146c72..b470be5ad40 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -17,10 +17,9 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::{ParquetError, Result}; -use arrow::array::{ - ArrayData, ArrayDataBuilder, ArrayRef, BooleanBufferBuilder, StructArray, -}; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray}; +use arrow_data::{ArrayData, ArrayDataBuilder}; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::sync::Arc; @@ -216,9 +215,9 @@ mod tests { use super::*; use crate::arrow::array_reader::test_util::InMemoryArrayReader; use crate::arrow::array_reader::ListArrayReader; - use arrow::array::{Array, Int32Array, ListArray}; use arrow::buffer::Buffer; use arrow::datatypes::Field; + use arrow_array::{Array, Int32Array, ListArray}; #[test] fn test_struct_array_reader() { diff --git a/parquet/src/arrow/array_reader/test_util.rs b/parquet/src/arrow/array_reader/test_util.rs index ca1aabfd4aa..6585d46146e 100644 --- a/parquet/src/arrow/array_reader/test_util.rs +++ b/parquet/src/arrow/array_reader/test_util.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, ArrayRef}; -use arrow::datatypes::DataType as ArrowType; +use arrow_array::{Array, ArrayRef}; +use arrow_schema::DataType as ArrowType; use std::any::Any; use std::sync::Arc; diff --git a/parquet/src/arrow/arrow_reader/filter.rs b/parquet/src/arrow/arrow_reader/filter.rs index 8945ccde424..cbded9a6f42 100644 --- a/parquet/src/arrow/arrow_reader/filter.rs +++ b/parquet/src/arrow/arrow_reader/filter.rs @@ -16,9 +16,8 @@ // under the License. use crate::arrow::ProjectionMask; -use arrow::array::BooleanArray; -use arrow::error::Result as ArrowResult; -use arrow::record_batch::RecordBatch; +use arrow_array::{BooleanArray, RecordBatch}; +use arrow_schema::ArrowError; /// A predicate operating on [`RecordBatch`] pub trait ArrowPredicate: Send + 'static { @@ -32,7 +31,7 @@ pub trait ArrowPredicate: Send + 'static { /// /// Rows that are `true` in the returned [`BooleanArray`] will be returned by the /// parquet reader, whereas rows that are `false` or `Null` will not be - fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult; + fn evaluate(&mut self, batch: RecordBatch) -> Result; } /// An [`ArrowPredicate`] created from an [`FnMut`] @@ -43,7 +42,7 @@ pub struct ArrowPredicateFn { impl ArrowPredicateFn where - F: FnMut(RecordBatch) -> ArrowResult + Send + 'static, + F: FnMut(RecordBatch) -> Result + Send + 'static, { /// Create a new [`ArrowPredicateFn`]. `f` will be passed batches /// that contains the columns specified in `projection` @@ -56,13 +55,13 @@ where impl ArrowPredicate for ArrowPredicateFn where - F: FnMut(RecordBatch) -> ArrowResult + Send + 'static, + F: FnMut(RecordBatch) -> Result + Send + 'static, { fn projection(&self) -> &ProjectionMask { &self.projection } - fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult { + fn evaluate(&mut self, batch: RecordBatch) -> Result { (self.f)(batch) } } diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 19c877dffc2..35b70a0485c 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -20,12 +20,10 @@ use std::collections::VecDeque; use std::sync::Arc; -use arrow::array::Array; -use arrow::compute::prep_null_mask_filter; -use arrow::datatypes::{DataType as ArrowType, Schema, SchemaRef}; -use arrow::error::Result as ArrowResult; -use arrow::record_batch::{RecordBatch, RecordBatchReader}; -use arrow::{array::StructArray, error::ArrowError}; +use arrow_array::{Array, StructArray}; +use arrow_array::{RecordBatch, RecordBatchReader}; +use arrow_schema::{ArrowError, DataType as ArrowType, Schema, SchemaRef}; +use arrow_select::filter::prep_null_mask_filter; use crate::arrow::array_reader::{ build_array_reader, ArrayReader, FileReaderRowGroupCollection, RowGroupCollection, @@ -473,7 +471,7 @@ pub struct ParquetRecordBatchReader { } impl Iterator for ParquetRecordBatchReader { - type Item = ArrowResult; + type Item = Result; fn next(&mut self) -> Option { let mut read_records = 0; @@ -638,11 +636,12 @@ mod tests { use rand::{thread_rng, Rng, RngCore}; use tempfile::tempfile; - use arrow::array::*; - use arrow::buffer::Buffer; - use arrow::datatypes::{DataType as ArrowDataType, Field, Schema}; - use arrow::error::Result as ArrowResult; - use arrow::record_batch::{RecordBatch, RecordBatchReader}; + use arrow_array::builder::*; + use arrow_array::*; + use arrow_array::{RecordBatch, RecordBatchReader}; + use arrow_buffer::Buffer; + use arrow_data::ArrayDataBuilder; + use arrow_schema::{DataType as ArrowDataType, Field, Schema}; use crate::arrow::arrow_reader::{ ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReader, @@ -714,7 +713,7 @@ mod tests { file.rewind().unwrap(); let record_reader = ParquetRecordBatchReader::try_new(file, 2).unwrap(); - let batches = record_reader.collect::>>().unwrap(); + let batches = record_reader.collect::, _>>().unwrap(); assert_eq!(batches.len(), 4); for batch in &batches[0..3] { @@ -1067,7 +1066,7 @@ mod tests { let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(&written.slice(0, 3), &read[0]); @@ -1103,7 +1102,7 @@ mod tests { let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(&written.slice(0, 3), &read[0]); @@ -1143,7 +1142,7 @@ mod tests { let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(&written.slice(0, 3), &read[0]); @@ -1153,7 +1152,7 @@ mod tests { #[test] fn test_read_decimal_file() { - use arrow::array::Decimal128Array; + use arrow_array::Decimal128Array; let testdata = arrow::util::test_util::parquet_test_data(); let file_variants = vec![ ("byte_array", 4), @@ -1936,7 +1935,7 @@ mod tests { let record_reader = ParquetRecordBatchReader::try_new(file, 3).unwrap(); let batches = record_reader - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(batches.len(), 6); @@ -2271,7 +2270,7 @@ mod tests { let expected = get_expected_batches(&data, &selections, batch_size); let skip_reader = create_skip_reader(&test_file, batch_size, selections); assert_eq!( - skip_reader.collect::>>().unwrap(), + skip_reader.collect::, _>>().unwrap(), expected, "batch_size: {}, selection_len: {}, skip_first: {}", batch_size, @@ -2399,7 +2398,7 @@ mod tests { let batches = ParquetRecordBatchReader::try_new(file, 1024) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(batches.len(), 1); let batch = &batches[0]; @@ -2444,7 +2443,7 @@ mod tests { let batches = ParquetRecordBatchReader::try_new(file, expected_rows) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(batches.len(), 1); let batch = &batches[0]; @@ -2476,7 +2475,7 @@ mod tests { let batches = ParquetRecordBatchReader::try_new(file, expected_rows) .unwrap() - .collect::>>() + .collect::, _>>() .unwrap(); assert_eq!(batches.len(), 1); let batch = &batches[0]; diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 2328c450159..357960906c8 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, BooleanArray}; -use arrow::compute::SlicesIterator; +use arrow_array::{Array, BooleanArray}; +use arrow_select::filter::SlicesIterator; use std::cmp::Ordering; use std::collections::VecDeque; use std::ops::Range; diff --git a/parquet/src/arrow/arrow_writer/byte_array.rs b/parquet/src/arrow/arrow_writer/byte_array.rs index 7070cecacf2..d5231785280 100644 --- a/parquet/src/arrow/arrow_writer/byte_array.rs +++ b/parquet/src/arrow/arrow_writer/byte_array.rs @@ -31,17 +31,17 @@ use crate::file::writer::OnCloseColumnChunk; use crate::schema::types::ColumnDescPtr; use crate::util::bit_util::num_required_bits; use crate::util::interner::{Interner, Storage}; -use arrow::array::{ +use arrow_array::{ Array, ArrayAccessor, ArrayRef, BinaryArray, DictionaryArray, LargeBinaryArray, LargeStringArray, StringArray, }; -use arrow::datatypes::DataType; +use arrow_schema::DataType; macro_rules! downcast_dict_impl { ($array:ident, $key:ident, $val:ident, $op:expr $(, $arg:expr)*) => {{ $op($array .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap() .downcast_dict::<$val>() .unwrap()$(, $arg)*) diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 5736f05fdcf..e2a8a8c50e9 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -41,11 +41,11 @@ //! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding) use crate::errors::{ParquetError, Result}; -use arrow::array::{ - make_array, Array, ArrayData, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, - StructArray, +use arrow_array::{ + make_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, StructArray, }; -use arrow::datatypes::{DataType, Field}; +use arrow_data::ArrayData; +use arrow_schema::{DataType, Field}; use std::ops::Range; /// Performs a depth-first scan of the children of `array`, constructing [`LevelInfo`] @@ -482,11 +482,13 @@ mod tests { use std::sync::Arc; - use arrow::array::*; - use arrow::buffer::Buffer; - use arrow::datatypes::{Int32Type, Schema, ToByteSlice}; - use arrow::record_batch::RecordBatch; - use arrow::util::pretty::pretty_format_columns; + use arrow_array::builder::*; + use arrow_array::types::Int32Type; + use arrow_array::*; + use arrow_buffer::{Buffer, ToByteSlice}; + use arrow_cast::display::array_value_to_string; + use arrow_data::ArrayDataBuilder; + use arrow_schema::Schema; #[test] fn test_calculate_array_levels_twitter_example() { @@ -1355,21 +1357,18 @@ mod tests { let list_field = Field::new("col", list_type, true); let expected = vec![ - r#"+-------------------------------------+"#, - r#"| col |"#, - r#"+-------------------------------------+"#, - r#"| |"#, - r#"| |"#, - r#"| [] |"#, - r#"| [{"list": [3, ], "integers": null}] |"#, - r#"| [, {"list": null, "integers": 5}] |"#, - r#"| [] |"#, - r#"+-------------------------------------+"#, - ] - .join("\n"); - - let pretty = pretty_format_columns(list_field.name(), &[list.clone()]).unwrap(); - assert_eq!(pretty.to_string(), expected); + r#""#.to_string(), + r#""#.to_string(), + r#"[]"#.to_string(), + r#"[{"list": [3, ], "integers": null}]"#.to_string(), + r#"[, {"list": null, "integers": 5}]"#.to_string(), + r#"[]"#.to_string(), + ]; + + let actual: Vec<_> = (0..6) + .map(|x| array_value_to_string(&list, x).unwrap()) + .collect(); + assert_eq!(actual, expected); let levels = calculate_array_levels(&list, &list_field).unwrap(); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 52f55a91baa..ecb59e93e2f 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -21,11 +21,8 @@ use std::collections::VecDeque; use std::io::Write; use std::sync::Arc; -use arrow::array as arrow_array; -use arrow::array::ArrayRef; -use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef}; -use arrow::record_batch::RecordBatch; -use arrow_array::Array; +use arrow_array::{Array, ArrayRef, RecordBatch}; +use arrow_schema::{DataType as ArrowDataType, IntervalUnit, SchemaRef}; use super::schema::{ add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema, @@ -54,8 +51,8 @@ mod levels; /// ``` /// # use std::sync::Arc; /// # use bytes::Bytes; -/// # use arrow::array::{ArrayRef, Int64Array}; -/// # use arrow::record_batch::RecordBatch; +/// # use arrow_array::{ArrayRef, Int64Array}; +/// # use arrow_array::RecordBatch; /// # use parquet::arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader}; /// let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef; /// let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap(); @@ -376,8 +373,8 @@ fn write_leaf( match column.data_type() { ArrowDataType::Date64 => { // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32 - let array = arrow::compute::cast(column, &ArrowDataType::Date32)?; - let array = arrow::compute::cast(&array, &ArrowDataType::Int32)?; + let array = arrow_cast::cast(column, &ArrowDataType::Date32)?; + let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?; let array = array .as_any() @@ -394,7 +391,7 @@ fn write_leaf( write_primitive(typed, &array[offset..offset + data.len()], levels)? } _ => { - let array = arrow::compute::cast(column, &ArrowDataType::Int32)?; + let array = arrow_cast::cast(column, &ArrowDataType::Int32)?; let array = array .as_any() .downcast_ref::() @@ -432,7 +429,7 @@ fn write_leaf( write_primitive(typed, &array[offset..offset + data.len()], levels)? } _ => { - let array = arrow::compute::cast(column, &ArrowDataType::Int64)?; + let array = arrow_cast::cast(column, &ArrowDataType::Int64)?; let array = array .as_any() .downcast_ref::() @@ -618,9 +615,9 @@ mod tests { use arrow::datatypes::ToByteSlice; use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type}; use arrow::error::Result as ArrowResult; - use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; use arrow::{array::*, buffer::Buffer}; + use arrow_array::RecordBatch; use crate::basic::Encoding; use crate::file::metadata::ParquetMetaData; diff --git a/parquet/src/arrow/async_reader.rs b/parquet/src/arrow/async_reader.rs index b6b5d7ff7de..d52fa0406bf 100644 --- a/parquet/src/arrow/async_reader.rs +++ b/parquet/src/arrow/async_reader.rs @@ -22,7 +22,7 @@ //! # #[tokio::main(flavor="current_thread")] //! # async fn main() { //! # -//! use arrow::record_batch::RecordBatch; +//! use arrow_array::RecordBatch; //! use arrow::util::pretty::pretty_format_batches; //! use futures::TryStreamExt; //! use tokio::fs::File; @@ -93,8 +93,8 @@ use thrift::protocol::TCompactInputProtocol; use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; -use arrow::datatypes::SchemaRef; -use arrow::record_batch::RecordBatch; +use arrow_array::RecordBatch; +use arrow_schema::SchemaRef; use crate::arrow::array_reader::{build_array_reader, RowGroupCollection}; use crate::arrow::arrow_reader::{ @@ -797,8 +797,8 @@ mod tests { use crate::arrow::ArrowWriter; use crate::file::footer::parse_metadata; use crate::file::page_index::index_reader; - use arrow::array::{Array, ArrayRef, Int32Array, StringArray}; use arrow::error::Result as ArrowResult; + use arrow_array::{Array, ArrayRef, Int32Array, StringArray}; use futures::TryStreamExt; use rand::{thread_rng, Rng}; use std::sync::Mutex; diff --git a/parquet/src/arrow/buffer/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs index 04704237c45..34a0a4b83e8 100644 --- a/parquet/src/arrow/buffer/bit_util.rs +++ b/parquet/src/arrow/buffer/bit_util.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::util::bit_chunk_iterator::UnalignedBitChunk; +use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk; use std::ops::Range; /// Counts the number of set bits in the provided range @@ -65,7 +65,7 @@ pub fn sign_extend_be(b: &[u8]) -> [u8; N] { #[cfg(test)] mod tests { use super::*; - use arrow::array::BooleanBufferBuilder; + use arrow_array::builder::BooleanBufferBuilder; use rand::prelude::*; #[test] diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs index ae9e3590de3..23ebea57b5b 100644 --- a/parquet/src/arrow/buffer/dictionary_buffer.rs +++ b/parquet/src/arrow/buffer/dictionary_buffer.rs @@ -21,9 +21,10 @@ use crate::arrow::record_reader::buffer::{ }; use crate::column::reader::decoder::ValuesBufferSlice; use crate::errors::{ParquetError, Result}; -use arrow::array::{make_array, Array, ArrayDataBuilder, ArrayRef, OffsetSizeTrait}; -use arrow::buffer::Buffer; -use arrow::datatypes::{ArrowNativeType, DataType as ArrowType}; +use arrow_array::{make_array, Array, ArrayRef, OffsetSizeTrait}; +use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::DataType as ArrowType; use std::sync::Arc; /// An array of variable length byte arrays that are potentially dictionary encoded @@ -179,7 +180,7 @@ impl }; // This will compute a new dictionary - let array = arrow::compute::cast( + let array = arrow_cast::cast( &values.into_array(null_buffer, value_type), data_type, ) @@ -252,8 +253,8 @@ impl BufferQueue #[cfg(test)] mod tests { use super::*; - use arrow::array::{Array, StringArray}; use arrow::compute::cast; + use arrow_array::{Array, StringArray}; #[test] fn test_dictionary_buffer() { diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs index 48eb7013739..df96996e3cb 100644 --- a/parquet/src/arrow/buffer/offset_buffer.rs +++ b/parquet/src/arrow/buffer/offset_buffer.rs @@ -21,9 +21,10 @@ use crate::arrow::record_reader::buffer::{ }; use crate::column::reader::decoder::ValuesBufferSlice; use crate::errors::{ParquetError, Result}; -use arrow::array::{make_array, ArrayDataBuilder, ArrayRef, OffsetSizeTrait}; -use arrow::buffer::Buffer; -use arrow::datatypes::{ArrowNativeType, DataType as ArrowType}; +use arrow_array::{make_array, ArrayRef, OffsetSizeTrait}; +use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::DataType as ArrowType; /// A buffer of variable-sized byte arrays that can be converted into /// a corresponding [`ArrayRef`] @@ -238,7 +239,7 @@ impl ValuesBufferSlice for OffsetBuffer { #[cfg(test)] mod tests { use super::*; - use arrow::array::{Array, LargeStringArray, StringArray}; + use arrow_array::{Array, LargeStringArray, StringArray}; #[test] fn test_offset_buffer_empty() { diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs index c5fe0fa2a62..97d0c25e2b4 100644 --- a/parquet/src/arrow/mod.rs +++ b/parquet/src/arrow/mod.rs @@ -16,8 +16,8 @@ // under the License. //! Provides API for reading/writing Arrow -//! [RecordBatch](arrow::record_batch::RecordBatch)es and -//! [Array](arrow::array::Array)s to/from Parquet Files. +//! [RecordBatch](arrow_array::RecordBatch)es and +//! [Array](arrow_array::Array)s to/from Parquet Files. //! //! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for //! in-memory data. @@ -25,8 +25,8 @@ //!# Example of writing Arrow record batch to Parquet file //! //!```rust -//! use arrow::array::{Int32Array, ArrayRef}; -//! use arrow::record_batch::RecordBatch; +//! use arrow_array::{Int32Array, ArrayRef}; +//! use arrow_array::RecordBatch; //! use parquet::arrow::arrow_writer::ArrowWriter; //! use parquet::file::properties::WriterProperties; //! use std::fs::File; @@ -70,9 +70,9 @@ //! use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; //! //! # use std::sync::Arc; -//! # use arrow::array::Int32Array; +//! # use arrow_array::Int32Array; //! # use arrow::datatypes::{DataType, Field, Schema}; -//! # use arrow::record_batch::RecordBatch; +//! # use arrow_array::RecordBatch; //! # use parquet::arrow::arrow_writer::ArrowWriter; //! # //! # let ids = Int32Array::from(vec![1, 2, 3, 4]); diff --git a/parquet/src/arrow/record_reader/buffer.rs b/parquet/src/arrow/record_reader/buffer.rs index 64ea38f801d..40498949388 100644 --- a/parquet/src/arrow/record_reader/buffer.rs +++ b/parquet/src/arrow/record_reader/buffer.rs @@ -19,8 +19,7 @@ use std::marker::PhantomData; use crate::arrow::buffer::bit_util::iter_set_bits_rev; use crate::data_type::Int96; -use arrow::buffer::{Buffer, MutableBuffer}; -use arrow::datatypes::ArrowNativeType; +use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer}; /// A buffer that supports writing new data to the end, and removing data from the front /// diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index 2d65db77fa6..84b7ab94ceb 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -17,10 +17,10 @@ use std::ops::Range; -use arrow::array::BooleanBufferBuilder; -use arrow::bitmap::Bitmap; -use arrow::buffer::Buffer; -use arrow::util::bit_chunk_iterator::UnalignedBitChunk; +use arrow_array::builder::BooleanBufferBuilder; +use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk; +use arrow_buffer::Buffer; +use arrow_data::Bitmap; use crate::arrow::buffer::bit_util::count_set_bits; use crate::arrow::record_reader::buffer::BufferQueue; diff --git a/parquet/src/arrow/record_reader/mod.rs b/parquet/src/arrow/record_reader/mod.rs index b7318af9e85..ef17b8d0e6f 100644 --- a/parquet/src/arrow/record_reader/mod.rs +++ b/parquet/src/arrow/record_reader/mod.rs @@ -17,8 +17,8 @@ use std::cmp::{max, min}; -use arrow::bitmap::Bitmap; -use arrow::buffer::Buffer; +use arrow_buffer::Buffer; +use arrow_data::Bitmap; use crate::arrow::record_reader::{ buffer::{BufferQueue, ScalarBuffer, ValuesBuffer}, @@ -409,9 +409,9 @@ fn packed_null_mask(descr: &ColumnDescPtr) -> bool { mod tests { use std::sync::Arc; - use arrow::array::{Int16BufferBuilder, Int32BufferBuilder}; use arrow::bitmap::Bitmap; use arrow::buffer::Buffer; + use arrow_array::builder::{Int16BufferBuilder, Int32BufferBuilder}; use crate::basic::Encoding; use crate::data_type::Int32Type; diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs index 7803385e7f0..395c4aac150 100644 --- a/parquet/src/arrow/schema.rs +++ b/parquet/src/arrow/schema.rs @@ -26,8 +26,8 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use arrow::ipc::writer; +use arrow_schema::{DataType, Field, Schema, TimeUnit}; +use arrow_ipc::writer; use crate::basic::{ ConvertedType, LogicalType, Repetition, TimeUnit as ParquetTimeUnit, @@ -108,10 +108,10 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result { } else { bytes.as_slice() }; - match arrow::ipc::root_as_message(slice) { + match arrow_ipc::root_as_message(slice) { Ok(message) => message .header_as_schema() - .map(arrow::ipc::convert::fb_to_schema) + .map(arrow_ipc::convert::fb_to_schema) .ok_or_else(|| arrow_err!("the message is not Arrow Schema")), Err(err) => { // The flatbuffers implementation returns an error on verification error. @@ -137,7 +137,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result { /// Encodes the Arrow schema into the IPC format, and base64 encodes it fn encode_arrow_schema(schema: &Schema) -> String { let options = writer::IpcWriteOptions::default(); - let data_gen = arrow::ipc::writer::IpcDataGenerator::default(); + let data_gen = writer::IpcDataGenerator::default(); let mut serialized_schema = data_gen.schema_to_bytes(schema, &options); // manually prepending the length to the schema as arrow uses the legacy IPC format diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs index d63ab5606b0..2334a5601b4 100644 --- a/parquet/src/arrow/schema/complex.rs +++ b/parquet/src/arrow/schema/complex.rs @@ -21,7 +21,7 @@ use crate::basic::{ConvertedType, Repetition}; use crate::errors::ParquetError; use crate::errors::Result; use crate::schema::types::{SchemaDescriptor, Type, TypePtr}; -use arrow::datatypes::{DataType, Field, Schema}; +use arrow_schema::{DataType, Field, Schema}; fn get_repetition(t: &Type) -> Repetition { let info = t.get_basic_info(); diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 87edd75b0b8..e5bab9ac96c 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -20,7 +20,7 @@ use crate::basic::{ }; use crate::errors::{ParquetError, Result}; use crate::schema::types::{BasicTypeInfo, Type}; -use arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; +use arrow_schema::{DataType, IntervalUnit, TimeUnit}; /// Converts [`Type`] to [`DataType`] with an optional `arrow_type_hint` /// provided by the arrow schema diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs index 8c62241e34f..5fdece7cc8a 100644 --- a/parquet/src/bin/parquet-fromcsv.rs +++ b/parquet/src/bin/parquet-fromcsv.rs @@ -71,7 +71,8 @@ use std::{ sync::Arc, }; -use arrow::{csv::ReaderBuilder, datatypes::Schema, error::ArrowError}; +use arrow_csv::ReaderBuilder; +use arrow_schema::{ArrowError, Schema}; use clap::{Parser, ValueEnum}; use parquet::{ arrow::{parquet_to_arrow_schema, ArrowWriter}, diff --git a/parquet/src/column/writer/encoder.rs b/parquet/src/column/writer/encoder.rs index 9227c4ba1ce..74e6216b0a0 100644 --- a/parquet/src/column/writer/encoder.rs +++ b/parquet/src/column/writer/encoder.rs @@ -35,9 +35,9 @@ pub trait ColumnValues { } #[cfg(any(feature = "arrow", test))] -impl ColumnValues for T { +impl ColumnValues for T { fn len(&self) -> usize { - arrow::array::Array::len(self) + arrow_array::Array::len(self) } } diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs index c4f5faaaaca..f9e089787ca 100644 --- a/parquet/src/errors.rs +++ b/parquet/src/errors.rs @@ -20,7 +20,7 @@ use std::{cell, io, result, str}; #[cfg(any(feature = "arrow", test))] -use arrow::error::ArrowError; +use arrow_schema::ArrowError; #[derive(Debug, PartialEq, Clone, Eq)] pub enum ParquetError { @@ -103,7 +103,7 @@ impl From for ParquetError { } /// A specialized `Result` for Parquet errors. -pub type Result = result::Result; +pub type Result = result::Result; // ---------------------------------------------------------------------- // Conversion from `ParquetError` to other types of `Error`s