From 8c4aa31d411a2a0b591034c784494bb5fa68ed45 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 1 Sep 2022 17:21:15 +0100 Subject: [PATCH] Update thrift v0.16 vendor parquet-format (#2502) --- parquet/CONTRIBUTING.md | 17 +- parquet/Cargo.toml | 3 +- parquet/src/arrow/arrow_reader/mod.rs | 2 +- parquet/src/arrow/arrow_reader/selection.rs | 4 +- parquet/src/arrow/arrow_writer/mod.rs | 2 +- parquet/src/arrow/async_reader.rs | 2 +- parquet/src/basic.rs | 534 +- parquet/src/column/page.rs | 8 +- parquet/src/column/writer/mod.rs | 6 +- parquet/src/file/footer.rs | 4 +- parquet/src/file/metadata.rs | 21 +- parquet/src/file/page_encoding_stats.rs | 17 +- parquet/src/file/page_index/index.rs | 2 +- parquet/src/file/page_index/index_reader.rs | 2 +- parquet/src/file/page_index/range.rs | 6 +- parquet/src/file/serialized_reader.rs | 46 +- parquet/src/file/statistics.rs | 2 +- parquet/src/file/writer.rs | 6 +- parquet/src/format.rs | 5200 +++++++++++++++++++ parquet/src/lib.rs | 3 + parquet/src/schema/types.rs | 15 +- 21 files changed, 5585 insertions(+), 317 deletions(-) create mode 100644 parquet/src/format.rs diff --git a/parquet/CONTRIBUTING.md b/parquet/CONTRIBUTING.md index 77e9f417e49..15a67ce3c9d 100644 --- a/parquet/CONTRIBUTING.md +++ b/parquet/CONTRIBUTING.md @@ -60,7 +60,18 @@ Run `cargo bench` for benchmarks. To build documentation, run `cargo doc --no-deps`. To compile and view in the browser, run `cargo doc --no-deps --open`. -## Update Supported Parquet Version +## Update Parquet Format -To update Parquet format to a newer version, check if [parquet-format](https://github.com/sunchao/parquet-format-rs) -version is available. Then simply update version of `parquet-format` crate in Cargo.toml. +To generate the parquet format code run + +``` +$ git clone https://github.com/apache/thrift +$ cd thrift +$ git checkout v0.16.0 +# docker build just builds a docker image with thrift dependencies +$ docker build -t thrift build/docker/ubuntu-bionic +# build/docker/scripts/cmake.sh actually compiles thrift +$ docker run -v $(pwd):/thrift/src -it thrift build/docker/scripts/cmake.sh && wget https://raw.githubusercontent.com/apache/parquet-format/apache-parquet-format-2.9.0/src/main/thrift/parquet.thrift && ./cmake_build/compiler/cpp/bin/thrift --gen rs parquet.thrift +``` + +Then copy the generated `parquet.rs` into `src/format.rs` and commit changes. \ No newline at end of file diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index d9b8495559c..80f672f055a 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -31,9 +31,8 @@ rust-version = "1.62" [dependencies] ahash = "0.8" -parquet-format = { version = "4.0.0", default-features = false } bytes = { version = "1.1", default-features = false, features = ["std"] } -thrift = { version = "0.13", default-features = false } +thrift = { version = "0.16", default-features = false } snap = { version = "1.0", default-features = false, optional = true } brotli = { version = "3.3", default-features = false, features = ["std"], optional = true } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true } diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 76e247ae1f1..333efe946ae 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1662,7 +1662,7 @@ mod tests { schema: TypePtr, field: Option, opts: &TestOptions, - ) -> Result { + ) -> Result { let mut writer_props = opts.writer_props(); if let Some(field) = field { let arrow_schema = Schema::new(vec![field]); diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index b6ee273ab56..8604ac0dc5f 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -121,7 +121,7 @@ impl RowSelection { #[cfg(any(test, feature = "async"))] pub(crate) fn scan_ranges( &self, - page_locations: &[parquet_format::PageLocation], + page_locations: &[crate::format::PageLocation], ) -> Vec> { let mut ranges = vec![]; let mut row_offset = 0; @@ -302,7 +302,7 @@ impl From for VecDeque { #[cfg(test)] mod tests { use super::*; - use parquet_format::PageLocation; + use crate::format::PageLocation; use rand::{thread_rng, Rng}; #[test] diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 6f9d5b3aff8..dc014e5ec46 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -230,7 +230,7 @@ impl ArrowWriter { } /// Close and finalize the underlying Parquet writer - pub fn close(mut self) -> Result { + pub fn close(mut self) -> Result { self.flush()?; self.writer.close() } diff --git a/parquet/src/arrow/async_reader.rs b/parquet/src/arrow/async_reader.rs index 201f2afcf0e..aca05fb9ca3 100644 --- a/parquet/src/arrow/async_reader.rs +++ b/parquet/src/arrow/async_reader.rs @@ -88,7 +88,7 @@ use bytes::{Buf, Bytes}; use futures::future::{BoxFuture, FutureExt}; use futures::ready; use futures::stream::Stream; -use parquet_format::OffsetIndex; +use crate::format::OffsetIndex; use thrift::protocol::TCompactInputProtocol; use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 7adbc8c1b6d..268612a6e77 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -18,14 +18,14 @@ //! Contains Rust mappings for Thrift definition. //! Refer to `parquet.thrift` file to see raw definitions. -use std::{fmt, result, str}; +use std::{fmt, str}; -use parquet_format as parquet; +use crate::format as parquet; -use crate::errors::ParquetError; +use crate::errors::{ParquetError, Result}; -// Re-export parquet_format types used in this module -pub use parquet_format::{ +// Re-export crate::format types used in this module +pub use crate::format::{ BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType, StringType, TimeType, TimeUnit, TimestampType, UUIDType, }; @@ -496,32 +496,35 @@ impl fmt::Display for ColumnOrder { // ---------------------------------------------------------------------- // parquet::Type <=> Type conversion -impl From for Type { - fn from(value: parquet::Type) -> Self { - match value { - parquet::Type::Boolean => Type::BOOLEAN, - parquet::Type::Int32 => Type::INT32, - parquet::Type::Int64 => Type::INT64, - parquet::Type::Int96 => Type::INT96, - parquet::Type::Float => Type::FLOAT, - parquet::Type::Double => Type::DOUBLE, - parquet::Type::ByteArray => Type::BYTE_ARRAY, - parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY, - } +impl TryFrom for Type { + type Error = ParquetError; + + fn try_from(value: parquet::Type) -> Result { + Ok(match value { + parquet::Type::BOOLEAN => Type::BOOLEAN, + parquet::Type::INT32 => Type::INT32, + parquet::Type::INT64 => Type::INT64, + parquet::Type::INT96 => Type::INT96, + parquet::Type::FLOAT => Type::FLOAT, + parquet::Type::DOUBLE => Type::DOUBLE, + parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY, + parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY, + _ => return Err(general_err!("unexpected type: {}", value.0)), + }) } } impl From for parquet::Type { fn from(value: Type) -> Self { match value { - Type::BOOLEAN => parquet::Type::Boolean, - Type::INT32 => parquet::Type::Int32, - Type::INT64 => parquet::Type::Int64, - Type::INT96 => parquet::Type::Int96, - Type::FLOAT => parquet::Type::Float, - Type::DOUBLE => parquet::Type::Double, - Type::BYTE_ARRAY => parquet::Type::ByteArray, - Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FixedLenByteArray, + Type::BOOLEAN => parquet::Type::BOOLEAN, + Type::INT32 => parquet::Type::INT32, + Type::INT64 => parquet::Type::INT64, + Type::INT96 => parquet::Type::INT96, + Type::FLOAT => parquet::Type::FLOAT, + Type::DOUBLE => parquet::Type::DOUBLE, + Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY, + Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY, } } } @@ -529,39 +532,42 @@ impl From for parquet::Type { // ---------------------------------------------------------------------- // parquet::ConvertedType <=> ConvertedType conversion -impl From> for ConvertedType { - fn from(option: Option) -> Self { - match option { +impl TryFrom> for ConvertedType { + type Error = ParquetError; + + fn try_from(option: Option) -> Result { + Ok(match option { None => ConvertedType::NONE, Some(value) => match value { - parquet::ConvertedType::Utf8 => ConvertedType::UTF8, - parquet::ConvertedType::Map => ConvertedType::MAP, - parquet::ConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE, - parquet::ConvertedType::List => ConvertedType::LIST, - parquet::ConvertedType::Enum => ConvertedType::ENUM, - parquet::ConvertedType::Decimal => ConvertedType::DECIMAL, - parquet::ConvertedType::Date => ConvertedType::DATE, - parquet::ConvertedType::TimeMillis => ConvertedType::TIME_MILLIS, - parquet::ConvertedType::TimeMicros => ConvertedType::TIME_MICROS, - parquet::ConvertedType::TimestampMillis => { + parquet::ConvertedType::UTF8 => ConvertedType::UTF8, + parquet::ConvertedType::MAP => ConvertedType::MAP, + parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE, + parquet::ConvertedType::LIST => ConvertedType::LIST, + parquet::ConvertedType::ENUM => ConvertedType::ENUM, + parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL, + parquet::ConvertedType::DATE => ConvertedType::DATE, + parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS, + parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS, + parquet::ConvertedType::TIMESTAMP_MILLIS => { ConvertedType::TIMESTAMP_MILLIS } - parquet::ConvertedType::TimestampMicros => { + parquet::ConvertedType::TIMESTAMP_MICROS => { ConvertedType::TIMESTAMP_MICROS } - parquet::ConvertedType::Uint8 => ConvertedType::UINT_8, - parquet::ConvertedType::Uint16 => ConvertedType::UINT_16, - parquet::ConvertedType::Uint32 => ConvertedType::UINT_32, - parquet::ConvertedType::Uint64 => ConvertedType::UINT_64, - parquet::ConvertedType::Int8 => ConvertedType::INT_8, - parquet::ConvertedType::Int16 => ConvertedType::INT_16, - parquet::ConvertedType::Int32 => ConvertedType::INT_32, - parquet::ConvertedType::Int64 => ConvertedType::INT_64, - parquet::ConvertedType::Json => ConvertedType::JSON, - parquet::ConvertedType::Bson => ConvertedType::BSON, - parquet::ConvertedType::Interval => ConvertedType::INTERVAL, + parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8, + parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16, + parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32, + parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64, + parquet::ConvertedType::INT_8 => ConvertedType::INT_8, + parquet::ConvertedType::INT_16 => ConvertedType::INT_16, + parquet::ConvertedType::INT_32 => ConvertedType::INT_32, + parquet::ConvertedType::INT_64 => ConvertedType::INT_64, + parquet::ConvertedType::JSON => ConvertedType::JSON, + parquet::ConvertedType::BSON => ConvertedType::BSON, + parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL, + _ => return Err(general_err!("unexpected converted type: {}", value.0)), }, - } + }) } } @@ -569,32 +575,32 @@ impl From for Option { fn from(value: ConvertedType) -> Self { match value { ConvertedType::NONE => None, - ConvertedType::UTF8 => Some(parquet::ConvertedType::Utf8), - ConvertedType::MAP => Some(parquet::ConvertedType::Map), - ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MapKeyValue), - ConvertedType::LIST => Some(parquet::ConvertedType::List), - ConvertedType::ENUM => Some(parquet::ConvertedType::Enum), - ConvertedType::DECIMAL => Some(parquet::ConvertedType::Decimal), - ConvertedType::DATE => Some(parquet::ConvertedType::Date), - ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TimeMillis), - ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TimeMicros), + ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8), + ConvertedType::MAP => Some(parquet::ConvertedType::MAP), + ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE), + ConvertedType::LIST => Some(parquet::ConvertedType::LIST), + ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM), + ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL), + ConvertedType::DATE => Some(parquet::ConvertedType::DATE), + ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS), + ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS), ConvertedType::TIMESTAMP_MILLIS => { - Some(parquet::ConvertedType::TimestampMillis) + Some(parquet::ConvertedType::TIMESTAMP_MILLIS) } ConvertedType::TIMESTAMP_MICROS => { - Some(parquet::ConvertedType::TimestampMicros) + Some(parquet::ConvertedType::TIMESTAMP_MICROS) } - ConvertedType::UINT_8 => Some(parquet::ConvertedType::Uint8), - ConvertedType::UINT_16 => Some(parquet::ConvertedType::Uint16), - ConvertedType::UINT_32 => Some(parquet::ConvertedType::Uint32), - ConvertedType::UINT_64 => Some(parquet::ConvertedType::Uint64), - ConvertedType::INT_8 => Some(parquet::ConvertedType::Int8), - ConvertedType::INT_16 => Some(parquet::ConvertedType::Int16), - ConvertedType::INT_32 => Some(parquet::ConvertedType::Int32), - ConvertedType::INT_64 => Some(parquet::ConvertedType::Int64), - ConvertedType::JSON => Some(parquet::ConvertedType::Json), - ConvertedType::BSON => Some(parquet::ConvertedType::Bson), - ConvertedType::INTERVAL => Some(parquet::ConvertedType::Interval), + ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8), + ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16), + ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32), + ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64), + ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8), + ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16), + ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32), + ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64), + ConvertedType::JSON => Some(parquet::ConvertedType::JSON), + ConvertedType::BSON => Some(parquet::ConvertedType::BSON), + ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL), } } } @@ -730,22 +736,25 @@ impl From> for ConvertedType { // ---------------------------------------------------------------------- // parquet::FieldRepetitionType <=> Repetition conversion -impl From for Repetition { - fn from(value: parquet::FieldRepetitionType) -> Self { - match value { - parquet::FieldRepetitionType::Required => Repetition::REQUIRED, - parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL, - parquet::FieldRepetitionType::Repeated => Repetition::REPEATED, - } +impl TryFrom for Repetition { + type Error = ParquetError; + + fn try_from(value: parquet::FieldRepetitionType) -> Result { + Ok(match value { + parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED, + parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL, + parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED, + _ => return Err(general_err!("unexpected repetition type: {}", value.0)), + }) } } impl From for parquet::FieldRepetitionType { fn from(value: Repetition) -> Self { match value { - Repetition::REQUIRED => parquet::FieldRepetitionType::Required, - Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional, - Repetition::REPEATED => parquet::FieldRepetitionType::Repeated, + Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED, + Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL, + Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED, } } } @@ -753,34 +762,41 @@ impl From for parquet::FieldRepetitionType { // ---------------------------------------------------------------------- // parquet::Encoding <=> Encoding conversion -impl From for Encoding { - fn from(value: parquet::Encoding) -> Self { - match value { - parquet::Encoding::Plain => Encoding::PLAIN, - parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY, - parquet::Encoding::Rle => Encoding::RLE, - parquet::Encoding::BitPacked => Encoding::BIT_PACKED, - parquet::Encoding::DeltaBinaryPacked => Encoding::DELTA_BINARY_PACKED, - parquet::Encoding::DeltaLengthByteArray => Encoding::DELTA_LENGTH_BYTE_ARRAY, - parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY, - parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY, - parquet::Encoding::ByteStreamSplit => Encoding::BYTE_STREAM_SPLIT, - } +impl TryFrom for Encoding { + type Error = ParquetError; + + fn try_from(value: parquet::Encoding) -> Result { + Ok(match value { + parquet::Encoding::PLAIN => Encoding::PLAIN, + parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY, + parquet::Encoding::RLE => Encoding::RLE, + parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED, + parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED, + parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => { + Encoding::DELTA_LENGTH_BYTE_ARRAY + } + parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY, + parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY, + parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT, + _ => return Err(general_err!("unexpected encoding: {}", value.0)), + }) } } impl From for parquet::Encoding { fn from(value: Encoding) -> Self { match value { - Encoding::PLAIN => parquet::Encoding::Plain, - Encoding::PLAIN_DICTIONARY => parquet::Encoding::PlainDictionary, - Encoding::RLE => parquet::Encoding::Rle, - Encoding::BIT_PACKED => parquet::Encoding::BitPacked, - Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DeltaBinaryPacked, - Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DeltaLengthByteArray, - Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DeltaByteArray, - Encoding::RLE_DICTIONARY => parquet::Encoding::RleDictionary, - Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::ByteStreamSplit, + Encoding::PLAIN => parquet::Encoding::PLAIN, + Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY, + Encoding::RLE => parquet::Encoding::RLE, + Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED, + Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED, + Encoding::DELTA_LENGTH_BYTE_ARRAY => { + parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY + } + Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY, + Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY, + Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT, } } } @@ -788,30 +804,33 @@ impl From for parquet::Encoding { // ---------------------------------------------------------------------- // parquet::CompressionCodec <=> Compression conversion -impl From for Compression { - fn from(value: parquet::CompressionCodec) -> Self { - match value { - parquet::CompressionCodec::Uncompressed => Compression::UNCOMPRESSED, - parquet::CompressionCodec::Snappy => Compression::SNAPPY, - parquet::CompressionCodec::Gzip => Compression::GZIP, - parquet::CompressionCodec::Lzo => Compression::LZO, - parquet::CompressionCodec::Brotli => Compression::BROTLI, - parquet::CompressionCodec::Lz4 => Compression::LZ4, - parquet::CompressionCodec::Zstd => Compression::ZSTD, - } +impl TryFrom for Compression { + type Error = ParquetError; + + fn try_from(value: parquet::CompressionCodec) -> Result { + Ok(match value { + parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED, + parquet::CompressionCodec::SNAPPY => Compression::SNAPPY, + parquet::CompressionCodec::GZIP => Compression::GZIP, + parquet::CompressionCodec::LZO => Compression::LZO, + parquet::CompressionCodec::BROTLI => Compression::BROTLI, + parquet::CompressionCodec::LZ4 => Compression::LZ4, + parquet::CompressionCodec::ZSTD => Compression::ZSTD, + _ => return Err(general_err!("unexpected compression codec: {}", value.0)), + }) } } impl From for parquet::CompressionCodec { fn from(value: Compression) -> Self { match value { - Compression::UNCOMPRESSED => parquet::CompressionCodec::Uncompressed, - Compression::SNAPPY => parquet::CompressionCodec::Snappy, - Compression::GZIP => parquet::CompressionCodec::Gzip, - Compression::LZO => parquet::CompressionCodec::Lzo, - Compression::BROTLI => parquet::CompressionCodec::Brotli, - Compression::LZ4 => parquet::CompressionCodec::Lz4, - Compression::ZSTD => parquet::CompressionCodec::Zstd, + Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED, + Compression::SNAPPY => parquet::CompressionCodec::SNAPPY, + Compression::GZIP => parquet::CompressionCodec::GZIP, + Compression::LZO => parquet::CompressionCodec::LZO, + Compression::BROTLI => parquet::CompressionCodec::BROTLI, + Compression::LZ4 => parquet::CompressionCodec::LZ4, + Compression::ZSTD => parquet::CompressionCodec::ZSTD, } } } @@ -819,24 +838,27 @@ impl From for parquet::CompressionCodec { // ---------------------------------------------------------------------- // parquet::PageType <=> PageType conversion -impl From for PageType { - fn from(value: parquet::PageType) -> Self { - match value { - parquet::PageType::DataPage => PageType::DATA_PAGE, - parquet::PageType::IndexPage => PageType::INDEX_PAGE, - parquet::PageType::DictionaryPage => PageType::DICTIONARY_PAGE, - parquet::PageType::DataPageV2 => PageType::DATA_PAGE_V2, - } +impl TryFrom for PageType { + type Error = ParquetError; + + fn try_from(value: parquet::PageType) -> Result { + Ok(match value { + parquet::PageType::DATA_PAGE => PageType::DATA_PAGE, + parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE, + parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE, + parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2, + _ => return Err(general_err!("unexpected page type: {}", value.0)), + }) } } impl From for parquet::PageType { fn from(value: PageType) -> Self { match value { - PageType::DATA_PAGE => parquet::PageType::DataPage, - PageType::INDEX_PAGE => parquet::PageType::IndexPage, - PageType::DICTIONARY_PAGE => parquet::PageType::DictionaryPage, - PageType::DATA_PAGE_V2 => parquet::PageType::DataPageV2, + PageType::DATA_PAGE => parquet::PageType::DATA_PAGE, + PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE, + PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE, + PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2, } } } @@ -847,7 +869,7 @@ impl From for parquet::PageType { impl str::FromStr for Repetition { type Err = ParquetError; - fn from_str(s: &str) -> result::Result { + fn from_str(s: &str) -> Result { match s { "REQUIRED" => Ok(Repetition::REQUIRED), "OPTIONAL" => Ok(Repetition::OPTIONAL), @@ -860,7 +882,7 @@ impl str::FromStr for Repetition { impl str::FromStr for Type { type Err = ParquetError; - fn from_str(s: &str) -> result::Result { + fn from_str(s: &str) -> Result { match s { "BOOLEAN" => Ok(Type::BOOLEAN), "INT32" => Ok(Type::INT32), @@ -878,7 +900,7 @@ impl str::FromStr for Type { impl str::FromStr for ConvertedType { type Err = ParquetError; - fn from_str(s: &str) -> result::Result { + fn from_str(s: &str) -> Result { match s { "NONE" => Ok(ConvertedType::NONE), "UTF8" => Ok(ConvertedType::UTF8), @@ -911,7 +933,7 @@ impl str::FromStr for ConvertedType { impl str::FromStr for LogicalType { type Err = ParquetError; - fn from_str(s: &str) -> result::Result { + fn from_str(s: &str) -> Result { match s { // The type is a placeholder that gets updated elsewhere "INTEGER" => Ok(LogicalType::Integer { @@ -966,30 +988,36 @@ mod tests { #[test] fn test_from_type() { - assert_eq!(Type::from(parquet::Type::Boolean), Type::BOOLEAN); - assert_eq!(Type::from(parquet::Type::Int32), Type::INT32); - assert_eq!(Type::from(parquet::Type::Int64), Type::INT64); - assert_eq!(Type::from(parquet::Type::Int96), Type::INT96); - assert_eq!(Type::from(parquet::Type::Float), Type::FLOAT); - assert_eq!(Type::from(parquet::Type::Double), Type::DOUBLE); - assert_eq!(Type::from(parquet::Type::ByteArray), Type::BYTE_ARRAY); - assert_eq!( - Type::from(parquet::Type::FixedLenByteArray), + assert_eq!( + Type::try_from(parquet::Type::BOOLEAN).unwrap(), + Type::BOOLEAN + ); + assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32); + assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64); + assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96); + assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT); + assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE); + assert_eq!( + Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(), + Type::BYTE_ARRAY + ); + assert_eq!( + Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(), Type::FIXED_LEN_BYTE_ARRAY ); } #[test] fn test_into_type() { - assert_eq!(parquet::Type::Boolean, Type::BOOLEAN.into()); - assert_eq!(parquet::Type::Int32, Type::INT32.into()); - assert_eq!(parquet::Type::Int64, Type::INT64.into()); - assert_eq!(parquet::Type::Int96, Type::INT96.into()); - assert_eq!(parquet::Type::Float, Type::FLOAT.into()); - assert_eq!(parquet::Type::Double, Type::DOUBLE.into()); - assert_eq!(parquet::Type::ByteArray, Type::BYTE_ARRAY.into()); - assert_eq!( - parquet::Type::FixedLenByteArray, + assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into()); + assert_eq!(parquet::Type::INT32, Type::INT32.into()); + assert_eq!(parquet::Type::INT64, Type::INT64.into()); + assert_eq!(parquet::Type::INT96, Type::INT96.into()); + assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into()); + assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into()); + assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into()); + assert_eq!( + parquet::Type::FIXED_LEN_BYTE_ARRAY, Type::FIXED_LEN_BYTE_ARRAY.into() ); } @@ -1072,97 +1100,102 @@ mod tests { #[test] fn test_from_converted_type() { let parquet_conv_none: Option = None; - assert_eq!(ConvertedType::from(parquet_conv_none), ConvertedType::NONE); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Utf8)), + ConvertedType::try_from(parquet_conv_none).unwrap(), + ConvertedType::NONE + ); + assert_eq!( + ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(), ConvertedType::UTF8 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Map)), + ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(), ConvertedType::MAP ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::MapKeyValue)), + ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(), ConvertedType::MAP_KEY_VALUE ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::List)), + ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(), ConvertedType::LIST ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Enum)), + ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(), ConvertedType::ENUM ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Decimal)), + ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(), ConvertedType::DECIMAL ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Date)), + ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(), ConvertedType::DATE ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::TimeMillis)), + ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(), ConvertedType::TIME_MILLIS ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::TimeMicros)), + ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(), ConvertedType::TIME_MICROS ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::TimestampMillis)), + ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)) + .unwrap(), ConvertedType::TIMESTAMP_MILLIS ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::TimestampMicros)), + ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)) + .unwrap(), ConvertedType::TIMESTAMP_MICROS ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Uint8)), + ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(), ConvertedType::UINT_8 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Uint16)), + ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(), ConvertedType::UINT_16 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Uint32)), + ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(), ConvertedType::UINT_32 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Uint64)), + ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(), ConvertedType::UINT_64 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Int8)), + ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(), ConvertedType::INT_8 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Int16)), + ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(), ConvertedType::INT_16 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Int32)), + ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(), ConvertedType::INT_32 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Int64)), + ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(), ConvertedType::INT_64 ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Json)), + ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(), ConvertedType::JSON ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Bson)), + ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(), ConvertedType::BSON ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Interval)), + ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(), ConvertedType::INTERVAL ); assert_eq!( - ConvertedType::from(Some(parquet::ConvertedType::Decimal)), + ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(), ConvertedType::DECIMAL ) } @@ -1172,92 +1205,92 @@ mod tests { let converted_type: Option = None; assert_eq!(converted_type, ConvertedType::NONE.into()); assert_eq!( - Some(parquet::ConvertedType::Utf8), + Some(parquet::ConvertedType::UTF8), ConvertedType::UTF8.into() ); - assert_eq!(Some(parquet::ConvertedType::Map), ConvertedType::MAP.into()); + assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into()); assert_eq!( - Some(parquet::ConvertedType::MapKeyValue), + Some(parquet::ConvertedType::MAP_KEY_VALUE), ConvertedType::MAP_KEY_VALUE.into() ); assert_eq!( - Some(parquet::ConvertedType::List), + Some(parquet::ConvertedType::LIST), ConvertedType::LIST.into() ); assert_eq!( - Some(parquet::ConvertedType::Enum), + Some(parquet::ConvertedType::ENUM), ConvertedType::ENUM.into() ); assert_eq!( - Some(parquet::ConvertedType::Decimal), + Some(parquet::ConvertedType::DECIMAL), ConvertedType::DECIMAL.into() ); assert_eq!( - Some(parquet::ConvertedType::Date), + Some(parquet::ConvertedType::DATE), ConvertedType::DATE.into() ); assert_eq!( - Some(parquet::ConvertedType::TimeMillis), + Some(parquet::ConvertedType::TIME_MILLIS), ConvertedType::TIME_MILLIS.into() ); assert_eq!( - Some(parquet::ConvertedType::TimeMicros), + Some(parquet::ConvertedType::TIME_MICROS), ConvertedType::TIME_MICROS.into() ); assert_eq!( - Some(parquet::ConvertedType::TimestampMillis), + Some(parquet::ConvertedType::TIMESTAMP_MILLIS), ConvertedType::TIMESTAMP_MILLIS.into() ); assert_eq!( - Some(parquet::ConvertedType::TimestampMicros), + Some(parquet::ConvertedType::TIMESTAMP_MICROS), ConvertedType::TIMESTAMP_MICROS.into() ); assert_eq!( - Some(parquet::ConvertedType::Uint8), + Some(parquet::ConvertedType::UINT_8), ConvertedType::UINT_8.into() ); assert_eq!( - Some(parquet::ConvertedType::Uint16), + Some(parquet::ConvertedType::UINT_16), ConvertedType::UINT_16.into() ); assert_eq!( - Some(parquet::ConvertedType::Uint32), + Some(parquet::ConvertedType::UINT_32), ConvertedType::UINT_32.into() ); assert_eq!( - Some(parquet::ConvertedType::Uint64), + Some(parquet::ConvertedType::UINT_64), ConvertedType::UINT_64.into() ); assert_eq!( - Some(parquet::ConvertedType::Int8), + Some(parquet::ConvertedType::INT_8), ConvertedType::INT_8.into() ); assert_eq!( - Some(parquet::ConvertedType::Int16), + Some(parquet::ConvertedType::INT_16), ConvertedType::INT_16.into() ); assert_eq!( - Some(parquet::ConvertedType::Int32), + Some(parquet::ConvertedType::INT_32), ConvertedType::INT_32.into() ); assert_eq!( - Some(parquet::ConvertedType::Int64), + Some(parquet::ConvertedType::INT_64), ConvertedType::INT_64.into() ); assert_eq!( - Some(parquet::ConvertedType::Json), + Some(parquet::ConvertedType::JSON), ConvertedType::JSON.into() ); assert_eq!( - Some(parquet::ConvertedType::Bson), + Some(parquet::ConvertedType::BSON), ConvertedType::BSON.into() ); assert_eq!( - Some(parquet::ConvertedType::Interval), + Some(parquet::ConvertedType::INTERVAL), ConvertedType::INTERVAL.into() ); assert_eq!( - Some(parquet::ConvertedType::Decimal), + Some(parquet::ConvertedType::DECIMAL), ConvertedType::DECIMAL.into() ) } @@ -1591,15 +1624,15 @@ mod tests { #[test] fn test_from_repetition() { assert_eq!( - Repetition::from(parquet::FieldRepetitionType::Required), + Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(), Repetition::REQUIRED ); assert_eq!( - Repetition::from(parquet::FieldRepetitionType::Optional), + Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(), Repetition::OPTIONAL ); assert_eq!( - Repetition::from(parquet::FieldRepetitionType::Repeated), + Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(), Repetition::REPEATED ); } @@ -1607,15 +1640,15 @@ mod tests { #[test] fn test_into_repetition() { assert_eq!( - parquet::FieldRepetitionType::Required, + parquet::FieldRepetitionType::REQUIRED, Repetition::REQUIRED.into() ); assert_eq!( - parquet::FieldRepetitionType::Optional, + parquet::FieldRepetitionType::OPTIONAL, Repetition::OPTIONAL.into() ); assert_eq!( - parquet::FieldRepetitionType::Repeated, + parquet::FieldRepetitionType::REPEATED, Repetition::REPEATED.into() ); } @@ -1665,49 +1698,55 @@ mod tests { #[test] fn test_from_encoding() { - assert_eq!(Encoding::from(parquet::Encoding::Plain), Encoding::PLAIN); assert_eq!( - Encoding::from(parquet::Encoding::PlainDictionary), + Encoding::try_from(parquet::Encoding::PLAIN).unwrap(), + Encoding::PLAIN + ); + assert_eq!( + Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(), Encoding::PLAIN_DICTIONARY ); - assert_eq!(Encoding::from(parquet::Encoding::Rle), Encoding::RLE); assert_eq!( - Encoding::from(parquet::Encoding::BitPacked), + Encoding::try_from(parquet::Encoding::RLE).unwrap(), + Encoding::RLE + ); + assert_eq!( + Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(), Encoding::BIT_PACKED ); assert_eq!( - Encoding::from(parquet::Encoding::DeltaBinaryPacked), + Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(), Encoding::DELTA_BINARY_PACKED ); assert_eq!( - Encoding::from(parquet::Encoding::DeltaLengthByteArray), + Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(), Encoding::DELTA_LENGTH_BYTE_ARRAY ); assert_eq!( - Encoding::from(parquet::Encoding::DeltaByteArray), + Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(), Encoding::DELTA_BYTE_ARRAY ); } #[test] fn test_into_encoding() { - assert_eq!(parquet::Encoding::Plain, Encoding::PLAIN.into()); + assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into()); assert_eq!( - parquet::Encoding::PlainDictionary, + parquet::Encoding::PLAIN_DICTIONARY, Encoding::PLAIN_DICTIONARY.into() ); - assert_eq!(parquet::Encoding::Rle, Encoding::RLE.into()); - assert_eq!(parquet::Encoding::BitPacked, Encoding::BIT_PACKED.into()); + assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into()); + assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into()); assert_eq!( - parquet::Encoding::DeltaBinaryPacked, + parquet::Encoding::DELTA_BINARY_PACKED, Encoding::DELTA_BINARY_PACKED.into() ); assert_eq!( - parquet::Encoding::DeltaLengthByteArray, + parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY, Encoding::DELTA_LENGTH_BYTE_ARRAY.into() ); assert_eq!( - parquet::Encoding::DeltaByteArray, + parquet::Encoding::DELTA_BYTE_ARRAY, Encoding::DELTA_BYTE_ARRAY.into() ); } @@ -1726,31 +1765,31 @@ mod tests { #[test] fn test_from_compression() { assert_eq!( - Compression::from(parquet::CompressionCodec::Uncompressed), + Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(), Compression::UNCOMPRESSED ); assert_eq!( - Compression::from(parquet::CompressionCodec::Snappy), + Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(), Compression::SNAPPY ); assert_eq!( - Compression::from(parquet::CompressionCodec::Gzip), + Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(), Compression::GZIP ); assert_eq!( - Compression::from(parquet::CompressionCodec::Lzo), + Compression::try_from(parquet::CompressionCodec::LZO).unwrap(), Compression::LZO ); assert_eq!( - Compression::from(parquet::CompressionCodec::Brotli), + Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(), Compression::BROTLI ); assert_eq!( - Compression::from(parquet::CompressionCodec::Lz4), + Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(), Compression::LZ4 ); assert_eq!( - Compression::from(parquet::CompressionCodec::Zstd), + Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(), Compression::ZSTD ); } @@ -1758,21 +1797,21 @@ mod tests { #[test] fn test_into_compression() { assert_eq!( - parquet::CompressionCodec::Uncompressed, + parquet::CompressionCodec::UNCOMPRESSED, Compression::UNCOMPRESSED.into() ); assert_eq!( - parquet::CompressionCodec::Snappy, + parquet::CompressionCodec::SNAPPY, Compression::SNAPPY.into() ); - assert_eq!(parquet::CompressionCodec::Gzip, Compression::GZIP.into()); - assert_eq!(parquet::CompressionCodec::Lzo, Compression::LZO.into()); + assert_eq!(parquet::CompressionCodec::GZIP, Compression::GZIP.into()); + assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into()); assert_eq!( - parquet::CompressionCodec::Brotli, + parquet::CompressionCodec::BROTLI, Compression::BROTLI.into() ); - assert_eq!(parquet::CompressionCodec::Lz4, Compression::LZ4.into()); - assert_eq!(parquet::CompressionCodec::Zstd, Compression::ZSTD.into()); + assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into()); + assert_eq!(parquet::CompressionCodec::ZSTD, Compression::ZSTD.into()); } #[test] @@ -1786,32 +1825,35 @@ mod tests { #[test] fn test_from_page_type() { assert_eq!( - PageType::from(parquet::PageType::DataPage), + PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(), PageType::DATA_PAGE ); assert_eq!( - PageType::from(parquet::PageType::IndexPage), + PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(), PageType::INDEX_PAGE ); assert_eq!( - PageType::from(parquet::PageType::DictionaryPage), + PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(), PageType::DICTIONARY_PAGE ); assert_eq!( - PageType::from(parquet::PageType::DataPageV2), + PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(), PageType::DATA_PAGE_V2 ); } #[test] fn test_into_page_type() { - assert_eq!(parquet::PageType::DataPage, PageType::DATA_PAGE.into()); - assert_eq!(parquet::PageType::IndexPage, PageType::INDEX_PAGE.into()); + assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into()); + assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into()); assert_eq!( - parquet::PageType::DictionaryPage, + parquet::PageType::DICTIONARY_PAGE, PageType::DICTIONARY_PAGE.into() ); - assert_eq!(parquet::PageType::DataPageV2, PageType::DATA_PAGE_V2.into()); + assert_eq!( + parquet::PageType::DATA_PAGE_V2, + PageType::DATA_PAGE_V2.into() + ); } #[test] diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs index ab2d885a23f..ddb6d243ebd 100644 --- a/parquet/src/column/page.rs +++ b/parquet/src/column/page.rs @@ -20,9 +20,9 @@ use crate::basic::{Encoding, PageType}; use crate::errors::{ParquetError, Result}; use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics}; +use crate::format::PageHeader; use crate::schema::types::{ColumnDescPtr, SchemaDescPtr}; use crate::util::memory::ByteBufferPtr; -use parquet_format::PageHeader; /// Parquet Page definition. /// @@ -209,15 +209,15 @@ impl TryFrom<&PageHeader> for PageMetadata { fn try_from(value: &PageHeader) -> std::result::Result { match value.type_ { - parquet_format::PageType::DataPage => Ok(PageMetadata { + crate::format::PageType::DATA_PAGE => Ok(PageMetadata { num_rows: value.data_page_header.as_ref().unwrap().num_values as usize, is_dict: false, }), - parquet_format::PageType::DictionaryPage => Ok(PageMetadata { + crate::format::PageType::DICTIONARY_PAGE => Ok(PageMetadata { num_rows: usize::MIN, is_dict: true, }), - parquet_format::PageType::DataPageV2 => Ok(PageMetadata { + crate::format::PageType::DATA_PAGE_V2 => Ok(PageMetadata { num_rows: value.data_page_header_v2.as_ref().unwrap().num_rows as usize, is_dict: false, }), diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 05e32f7e48a..f9dd2d8d39b 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -16,7 +16,7 @@ // under the License. //! Contains column writer API. -use parquet_format::{ColumnIndex, OffsetIndex}; +use crate::format::{ColumnIndex, OffsetIndex}; use std::collections::{BTreeSet, VecDeque}; use crate::basic::{Compression, ConvertedType, Encoding, LogicalType, PageType, Type}; @@ -1089,8 +1089,8 @@ fn compare_greater_byte_array_decimals(a: &[u8], b: &[u8]) -> bool { #[cfg(test)] mod tests { + use crate::format::BoundaryOrder; use bytes::Bytes; - use parquet_format::BoundaryOrder; use rand::distributions::uniform::SampleUniform; use std::sync::Arc; @@ -2086,7 +2086,7 @@ mod tests { // column index assert_eq!(2, column_index.null_pages.len()); assert_eq!(2, offset_index.page_locations.len()); - assert_eq!(BoundaryOrder::Unordered, column_index.boundary_order); + assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order); for idx in 0..2 { assert!(!column_index.null_pages[idx]); assert_eq!(0, column_index.null_counts.as_ref().unwrap()[idx]); diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs index 30afec55eb3..ad965455f93 100644 --- a/parquet/src/file/footer.rs +++ b/parquet/src/file/footer.rs @@ -17,7 +17,7 @@ use std::{io::Read, sync::Arc}; -use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData}; +use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData}; use thrift::protocol::TCompactInputProtocol; use crate::basic::ColumnOrder; @@ -150,7 +150,7 @@ mod tests { use crate::basic::SortOrder; use crate::basic::Type; use crate::schema::types::Type as SchemaType; - use parquet_format::TypeDefinedOrder; + use crate::format::TypeDefinedOrder; #[test] fn test_parse_metadata_size_smaller_than_footer() { diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs index 018dd95d9f3..0804890c22a 100644 --- a/parquet/src/file/metadata.rs +++ b/parquet/src/file/metadata.rs @@ -35,7 +35,7 @@ use std::sync::Arc; -use parquet_format::{ +use crate::format::{ BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation, RowGroup, }; @@ -122,7 +122,7 @@ impl ParquetMetaData { } } -pub type KeyValue = parquet_format::KeyValue; +pub type KeyValue = crate::format::KeyValue; /// Reference counted pointer for [`FileMetaData`]. pub type FileMetaDataPtr = Arc; @@ -553,14 +553,14 @@ impl ColumnChunkMetaData { return Err(general_err!("Expected to have column metadata")); } let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap(); - let column_type = Type::from(col_metadata.type_); + let column_type = Type::try_from(col_metadata.type_)?; let column_path = ColumnPath::new(col_metadata.path_in_schema); let encodings = col_metadata .encodings .drain(0..) - .map(Encoding::from) - .collect(); - let compression = Compression::from(col_metadata.codec); + .map(Encoding::try_from) + .collect::>()?; + let compression = Compression::try_from(col_metadata.codec)?; let file_path = cc.file_path; let file_offset = cc.file_offset; let num_values = col_metadata.num_values; @@ -573,7 +573,12 @@ impl ColumnChunkMetaData { let encoding_stats = col_metadata .encoding_stats .as_ref() - .map(|vec| vec.iter().map(page_encoding_stats::from_thrift).collect()); + .map(|vec| { + vec.iter() + .map(page_encoding_stats::try_from_thrift) + .collect::>() + }) + .transpose()?; let bloom_filter_offset = col_metadata.bloom_filter_offset; let offset_index_offset = cc.offset_index_offset; let offset_index_length = cc.offset_index_length; @@ -846,7 +851,7 @@ impl ColumnIndexBuilder { null_pages: Vec::new(), min_values: Vec::new(), max_values: Vec::new(), - boundary_order: BoundaryOrder::Unordered, + boundary_order: BoundaryOrder::UNORDERED, null_counts: Vec::new(), valid: true, } diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index e499a094ae0..eb26804784a 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -16,7 +16,8 @@ // under the License. use crate::basic::{Encoding, PageType}; -use parquet_format::{ +use crate::errors::Result; +use crate::format::{ Encoding as TEncoding, PageEncodingStats as TPageEncodingStats, PageType as TPageType, }; @@ -32,16 +33,18 @@ pub struct PageEncodingStats { } /// Converts Thrift definition into `PageEncodingStats`. -pub fn from_thrift(thrift_encoding_stats: &TPageEncodingStats) -> PageEncodingStats { - let page_type = PageType::from(thrift_encoding_stats.page_type); - let encoding = Encoding::from(thrift_encoding_stats.encoding); +pub fn try_from_thrift( + thrift_encoding_stats: &TPageEncodingStats, +) -> Result { + let page_type = PageType::try_from(thrift_encoding_stats.page_type)?; + let encoding = Encoding::try_from(thrift_encoding_stats.encoding)?; let count = thrift_encoding_stats.count; - PageEncodingStats { + Ok(PageEncodingStats { page_type, encoding, count, - } + }) } /// Converts `PageEncodingStats` into Thrift definition. @@ -70,6 +73,6 @@ mod tests { count: 1, }; - assert_eq!(from_thrift(&to_thrift(&stats)), stats); + assert_eq!(try_from_thrift(&to_thrift(&stats)).unwrap(), stats); } } diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs index f29b80accae..ac81157e899 100644 --- a/parquet/src/file/page_index/index.rs +++ b/parquet/src/file/page_index/index.rs @@ -20,7 +20,7 @@ use crate::data_type::private::ParquetValueType; use crate::data_type::Int96; use crate::errors::ParquetError; use crate::util::bit_util::from_le_slice; -use parquet_format::{BoundaryOrder, ColumnIndex}; +use crate::format::{BoundaryOrder, ColumnIndex}; use std::fmt::Debug; /// The statistics in one page diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index e6a4e598102..99877a92105 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -21,7 +21,7 @@ use crate::errors::ParquetError; use crate::file::metadata::ColumnChunkMetaData; use crate::file::page_index::index::{BooleanIndex, ByteArrayIndex, Index, NativeIndex}; use crate::file::reader::ChunkReader; -use parquet_format::{ColumnIndex, OffsetIndex, PageLocation}; +use crate::format::{ColumnIndex, OffsetIndex, PageLocation}; use std::io::{Cursor, Read}; use thrift::protocol::TCompactInputProtocol; diff --git a/parquet/src/file/page_index/range.rs b/parquet/src/file/page_index/range.rs index e9741ec8e7f..816ea4025f2 100644 --- a/parquet/src/file/page_index/range.rs +++ b/parquet/src/file/page_index/range.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. use crate::errors::ParquetError; -use parquet_format::PageLocation; +use crate::format::PageLocation; use std::cmp::Ordering; use std::collections::VecDeque; use std::ops::RangeInclusive; @@ -284,7 +284,7 @@ mod tests { use crate::basic::Type::INT32; use crate::file::page_index::index::{NativeIndex, PageIndex}; use crate::file::page_index::range::{compute_row_ranges, Range, RowRanges}; - use parquet_format::{BoundaryOrder, PageLocation}; + use crate::format::{BoundaryOrder, PageLocation}; #[test] fn test_binary_search_overlap() { @@ -445,7 +445,7 @@ mod tests { null_count: Some(0), }, ], - boundary_order: BoundaryOrder::Ascending, + boundary_order: BoundaryOrder::ASCENDING, }; let locations = &[ PageLocation { diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index f3beb57c02e..cd90b0d0b67 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -22,8 +22,8 @@ use std::collections::VecDeque; use std::io::Cursor; use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc}; +use crate::format::{PageHeader, PageLocation, PageType}; use bytes::{Buf, Bytes}; -use parquet_format::{PageHeader, PageLocation, PageType}; use thrift::protocol::TCompactInputProtocol; use crate::basic::{Encoding, Type}; @@ -436,37 +436,37 @@ pub(crate) fn decode_page( }; let result = match page_header.type_ { - PageType::DictionaryPage => { + PageType::DICTIONARY_PAGE => { assert!(page_header.dictionary_page_header.is_some()); let dict_header = page_header.dictionary_page_header.as_ref().unwrap(); let is_sorted = dict_header.is_sorted.unwrap_or(false); Page::DictionaryPage { buf: buffer, num_values: dict_header.num_values as u32, - encoding: Encoding::from(dict_header.encoding), + encoding: Encoding::try_from(dict_header.encoding)?, is_sorted, } } - PageType::DataPage => { + PageType::DATA_PAGE => { assert!(page_header.data_page_header.is_some()); let header = page_header.data_page_header.unwrap(); Page::DataPage { buf: buffer, num_values: header.num_values as u32, - encoding: Encoding::from(header.encoding), - def_level_encoding: Encoding::from(header.definition_level_encoding), - rep_level_encoding: Encoding::from(header.repetition_level_encoding), + encoding: Encoding::try_from(header.encoding)?, + def_level_encoding: Encoding::try_from(header.definition_level_encoding)?, + rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?, statistics: statistics::from_thrift(physical_type, header.statistics), } } - PageType::DataPageV2 => { + PageType::DATA_PAGE_V2 => { assert!(page_header.data_page_header_v2.is_some()); let header = page_header.data_page_header_v2.unwrap(); let is_compressed = header.is_compressed.unwrap_or(true); Page::DataPageV2 { buf: buffer, num_values: header.num_values as u32, - encoding: Encoding::from(header.encoding), + encoding: Encoding::try_from(header.encoding)?, num_nulls: header.num_nulls as u32, num_rows: header.num_rows as u32, def_levels_byte_len: header.definition_levels_byte_length as u32, @@ -600,7 +600,7 @@ impl PageReader for SerializedPageReader { *offset += data_len; *remaining -= data_len; - if header.type_ == PageType::IndexPage { + if header.type_ == PageType::INDEX_PAGE { continue; } @@ -754,7 +754,7 @@ impl PageReader for SerializedPageReader { mod tests { use std::sync::Arc; - use parquet_format::BoundaryOrder; + use crate::format::BoundaryOrder; use crate::basic::{self, ColumnOrder}; use crate::data_type::private::ParquetValueType; @@ -1281,7 +1281,7 @@ mod tests { unreachable!() }; - assert_eq!(index.boundary_order, BoundaryOrder::Ascending); + assert_eq!(index.boundary_order, BoundaryOrder::ASCENDING); let index_in_pages = &index.indexes; //only one page group @@ -1330,7 +1330,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 0), - BoundaryOrder::Unordered, + BoundaryOrder::UNORDERED, ); assert_eq!(row_group_offset_indexes[0].len(), 325); } else { @@ -1349,7 +1349,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 2), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[2].len(), 325); } else { @@ -1361,7 +1361,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 3), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[3].len(), 325); } else { @@ -1373,7 +1373,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 4), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[4].len(), 325); } else { @@ -1385,7 +1385,7 @@ mod tests { index, 528, get_row_group_min_max_bytes(row_group_metadata, 5), - BoundaryOrder::Unordered, + BoundaryOrder::UNORDERED, ); assert_eq!(row_group_offset_indexes[5].len(), 528); } else { @@ -1397,7 +1397,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 6), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[6].len(), 325); } else { @@ -1409,7 +1409,7 @@ mod tests { index, 528, get_row_group_min_max_bytes(row_group_metadata, 7), - BoundaryOrder::Unordered, + BoundaryOrder::UNORDERED, ); assert_eq!(row_group_offset_indexes[7].len(), 528); } else { @@ -1421,7 +1421,7 @@ mod tests { index, 974, get_row_group_min_max_bytes(row_group_metadata, 8), - BoundaryOrder::Unordered, + BoundaryOrder::UNORDERED, ); assert_eq!(row_group_offset_indexes[8].len(), 974); } else { @@ -1433,7 +1433,7 @@ mod tests { index, 352, get_row_group_min_max_bytes(row_group_metadata, 9), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[9].len(), 352); } else { @@ -1452,7 +1452,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 11), - BoundaryOrder::Ascending, + BoundaryOrder::ASCENDING, ); assert_eq!(row_group_offset_indexes[11].len(), 325); } else { @@ -1464,7 +1464,7 @@ mod tests { index, 325, get_row_group_min_max_bytes(row_group_metadata, 12), - BoundaryOrder::Unordered, + BoundaryOrder::UNORDERED, ); assert_eq!(row_group_offset_indexes[12].len(), 325); } else { diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index da2ec2e9a14..35b5179d36b 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -39,7 +39,7 @@ use std::fmt; -use parquet_format::Statistics as TStatistics; +use crate::format::Statistics as TStatistics; use crate::basic::Type; use crate::data_type::private::ParquetValueType; diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 7af4b0fa2c9..8cb6df974e4 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -20,8 +20,8 @@ use std::{io::Write, sync::Arc}; -use parquet_format as parquet; -use parquet_format::{ColumnIndex, OffsetIndex, RowGroup}; +use crate::format as parquet; +use crate::format::{ColumnIndex, OffsetIndex, RowGroup}; use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; use crate::basic::PageType; @@ -1110,7 +1110,7 @@ mod tests { fn test_file_roundtrip( file: File, data: Vec>, - ) -> parquet_format::FileMetaData { + ) -> crate::format::FileMetaData { let schema = Arc::new( types::Type::group_type_builder("schema") .with_fields(&mut vec![Arc::new( diff --git a/parquet/src/format.rs b/parquet/src/format.rs new file mode 100644 index 00000000000..a170a7b6a73 --- /dev/null +++ b/parquet/src/format.rs @@ -0,0 +1,5200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Autogenerated thrift definitions + +// Autogenerated by Thrift Compiler (0.16.0) +// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + +#![allow(unused_imports)] +#![allow(unused_extern_crates)] +#![allow(clippy::too_many_arguments, clippy::type_complexity, clippy::vec_box)] +#![cfg_attr(rustfmt, rustfmt_skip)] + +use std::cell::RefCell; +use std::collections::{BTreeMap, BTreeSet}; +use std::convert::{From, TryFrom}; +use std::default::Default; +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::rc::Rc; + +use thrift::OrderedFloat; +use thrift::{ApplicationError, ApplicationErrorKind, ProtocolError, ProtocolErrorKind, TThriftClient}; +use thrift::protocol::{TFieldIdentifier, TListIdentifier, TMapIdentifier, TMessageIdentifier, TMessageType, TInputProtocol, TOutputProtocol, TSetIdentifier, TStructIdentifier, TType}; +use thrift::protocol::field_id; +use thrift::protocol::verify_expected_message_type; +use thrift::protocol::verify_expected_sequence_number; +use thrift::protocol::verify_expected_service_call; +use thrift::protocol::verify_required_field_exists; + +/// Types supported by Parquet. These types are intended to be used in combination +/// with the encodings to control the on disk storage format. +/// For example INT16 is not included as a type since a good encoding of INT32 +/// would handle this. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Type(pub i32); + +impl Type { + pub const BOOLEAN: Type = Type(0); + pub const INT32: Type = Type(1); + pub const INT64: Type = Type(2); + pub const INT96: Type = Type(3); + pub const FLOAT: Type = Type(4); + pub const DOUBLE: Type = Type(5); + pub const BYTE_ARRAY: Type = Type(6); + pub const FIXED_LEN_BYTE_ARRAY: Type = Type(7); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::BOOLEAN, + Self::INT32, + Self::INT64, + Self::INT96, + Self::FLOAT, + Self::DOUBLE, + Self::BYTE_ARRAY, + Self::FIXED_LEN_BYTE_ARRAY, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(Type::from(enum_value)) + } +} + +impl From for Type { + fn from(i: i32) -> Self { + match i { + 0 => Type::BOOLEAN, + 1 => Type::INT32, + 2 => Type::INT64, + 3 => Type::INT96, + 4 => Type::FLOAT, + 5 => Type::DOUBLE, + 6 => Type::BYTE_ARRAY, + 7 => Type::FIXED_LEN_BYTE_ARRAY, + _ => Type(i) + } + } +} + +impl From<&i32> for Type { + fn from(i: &i32) -> Self { + Type::from(*i) + } +} + +impl From for i32 { + fn from(e: Type) -> i32 { + e.0 + } +} + +impl From<&Type> for i32 { + fn from(e: &Type) -> i32 { + e.0 + } +} + +/// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. +/// ConvertedType is superseded by LogicalType. This enum should not be extended. +/// +/// See LogicalTypes.md for conversion between ConvertedType and LogicalType. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ConvertedType(pub i32); + +impl ConvertedType { + /// a BYTE_ARRAY actually contains UTF8 encoded chars + pub const UTF8: ConvertedType = ConvertedType(0); + /// a map is converted as an optional field containing a repeated key/value pair + pub const MAP: ConvertedType = ConvertedType(1); + /// a key/value pair is converted into a group of two fields + pub const MAP_KEY_VALUE: ConvertedType = ConvertedType(2); + /// a list is converted into an optional field containing a repeated field for its + /// values + pub const LIST: ConvertedType = ConvertedType(3); + /// an enum is converted into a binary field + pub const ENUM: ConvertedType = ConvertedType(4); + /// A decimal value. + /// + /// This may be used to annotate binary or fixed primitive types. The + /// underlying byte array stores the unscaled value encoded as two's + /// complement using big-endian byte order (the most significant byte is the + /// zeroth element). The value of the decimal is the value * 10^{-scale}. + /// + /// This must be accompanied by a (maximum) precision and a scale in the + /// SchemaElement. The precision specifies the number of digits in the decimal + /// and the scale stores the location of the decimal point. For example 1.23 + /// would have precision 3 (3 total digits) and scale 2 (the decimal point is + /// 2 digits over). + pub const DECIMAL: ConvertedType = ConvertedType(5); + /// A Date + /// + /// Stored as days since Unix epoch, encoded as the INT32 physical type. + /// + pub const DATE: ConvertedType = ConvertedType(6); + /// A time + /// + /// The total number of milliseconds since midnight. The value is stored + /// as an INT32 physical type. + pub const TIME_MILLIS: ConvertedType = ConvertedType(7); + /// A time. + /// + /// The total number of microseconds since midnight. The value is stored as + /// an INT64 physical type. + pub const TIME_MICROS: ConvertedType = ConvertedType(8); + /// A date/time combination + /// + /// Date and time recorded as milliseconds since the Unix epoch. Recorded as + /// a physical type of INT64. + pub const TIMESTAMP_MILLIS: ConvertedType = ConvertedType(9); + /// A date/time combination + /// + /// Date and time recorded as microseconds since the Unix epoch. The value is + /// stored as an INT64 physical type. + pub const TIMESTAMP_MICROS: ConvertedType = ConvertedType(10); + /// An unsigned integer value. + /// + /// The number describes the maximum number of meaningful data bits in + /// the stored value. 8, 16 and 32 bit values are stored using the + /// INT32 physical type. 64 bit values are stored using the INT64 + /// physical type. + /// + pub const UINT_8: ConvertedType = ConvertedType(11); + pub const UINT_16: ConvertedType = ConvertedType(12); + pub const UINT_32: ConvertedType = ConvertedType(13); + pub const UINT_64: ConvertedType = ConvertedType(14); + /// A signed integer value. + /// + /// The number describes the maximum number of meaningful data bits in + /// the stored value. 8, 16 and 32 bit values are stored using the + /// INT32 physical type. 64 bit values are stored using the INT64 + /// physical type. + /// + pub const INT_8: ConvertedType = ConvertedType(15); + pub const INT_16: ConvertedType = ConvertedType(16); + pub const INT_32: ConvertedType = ConvertedType(17); + pub const INT_64: ConvertedType = ConvertedType(18); + /// An embedded JSON document + /// + /// A JSON document embedded within a single UTF8 column. + pub const JSON: ConvertedType = ConvertedType(19); + /// An embedded BSON document + /// + /// A BSON document embedded within a single BINARY column. + pub const BSON: ConvertedType = ConvertedType(20); + /// An interval of time + /// + /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 + /// This data is composed of three separate little endian unsigned + /// integers. Each stores a component of a duration of time. The first + /// integer identifies the number of months associated with the duration, + /// the second identifies the number of days associated with the duration + /// and the third identifies the number of milliseconds associated with + /// the provided duration. This duration of time is independent of any + /// particular timezone or date. + pub const INTERVAL: ConvertedType = ConvertedType(21); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::UTF8, + Self::MAP, + Self::MAP_KEY_VALUE, + Self::LIST, + Self::ENUM, + Self::DECIMAL, + Self::DATE, + Self::TIME_MILLIS, + Self::TIME_MICROS, + Self::TIMESTAMP_MILLIS, + Self::TIMESTAMP_MICROS, + Self::UINT_8, + Self::UINT_16, + Self::UINT_32, + Self::UINT_64, + Self::INT_8, + Self::INT_16, + Self::INT_32, + Self::INT_64, + Self::JSON, + Self::BSON, + Self::INTERVAL, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(ConvertedType::from(enum_value)) + } +} + +impl From for ConvertedType { + fn from(i: i32) -> Self { + match i { + 0 => ConvertedType::UTF8, + 1 => ConvertedType::MAP, + 2 => ConvertedType::MAP_KEY_VALUE, + 3 => ConvertedType::LIST, + 4 => ConvertedType::ENUM, + 5 => ConvertedType::DECIMAL, + 6 => ConvertedType::DATE, + 7 => ConvertedType::TIME_MILLIS, + 8 => ConvertedType::TIME_MICROS, + 9 => ConvertedType::TIMESTAMP_MILLIS, + 10 => ConvertedType::TIMESTAMP_MICROS, + 11 => ConvertedType::UINT_8, + 12 => ConvertedType::UINT_16, + 13 => ConvertedType::UINT_32, + 14 => ConvertedType::UINT_64, + 15 => ConvertedType::INT_8, + 16 => ConvertedType::INT_16, + 17 => ConvertedType::INT_32, + 18 => ConvertedType::INT_64, + 19 => ConvertedType::JSON, + 20 => ConvertedType::BSON, + 21 => ConvertedType::INTERVAL, + _ => ConvertedType(i) + } + } +} + +impl From<&i32> for ConvertedType { + fn from(i: &i32) -> Self { + ConvertedType::from(*i) + } +} + +impl From for i32 { + fn from(e: ConvertedType) -> i32 { + e.0 + } +} + +impl From<&ConvertedType> for i32 { + fn from(e: &ConvertedType) -> i32 { + e.0 + } +} + +/// Representation of Schemas +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FieldRepetitionType(pub i32); + +impl FieldRepetitionType { + /// This field is required (can not be null) and each record has exactly 1 value. + pub const REQUIRED: FieldRepetitionType = FieldRepetitionType(0); + /// The field is optional (can be null) and each record has 0 or 1 values. + pub const OPTIONAL: FieldRepetitionType = FieldRepetitionType(1); + /// The field is repeated and can contain 0 or more values + pub const REPEATED: FieldRepetitionType = FieldRepetitionType(2); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::REQUIRED, + Self::OPTIONAL, + Self::REPEATED, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(FieldRepetitionType::from(enum_value)) + } +} + +impl From for FieldRepetitionType { + fn from(i: i32) -> Self { + match i { + 0 => FieldRepetitionType::REQUIRED, + 1 => FieldRepetitionType::OPTIONAL, + 2 => FieldRepetitionType::REPEATED, + _ => FieldRepetitionType(i) + } + } +} + +impl From<&i32> for FieldRepetitionType { + fn from(i: &i32) -> Self { + FieldRepetitionType::from(*i) + } +} + +impl From for i32 { + fn from(e: FieldRepetitionType) -> i32 { + e.0 + } +} + +impl From<&FieldRepetitionType> for i32 { + fn from(e: &FieldRepetitionType) -> i32 { + e.0 + } +} + +/// Encodings supported by Parquet. Not all encodings are valid for all types. These +/// enums are also used to specify the encoding of definition and repetition levels. +/// See the accompanying doc for the details of the more complicated encodings. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Encoding(pub i32); + +impl Encoding { + /// Default encoding. + /// BOOLEAN - 1 bit per value. 0 is false; 1 is true. + /// INT32 - 4 bytes per value. Stored as little-endian. + /// INT64 - 8 bytes per value. Stored as little-endian. + /// FLOAT - 4 bytes per value. IEEE. Stored as little-endian. + /// DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. + /// BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. + /// FIXED_LEN_BYTE_ARRAY - Just the bytes. + pub const PLAIN: Encoding = Encoding(0); + /// Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + /// plain type. + /// in a data page use RLE_DICTIONARY instead. + /// in a Dictionary page use PLAIN instead + pub const PLAIN_DICTIONARY: Encoding = Encoding(2); + /// Group packed run length encoding. Usable for definition/repetition levels + /// encoding and Booleans (on one bit: 0 is false; 1 is true.) + pub const RLE: Encoding = Encoding(3); + /// Bit packed encoding. This can only be used if the data has a known max + /// width. Usable for definition/repetition levels encoding. + pub const BIT_PACKED: Encoding = Encoding(4); + /// Delta encoding for integers. This can be used for int columns and works best + /// on sorted data + pub const DELTA_BINARY_PACKED: Encoding = Encoding(5); + /// Encoding for byte arrays to separate the length values and the data. The lengths + /// are encoded using DELTA_BINARY_PACKED + pub const DELTA_LENGTH_BYTE_ARRAY: Encoding = Encoding(6); + /// Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. + /// Suffixes are stored as delta length byte arrays. + pub const DELTA_BYTE_ARRAY: Encoding = Encoding(7); + /// Dictionary encoding: the ids are encoded using the RLE encoding + pub const RLE_DICTIONARY: Encoding = Encoding(8); + /// Encoding for floating-point data. + /// K byte-streams are created where K is the size in bytes of the data type. + /// The individual bytes of an FP value are scattered to the corresponding stream and + /// the streams are concatenated. + /// This itself does not reduce the size of the data but can lead to better compression + /// afterwards. + pub const BYTE_STREAM_SPLIT: Encoding = Encoding(9); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::PLAIN, + Self::PLAIN_DICTIONARY, + Self::RLE, + Self::BIT_PACKED, + Self::DELTA_BINARY_PACKED, + Self::DELTA_LENGTH_BYTE_ARRAY, + Self::DELTA_BYTE_ARRAY, + Self::RLE_DICTIONARY, + Self::BYTE_STREAM_SPLIT, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(Encoding::from(enum_value)) + } +} + +impl From for Encoding { + fn from(i: i32) -> Self { + match i { + 0 => Encoding::PLAIN, + 2 => Encoding::PLAIN_DICTIONARY, + 3 => Encoding::RLE, + 4 => Encoding::BIT_PACKED, + 5 => Encoding::DELTA_BINARY_PACKED, + 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY, + 7 => Encoding::DELTA_BYTE_ARRAY, + 8 => Encoding::RLE_DICTIONARY, + 9 => Encoding::BYTE_STREAM_SPLIT, + _ => Encoding(i) + } + } +} + +impl From<&i32> for Encoding { + fn from(i: &i32) -> Self { + Encoding::from(*i) + } +} + +impl From for i32 { + fn from(e: Encoding) -> i32 { + e.0 + } +} + +impl From<&Encoding> for i32 { + fn from(e: &Encoding) -> i32 { + e.0 + } +} + +/// Supported compression algorithms. +/// +/// Codecs added in format version X.Y can be read by readers based on X.Y and later. +/// Codec support may vary between readers based on the format version and +/// libraries available at runtime. +/// +/// See Compression.md for a detailed specification of these algorithms. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct CompressionCodec(pub i32); + +impl CompressionCodec { + pub const UNCOMPRESSED: CompressionCodec = CompressionCodec(0); + pub const SNAPPY: CompressionCodec = CompressionCodec(1); + pub const GZIP: CompressionCodec = CompressionCodec(2); + pub const LZO: CompressionCodec = CompressionCodec(3); + pub const BROTLI: CompressionCodec = CompressionCodec(4); + pub const LZ4: CompressionCodec = CompressionCodec(5); + pub const ZSTD: CompressionCodec = CompressionCodec(6); + pub const LZ4_RAW: CompressionCodec = CompressionCodec(7); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::UNCOMPRESSED, + Self::SNAPPY, + Self::GZIP, + Self::LZO, + Self::BROTLI, + Self::LZ4, + Self::ZSTD, + Self::LZ4_RAW, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(CompressionCodec::from(enum_value)) + } +} + +impl From for CompressionCodec { + fn from(i: i32) -> Self { + match i { + 0 => CompressionCodec::UNCOMPRESSED, + 1 => CompressionCodec::SNAPPY, + 2 => CompressionCodec::GZIP, + 3 => CompressionCodec::LZO, + 4 => CompressionCodec::BROTLI, + 5 => CompressionCodec::LZ4, + 6 => CompressionCodec::ZSTD, + 7 => CompressionCodec::LZ4_RAW, + _ => CompressionCodec(i) + } + } +} + +impl From<&i32> for CompressionCodec { + fn from(i: &i32) -> Self { + CompressionCodec::from(*i) + } +} + +impl From for i32 { + fn from(e: CompressionCodec) -> i32 { + e.0 + } +} + +impl From<&CompressionCodec> for i32 { + fn from(e: &CompressionCodec) -> i32 { + e.0 + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageType(pub i32); + +impl PageType { + pub const DATA_PAGE: PageType = PageType(0); + pub const INDEX_PAGE: PageType = PageType(1); + pub const DICTIONARY_PAGE: PageType = PageType(2); + pub const DATA_PAGE_V2: PageType = PageType(3); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::DATA_PAGE, + Self::INDEX_PAGE, + Self::DICTIONARY_PAGE, + Self::DATA_PAGE_V2, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(PageType::from(enum_value)) + } +} + +impl From for PageType { + fn from(i: i32) -> Self { + match i { + 0 => PageType::DATA_PAGE, + 1 => PageType::INDEX_PAGE, + 2 => PageType::DICTIONARY_PAGE, + 3 => PageType::DATA_PAGE_V2, + _ => PageType(i) + } + } +} + +impl From<&i32> for PageType { + fn from(i: &i32) -> Self { + PageType::from(*i) + } +} + +impl From for i32 { + fn from(e: PageType) -> i32 { + e.0 + } +} + +impl From<&PageType> for i32 { + fn from(e: &PageType) -> i32 { + e.0 + } +} + +/// Enum to annotate whether lists of min/max elements inside ColumnIndex +/// are ordered and if so, in which direction. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BoundaryOrder(pub i32); + +impl BoundaryOrder { + pub const UNORDERED: BoundaryOrder = BoundaryOrder(0); + pub const ASCENDING: BoundaryOrder = BoundaryOrder(1); + pub const DESCENDING: BoundaryOrder = BoundaryOrder(2); + pub const ENUM_VALUES: &'static [Self] = &[ + Self::UNORDERED, + Self::ASCENDING, + Self::DESCENDING, + ]; + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(self.0) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Ok(BoundaryOrder::from(enum_value)) + } +} + +impl From for BoundaryOrder { + fn from(i: i32) -> Self { + match i { + 0 => BoundaryOrder::UNORDERED, + 1 => BoundaryOrder::ASCENDING, + 2 => BoundaryOrder::DESCENDING, + _ => BoundaryOrder(i) + } + } +} + +impl From<&i32> for BoundaryOrder { + fn from(i: &i32) -> Self { + BoundaryOrder::from(*i) + } +} + +impl From for i32 { + fn from(e: BoundaryOrder) -> i32 { + e.0 + } +} + +impl From<&BoundaryOrder> for i32 { + fn from(e: &BoundaryOrder) -> i32 { + e.0 + } +} + +// +// Statistics +// + +/// Statistics per row group and per page +/// All fields are optional. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Statistics { + /// DEPRECATED: min and max value of the column. Use min_value and max_value. + /// + /// Values are encoded using PLAIN encoding, except that variable-length byte + /// arrays do not include a length prefix. + /// + /// These fields encode min and max values determined by signed comparison + /// only. New files should use the correct order for a column's logical type + /// and store the values in the min_value and max_value fields. + /// + /// To support older readers, these may be set when the column order is + /// signed. + pub max: Option>, + pub min: Option>, + /// count of null value in the column + pub null_count: Option, + /// count of distinct values occurring + pub distinct_count: Option, + /// Min and max values for the column, determined by its ColumnOrder. + /// + /// Values are encoded using PLAIN encoding, except that variable-length byte + /// arrays do not include a length prefix. + pub max_value: Option>, + pub min_value: Option>, +} + +impl Statistics { + pub fn new(max: F1, min: F2, null_count: F3, distinct_count: F4, max_value: F5, min_value: F6) -> Statistics where F1: Into>>, F2: Into>>, F3: Into>, F4: Into>, F5: Into>>, F6: Into>> { + Statistics { + max: max.into(), + min: min.into(), + null_count: null_count.into(), + distinct_count: distinct_count.into(), + max_value: max_value.into(), + min_value: min_value.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option> = None; + let mut f_6: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i64()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_bytes()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_bytes()?; + f_6 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = Statistics { + max: f_1, + min: f_2, + null_count: f_3, + distinct_count: f_4, + max_value: f_5, + min_value: f_6, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("Statistics"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.max { + o_prot.write_field_begin(&TFieldIdentifier::new("max", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.min { + o_prot.write_field_begin(&TFieldIdentifier::new("min", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.null_count { + o_prot.write_field_begin(&TFieldIdentifier::new("null_count", TType::I64, 3))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.distinct_count { + o_prot.write_field_begin(&TFieldIdentifier::new("distinct_count", TType::I64, 4))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.max_value { + o_prot.write_field_begin(&TFieldIdentifier::new("max_value", TType::String, 5))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.min_value { + o_prot.write_field_begin(&TFieldIdentifier::new("min_value", TType::String, 6))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for Statistics { + fn default() -> Self { + Statistics{ + max: Some(Vec::new()), + min: Some(Vec::new()), + null_count: Some(0), + distinct_count: Some(0), + max_value: Some(Vec::new()), + min_value: Some(Vec::new()), + } + } +} + +// +// StringType +// + +/// Empty structs to use as logical type annotations +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct StringType { +} + +impl StringType { + pub fn new() -> StringType { + StringType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = StringType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("StringType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for StringType { + fn default() -> Self { + StringType{} + } +} + +// +// UUIDType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct UUIDType { +} + +impl UUIDType { + pub fn new() -> UUIDType { + UUIDType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = UUIDType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("UUIDType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for UUIDType { + fn default() -> Self { + UUIDType{} + } +} + +// +// MapType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MapType { +} + +impl MapType { + pub fn new() -> MapType { + MapType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MapType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MapType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MapType { + fn default() -> Self { + MapType{} + } +} + +// +// ListType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ListType { +} + +impl ListType { + pub fn new() -> ListType { + ListType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = ListType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ListType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for ListType { + fn default() -> Self { + ListType{} + } +} + +// +// EnumType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EnumType { +} + +impl EnumType { + pub fn new() -> EnumType { + EnumType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = EnumType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EnumType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for EnumType { + fn default() -> Self { + EnumType{} + } +} + +// +// DateType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DateType { +} + +impl DateType { + pub fn new() -> DateType { + DateType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = DateType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DateType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for DateType { + fn default() -> Self { + DateType{} + } +} + +// +// NullType +// + +/// Logical type to annotate a column that is always null. +/// +/// Sometimes when discovering the schema of existing data, values are always +/// null and the physical type can't be determined. This annotation signals +/// the case where the physical type was guessed from all null values. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NullType { +} + +impl NullType { + pub fn new() -> NullType { + NullType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = NullType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("NullType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for NullType { + fn default() -> Self { + NullType{} + } +} + +// +// DecimalType +// + +/// Decimal logical type annotation +/// +/// To maintain forward-compatibility in v1, implementations using this logical +/// type must also set scale and precision on the annotated SchemaElement. +/// +/// Allowed for physical types: INT32, INT64, FIXED, and BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DecimalType { + pub scale: i32, + pub precision: i32, +} + +impl DecimalType { + pub fn new(scale: i32, precision: i32) -> DecimalType { + DecimalType { + scale, + precision, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DecimalType.scale", &f_1)?; + verify_required_field_exists("DecimalType.precision", &f_2)?; + let ret = DecimalType { + scale: f_1.expect("auto-generated code should have checked for presence of required fields"), + precision: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DecimalType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("scale", TType::I32, 1))?; + o_prot.write_i32(self.scale)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("precision", TType::I32, 2))?; + o_prot.write_i32(self.precision)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// MilliSeconds +// + +/// Time units for logical types +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MilliSeconds { +} + +impl MilliSeconds { + pub fn new() -> MilliSeconds { + MilliSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MilliSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MilliSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MilliSeconds { + fn default() -> Self { + MilliSeconds{} + } +} + +// +// MicroSeconds +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MicroSeconds { +} + +impl MicroSeconds { + pub fn new() -> MicroSeconds { + MicroSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MicroSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MicroSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MicroSeconds { + fn default() -> Self { + MicroSeconds{} + } +} + +// +// NanoSeconds +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NanoSeconds { +} + +impl NanoSeconds { + pub fn new() -> NanoSeconds { + NanoSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = NanoSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("NanoSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for NanoSeconds { + fn default() -> Self { + NanoSeconds{} + } +} + +// +// TimeUnit +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum TimeUnit { + MILLIS(MilliSeconds), + MICROS(MicroSeconds), + NANOS(NanoSeconds), +} + +impl TimeUnit { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = MilliSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::MILLIS(val)); + } + received_field_count += 1; + }, + 2 => { + let val = MicroSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::MICROS(val)); + } + received_field_count += 1; + }, + 3 => { + let val = NanoSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::NANOS(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote TimeUnit" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote TimeUnit" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimeUnit"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + TimeUnit::MILLIS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MILLIS", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + TimeUnit::MICROS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MICROS", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + TimeUnit::NANOS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("NANOS", TType::Struct, 3))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TimestampType +// + +/// Timestamp logical type annotation +/// +/// Allowed for physical types: INT64 +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TimestampType { + pub is_adjusted_to_u_t_c: bool, + pub unit: TimeUnit, +} + +impl TimestampType { + pub fn new(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> TimestampType { + TimestampType { + is_adjusted_to_u_t_c, + unit, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bool()?; + f_1 = Some(val); + }, + 2 => { + let val = TimeUnit::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("TimestampType.is_adjusted_to_u_t_c", &f_1)?; + verify_required_field_exists("TimestampType.unit", &f_2)?; + let ret = TimestampType { + is_adjusted_to_u_t_c: f_1.expect("auto-generated code should have checked for presence of required fields"), + unit: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimestampType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?; + o_prot.write_bool(self.is_adjusted_to_u_t_c)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("unit", TType::Struct, 2))?; + self.unit.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TimeType +// + +/// Time logical type annotation +/// +/// Allowed for physical types: INT32 (millis), INT64 (micros, nanos) +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TimeType { + pub is_adjusted_to_u_t_c: bool, + pub unit: TimeUnit, +} + +impl TimeType { + pub fn new(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> TimeType { + TimeType { + is_adjusted_to_u_t_c, + unit, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bool()?; + f_1 = Some(val); + }, + 2 => { + let val = TimeUnit::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("TimeType.is_adjusted_to_u_t_c", &f_1)?; + verify_required_field_exists("TimeType.unit", &f_2)?; + let ret = TimeType { + is_adjusted_to_u_t_c: f_1.expect("auto-generated code should have checked for presence of required fields"), + unit: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimeType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?; + o_prot.write_bool(self.is_adjusted_to_u_t_c)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("unit", TType::Struct, 2))?; + self.unit.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// IntType +// + +/// Integer logical type annotation +/// +/// bitWidth must be 8, 16, 32, or 64. +/// +/// Allowed for physical types: INT32, INT64 +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IntType { + pub bit_width: i8, + pub is_signed: bool, +} + +impl IntType { + pub fn new(bit_width: i8, is_signed: bool) -> IntType { + IntType { + bit_width, + is_signed, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i8()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bool()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("IntType.bit_width", &f_1)?; + verify_required_field_exists("IntType.is_signed", &f_2)?; + let ret = IntType { + bit_width: f_1.expect("auto-generated code should have checked for presence of required fields"), + is_signed: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("IntType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("bitWidth", TType::I08, 1))?; + o_prot.write_i8(self.bit_width)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("isSigned", TType::Bool, 2))?; + o_prot.write_bool(self.is_signed)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// JsonType +// + +/// Embedded JSON logical type annotation +/// +/// Allowed for physical types: BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct JsonType { +} + +impl JsonType { + pub fn new() -> JsonType { + JsonType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = JsonType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("JsonType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for JsonType { + fn default() -> Self { + JsonType{} + } +} + +// +// BsonType +// + +/// Embedded BSON logical type annotation +/// +/// Allowed for physical types: BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BsonType { +} + +impl BsonType { + pub fn new() -> BsonType { + BsonType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = BsonType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BsonType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for BsonType { + fn default() -> Self { + BsonType{} + } +} + +// +// LogicalType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum LogicalType { + STRING(StringType), + MAP(MapType), + LIST(ListType), + ENUM(EnumType), + DECIMAL(DecimalType), + DATE(DateType), + TIME(TimeType), + TIMESTAMP(TimestampType), + INTEGER(IntType), + UNKNOWN(NullType), + JSON(JsonType), + BSON(BsonType), + UUID(UUIDType), +} + +impl LogicalType { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = StringType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::STRING(val)); + } + received_field_count += 1; + }, + 2 => { + let val = MapType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::MAP(val)); + } + received_field_count += 1; + }, + 3 => { + let val = ListType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::LIST(val)); + } + received_field_count += 1; + }, + 4 => { + let val = EnumType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::ENUM(val)); + } + received_field_count += 1; + }, + 5 => { + let val = DecimalType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::DECIMAL(val)); + } + received_field_count += 1; + }, + 6 => { + let val = DateType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::DATE(val)); + } + received_field_count += 1; + }, + 7 => { + let val = TimeType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::TIME(val)); + } + received_field_count += 1; + }, + 8 => { + let val = TimestampType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::TIMESTAMP(val)); + } + received_field_count += 1; + }, + 10 => { + let val = IntType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::INTEGER(val)); + } + received_field_count += 1; + }, + 11 => { + let val = NullType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::UNKNOWN(val)); + } + received_field_count += 1; + }, + 12 => { + let val = JsonType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::JSON(val)); + } + received_field_count += 1; + }, + 13 => { + let val = BsonType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::BSON(val)); + } + received_field_count += 1; + }, + 14 => { + let val = UUIDType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::UUID(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote LogicalType" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote LogicalType" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("LogicalType"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + LogicalType::STRING(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("STRING", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::MAP(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MAP", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::LIST(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("LIST", TType::Struct, 3))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::ENUM(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENUM", TType::Struct, 4))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::DECIMAL(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("DECIMAL", TType::Struct, 5))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::DATE(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("DATE", TType::Struct, 6))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::TIME(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TIME", TType::Struct, 7))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::TIMESTAMP(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TIMESTAMP", TType::Struct, 8))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::INTEGER(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("INTEGER", TType::Struct, 10))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::UNKNOWN(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UNKNOWN", TType::Struct, 11))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::JSON(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("JSON", TType::Struct, 12))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::BSON(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("BSON", TType::Struct, 13))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::UUID(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UUID", TType::Struct, 14))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SchemaElement +// + +/// Represents a element inside a schema definition. +/// - if it is a group (inner node) then type is undefined and num_children is defined +/// - if it is a primitive type (leaf) then type is defined and num_children is undefined +/// the nodes are listed in depth first traversal order. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SchemaElement { + /// Data type for this field. Not set if the current element is a non-leaf node + pub type_: Option, + /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. + /// Otherwise, if specified, this is the maximum bit length to store any of the values. + /// (e.g. a low cardinality INT col could have this set to 3). Note that this is + /// in the schema, and therefore fixed for the entire file. + pub type_length: Option, + /// repetition of the field. The root of the schema does not have a repetition_type. + /// All other nodes must have one + pub repetition_type: Option, + /// Name of the field in the schema + pub name: String, + /// Nested fields. Since thrift does not support nested fields, + /// the nesting is flattened to a single list by a depth-first traversal. + /// The children count is used to construct the nested relationship. + /// This field is not set when the element is a primitive type + pub num_children: Option, + /// DEPRECATED: When the schema is the result of a conversion from another model. + /// Used to record the original type to help with cross conversion. + /// + /// This is superseded by logicalType. + pub converted_type: Option, + /// DEPRECATED: Used when this column contains decimal data. + /// See the DECIMAL converted type for more details. + /// + /// This is superseded by using the DecimalType annotation in logicalType. + pub scale: Option, + pub precision: Option, + /// When the original schema supports field ids, this will save the + /// original field id in the parquet schema + pub field_id: Option, + /// The logical type of this SchemaElement + /// + /// LogicalType replaces ConvertedType, but ConvertedType is still required + /// for some logical types to ensure forward-compatibility in format v1. + pub logical_type: Option, +} + +impl SchemaElement { + pub fn new(type_: F1, type_length: F2, repetition_type: F3, name: String, num_children: F5, converted_type: F6, scale: F7, precision: F8, field_id: F9, logical_type: F10) -> SchemaElement where F1: Into>, F2: Into>, F3: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into>, F9: Into>, F10: Into> { + SchemaElement { + type_: type_.into(), + type_length: type_length.into(), + repetition_type: repetition_type.into(), + name, + num_children: num_children.into(), + converted_type: converted_type.into(), + scale: scale.into(), + precision: precision.into(), + field_id: field_id.into(), + logical_type: logical_type.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + let mut f_9: Option = None; + let mut f_10: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Type::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = FieldRepetitionType::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_string()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = ConvertedType::read_from_in_protocol(i_prot)?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i32()?; + f_7 = Some(val); + }, + 8 => { + let val = i_prot.read_i32()?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_i32()?; + f_9 = Some(val); + }, + 10 => { + let val = LogicalType::read_from_in_protocol(i_prot)?; + f_10 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("SchemaElement.name", &f_4)?; + let ret = SchemaElement { + type_: f_1, + type_length: f_2, + repetition_type: f_3, + name: f_4.expect("auto-generated code should have checked for presence of required fields"), + num_children: f_5, + converted_type: f_6, + scale: f_7, + precision: f_8, + field_id: f_9, + logical_type: f_10, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SchemaElement"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.type_ { + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.type_length { + o_prot.write_field_begin(&TFieldIdentifier::new("type_length", TType::I32, 2))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.repetition_type { + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_type", TType::I32, 3))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + o_prot.write_field_begin(&TFieldIdentifier::new("name", TType::String, 4))?; + o_prot.write_string(&self.name)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.num_children { + o_prot.write_field_begin(&TFieldIdentifier::new("num_children", TType::I32, 5))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.converted_type { + o_prot.write_field_begin(&TFieldIdentifier::new("converted_type", TType::I32, 6))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.scale { + o_prot.write_field_begin(&TFieldIdentifier::new("scale", TType::I32, 7))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.precision { + o_prot.write_field_begin(&TFieldIdentifier::new("precision", TType::I32, 8))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.field_id { + o_prot.write_field_begin(&TFieldIdentifier::new("field_id", TType::I32, 9))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.logical_type { + o_prot.write_field_begin(&TFieldIdentifier::new("logicalType", TType::Struct, 10))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// DataPageHeader +// + +/// Data page header +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DataPageHeader { + /// Number of values, including NULLs, in this data page. * + pub num_values: i32, + /// Encoding used for this data page * + pub encoding: Encoding, + /// Encoding used for definition levels * + pub definition_level_encoding: Encoding, + /// Encoding used for repetition levels * + pub repetition_level_encoding: Encoding, + /// Optional statistics for the data in this page* + pub statistics: Option, +} + +impl DataPageHeader { + pub fn new(num_values: i32, encoding: Encoding, definition_level_encoding: Encoding, repetition_level_encoding: Encoding, statistics: F5) -> DataPageHeader where F5: Into> { + DataPageHeader { + num_values, + encoding, + definition_level_encoding, + repetition_level_encoding, + statistics: statistics.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_5 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DataPageHeader.num_values", &f_1)?; + verify_required_field_exists("DataPageHeader.encoding", &f_2)?; + verify_required_field_exists("DataPageHeader.definition_level_encoding", &f_3)?; + verify_required_field_exists("DataPageHeader.repetition_level_encoding", &f_4)?; + let ret = DataPageHeader { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + definition_level_encoding: f_3.expect("auto-generated code should have checked for presence of required fields"), + repetition_level_encoding: f_4.expect("auto-generated code should have checked for presence of required fields"), + statistics: f_5, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DataPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("definition_level_encoding", TType::I32, 3))?; + self.definition_level_encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_level_encoding", TType::I32, 4))?; + self.repetition_level_encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 5))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// IndexPageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IndexPageHeader { +} + +impl IndexPageHeader { + pub fn new() -> IndexPageHeader { + IndexPageHeader {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = IndexPageHeader {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("IndexPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for IndexPageHeader { + fn default() -> Self { + IndexPageHeader{} + } +} + +// +// DictionaryPageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DictionaryPageHeader { + /// Number of values in the dictionary * + pub num_values: i32, + /// Encoding using this dictionary page * + pub encoding: Encoding, + /// If true, the entries in the dictionary are sorted in ascending order * + pub is_sorted: Option, +} + +impl DictionaryPageHeader { + pub fn new(num_values: i32, encoding: Encoding, is_sorted: F3) -> DictionaryPageHeader where F3: Into> { + DictionaryPageHeader { + num_values, + encoding, + is_sorted: is_sorted.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DictionaryPageHeader.num_values", &f_1)?; + verify_required_field_exists("DictionaryPageHeader.encoding", &f_2)?; + let ret = DictionaryPageHeader { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + is_sorted: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DictionaryPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.is_sorted { + o_prot.write_field_begin(&TFieldIdentifier::new("is_sorted", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// DataPageHeaderV2 +// + +/// New page format allowing reading levels without decompressing the data +/// Repetition and definition levels are uncompressed +/// The remaining section containing the data is compressed if is_compressed is true +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DataPageHeaderV2 { + /// Number of values, including NULLs, in this data page. * + pub num_values: i32, + /// Number of NULL values, in this data page. + /// Number of non-null = num_values - num_nulls which is also the number of values in the data section * + pub num_nulls: i32, + /// Number of rows in this data page. which means pages change on record boundaries (r = 0) * + pub num_rows: i32, + /// Encoding used for data in this page * + pub encoding: Encoding, + /// length of the definition levels + pub definition_levels_byte_length: i32, + /// length of the repetition levels + pub repetition_levels_byte_length: i32, + /// whether the values are compressed. + /// Which means the section of the page between + /// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + /// is compressed with the compression_codec. + /// If missing it is considered compressed + pub is_compressed: Option, + /// optional statistics for the data in this page * + pub statistics: Option, +} + +impl DataPageHeaderV2 { + pub fn new(num_values: i32, num_nulls: i32, num_rows: i32, encoding: Encoding, definition_levels_byte_length: i32, repetition_levels_byte_length: i32, is_compressed: F7, statistics: F8) -> DataPageHeaderV2 where F7: Into>, F8: Into> { + DataPageHeaderV2 { + num_values, + num_nulls, + num_rows, + encoding, + definition_levels_byte_length, + repetition_levels_byte_length, + is_compressed: is_compressed.into(), + statistics: statistics.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + 4 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i32()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_bool()?; + f_7 = Some(val); + }, + 8 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DataPageHeaderV2.num_values", &f_1)?; + verify_required_field_exists("DataPageHeaderV2.num_nulls", &f_2)?; + verify_required_field_exists("DataPageHeaderV2.num_rows", &f_3)?; + verify_required_field_exists("DataPageHeaderV2.encoding", &f_4)?; + verify_required_field_exists("DataPageHeaderV2.definition_levels_byte_length", &f_5)?; + verify_required_field_exists("DataPageHeaderV2.repetition_levels_byte_length", &f_6)?; + let ret = DataPageHeaderV2 { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + num_nulls: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_4.expect("auto-generated code should have checked for presence of required fields"), + definition_levels_byte_length: f_5.expect("auto-generated code should have checked for presence of required fields"), + repetition_levels_byte_length: f_6.expect("auto-generated code should have checked for presence of required fields"), + is_compressed: f_7, + statistics: f_8, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DataPageHeaderV2"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_nulls", TType::I32, 2))?; + o_prot.write_i32(self.num_nulls)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I32, 3))?; + o_prot.write_i32(self.num_rows)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 4))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("definition_levels_byte_length", TType::I32, 5))?; + o_prot.write_i32(self.definition_levels_byte_length)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_levels_byte_length", TType::I32, 6))?; + o_prot.write_i32(self.repetition_levels_byte_length)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.is_compressed { + o_prot.write_field_begin(&TFieldIdentifier::new("is_compressed", TType::Bool, 7))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SplitBlockAlgorithm +// + +/// Block-based algorithm type annotation. * +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SplitBlockAlgorithm { +} + +impl SplitBlockAlgorithm { + pub fn new() -> SplitBlockAlgorithm { + SplitBlockAlgorithm {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = SplitBlockAlgorithm {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SplitBlockAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for SplitBlockAlgorithm { + fn default() -> Self { + SplitBlockAlgorithm{} + } +} + +// +// BloomFilterAlgorithm +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterAlgorithm { + BLOCK(SplitBlockAlgorithm), +} + +impl BloomFilterAlgorithm { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = SplitBlockAlgorithm::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterAlgorithm::BLOCK(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterAlgorithm" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterAlgorithm" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterAlgorithm::BLOCK(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("BLOCK", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// XxHash +// + +/// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash +/// algorithm. It uses 64 bits version of xxHash. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XxHash { +} + +impl XxHash { + pub fn new() -> XxHash { + XxHash {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = XxHash {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("XxHash"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for XxHash { + fn default() -> Self { + XxHash{} + } +} + +// +// BloomFilterHash +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterHash { + XXHASH(XxHash), +} + +impl BloomFilterHash { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = XxHash::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterHash::XXHASH(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterHash" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterHash" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterHash"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterHash::XXHASH(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("XXHASH", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// Uncompressed +// + +/// The compression used in the Bloom filter. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Uncompressed { +} + +impl Uncompressed { + pub fn new() -> Uncompressed { + Uncompressed {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = Uncompressed {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("Uncompressed"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for Uncompressed { + fn default() -> Self { + Uncompressed{} + } +} + +// +// BloomFilterCompression +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterCompression { + UNCOMPRESSED(Uncompressed), +} + +impl BloomFilterCompression { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Uncompressed::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterCompression::UNCOMPRESSED(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterCompression" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterCompression" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterCompression"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterCompression::UNCOMPRESSED(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UNCOMPRESSED", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// BloomFilterHeader +// + +/// Bloom filter header is stored at beginning of Bloom filter data of each column +/// and followed by its bitset. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BloomFilterHeader { + /// The size of bitset in bytes * + pub num_bytes: i32, + /// The algorithm for setting bits. * + pub algorithm: BloomFilterAlgorithm, + /// The hash function used for Bloom filter. * + pub hash: BloomFilterHash, + /// The compression used in the Bloom filter * + pub compression: BloomFilterCompression, +} + +impl BloomFilterHeader { + pub fn new(num_bytes: i32, algorithm: BloomFilterAlgorithm, hash: BloomFilterHash, compression: BloomFilterCompression) -> BloomFilterHeader { + BloomFilterHeader { + num_bytes, + algorithm, + hash, + compression, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = BloomFilterAlgorithm::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = BloomFilterHash::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = BloomFilterCompression::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("BloomFilterHeader.num_bytes", &f_1)?; + verify_required_field_exists("BloomFilterHeader.algorithm", &f_2)?; + verify_required_field_exists("BloomFilterHeader.hash", &f_3)?; + verify_required_field_exists("BloomFilterHeader.compression", &f_4)?; + let ret = BloomFilterHeader { + num_bytes: f_1.expect("auto-generated code should have checked for presence of required fields"), + algorithm: f_2.expect("auto-generated code should have checked for presence of required fields"), + hash: f_3.expect("auto-generated code should have checked for presence of required fields"), + compression: f_4.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("numBytes", TType::I32, 1))?; + o_prot.write_i32(self.num_bytes)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("algorithm", TType::Struct, 2))?; + self.algorithm.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("hash", TType::Struct, 3))?; + self.hash.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compression", TType::Struct, 4))?; + self.compression.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageHeader { + /// the type of the page: indicates which of the *_header fields is set * + pub type_: PageType, + /// Uncompressed page size in bytes (not including this header) * + pub uncompressed_page_size: i32, + /// Compressed (and potentially encrypted) page size in bytes, not including this header * + pub compressed_page_size: i32, + /// The 32bit CRC for the page, to be be calculated as follows: + /// - Using the standard CRC32 algorithm + /// - On the data only, i.e. this header should not be included. 'Data' + /// hereby refers to the concatenation of the repetition levels, the + /// definition levels and the column value, in this exact order. + /// - On the encoded versions of the repetition levels, definition levels and + /// column values + /// - On the compressed versions of the repetition levels, definition levels + /// and column values where possible; + /// - For v1 data pages, the repetition levels, definition levels and column + /// values are always compressed together. If a compression scheme is + /// specified, the CRC shall be calculated on the compressed version of + /// this concatenation. If no compression scheme is specified, the CRC + /// shall be calculated on the uncompressed version of this concatenation. + /// - For v2 data pages, the repetition levels and definition levels are + /// handled separately from the data and are never compressed (only + /// encoded). If a compression scheme is specified, the CRC shall be + /// calculated on the concatenation of the uncompressed repetition levels, + /// uncompressed definition levels and the compressed column values. + /// If no compression scheme is specified, the CRC shall be calculated on + /// the uncompressed concatenation. + /// - In encrypted columns, CRC is calculated after page encryption; the + /// encryption itself is performed after page compression (if compressed) + /// If enabled, this allows for disabling checksumming in HDFS if only a few + /// pages need to be read. + /// + pub crc: Option, + pub data_page_header: Option, + pub index_page_header: Option, + pub dictionary_page_header: Option, + pub data_page_header_v2: Option, +} + +impl PageHeader { + pub fn new(type_: PageType, uncompressed_page_size: i32, compressed_page_size: i32, crc: F4, data_page_header: F5, index_page_header: F6, dictionary_page_header: F7, data_page_header_v2: F8) -> PageHeader where F4: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into> { + PageHeader { + type_, + uncompressed_page_size, + compressed_page_size, + crc: crc.into(), + data_page_header: data_page_header.into(), + index_page_header: index_page_header.into(), + dictionary_page_header: dictionary_page_header.into(), + data_page_header_v2: data_page_header_v2.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = PageType::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i32()?; + f_4 = Some(val); + }, + 5 => { + let val = DataPageHeader::read_from_in_protocol(i_prot)?; + f_5 = Some(val); + }, + 6 => { + let val = IndexPageHeader::read_from_in_protocol(i_prot)?; + f_6 = Some(val); + }, + 7 => { + let val = DictionaryPageHeader::read_from_in_protocol(i_prot)?; + f_7 = Some(val); + }, + 8 => { + let val = DataPageHeaderV2::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageHeader.type_", &f_1)?; + verify_required_field_exists("PageHeader.uncompressed_page_size", &f_2)?; + verify_required_field_exists("PageHeader.compressed_page_size", &f_3)?; + let ret = PageHeader { + type_: f_1.expect("auto-generated code should have checked for presence of required fields"), + uncompressed_page_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + compressed_page_size: f_3.expect("auto-generated code should have checked for presence of required fields"), + crc: f_4, + data_page_header: f_5, + index_page_header: f_6, + dictionary_page_header: f_7, + data_page_header_v2: f_8, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + self.type_.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("uncompressed_page_size", TType::I32, 2))?; + o_prot.write_i32(self.uncompressed_page_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compressed_page_size", TType::I32, 3))?; + o_prot.write_i32(self.compressed_page_size)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.crc { + o_prot.write_field_begin(&TFieldIdentifier::new("crc", TType::I32, 4))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.data_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_header", TType::Struct, 5))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.index_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("index_page_header", TType::Struct, 6))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.dictionary_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("dictionary_page_header", TType::Struct, 7))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.data_page_header_v2 { + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_header_v2", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// KeyValue +// + +/// Wrapper struct to store key values +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct KeyValue { + pub key: String, + pub value: Option, +} + +impl KeyValue { + pub fn new(key: String, value: F2) -> KeyValue where F2: Into> { + KeyValue { + key, + value: value.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_string()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_string()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("KeyValue.key", &f_1)?; + let ret = KeyValue { + key: f_1.expect("auto-generated code should have checked for presence of required fields"), + value: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("KeyValue"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("key", TType::String, 1))?; + o_prot.write_string(&self.key)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.value { + o_prot.write_field_begin(&TFieldIdentifier::new("value", TType::String, 2))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SortingColumn +// + +/// Wrapper struct to specify sort order +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SortingColumn { + /// The column index (in this row group) * + pub column_idx: i32, + /// If true, indicates this column is sorted in descending order. * + pub descending: bool, + /// If true, nulls will come before non-null values, otherwise, + /// nulls go at the end. + pub nulls_first: bool, +} + +impl SortingColumn { + pub fn new(column_idx: i32, descending: bool, nulls_first: bool) -> SortingColumn { + SortingColumn { + column_idx, + descending, + nulls_first, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bool()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("SortingColumn.column_idx", &f_1)?; + verify_required_field_exists("SortingColumn.descending", &f_2)?; + verify_required_field_exists("SortingColumn.nulls_first", &f_3)?; + let ret = SortingColumn { + column_idx: f_1.expect("auto-generated code should have checked for presence of required fields"), + descending: f_2.expect("auto-generated code should have checked for presence of required fields"), + nulls_first: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SortingColumn"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("column_idx", TType::I32, 1))?; + o_prot.write_i32(self.column_idx)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("descending", TType::Bool, 2))?; + o_prot.write_bool(self.descending)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("nulls_first", TType::Bool, 3))?; + o_prot.write_bool(self.nulls_first)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageEncodingStats +// + +/// statistics of a given page type and encoding +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageEncodingStats { + /// the page type (data/dic/...) * + pub page_type: PageType, + /// encoding of the page * + pub encoding: Encoding, + /// number of pages of this type with this encoding * + pub count: i32, +} + +impl PageEncodingStats { + pub fn new(page_type: PageType, encoding: Encoding, count: i32) -> PageEncodingStats { + PageEncodingStats { + page_type, + encoding, + count, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = PageType::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageEncodingStats.page_type", &f_1)?; + verify_required_field_exists("PageEncodingStats.encoding", &f_2)?; + verify_required_field_exists("PageEncodingStats.count", &f_3)?; + let ret = PageEncodingStats { + page_type: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + count: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageEncodingStats"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("page_type", TType::I32, 1))?; + self.page_type.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("count", TType::I32, 3))?; + o_prot.write_i32(self.count)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnMetaData +// + +/// Description for column metadata +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnMetaData { + /// Type of this column * + pub type_: Type, + /// Set of all encodings used for this column. The purpose is to validate + /// whether we can decode those pages. * + pub encodings: Vec, + /// Path in schema * + pub path_in_schema: Vec, + /// Compression codec * + pub codec: CompressionCodec, + /// Number of values in this column * + pub num_values: i64, + /// total byte size of all uncompressed pages in this column chunk (including the headers) * + pub total_uncompressed_size: i64, + /// total byte size of all compressed, and potentially encrypted, pages + /// in this column chunk (including the headers) * + pub total_compressed_size: i64, + /// Optional key/value metadata * + pub key_value_metadata: Option>, + /// Byte offset from beginning of file to first data page * + pub data_page_offset: i64, + /// Byte offset from beginning of file to root index page * + pub index_page_offset: Option, + /// Byte offset from the beginning of file to first (only) dictionary page * + pub dictionary_page_offset: Option, + /// optional statistics for this column chunk + pub statistics: Option, + /// Set of all encodings used for pages in this column chunk. + /// This information can be used to determine if all data pages are + /// dictionary encoded for example * + pub encoding_stats: Option>, + /// Byte offset from beginning of file to Bloom filter data. * + pub bloom_filter_offset: Option, +} + +impl ColumnMetaData { + pub fn new(type_: Type, encodings: Vec, path_in_schema: Vec, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14) -> ColumnMetaData where F8: Into>>, F10: Into>, F11: Into>, F12: Into>, F13: Into>>, F14: Into> { + ColumnMetaData { + type_, + encodings, + path_in_schema, + codec, + num_values, + total_uncompressed_size, + total_compressed_size, + key_value_metadata: key_value_metadata.into(), + data_page_offset, + index_page_offset: index_page_offset.into(), + dictionary_page_offset: dictionary_page_offset.into(), + statistics: statistics.into(), + encoding_stats: encoding_stats.into(), + bloom_filter_offset: bloom_filter_offset.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + let mut f_3: Option> = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option> = None; + let mut f_9: Option = None; + let mut f_10: Option = None; + let mut f_11: Option = None; + let mut f_12: Option = None; + let mut f_13: Option> = None; + let mut f_14: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Type::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_0 = Encoding::read_from_in_protocol(i_prot)?; + val.push(list_elem_0); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_1 = i_prot.read_string()?; + val.push(list_elem_1); + } + i_prot.read_list_end()?; + f_3 = Some(val); + }, + 4 => { + let val = CompressionCodec::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i64()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i64()?; + f_7 = Some(val); + }, + 8 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_2 = KeyValue::read_from_in_protocol(i_prot)?; + val.push(list_elem_2); + } + i_prot.read_list_end()?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_i64()?; + f_9 = Some(val); + }, + 10 => { + let val = i_prot.read_i64()?; + f_10 = Some(val); + }, + 11 => { + let val = i_prot.read_i64()?; + f_11 = Some(val); + }, + 12 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_12 = Some(val); + }, + 13 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_3 = PageEncodingStats::read_from_in_protocol(i_prot)?; + val.push(list_elem_3); + } + i_prot.read_list_end()?; + f_13 = Some(val); + }, + 14 => { + let val = i_prot.read_i64()?; + f_14 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnMetaData.type_", &f_1)?; + verify_required_field_exists("ColumnMetaData.encodings", &f_2)?; + verify_required_field_exists("ColumnMetaData.path_in_schema", &f_3)?; + verify_required_field_exists("ColumnMetaData.codec", &f_4)?; + verify_required_field_exists("ColumnMetaData.num_values", &f_5)?; + verify_required_field_exists("ColumnMetaData.total_uncompressed_size", &f_6)?; + verify_required_field_exists("ColumnMetaData.total_compressed_size", &f_7)?; + verify_required_field_exists("ColumnMetaData.data_page_offset", &f_9)?; + let ret = ColumnMetaData { + type_: f_1.expect("auto-generated code should have checked for presence of required fields"), + encodings: f_2.expect("auto-generated code should have checked for presence of required fields"), + path_in_schema: f_3.expect("auto-generated code should have checked for presence of required fields"), + codec: f_4.expect("auto-generated code should have checked for presence of required fields"), + num_values: f_5.expect("auto-generated code should have checked for presence of required fields"), + total_uncompressed_size: f_6.expect("auto-generated code should have checked for presence of required fields"), + total_compressed_size: f_7.expect("auto-generated code should have checked for presence of required fields"), + key_value_metadata: f_8, + data_page_offset: f_9.expect("auto-generated code should have checked for presence of required fields"), + index_page_offset: f_10, + dictionary_page_offset: f_11, + statistics: f_12, + encoding_stats: f_13, + bloom_filter_offset: f_14, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + self.type_.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encodings", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::I32, self.encodings.len() as i32))?; + for e in &self.encodings { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("path_in_schema", TType::List, 3))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.path_in_schema.len() as i32))?; + for e in &self.path_in_schema { + o_prot.write_string(e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("codec", TType::I32, 4))?; + self.codec.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I64, 5))?; + o_prot.write_i64(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_uncompressed_size", TType::I64, 6))?; + o_prot.write_i64(self.total_uncompressed_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_compressed_size", TType::I64, 7))?; + o_prot.write_i64(self.total_compressed_size)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_value_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_value_metadata", TType::List, 8))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_offset", TType::I64, 9))?; + o_prot.write_i64(self.data_page_offset)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.index_page_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("index_page_offset", TType::I64, 10))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.dictionary_page_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("dictionary_page_offset", TType::I64, 11))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 12))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.encoding_stats { + o_prot.write_field_begin(&TFieldIdentifier::new("encoding_stats", TType::List, 13))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.bloom_filter_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("bloom_filter_offset", TType::I64, 14))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// EncryptionWithFooterKey +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EncryptionWithFooterKey { +} + +impl EncryptionWithFooterKey { + pub fn new() -> EncryptionWithFooterKey { + EncryptionWithFooterKey {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = EncryptionWithFooterKey {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionWithFooterKey"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for EncryptionWithFooterKey { + fn default() -> Self { + EncryptionWithFooterKey{} + } +} + +// +// EncryptionWithColumnKey +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EncryptionWithColumnKey { + /// Column path in schema * + pub path_in_schema: Vec, + /// Retrieval metadata of column encryption key * + pub key_metadata: Option>, +} + +impl EncryptionWithColumnKey { + pub fn new(path_in_schema: Vec, key_metadata: F2) -> EncryptionWithColumnKey where F2: Into>> { + EncryptionWithColumnKey { + path_in_schema, + key_metadata: key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_4 = i_prot.read_string()?; + val.push(list_elem_4); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("EncryptionWithColumnKey.path_in_schema", &f_1)?; + let ret = EncryptionWithColumnKey { + path_in_schema: f_1.expect("auto-generated code should have checked for presence of required fields"), + key_metadata: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionWithColumnKey"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("path_in_schema", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.path_in_schema.len() as i32))?; + for e in &self.path_in_schema { + o_prot.write_string(e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_metadata", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnCryptoMetaData +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ColumnCryptoMetaData { + ENCRYPTIONWITHFOOTERKEY(EncryptionWithFooterKey), + ENCRYPTIONWITHCOLUMNKEY(EncryptionWithColumnKey), +} + +impl ColumnCryptoMetaData { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = EncryptionWithFooterKey::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(val)); + } + received_field_count += 1; + }, + 2 => { + let val = EncryptionWithColumnKey::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote ColumnCryptoMetaData" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote ColumnCryptoMetaData" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnCryptoMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENCRYPTION_WITH_FOOTER_KEY", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENCRYPTION_WITH_COLUMN_KEY", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnChunk +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnChunk { + /// File where column data is stored. If not set, assumed to be same file as + /// metadata. This path is relative to the current file. + /// + pub file_path: Option, + /// Byte offset in file_path to the ColumnMetaData * + pub file_offset: i64, + /// Column metadata for this chunk. This is the same content as what is at + /// file_path/file_offset. Having it here has it replicated in the file + /// metadata. + /// + pub meta_data: Option, + /// File offset of ColumnChunk's OffsetIndex * + pub offset_index_offset: Option, + /// Size of ColumnChunk's OffsetIndex, in bytes * + pub offset_index_length: Option, + /// File offset of ColumnChunk's ColumnIndex * + pub column_index_offset: Option, + /// Size of ColumnChunk's ColumnIndex, in bytes * + pub column_index_length: Option, + /// Crypto metadata of encrypted columns * + pub crypto_metadata: Option, + /// Encrypted column metadata for this chunk * + pub encrypted_column_metadata: Option>, +} + +impl ColumnChunk { + pub fn new(file_path: F1, file_offset: i64, meta_data: F3, offset_index_offset: F4, offset_index_length: F5, column_index_offset: F6, column_index_length: F7, crypto_metadata: F8, encrypted_column_metadata: F9) -> ColumnChunk where F1: Into>, F3: Into>, F4: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into>, F9: Into>> { + ColumnChunk { + file_path: file_path.into(), + file_offset, + meta_data: meta_data.into(), + offset_index_offset: offset_index_offset.into(), + offset_index_length: offset_index_length.into(), + column_index_offset: column_index_offset.into(), + column_index_length: column_index_length.into(), + crypto_metadata: crypto_metadata.into(), + encrypted_column_metadata: encrypted_column_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + let mut f_9: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_string()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i64()?; + f_2 = Some(val); + }, + 3 => { + let val = ColumnMetaData::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i64()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i32()?; + f_7 = Some(val); + }, + 8 => { + let val = ColumnCryptoMetaData::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_bytes()?; + f_9 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnChunk.file_offset", &f_2)?; + let ret = ColumnChunk { + file_path: f_1, + file_offset: f_2.expect("auto-generated code should have checked for presence of required fields"), + meta_data: f_3, + offset_index_offset: f_4, + offset_index_length: f_5, + column_index_offset: f_6, + column_index_length: f_7, + crypto_metadata: f_8, + encrypted_column_metadata: f_9, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnChunk"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.file_path { + o_prot.write_field_begin(&TFieldIdentifier::new("file_path", TType::String, 1))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_begin(&TFieldIdentifier::new("file_offset", TType::I64, 2))?; + o_prot.write_i64(self.file_offset)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.meta_data { + o_prot.write_field_begin(&TFieldIdentifier::new("meta_data", TType::Struct, 3))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.offset_index_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("offset_index_offset", TType::I64, 4))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.offset_index_length { + o_prot.write_field_begin(&TFieldIdentifier::new("offset_index_length", TType::I32, 5))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.column_index_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("column_index_offset", TType::I64, 6))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.column_index_length { + o_prot.write_field_begin(&TFieldIdentifier::new("column_index_length", TType::I32, 7))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.crypto_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("crypto_metadata", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.encrypted_column_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("encrypted_column_metadata", TType::String, 9))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// RowGroup +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct RowGroup { + /// Metadata for each column chunk in this row group. + /// This list must have the same order as the SchemaElement list in FileMetaData. + /// + pub columns: Vec, + /// Total byte size of all the uncompressed column data in this row group * + pub total_byte_size: i64, + /// Number of rows in this row group * + pub num_rows: i64, + /// If set, specifies a sort ordering of the rows in this RowGroup. + /// The sorting columns can be a subset of all the columns. + pub sorting_columns: Option>, + /// Byte offset from beginning of file to first page (data or dictionary) + /// in this row group * + pub file_offset: Option, + /// Total byte size of all compressed (and potentially encrypted) column data + /// in this row group * + pub total_compressed_size: Option, + /// Row group ordinal in the file * + pub ordinal: Option, +} + +impl RowGroup { + pub fn new(columns: Vec, total_byte_size: i64, num_rows: i64, sorting_columns: F4, file_offset: F5, total_compressed_size: F6, ordinal: F7) -> RowGroup where F4: Into>>, F5: Into>, F6: Into>, F7: Into> { + RowGroup { + columns, + total_byte_size, + num_rows, + sorting_columns: sorting_columns.into(), + file_offset: file_offset.into(), + total_compressed_size: total_compressed_size.into(), + ordinal: ordinal.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option> = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_5 = ColumnChunk::read_from_in_protocol(i_prot)?; + val.push(list_elem_5); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i64()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_6 = SortingColumn::read_from_in_protocol(i_prot)?; + val.push(list_elem_6); + } + i_prot.read_list_end()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i64()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i16()?; + f_7 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("RowGroup.columns", &f_1)?; + verify_required_field_exists("RowGroup.total_byte_size", &f_2)?; + verify_required_field_exists("RowGroup.num_rows", &f_3)?; + let ret = RowGroup { + columns: f_1.expect("auto-generated code should have checked for presence of required fields"), + total_byte_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + sorting_columns: f_4, + file_offset: f_5, + total_compressed_size: f_6, + ordinal: f_7, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("RowGroup"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("columns", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.columns.len() as i32))?; + for e in &self.columns { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_byte_size", TType::I64, 2))?; + o_prot.write_i64(self.total_byte_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I64, 3))?; + o_prot.write_i64(self.num_rows)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.sorting_columns { + o_prot.write_field_begin(&TFieldIdentifier::new("sorting_columns", TType::List, 4))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.file_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("file_offset", TType::I64, 5))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.total_compressed_size { + o_prot.write_field_begin(&TFieldIdentifier::new("total_compressed_size", TType::I64, 6))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.ordinal { + o_prot.write_field_begin(&TFieldIdentifier::new("ordinal", TType::I16, 7))?; + o_prot.write_i16(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TypeDefinedOrder +// + +/// Empty struct to signal the order defined by the physical or logical type +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TypeDefinedOrder { +} + +impl TypeDefinedOrder { + pub fn new() -> TypeDefinedOrder { + TypeDefinedOrder {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = TypeDefinedOrder {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TypeDefinedOrder"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for TypeDefinedOrder { + fn default() -> Self { + TypeDefinedOrder{} + } +} + +// +// ColumnOrder +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ColumnOrder { + TYPEORDER(TypeDefinedOrder), +} + +impl ColumnOrder { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = TypeDefinedOrder::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnOrder::TYPEORDER(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote ColumnOrder" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote ColumnOrder" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnOrder"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + ColumnOrder::TYPEORDER(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TYPE_ORDER", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageLocation +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageLocation { + /// Offset of the page in the file * + pub offset: i64, + /// Size of the page, including header. Sum of compressed_page_size and header + /// length + pub compressed_page_size: i32, + /// Index within the RowGroup of the first row of the page; this means pages + /// change on record boundaries (r = 0). + pub first_row_index: i64, +} + +impl PageLocation { + pub fn new(offset: i64, compressed_page_size: i32, first_row_index: i64) -> PageLocation { + PageLocation { + offset, + compressed_page_size, + first_row_index, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i64()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageLocation.offset", &f_1)?; + verify_required_field_exists("PageLocation.compressed_page_size", &f_2)?; + verify_required_field_exists("PageLocation.first_row_index", &f_3)?; + let ret = PageLocation { + offset: f_1.expect("auto-generated code should have checked for presence of required fields"), + compressed_page_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + first_row_index: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageLocation"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("offset", TType::I64, 1))?; + o_prot.write_i64(self.offset)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compressed_page_size", TType::I32, 2))?; + o_prot.write_i32(self.compressed_page_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("first_row_index", TType::I64, 3))?; + o_prot.write_i64(self.first_row_index)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// OffsetIndex +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct OffsetIndex { + /// PageLocations, ordered by increasing PageLocation.offset. It is required + /// that page_locations[i].first_row_index < page_locations[i+1].first_row_index. + pub page_locations: Vec, +} + +impl OffsetIndex { + pub fn new(page_locations: Vec) -> OffsetIndex { + OffsetIndex { + page_locations, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_7 = PageLocation::read_from_in_protocol(i_prot)?; + val.push(list_elem_7); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("OffsetIndex.page_locations", &f_1)?; + let ret = OffsetIndex { + page_locations: f_1.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("OffsetIndex"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("page_locations", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.page_locations.len() as i32))?; + for e in &self.page_locations { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnIndex +// + +/// Description for ColumnIndex. +/// Each [i] refers to the page at OffsetIndex.page_locations[i] +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnIndex { + /// A list of Boolean values to determine the validity of the corresponding + /// min and max values. If true, a page contains only null values, and writers + /// have to set the corresponding entries in min_values and max_values to + /// byte[0], so that all lists have the same length. If false, the + /// corresponding entries in min_values and max_values must be valid. + pub null_pages: Vec, + /// Two lists containing lower and upper bounds for the values of each page. + /// These may be the actual minimum and maximum values found on a page, but + /// can also be (more compact) values that do not exist on a page. For + /// example, instead of storing ""Blart Versenwald III", a writer may set + /// min_values[i]="B", max_values[i]="C". Such more compact values must still + /// be valid values within the column's logical type. Readers must make sure + /// that list entries are populated before using them by inspecting null_pages. + pub min_values: Vec>, + pub max_values: Vec>, + /// Stores whether both min_values and max_values are orderd and if so, in + /// which direction. This allows readers to perform binary searches in both + /// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even + /// if the lists are ordered. + pub boundary_order: BoundaryOrder, + /// A list containing the number of null values for each page * + pub null_counts: Option>, +} + +impl ColumnIndex { + pub fn new(null_pages: Vec, min_values: Vec>, max_values: Vec>, boundary_order: BoundaryOrder, null_counts: F5) -> ColumnIndex where F5: Into>> { + ColumnIndex { + null_pages, + min_values, + max_values, + boundary_order, + null_counts: null_counts.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option>> = None; + let mut f_3: Option>> = None; + let mut f_4: Option = None; + let mut f_5: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_8 = i_prot.read_bool()?; + val.push(list_elem_8); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec> = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_9 = i_prot.read_bytes()?; + val.push(list_elem_9); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec> = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_10 = i_prot.read_bytes()?; + val.push(list_elem_10); + } + i_prot.read_list_end()?; + f_3 = Some(val); + }, + 4 => { + let val = BoundaryOrder::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_11 = i_prot.read_i64()?; + val.push(list_elem_11); + } + i_prot.read_list_end()?; + f_5 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnIndex.null_pages", &f_1)?; + verify_required_field_exists("ColumnIndex.min_values", &f_2)?; + verify_required_field_exists("ColumnIndex.max_values", &f_3)?; + verify_required_field_exists("ColumnIndex.boundary_order", &f_4)?; + let ret = ColumnIndex { + null_pages: f_1.expect("auto-generated code should have checked for presence of required fields"), + min_values: f_2.expect("auto-generated code should have checked for presence of required fields"), + max_values: f_3.expect("auto-generated code should have checked for presence of required fields"), + boundary_order: f_4.expect("auto-generated code should have checked for presence of required fields"), + null_counts: f_5, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnIndex"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("null_pages", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Bool, self.null_pages.len() as i32))?; + for e in &self.null_pages { + o_prot.write_bool(*e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("min_values", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.min_values.len() as i32))?; + for e in &self.min_values { + o_prot.write_bytes(e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("max_values", TType::List, 3))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.max_values.len() as i32))?; + for e in &self.max_values { + o_prot.write_bytes(e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("boundary_order", TType::I32, 4))?; + self.boundary_order.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.null_counts { + o_prot.write_field_begin(&TFieldIdentifier::new("null_counts", TType::List, 5))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::I64, fld_var.len() as i32))?; + for e in fld_var { + o_prot.write_i64(*e)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// AesGcmV1 +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AesGcmV1 { + /// AAD prefix * + pub aad_prefix: Option>, + /// Unique file identifier part of AAD suffix * + pub aad_file_unique: Option>, + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix * + pub supply_aad_prefix: Option, +} + +impl AesGcmV1 { + pub fn new(aad_prefix: F1, aad_file_unique: F2, supply_aad_prefix: F3) -> AesGcmV1 where F1: Into>>, F2: Into>>, F3: Into> { + AesGcmV1 { + aad_prefix: aad_prefix.into(), + aad_file_unique: aad_file_unique.into(), + supply_aad_prefix: supply_aad_prefix.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = AesGcmV1 { + aad_prefix: f_1, + aad_file_unique: f_2, + supply_aad_prefix: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("AesGcmV1"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_prefix", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.aad_file_unique { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_file_unique", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.supply_aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("supply_aad_prefix", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for AesGcmV1 { + fn default() -> Self { + AesGcmV1{ + aad_prefix: Some(Vec::new()), + aad_file_unique: Some(Vec::new()), + supply_aad_prefix: Some(false), + } + } +} + +// +// AesGcmCtrV1 +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AesGcmCtrV1 { + /// AAD prefix * + pub aad_prefix: Option>, + /// Unique file identifier part of AAD suffix * + pub aad_file_unique: Option>, + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix * + pub supply_aad_prefix: Option, +} + +impl AesGcmCtrV1 { + pub fn new(aad_prefix: F1, aad_file_unique: F2, supply_aad_prefix: F3) -> AesGcmCtrV1 where F1: Into>>, F2: Into>>, F3: Into> { + AesGcmCtrV1 { + aad_prefix: aad_prefix.into(), + aad_file_unique: aad_file_unique.into(), + supply_aad_prefix: supply_aad_prefix.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = AesGcmCtrV1 { + aad_prefix: f_1, + aad_file_unique: f_2, + supply_aad_prefix: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("AesGcmCtrV1"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_prefix", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.aad_file_unique { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_file_unique", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + if let Some(fld_var) = self.supply_aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("supply_aad_prefix", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for AesGcmCtrV1 { + fn default() -> Self { + AesGcmCtrV1{ + aad_prefix: Some(Vec::new()), + aad_file_unique: Some(Vec::new()), + supply_aad_prefix: Some(false), + } + } +} + +// +// EncryptionAlgorithm +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum EncryptionAlgorithm { + AESGCMV1(AesGcmV1), + AESGCMCTRV1(AesGcmCtrV1), +} + +impl EncryptionAlgorithm { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = AesGcmV1::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(EncryptionAlgorithm::AESGCMV1(val)); + } + received_field_count += 1; + }, + 2 => { + let val = AesGcmCtrV1::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(EncryptionAlgorithm::AESGCMCTRV1(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote EncryptionAlgorithm" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote EncryptionAlgorithm" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + EncryptionAlgorithm::AESGCMV1(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("AES_GCM_V1", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + EncryptionAlgorithm::AESGCMCTRV1(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("AES_GCM_CTR_V1", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// FileMetaData +// + +/// Description for file metadata +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileMetaData { + /// Version of this file * + pub version: i32, + /// Parquet schema for this file. This schema contains metadata for all the columns. + /// The schema is represented as a tree with a single root. The nodes of the tree + /// are flattened to a list by doing a depth-first traversal. + /// The column metadata contains the path in the schema for that column which can be + /// used to map columns to nodes in the schema. + /// The first element is the root * + pub schema: Vec, + /// Number of rows in this file * + pub num_rows: i64, + /// Row groups in this file * + pub row_groups: Vec, + /// Optional key/value metadata * + pub key_value_metadata: Option>, + /// String for application that wrote this file. This should be in the format + /// version (build ). + /// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) + /// + pub created_by: Option, + /// Sort order used for the min_value and max_value fields of each column in + /// this file. Sort orders are listed in the order matching the columns in the + /// schema. The indexes are not necessary the same though, because only leaf + /// nodes of the schema are represented in the list of sort orders. + /// + /// Without column_orders, the meaning of the min_value and max_value fields is + /// undefined. To ensure well-defined behaviour, if min_value and max_value are + /// written to a Parquet file, column_orders must be written as well. + /// + /// The obsolete min and max fields are always sorted by signed comparison + /// regardless of column_orders. + pub column_orders: Option>, + /// Encryption algorithm. This field is set only in encrypted files + /// with plaintext footer. Files with encrypted footer store algorithm id + /// in FileCryptoMetaData structure. + pub encryption_algorithm: Option, + /// Retrieval metadata of key used for signing the footer. + /// Used only in encrypted files with plaintext footer. + pub footer_signing_key_metadata: Option>, +} + +impl FileMetaData { + pub fn new(version: i32, schema: Vec, num_rows: i64, row_groups: Vec, key_value_metadata: F5, created_by: F6, column_orders: F7, encryption_algorithm: F8, footer_signing_key_metadata: F9) -> FileMetaData where F5: Into>>, F6: Into>, F7: Into>>, F8: Into>, F9: Into>> { + FileMetaData { + version, + schema, + num_rows, + row_groups, + key_value_metadata: key_value_metadata.into(), + created_by: created_by.into(), + column_orders: column_orders.into(), + encryption_algorithm: encryption_algorithm.into(), + footer_signing_key_metadata: footer_signing_key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + let mut f_4: Option> = None; + let mut f_5: Option> = None; + let mut f_6: Option = None; + let mut f_7: Option> = None; + let mut f_8: Option = None; + let mut f_9: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_12 = SchemaElement::read_from_in_protocol(i_prot)?; + val.push(list_elem_12); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_13 = RowGroup::read_from_in_protocol(i_prot)?; + val.push(list_elem_13); + } + i_prot.read_list_end()?; + f_4 = Some(val); + }, + 5 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_14 = KeyValue::read_from_in_protocol(i_prot)?; + val.push(list_elem_14); + } + i_prot.read_list_end()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_string()?; + f_6 = Some(val); + }, + 7 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_15 = ColumnOrder::read_from_in_protocol(i_prot)?; + val.push(list_elem_15); + } + i_prot.read_list_end()?; + f_7 = Some(val); + }, + 8 => { + let val = EncryptionAlgorithm::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_bytes()?; + f_9 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("FileMetaData.version", &f_1)?; + verify_required_field_exists("FileMetaData.schema", &f_2)?; + verify_required_field_exists("FileMetaData.num_rows", &f_3)?; + verify_required_field_exists("FileMetaData.row_groups", &f_4)?; + let ret = FileMetaData { + version: f_1.expect("auto-generated code should have checked for presence of required fields"), + schema: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + row_groups: f_4.expect("auto-generated code should have checked for presence of required fields"), + key_value_metadata: f_5, + created_by: f_6, + column_orders: f_7, + encryption_algorithm: f_8, + footer_signing_key_metadata: f_9, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("FileMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("version", TType::I32, 1))?; + o_prot.write_i32(self.version)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("schema", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.schema.len() as i32))?; + for e in &self.schema { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I64, 3))?; + o_prot.write_i64(self.num_rows)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("row_groups", TType::List, 4))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.row_groups.len() as i32))?; + for e in &self.row_groups { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_value_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_value_metadata", TType::List, 5))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.created_by { + o_prot.write_field_begin(&TFieldIdentifier::new("created_by", TType::String, 6))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.column_orders { + o_prot.write_field_begin(&TFieldIdentifier::new("column_orders", TType::List, 7))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + } + o_prot.write_list_end()?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.encryption_algorithm { + o_prot.write_field_begin(&TFieldIdentifier::new("encryption_algorithm", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()? + } + if let Some(ref fld_var) = self.footer_signing_key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("footer_signing_key_metadata", TType::String, 9))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// FileCryptoMetaData +// + +/// Crypto metadata for files with encrypted footer * +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileCryptoMetaData { + /// Encryption algorithm. This field is only used for files + /// with encrypted footer. Files with plaintext footer store algorithm id + /// inside footer (FileMetaData structure). + pub encryption_algorithm: EncryptionAlgorithm, + /// Retrieval metadata of key used for encryption of footer, + /// and (possibly) columns * + pub key_metadata: Option>, +} + +impl FileCryptoMetaData { + pub fn new(encryption_algorithm: EncryptionAlgorithm, key_metadata: F2) -> FileCryptoMetaData where F2: Into>> { + FileCryptoMetaData { + encryption_algorithm, + key_metadata: key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = EncryptionAlgorithm::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("FileCryptoMetaData.encryption_algorithm", &f_1)?; + let ret = FileCryptoMetaData { + encryption_algorithm: f_1.expect("auto-generated code should have checked for presence of required fields"), + key_metadata: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("FileCryptoMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("encryption_algorithm", TType::Struct, 1))?; + self.encryption_algorithm.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_metadata", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()? + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs index 90fe399e78d..cd573521042 100644 --- a/parquet/src/lib.rs +++ b/parquet/src/lib.rs @@ -61,6 +61,9 @@ macro_rules! experimental { pub mod errors; pub mod basic; +#[allow(clippy::derivable_impls, clippy::match_single_binding)] +pub mod format; + #[macro_use] pub mod data_type; diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 823803167ca..efb0b82b323 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -19,7 +19,7 @@ use std::{collections::HashMap, convert::From, fmt, sync::Arc}; -use parquet_format::SchemaElement; +use crate::format::SchemaElement; use crate::basic::{ ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType, @@ -1042,7 +1042,7 @@ fn from_thrift_helper( )); } let element = &elements[index]; - let converted_type = ConvertedType::from(element.converted_type); + let converted_type = ConvertedType::try_from(element.converted_type)?; // LogicalType is only present in v2 Parquet files. ConvertedType is always // populated, regardless of the version of the file (v1 or v2). let logical_type = element @@ -1063,8 +1063,9 @@ fn from_thrift_helper( "Repetition level must be defined for a primitive type" )); } - let repetition = Repetition::from(elements[index].repetition_type.unwrap()); - let physical_type = PhysicalType::from(elements[index].type_.unwrap()); + let repetition = + Repetition::try_from(elements[index].repetition_type.unwrap())?; + let physical_type = PhysicalType::try_from(elements[index].type_.unwrap())?; let length = elements[index].type_length.unwrap_or(-1); let scale = elements[index].scale.unwrap_or(-1); let precision = elements[index].precision.unwrap_or(-1); @@ -1082,7 +1083,11 @@ fn from_thrift_helper( Ok((index + 1, Arc::new(builder.build()?))) } Some(n) => { - let repetition = elements[index].repetition_type.map(Repetition::from); + let repetition = elements[index] + .repetition_type + .map(Repetition::try_from) + .transpose()?; + let mut fields = vec![]; let mut next_index = index + 1; for _ in 0..n {