From b07d24de506e5dd91eabe04baefa0dcc6b65967b Mon Sep 17 00:00:00 2001 From: psvri Date: Wed, 17 Aug 2022 20:39:36 +0530 Subject: [PATCH] remove byteorder dependency from parquet --- parquet/Cargo.toml | 1 - parquet/src/data_type.rs | 17 +++++++---------- parquet/src/file/footer.rs | 3 +-- parquet/src/file/statistics.rs | 17 ++++++++--------- parquet/src/file/writer.rs | 10 ++++------ 5 files changed, 20 insertions(+), 28 deletions(-) diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 5a8e4c48532..453de092cc5 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -33,7 +33,6 @@ rust-version = "1.62" ahash = "0.8" parquet-format = { version = "4.0.0", default-features = false } bytes = { version = "1.1", default-features = false, features = ["std"] } -byteorder = { version = "1", default-features = false } thrift = { version = "0.13", default-features = false } snap = { version = "1.0", default-features = false, optional = true } brotli = { version = "3.3", default-features = false, features = ["std"], optional = true } diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 7870ca36a6d..a6a1c64e61f 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -23,8 +23,6 @@ use std::mem; use std::ops::{Deref, DerefMut}; use std::str::from_utf8; -use byteorder::{BigEndian, ByteOrder}; - use crate::basic::Type; use crate::column::reader::{ColumnReader, ColumnReaderImpl}; use crate::column::writer::{ColumnWriter, ColumnWriterImpl}; @@ -349,8 +347,7 @@ pub enum Decimal { impl Decimal { /// Creates new decimal value from `i32`. pub fn from_i32(value: i32, precision: i32, scale: i32) -> Self { - let mut bytes = [0; 4]; - BigEndian::write_i32(&mut bytes, value); + let bytes = value.to_be_bytes(); Decimal::Int32 { value: bytes, precision, @@ -360,8 +357,7 @@ impl Decimal { /// Creates new decimal value from `i64`. pub fn from_i64(value: i64, precision: i32, scale: i32) -> Self { - let mut bytes = [0; 8]; - BigEndian::write_i64(&mut bytes, value); + let bytes = value.to_be_bytes(); Decimal::Int64 { value: bytes, precision, @@ -569,7 +565,6 @@ pub(crate) mod private { use crate::util::memory::ByteBufferPtr; use crate::basic::Type; - use byteorder::ByteOrder; use std::convert::TryInto; use super::{ParquetError, Result, SliceAsBytes}; @@ -851,9 +846,11 @@ pub(crate) mod private { let mut pos = 0; // position in byte array for item in buffer.iter_mut().take(num_values) { - let elem0 = byteorder::LittleEndian::read_u32(&bytes[pos..pos + 4]); - let elem1 = byteorder::LittleEndian::read_u32(&bytes[pos + 4..pos + 8]); - let elem2 = byteorder::LittleEndian::read_u32(&bytes[pos + 8..pos + 12]); + let elem0 = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap()); + let elem1 = + u32::from_le_bytes(bytes[pos + 4..pos + 8].try_into().unwrap()); + let elem2 = + u32::from_le_bytes(bytes[pos + 8..pos + 12].try_into().unwrap()); item.set_data(elem0, elem1, elem2); pos += 12; diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs index dc1d66d0fa4..8148f181fcf 100644 --- a/parquet/src/file/footer.rs +++ b/parquet/src/file/footer.rs @@ -17,7 +17,6 @@ use std::{io::Read, sync::Arc}; -use byteorder::{ByteOrder, LittleEndian}; use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData}; use thrift::protocol::TCompactInputProtocol; @@ -112,7 +111,7 @@ pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result { } // get the metadata length from the footer - let metadata_len = LittleEndian::read_i32(&slice[..4]); + let metadata_len = i32::from_le_bytes(slice[..4].try_into().unwrap()); metadata_len.try_into().map_err(|_| { general_err!( "Invalid Parquet file. Metadata length is less than zero ({})", diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 5d1a01df8a6..da2ec2e9a14 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -39,7 +39,6 @@ use std::fmt; -use byteorder::{ByteOrder, LittleEndian}; use parquet_format::Statistics as TStatistics; use crate::basic::Type; @@ -163,15 +162,15 @@ pub fn from_thrift( old_format, ), Type::INT32 => Statistics::int32( - min.map(|data| LittleEndian::read_i32(&data)), - max.map(|data| LittleEndian::read_i32(&data)), + min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), distinct_count, null_count, old_format, ), Type::INT64 => Statistics::int64( - min.map(|data| LittleEndian::read_i64(&data)), - max.map(|data| LittleEndian::read_i64(&data)), + min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), distinct_count, null_count, old_format, @@ -191,15 +190,15 @@ pub fn from_thrift( Statistics::int96(min, max, distinct_count, null_count, old_format) } Type::FLOAT => Statistics::float( - min.map(|data| LittleEndian::read_f32(&data)), - max.map(|data| LittleEndian::read_f32(&data)), + min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), distinct_count, null_count, old_format, ), Type::DOUBLE => Statistics::double( - min.map(|data| LittleEndian::read_f64(&data)), - max.map(|data| LittleEndian::read_f64(&data)), + min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), distinct_count, null_count, old_format, diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index d3d1f880926..87a9ae3e14e 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -20,7 +20,6 @@ use std::{io::Write, sync::Arc}; -use byteorder::{ByteOrder, LittleEndian}; use parquet_format as parquet; use parquet_format::{ColumnIndex, OffsetIndex, RowGroup}; use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; @@ -35,7 +34,7 @@ use crate::data_type::DataType; use crate::errors::{ParquetError, Result}; use crate::file::{ metadata::*, properties::WriterPropertiesPtr, - statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC, + statistics::to_thrift as statistics_to_thrift, PARQUET_MAGIC, }; use crate::schema::types::{ self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, TypePtr, @@ -292,11 +291,10 @@ impl SerializedFileWriter { let end_pos = self.buf.bytes_written(); // Write footer - let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE]; let metadata_len = (end_pos - start_pos) as i32; - LittleEndian::write_i32(&mut footer_buffer, metadata_len); - (&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?; - self.buf.write_all(&footer_buffer)?; + + self.buf.write_all(&metadata_len.to_le_bytes())?; + self.buf.write_all(&PARQUET_MAGIC)?; Ok(file_metadata) }