From 4cad205fc83040794e1808b4252550069ee3d616 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 2 Jun 2022 08:04:20 +0100 Subject: [PATCH] Access metadata of flushed row groups on write (#1691) --- parquet/src/arrow/arrow_writer.rs | 6 ++++++ parquet/src/file/writer.rs | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index 530dfe2ad09..24e2c322a2c 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -36,6 +36,7 @@ use super::schema::{ use crate::arrow::levels::calculate_array_levels; use crate::column::writer::ColumnWriter; use crate::errors::{ParquetError, Result}; +use crate::file::metadata::RowGroupMetaDataPtr; use crate::file::properties::WriterProperties; use crate::file::writer::{SerializedColumnWriter, SerializedRowGroupWriter}; use crate::{data_type::*, file::writer::SerializedFileWriter}; @@ -95,6 +96,11 @@ impl ArrowWriter { }) } + /// Returns metadata for any flushed row groups + pub fn flushed_row_groups(&self) -> &[RowGroupMetaDataPtr] { + self.writer.flushed_row_groups() + } + /// Enqueues the provided `RecordBatch` to be written /// /// If following this there are more than `max_row_group_size` rows buffered, diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 646550dcb6b..9304587bb10 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -153,6 +153,11 @@ impl SerializedFileWriter { Ok(row_group_writer) } + /// Returns metadata for any flushed row groups + pub fn flushed_row_groups(&self) -> &[RowGroupMetaDataPtr] { + &self.row_groups + } + /// Closes and finalises file writer, returning the file metadata. /// /// All row groups must be appended before this method is called.