From 88e0de5d661def7d7a45e4bc51314a366d017dda Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 12 Jul 2022 19:26:04 -0400 Subject: [PATCH] Remove null count from write_batch_with_statistics (#2047) * Remove null count rom write_batch_with_statistics * Fix test --- parquet/src/column/writer.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs index 5def721353a..1fc5207f6b4 100644 --- a/parquet/src/column/writer.rs +++ b/parquet/src/column/writer.rs @@ -292,7 +292,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> { rep_levels: Option<&[i16]>, min: Option<&T::T>, max: Option<&T::T>, - null_count: Option, distinct_count: Option, ) -> Result { // We check for DataPage limits only after we have inserted the values. If a user @@ -346,10 +345,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> { self.column_distinct_count = None; } - if let Some(nulls) = null_count { - self.num_column_nulls += nulls; - } - let mut values_offset = 0; let mut levels_offset = 0; for _ in 0..num_batches { @@ -389,7 +384,7 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> { def_levels: Option<&[i16]>, rep_levels: Option<&[i16]>, ) -> Result { - self.write_batch_internal(values, def_levels, rep_levels, None, None, None, None) + self.write_batch_internal(values, def_levels, rep_levels, None, None, None) } /// Writer may optionally provide pre-calculated statistics for use when computing @@ -406,7 +401,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> { rep_levels: Option<&[i16]>, min: Option<&T::T>, max: Option<&T::T>, - nulls_count: Option, distinct_count: Option, ) -> Result { self.write_batch_internal( @@ -415,7 +409,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> { rep_levels, min, max, - nulls_count, distinct_count, ) } @@ -1726,7 +1719,6 @@ mod tests { None, Some(&-17), Some(&9000), - Some(21), Some(55), ) .unwrap(); @@ -1745,7 +1737,7 @@ mod tests { assert_eq!(metadata.dictionary_page_offset(), Some(0)); if let Some(stats) = metadata.statistics() { assert!(stats.has_min_max_set()); - assert_eq!(stats.null_count(), 21); + assert_eq!(stats.null_count(), 0); assert_eq!(stats.distinct_count().unwrap_or(0), 55); if let Statistics::Int32(stats) = stats { assert_eq!(stats.min(), &-17); @@ -1774,7 +1766,6 @@ mod tests { None, Some(&5), Some(&7), - Some(0), Some(3), ) .unwrap();