From a4b74e3c895f2c95d03444ff1453723b996366c4 Mon Sep 17 00:00:00 2001 From: askoa Date: Wed, 14 Sep 2022 18:19:27 -0400 Subject: [PATCH 1/7] include builder for RecordBatchOptions --- arrow/src/ipc/reader.rs | 17 +++++++------- arrow/src/record_batch.rs | 48 ++++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs index 969c8c43f02..7f271783d9f 100644 --- a/arrow/src/ipc/reader.rs +++ b/arrow/src/ipc/reader.rs @@ -31,7 +31,7 @@ use crate::compute::cast; use crate::datatypes::{DataType, Field, IntervalUnit, Schema, SchemaRef, UnionMode}; use crate::error::{ArrowError, Result}; use crate::ipc; -use crate::record_batch::{RecordBatch, RecordBatchOptions, RecordBatchReader}; +use crate::record_batch::{RecordBatch, RecordBatchOptionsBuilder, RecordBatchReader}; use crate::ipc::compression::CompressionCodec; use ipc::CONTINUATION_MARKER; @@ -578,10 +578,9 @@ pub fn read_record_batch( let mut node_index = 0; let mut arrays = vec![]; - let options = RecordBatchOptions { - row_count: Some(batch.length() as usize), - ..Default::default() - }; + let options = RecordBatchOptionsBuilder::new() + .row_count(batch.length() as usize) + .build(); if let Some(projection) = projection { // project fields @@ -1692,10 +1691,10 @@ mod tests { #[test] fn test_no_columns_batch() { let schema = Arc::new(Schema::new(vec![])); - let options = RecordBatchOptions { - match_field_names: true, - row_count: Some(10), - }; + let options = RecordBatchOptionsBuilder::new() + .match_field_names(true) + .row_count(10) + .build(); let input_batch = RecordBatch::try_new_with_options(schema, vec![], &options).unwrap(); let output_batch = roundtrip_ipc_stream(&input_batch); diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 4b0d36a43e5..dc63ca6da3e 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -80,7 +80,7 @@ impl RecordBatch { /// # } /// ``` pub fn try_new(schema: SchemaRef, columns: Vec) -> Result { - let options = RecordBatchOptions::default(); + let options = RecordBatchOptionsBuilder::new().build(); Self::try_new_impl(schema, columns, &options) } @@ -412,16 +412,42 @@ pub struct RecordBatchOptions { /// Optional row count, useful for specifying a row count for a RecordBatch with no columns pub row_count: Option, } +/// Builder to create new RecordBatchOptions object +#[derive(Debug)] +pub struct RecordBatchOptionsBuilder { + /// Match field names of structs and lists. If set to `true`, the names must match. + match_field_names: bool, + + /// Optional row count, useful for specifying a row count for a RecordBatch with no columns + row_count: Option, +} -impl Default for RecordBatchOptions { - fn default() -> Self { +impl RecordBatchOptionsBuilder { + pub fn new() -> Self { Self { match_field_names: true, row_count: None, } } + pub fn match_field_names(mut self, match_field_names: bool) -> Self { + self.match_field_names = match_field_names; + self + } + pub fn row_count(mut self, row_count: usize) -> Self { + self.row_count = Some(row_count); + self + } + pub fn build(self) -> RecordBatchOptions { + let Self { + match_field_names, + row_count, + } = self; + RecordBatchOptions { + match_field_names, + row_count, + } + } } - impl From<&StructArray> for RecordBatch { /// Create a record batch from struct array, where each field of /// the `StructArray` becomes a `Field` in the schema. @@ -901,10 +927,7 @@ mod tests { .to_string() .contains("must either specify a row count or at least one column")); - let options = RecordBatchOptions { - row_count: Some(10), - ..Default::default() - }; + let options = RecordBatchOptionsBuilder::new().row_count(10).build(); let ok = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap(); @@ -929,4 +952,13 @@ mod tests { ); assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap())); } + #[test] + fn test_record_batch_options_builder() { + let options = RecordBatchOptionsBuilder::new() + .match_field_names(false) + .row_count(20) + .build(); + assert_eq!(options.match_field_names, false); + assert_eq!(options.row_count.unwrap(), 20) + } } From 5ea6f03130b21de03a7ec6a72a659560849de25a Mon Sep 17 00:00:00 2001 From: askoa Date: Wed, 14 Sep 2022 18:32:55 -0400 Subject: [PATCH 2/7] fix clippy warnings --- arrow/src/record_batch.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index dc63ca6da3e..8092e5a1625 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -421,6 +421,11 @@ pub struct RecordBatchOptionsBuilder { /// Optional row count, useful for specifying a row count for a RecordBatch with no columns row_count: Option, } +impl Default for RecordBatchOptionsBuilder { + fn default() -> Self { + Self::new() + } +} impl RecordBatchOptionsBuilder { pub fn new() -> Self { From 75300cf09f6ea55977346bb68667faf40dffd14c Mon Sep 17 00:00:00 2001 From: askoa Date: Wed, 14 Sep 2022 18:55:24 -0400 Subject: [PATCH 3/7] fix clippy warnings --- arrow/src/record_batch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 8092e5a1625..2aed26ed0d7 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -963,7 +963,7 @@ mod tests { .match_field_names(false) .row_count(20) .build(); - assert_eq!(options.match_field_names, false); + assert!(!options.match_field_names); assert_eq!(options.row_count.unwrap(), 20) } } From 2162e9ef412df65915e3bc24d64181bc1d77d7d8 Mon Sep 17 00:00:00 2001 From: askoa Date: Thu, 15 Sep 2022 09:35:26 -0400 Subject: [PATCH 4/7] remove builder struct --- arrow/src/ipc/reader.rs | 13 ++++------ arrow/src/record_batch.rs | 53 +++++++++++++-------------------------- 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs index 7f271783d9f..a784f54e20c 100644 --- a/arrow/src/ipc/reader.rs +++ b/arrow/src/ipc/reader.rs @@ -31,7 +31,7 @@ use crate::compute::cast; use crate::datatypes::{DataType, Field, IntervalUnit, Schema, SchemaRef, UnionMode}; use crate::error::{ArrowError, Result}; use crate::ipc; -use crate::record_batch::{RecordBatch, RecordBatchOptionsBuilder, RecordBatchReader}; +use crate::record_batch::{RecordBatch, RecordBatchOptions, RecordBatchReader}; use crate::ipc::compression::CompressionCodec; use ipc::CONTINUATION_MARKER; @@ -578,9 +578,7 @@ pub fn read_record_batch( let mut node_index = 0; let mut arrays = vec![]; - let options = RecordBatchOptionsBuilder::new() - .row_count(batch.length() as usize) - .build(); + let options = RecordBatchOptions::new().with_row_count(Some(batch.length() as usize)); if let Some(projection) = projection { // project fields @@ -1691,10 +1689,9 @@ mod tests { #[test] fn test_no_columns_batch() { let schema = Arc::new(Schema::new(vec![])); - let options = RecordBatchOptionsBuilder::new() - .match_field_names(true) - .row_count(10) - .build(); + let options = RecordBatchOptions::new() + .with_match_field_names(true) + .with_row_count(Some(10)); let input_batch = RecordBatch::try_new_with_options(schema, vec![], &options).unwrap(); let output_batch = roundtrip_ipc_stream(&input_batch); diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 2aed26ed0d7..c58cedaca6d 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -80,7 +80,7 @@ impl RecordBatch { /// # } /// ``` pub fn try_new(schema: SchemaRef, columns: Vec) -> Result { - let options = RecordBatchOptionsBuilder::new().build(); + let options = RecordBatchOptions::new(); Self::try_new_impl(schema, columns, &options) } @@ -412,47 +412,31 @@ pub struct RecordBatchOptions { /// Optional row count, useful for specifying a row count for a RecordBatch with no columns pub row_count: Option, } -/// Builder to create new RecordBatchOptions object -#[derive(Debug)] -pub struct RecordBatchOptionsBuilder { - /// Match field names of structs and lists. If set to `true`, the names must match. - match_field_names: bool, - - /// Optional row count, useful for specifying a row count for a RecordBatch with no columns - row_count: Option, -} -impl Default for RecordBatchOptionsBuilder { - fn default() -> Self { - Self::new() - } -} -impl RecordBatchOptionsBuilder { +impl RecordBatchOptions { pub fn new() -> Self { Self { match_field_names: true, row_count: None, } } - pub fn match_field_names(mut self, match_field_names: bool) -> Self { - self.match_field_names = match_field_names; + //Sets the row_count of RecordBatchOptions and returns self + pub fn with_row_count(mut self, row_count: Option) -> Self { + self.row_count = row_count; self } - pub fn row_count(mut self, row_count: usize) -> Self { - self.row_count = Some(row_count); + //Sets the match_field_names of RecordBatchOptions and returns self + pub fn with_match_field_names(mut self, match_field_names: bool) -> Self { + self.match_field_names = match_field_names; self } - pub fn build(self) -> RecordBatchOptions { - let Self { - match_field_names, - row_count, - } = self; - RecordBatchOptions { - match_field_names, - row_count, - } +} +impl Default for RecordBatchOptions { + fn default() -> Self { + Self::new() } } +/// Builder to create new RecordBatchOptions object impl From<&StructArray> for RecordBatch { /// Create a record batch from struct array, where each field of /// the `StructArray` becomes a `Field` in the schema. @@ -932,7 +916,7 @@ mod tests { .to_string() .contains("must either specify a row count or at least one column")); - let options = RecordBatchOptionsBuilder::new().row_count(10).build(); + let options = RecordBatchOptions::new().with_row_count(Some(10)); let ok = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap(); @@ -958,11 +942,10 @@ mod tests { assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap())); } #[test] - fn test_record_batch_options_builder() { - let options = RecordBatchOptionsBuilder::new() - .match_field_names(false) - .row_count(20) - .build(); + fn test_record_batch_options() { + let options = RecordBatchOptions::new() + .with_match_field_names(false) + .with_row_count(Some(20)); assert!(!options.match_field_names); assert_eq!(options.row_count.unwrap(), 20) } From c100359ef013ad5fe63fad88fadf424762ac6c75 Mon Sep 17 00:00:00 2001 From: askoa Date: Thu, 15 Sep 2022 09:43:01 -0400 Subject: [PATCH 5/7] removed a wrong comment --- arrow/src/record_batch.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index c58cedaca6d..79477757c09 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -436,7 +436,6 @@ impl Default for RecordBatchOptions { Self::new() } } -/// Builder to create new RecordBatchOptions object impl From<&StructArray> for RecordBatch { /// Create a record batch from struct array, where each field of /// the `StructArray` becomes a `Field` in the schema. From 2ad5d775762a9e0e2077ece258fa4ae2e580d027 Mon Sep 17 00:00:00 2001 From: askoa <112126368+askoa@users.noreply.github.com> Date: Thu, 15 Sep 2022 13:58:22 -0400 Subject: [PATCH 6/7] Update comment in arrow/src/record_batch.rs Co-authored-by: Andrew Lamb --- arrow/src/record_batch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 79477757c09..5388d88f16f 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -420,7 +420,7 @@ impl RecordBatchOptions { row_count: None, } } - //Sets the row_count of RecordBatchOptions and returns self + /// Sets the row_count of RecordBatchOptions and returns self pub fn with_row_count(mut self, row_count: Option) -> Self { self.row_count = row_count; self From 92aecd73c61ccfdd5293ee9ec6107d960a8c6585 Mon Sep 17 00:00:00 2001 From: askoa <112126368+askoa@users.noreply.github.com> Date: Thu, 15 Sep 2022 13:58:46 -0400 Subject: [PATCH 7/7] Update comment in arrow/src/record_batch.rs Co-authored-by: Andrew Lamb --- arrow/src/record_batch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 5388d88f16f..f71c67fe774 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -425,7 +425,7 @@ impl RecordBatchOptions { self.row_count = row_count; self } - //Sets the match_field_names of RecordBatchOptions and returns self + /// Sets the match_field_names of RecordBatchOptions and returns self pub fn with_match_field_names(mut self, match_field_names: bool) -> Self { self.match_field_names = match_field_names; self