From 8bb2917ee7c22c71cd71368cbe4dec4335e7d8f5 Mon Sep 17 00:00:00 2001 From: askoa <112126368+askoa@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:33:10 -0500 Subject: [PATCH] Remove Option from `Field::metadata` (#3091) * Remove Option from field metadata * fix test issues * fix clippy warnings * fix test issues * fix test issues * use default for BTreeMap initialization * empty commit Co-authored-by: askoa --- arrow-array/src/builder/struct_builder.rs | 2 +- arrow-integration-test/src/field.rs | 10 +- arrow-integration-test/src/lib.rs | 171 +++++++++++----------- arrow-ipc/src/convert.rs | 30 ++-- arrow-schema/src/datatype.rs | 6 +- arrow-schema/src/field.rs | 75 +++++----- arrow-schema/src/schema.rs | 62 ++++---- parquet/src/arrow/arrow_reader/mod.rs | 2 +- parquet/src/arrow/schema/complex.rs | 10 +- 9 files changed, 176 insertions(+), 192 deletions(-) diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 69c092c0368..1cb04aa6f78 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -396,7 +396,7 @@ mod tests { #[test] #[should_panic( - expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported" + expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported" )] fn test_struct_array_builder_from_schema_unsupported_type() { let mut fields = vec![Field::new("f1", DataType::Int16, false)]; diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs index 9b1a8f5f9ba..5b586355709 100644 --- a/arrow-integration-test/src/field.rs +++ b/arrow-integration-test/src/field.rs @@ -53,7 +53,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { // Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz let metadata = match map.get("metadata") { Some(&Value::Array(ref values)) => { - let mut res: BTreeMap = BTreeMap::new(); + let mut res: BTreeMap = BTreeMap::default(); for value in values { match value.as_object() { Some(map) => { @@ -87,12 +87,12 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { } } } - Some(res) + res } // We also support map format, because Schema's metadata supports this. // See https://github.com/apache/arrow/pull/5907 Some(&Value::Object(ref values)) => { - let mut res: BTreeMap = BTreeMap::new(); + let mut res: BTreeMap = BTreeMap::default(); for (k, v) in values { if let Some(str_value) = v.as_str() { res.insert(k.clone(), str_value.to_string().clone()); @@ -103,14 +103,14 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { ))); } } - Some(res) + res } Some(_) => { return Err(ArrowError::ParseError( "Field `metadata` is not json array".to_string(), )); } - _ => None, + _ => BTreeMap::default(), }; // if data_type is a struct or list, get its children diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index d0db4b4b9ec..75b76af1e6f 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -83,10 +83,10 @@ pub struct ArrowJsonField { impl From<&Field> for ArrowJsonField { fn from(field: &Field) -> Self { - let metadata_value = match field.metadata() { - Some(kv_list) => { + let metadata_value = match field.metadata().is_empty() { + false => { let mut array = Vec::new(); - for (k, v) in kv_list { + for (k, v) in field.metadata() { let mut kv_map = SJMap::new(); kv_map.insert(k.clone(), Value::String(v.clone())); array.push(Value::Object(kv_map)); @@ -1120,90 +1120,87 @@ mod tests { let micros_tz = Some("UTC".to_string()); let nanos_tz = Some("Africa/Johannesburg".to_string()); - let schema = - Schema::new(vec![ - Field::new("bools-with-metadata-map", DataType::Boolean, true) - .with_metadata(Some( - [("k".to_string(), "v".to_string())] - .iter() - .cloned() - .collect(), - )), - Field::new("bools-with-metadata-vec", DataType::Boolean, true) - .with_metadata(Some( - [("k2".to_string(), "v2".to_string())] - .iter() - .cloned() - .collect(), - )), - Field::new("bools", DataType::Boolean, true), - Field::new("int8s", DataType::Int8, true), - Field::new("int16s", DataType::Int16, true), - Field::new("int32s", DataType::Int32, true), - Field::new("int64s", DataType::Int64, true), - Field::new("uint8s", DataType::UInt8, true), - Field::new("uint16s", DataType::UInt16, true), - Field::new("uint32s", DataType::UInt32, true), - Field::new("uint64s", DataType::UInt64, true), - Field::new("float32s", DataType::Float32, true), - Field::new("float64s", DataType::Float64, true), - Field::new("date_days", DataType::Date32, true), - Field::new("date_millis", DataType::Date64, true), - Field::new("time_secs", DataType::Time32(TimeUnit::Second), true), - Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true), - Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true), - Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true), - Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true), - Field::new( - "ts_millis", - DataType::Timestamp(TimeUnit::Millisecond, None), - true, - ), - Field::new( - "ts_micros", - DataType::Timestamp(TimeUnit::Microsecond, None), - true, - ), - Field::new( - "ts_nanos", - DataType::Timestamp(TimeUnit::Nanosecond, None), - true, - ), - Field::new( - "ts_secs_tz", - DataType::Timestamp(TimeUnit::Second, secs_tz.clone()), - true, - ), - Field::new( - "ts_millis_tz", - DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()), - true, - ), - Field::new( - "ts_micros_tz", - DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()), - true, - ), - Field::new( - "ts_nanos_tz", - DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()), - true, - ), - Field::new("utf8s", DataType::Utf8, true), - Field::new( - "lists", - DataType::List(Box::new(Field::new("item", DataType::Int32, true))), - true, - ), - Field::new( - "structs", - DataType::Struct(vec![ - Field::new("int32s", DataType::Int32, true), - Field::new("utf8s", DataType::Utf8, true), - ]), - true, - ), - ]); + let schema = Schema::new(vec![ + Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata( + [("k".to_string(), "v".to_string())] + .iter() + .cloned() + .collect(), + ), + Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata( + [("k2".to_string(), "v2".to_string())] + .iter() + .cloned() + .collect(), + ), + Field::new("bools", DataType::Boolean, true), + Field::new("int8s", DataType::Int8, true), + Field::new("int16s", DataType::Int16, true), + Field::new("int32s", DataType::Int32, true), + Field::new("int64s", DataType::Int64, true), + Field::new("uint8s", DataType::UInt8, true), + Field::new("uint16s", DataType::UInt16, true), + Field::new("uint32s", DataType::UInt32, true), + Field::new("uint64s", DataType::UInt64, true), + Field::new("float32s", DataType::Float32, true), + Field::new("float64s", DataType::Float64, true), + Field::new("date_days", DataType::Date32, true), + Field::new("date_millis", DataType::Date64, true), + Field::new("time_secs", DataType::Time32(TimeUnit::Second), true), + Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true), + Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true), + Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true), + Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true), + Field::new( + "ts_millis", + DataType::Timestamp(TimeUnit::Millisecond, None), + true, + ), + Field::new( + "ts_micros", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ), + Field::new( + "ts_nanos", + DataType::Timestamp(TimeUnit::Nanosecond, None), + true, + ), + Field::new( + "ts_secs_tz", + DataType::Timestamp(TimeUnit::Second, secs_tz.clone()), + true, + ), + Field::new( + "ts_millis_tz", + DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()), + true, + ), + Field::new( + "ts_micros_tz", + DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()), + true, + ), + Field::new( + "ts_nanos_tz", + DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()), + true, + ), + Field::new("utf8s", DataType::Utf8, true), + Field::new( + "lists", + DataType::List(Box::new(Field::new("item", DataType::Int32, true))), + true, + ), + Field::new( + "structs", + DataType::Struct(vec![ + Field::new("int32s", DataType::Int32, true), + Field::new("utf8s", DataType::Utf8, true), + ]), + true, + ), + ]); let bools_with_metadata_map = BooleanArray::from(vec![Some(true), None, Some(false)]); diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index 8d01c58b6ae..a9dda6f2a1f 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -86,18 +86,16 @@ impl<'a> From> for Field { ) }; - let mut metadata = None; + let mut metadata_map = BTreeMap::default(); if let Some(list) = field.custom_metadata() { - let mut metadata_map = BTreeMap::default(); for kv in list { if let (Some(k), Some(v)) = (kv.key(), kv.value()) { metadata_map.insert(k.to_string(), v.to_string()); } } - metadata = Some(metadata_map); } - arrow_field.with_metadata(metadata) + arrow_field.with_metadata(metadata_map) } } @@ -424,19 +422,17 @@ pub(crate) fn build_field<'a>( ) -> WIPOffset> { // Optional custom metadata. let mut fb_metadata = None; - if let Some(metadata) = field.metadata() { - if !metadata.is_empty() { - let mut kv_vec = vec![]; - for (k, v) in metadata { - let kv_args = crate::KeyValueArgs { - key: Some(fbb.create_string(k.as_str())), - value: Some(fbb.create_string(v.as_str())), - }; - let kv_offset = crate::KeyValue::create(fbb, &kv_args); - kv_vec.push(kv_offset); - } - fb_metadata = Some(fbb.create_vector(&kv_vec)); + if !field.metadata().is_empty() { + let mut kv_vec = vec![]; + for (k, v) in field.metadata() { + let kv_args = crate::KeyValueArgs { + key: Some(fbb.create_string(k.as_str())), + value: Some(fbb.create_string(v.as_str())), + }; + let kv_offset = crate::KeyValue::create(fbb, &kv_args); + kv_vec.push(kv_offset); } + fb_metadata = Some(fbb.create_vector(&kv_vec)); }; let fb_field_name = fbb.create_string(field.name().as_str()); @@ -822,7 +818,7 @@ mod tests { .collect(); let schema = Schema::new_with_metadata( vec![ - Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)), + Field::new("uint8", DataType::UInt8, false).with_metadata(field_md), Field::new("uint16", DataType::UInt16, true), Field::new("uint32", DataType::UInt32, false), Field::new("uint64", DataType::UInt64, true), diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 759fc39646c..90ae429422c 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -387,12 +387,12 @@ mod tests { let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); // Non-empty map: should be converted as JSON obj { ... } - let first_name = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(field_metadata)); + let first_name = + Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata); // Empty map: should be omitted. let last_name = Field::new("last_name", DataType::Utf8, false) - .with_metadata(Some(BTreeMap::default())); + .with_metadata(BTreeMap::default()); let person = DataType::Struct(vec![ first_name, diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index 4d13f523fb9..ee6ece862da 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -35,8 +35,11 @@ pub struct Field { dict_id: i64, dict_is_ordered: bool, /// A map of key-value pairs containing additional custom meta data. - #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] - metadata: Option>, + #[cfg_attr( + feature = "serde", + serde(skip_serializing_if = "BTreeMap::is_empty", default) + )] + metadata: BTreeMap, } // Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered` @@ -89,7 +92,7 @@ impl Field { nullable, dict_id: 0, dict_is_ordered: false, - metadata: None, + metadata: BTreeMap::default(), } } @@ -107,33 +110,30 @@ impl Field { nullable, dict_id, dict_is_ordered, - metadata: None, + metadata: BTreeMap::default(), } } /// Sets the `Field`'s optional custom metadata. /// The metadata is set as `None` for empty map. #[inline] - pub fn set_metadata(&mut self, metadata: Option>) { - // To make serde happy, convert Some(empty_map) to None. - self.metadata = None; - if let Some(v) = metadata { - if !v.is_empty() { - self.metadata = Some(v); - } + pub fn set_metadata(&mut self, metadata: BTreeMap) { + self.metadata = BTreeMap::default(); + if !metadata.is_empty() { + self.metadata = metadata; } } /// Sets the metadata of this `Field` to be `metadata` and returns self - pub fn with_metadata(mut self, metadata: Option>) -> Self { + pub fn with_metadata(mut self, metadata: BTreeMap) -> Self { self.set_metadata(metadata); self } /// Returns the immutable reference to the `Field`'s optional custom metadata. #[inline] - pub const fn metadata(&self) -> Option<&BTreeMap> { - self.metadata.as_ref() + pub const fn metadata(&self) -> &BTreeMap { + &self.metadata } /// Returns an immutable reference to the `Field`'s name. @@ -278,11 +278,11 @@ impl Field { ))); } // merge metadata - match (self.metadata(), from.metadata()) { - (Some(self_metadata), Some(from_metadata)) => { - let mut merged = self_metadata.clone(); - for (key, from_value) in from_metadata { - if let Some(self_value) = self_metadata.get(key) { + match (self.metadata().is_empty(), from.metadata().is_empty()) { + (false, false) => { + let mut merged = self.metadata().clone(); + for (key, from_value) in from.metadata() { + if let Some(self_value) = self.metadata.get(key) { if self_value != from_value { return Err(ArrowError::SchemaError(format!( "Fail to merge field due to conflicting metadata data value for key {}. @@ -293,10 +293,10 @@ impl Field { merged.insert(key.clone(), from_value.clone()); } } - self.set_metadata(Some(merged)); + self.set_metadata(merged); } - (None, Some(from_metadata)) => { - self.set_metadata(Some(from_metadata.clone())); + (true, false) => { + self.set_metadata(from.metadata().clone()); } _ => {} } @@ -415,12 +415,12 @@ impl Field { // self need to be nullable or both of them are not nullable && (self.nullable || !other.nullable) // make sure self.metadata is a superset of other.metadata - && match (&self.metadata, &other.metadata) { - (_, None) => true, - (None, Some(_)) => false, - (Some(self_meta), Some(other_meta)) => { - other_meta.iter().all(|(k, v)| { - match self_meta.get(k) { + && match (&self.metadata.is_empty(), &other.metadata.is_empty()) { + (_, true) => true, + (true, false) => false, + (false, false) => { + other.metadata().iter().all(|(k, v)| { + match self.metadata().get(k) { Some(s) => s == v, None => false } @@ -538,10 +538,10 @@ mod test { #[test] fn test_contains_reflexivity() { let mut field = Field::new("field1", DataType::Float16, false); - field.set_metadata(Some(BTreeMap::from([ + field.set_metadata(BTreeMap::from([ (String::from("k0"), String::from("v0")), (String::from("k1"), String::from("v1")), - ]))); + ])); assert!(field.contains(&field)) } @@ -550,23 +550,14 @@ mod test { let child_field = Field::new("child1", DataType::Float16, false); let mut field1 = Field::new("field1", DataType::Struct(vec![child_field]), false); - field1.set_metadata(Some(BTreeMap::from([( - String::from("k1"), - String::from("v1"), - )]))); + field1.set_metadata(BTreeMap::from([(String::from("k1"), String::from("v1"))])); let mut field2 = Field::new("field1", DataType::Struct(vec![]), true); - field2.set_metadata(Some(BTreeMap::from([( - String::from("k2"), - String::from("v2"), - )]))); + field2.set_metadata(BTreeMap::from([(String::from("k2"), String::from("v2"))])); field2.try_merge(&field1).unwrap(); let mut field3 = Field::new("field1", DataType::Struct(vec![]), false); - field3.set_metadata(Some(BTreeMap::from([( - String::from("k3"), - String::from("v3"), - )]))); + field3.set_metadata(BTreeMap::from([(String::from("k3"), String::from("v3"))])); field3.try_merge(&field2).unwrap(); assert!(field2.contains(&field1)); diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 60fe3c6ca9a..519a8e089ae 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -419,12 +419,12 @@ mod tests { assert_ne!(schema2, schema4); assert_ne!(schema3, schema4); - let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some( + let f = Field::new("c1", DataType::Utf8, false).with_metadata( [("foo".to_string(), "bar".to_string())] .iter() .cloned() .collect(), - )); + ); let schema5 = Schema::new(vec![ f, Field::new("c2", DataType::Float64, true), @@ -437,13 +437,13 @@ mod tests { fn create_schema_string() { let schema = person_schema(); assert_eq!(schema.to_string(), - "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \ - Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ + "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \ + Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \ Field { name: \"address\", data_type: Struct([\ - Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ - Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\ - ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ - Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }") + Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \ + Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\ + ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \ + Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }") } #[test] @@ -462,8 +462,8 @@ mod tests { assert_eq!(first_name.dict_is_ordered(), None); let metadata = first_name.metadata(); - assert!(metadata.is_some()); - let md = metadata.as_ref().unwrap(); + assert!(!metadata.is_empty()); + let md = &metadata; assert_eq!(md.len(), 1); let key = md.get("k"); assert!(key.is_some()); @@ -524,8 +524,8 @@ mod tests { fn person_schema() -> Schema { let kv_array = [("k".to_string(), "v".to_string())]; let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); - let first_name = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(field_metadata)); + let first_name = + Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata); Schema::new(vec![ first_name, @@ -556,16 +556,14 @@ mod tests { .iter() .cloned() .collect(); - let f1 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata1)); + let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1); let metadata2: BTreeMap = [("foo".to_string(), "baz".to_string())] .iter() .cloned() .collect(); - let f2 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata2)); + let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2); assert!( Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]) @@ -579,34 +577,30 @@ mod tests { .iter() .cloned() .collect(); - let f2 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata2)); + let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2); assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); - assert_eq!( - f1.metadata().as_ref().unwrap(), - f2.metadata().as_ref().unwrap() - ); + assert!(!f1.metadata().is_empty()); + assert_eq!(f1.metadata(), f2.metadata()); // 3. Some + Some - let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata( [("foo".to_string(), "bar".to_string())] .iter() .cloned() .collect(), - )); - let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + ); + let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata( [("foo2".to_string(), "bar2".to_string())] .iter() .cloned() .collect(), - )); + ); assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); + assert!(!f1.metadata().is_empty()); assert_eq!( - f1.metadata().cloned().unwrap(), + f1.metadata().clone(), [ ("foo".to_string(), "bar".to_string()), ("foo2".to_string(), "bar2".to_string()) @@ -617,17 +611,17 @@ mod tests { ); // 4. Some + None. - let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata( [("foo".to_string(), "bar".to_string())] .iter() .cloned() .collect(), - )); + ); let f2 = Field::new("first_name", DataType::Utf8, false); assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); + assert!(!f1.metadata().is_empty()); assert_eq!( - f1.metadata().cloned().unwrap(), + f1.metadata().clone(), [("foo".to_string(), "bar".to_string())] .iter() .cloned() @@ -638,7 +632,7 @@ mod tests { let mut f1 = Field::new("first_name", DataType::Utf8, false); let f2 = Field::new("first_name", DataType::Utf8, false); assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_none()); + assert!(f1.metadata().is_empty()); } #[test] diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 35b70a0485c..a720d439cc9 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -2021,7 +2021,7 @@ mod tests { .collect(); let schema_with_metadata = - Arc::new(Schema::new(vec![field.with_metadata(Some(metadata))])); + Arc::new(Schema::new(vec![field.with_metadata(metadata)])); assert_ne!(schema_with_metadata, schema_without_metadata); diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs index 2334a5601b4..4ff9c7a3956 100644 --- a/parquet/src/arrow/schema/complex.rs +++ b/parquet/src/arrow/schema/complex.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::collections::BTreeMap; + use crate::arrow::schema::primitive::convert_primitive; use crate::arrow::ProjectionMask; use crate::basic::{ConvertedType, Repetition}; @@ -343,13 +345,17 @@ impl Visitor { (Some(key), Some(value)) => { let key_field = convert_field(map_key, &key, arrow_key); let value_field = convert_field(map_value, &value, arrow_value); + let field_metadata = match arrow_map { + Some(field) => field.metadata().clone(), + _ => BTreeMap::default(), + }; let map_field = Field::new( map_key_value.name(), DataType::Struct(vec![key_field, value_field]), false, // The inner map field is always non-nullable (#1697) ) - .with_metadata(arrow_map.and_then(|f| f.metadata().cloned())); + .with_metadata(field_metadata); Ok(Some(ParquetField { rep_level, @@ -539,7 +545,7 @@ fn convert_field( _ => Field::new(name, data_type, nullable), }; - field.with_metadata(hint.metadata().cloned()) + field.with_metadata(hint.metadata().clone()) } None => Field::new(name, data_type, nullable), }