From 528a0728359946436454678625630ad55e6499fb Mon Sep 17 00:00:00 2001 From: askoa <112126368+askoa@users.noreply.github.com> Date: Mon, 21 Nov 2022 17:48:08 -0500 Subject: [PATCH] Don't Skip Serializing Empty Metadata (#3082) (#3126) * remove skip_serializing_if to please postcard * add serde postcard roundtrip tests * fix formatting issues * use bincode for serialization tests Co-authored-by: askoa --- arrow-schema/Cargo.toml | 1 + arrow-schema/src/datatype.rs | 8 ++++---- arrow-schema/src/field.rs | 37 ++++++++++++++++++++++++++++++++---- arrow-schema/src/schema.rs | 4 ---- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml index 3b809f23ed4..d88632d1040 100644 --- a/arrow-schema/Cargo.toml +++ b/arrow-schema/Cargo.toml @@ -45,3 +45,4 @@ default = [] [dev-dependencies] serde_json = "1.0" +bincode = { version = "1.3.3", default-features = false } diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index acf3691450a..572d6f67da6 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -415,11 +415,11 @@ mod tests { assert_eq!( "{\"Struct\":[\ {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\ - {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ + {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\ {\"name\":\"address\",\"data_type\":{\"Struct\":\ - [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ - {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\ - ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}", + [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\ + {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}\ + ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}]}", serialized ); diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index cd9024747b4..9eed03ed24e 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -35,10 +35,6 @@ pub struct Field { dict_id: i64, dict_is_ordered: bool, /// A map of key-value pairs containing additional custom meta data. - #[cfg_attr( - feature = "serde", - serde(skip_serializing_if = "HashMap::is_empty", default) - )] metadata: HashMap, } @@ -654,4 +650,37 @@ mod test { assert!(!field1.contains(&field2)); assert!(!field2.contains(&field1)); } + + #[cfg(feature = "serde")] + fn assert_binary_serde_round_trip(field: Field) { + let serialized = bincode::serialize(&field).unwrap(); + let deserialized: Field = bincode::deserialize(&serialized).unwrap(); + assert_eq!(field, deserialized) + } + + #[cfg(feature = "serde")] + #[test] + fn test_field_without_metadata_serde() { + let field = Field::new("name", DataType::Boolean, true); + assert_binary_serde_round_trip(field) + } + + #[cfg(feature = "serde")] + #[test] + fn test_field_with_empty_metadata_serde() { + let field = + Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new()); + + assert_binary_serde_round_trip(field) + } + + #[cfg(feature = "serde")] + #[test] + fn test_field_with_nonempty_metadata_serde() { + let mut metadata = HashMap::new(); + metadata.insert("hi".to_owned(), "".to_owned()); + let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata); + + assert_binary_serde_round_trip(field) + } } diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 8ff40866d51..e45cedfb676 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -34,10 +34,6 @@ pub type SchemaRef = std::sync::Arc; pub struct Schema { pub fields: Vec, /// A map of key-value pairs containing additional meta data. - #[cfg_attr( - feature = "serde", - serde(skip_serializing_if = "HashMap::is_empty", default) - )] pub metadata: HashMap, }