Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Option from Field::metadata #3091

Merged
merged 8 commits into from Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-array/src/builder/struct_builder.rs
Expand Up @@ -396,7 +396,7 @@ mod tests {

#[test]
#[should_panic(
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
let mut fields = vec![Field::new("f1", DataType::Int16, false)];
Expand Down
10 changes: 5 additions & 5 deletions arrow-integration-test/src/field.rs
Expand Up @@ -53,7 +53,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
let metadata = match map.get("metadata") {
Some(&Value::Array(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
let mut res: BTreeMap<String, String> = BTreeMap::default();
for value in values {
match value.as_object() {
Some(map) => {
Expand Down Expand Up @@ -87,12 +87,12 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
}
}
}
Some(res)
res
}
// We also support map format, because Schema's metadata supports this.
// See https://github.com/apache/arrow/pull/5907
Some(&Value::Object(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
let mut res: BTreeMap<String, String> = BTreeMap::default();
for (k, v) in values {
if let Some(str_value) = v.as_str() {
res.insert(k.clone(), str_value.to_string().clone());
Expand All @@ -103,14 +103,14 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
)));
}
}
Some(res)
res
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field `metadata` is not json array".to_string(),
));
}
_ => None,
_ => BTreeMap::default(),
};

// if data_type is a struct or list, get its children
Expand Down
171 changes: 84 additions & 87 deletions arrow-integration-test/src/lib.rs
Expand Up @@ -83,10 +83,10 @@ pub struct ArrowJsonField {

impl From<&Field> for ArrowJsonField {
fn from(field: &Field) -> Self {
let metadata_value = match field.metadata() {
Some(kv_list) => {
let metadata_value = match field.metadata().is_empty() {
false => {
let mut array = Vec::new();
for (k, v) in kv_list {
for (k, v) in field.metadata() {
let mut kv_map = SJMap::new();
kv_map.insert(k.clone(), Value::String(v.clone()));
array.push(Value::Object(kv_map));
Expand Down Expand Up @@ -1120,90 +1120,87 @@ mod tests {
let micros_tz = Some("UTC".to_string());
let nanos_tz = Some("Africa/Johannesburg".to_string());

let schema =
Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true)
.with_metadata(Some(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
)),
Field::new("bools-with-metadata-vec", DataType::Boolean, true)
.with_metadata(Some(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
)),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Field::new("int32s", DataType::Int32, true),
Field::new("int64s", DataType::Int64, true),
Field::new("uint8s", DataType::UInt8, true),
Field::new("uint16s", DataType::UInt16, true),
Field::new("uint32s", DataType::UInt32, true),
Field::new("uint64s", DataType::UInt64, true),
Field::new("float32s", DataType::Float32, true),
Field::new("float64s", DataType::Float64, true),
Field::new("date_days", DataType::Date32, true),
Field::new("date_millis", DataType::Date64, true),
Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
Field::new(
"ts_millis",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micros",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
),
Field::new(
"ts_nanos",
DataType::Timestamp(TimeUnit::Nanosecond, None),
true,
),
Field::new(
"ts_secs_tz",
DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
true,
),
Field::new(
"ts_millis_tz",
DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
true,
),
Field::new(
"ts_micros_tz",
DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
true,
),
Field::new(
"ts_nanos_tz",
DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
true,
),
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
true,
),
Field::new(
"structs",
DataType::Struct(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
]),
true,
),
]);
let schema = Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Field::new("int32s", DataType::Int32, true),
Field::new("int64s", DataType::Int64, true),
Field::new("uint8s", DataType::UInt8, true),
Field::new("uint16s", DataType::UInt16, true),
Field::new("uint32s", DataType::UInt32, true),
Field::new("uint64s", DataType::UInt64, true),
Field::new("float32s", DataType::Float32, true),
Field::new("float64s", DataType::Float64, true),
Field::new("date_days", DataType::Date32, true),
Field::new("date_millis", DataType::Date64, true),
Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
Field::new(
"ts_millis",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micros",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
),
Field::new(
"ts_nanos",
DataType::Timestamp(TimeUnit::Nanosecond, None),
true,
),
Field::new(
"ts_secs_tz",
DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
true,
),
Field::new(
"ts_millis_tz",
DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
true,
),
Field::new(
"ts_micros_tz",
DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
true,
),
Field::new(
"ts_nanos_tz",
DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
true,
),
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
true,
),
Field::new(
"structs",
DataType::Struct(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
]),
true,
),
]);

let bools_with_metadata_map =
BooleanArray::from(vec![Some(true), None, Some(false)]);
Expand Down
30 changes: 13 additions & 17 deletions arrow-ipc/src/convert.rs
Expand Up @@ -86,18 +86,16 @@ impl<'a> From<crate::Field<'a>> for Field {
)
};

let mut metadata = None;
let mut metadata_map = BTreeMap::default();
if let Some(list) = field.custom_metadata() {
let mut metadata_map = BTreeMap::default();
for kv in list {
if let (Some(k), Some(v)) = (kv.key(), kv.value()) {
metadata_map.insert(k.to_string(), v.to_string());
}
}
metadata = Some(metadata_map);
}

arrow_field.with_metadata(metadata)
arrow_field.with_metadata(metadata_map)
}
}

Expand Down Expand Up @@ -424,19 +422,17 @@ pub(crate) fn build_field<'a>(
) -> WIPOffset<crate::Field<'a>> {
// Optional custom metadata.
let mut fb_metadata = None;
if let Some(metadata) = field.metadata() {
if !metadata.is_empty() {
let mut kv_vec = vec![];
for (k, v) in metadata {
let kv_args = crate::KeyValueArgs {
key: Some(fbb.create_string(k.as_str())),
value: Some(fbb.create_string(v.as_str())),
};
let kv_offset = crate::KeyValue::create(fbb, &kv_args);
kv_vec.push(kv_offset);
}
fb_metadata = Some(fbb.create_vector(&kv_vec));
if !field.metadata().is_empty() {
let mut kv_vec = vec![];
for (k, v) in field.metadata() {
let kv_args = crate::KeyValueArgs {
key: Some(fbb.create_string(k.as_str())),
value: Some(fbb.create_string(v.as_str())),
};
let kv_offset = crate::KeyValue::create(fbb, &kv_args);
kv_vec.push(kv_offset);
}
fb_metadata = Some(fbb.create_vector(&kv_vec));
};

let fb_field_name = fbb.create_string(field.name().as_str());
Expand Down Expand Up @@ -822,7 +818,7 @@ mod tests {
.collect();
let schema = Schema::new_with_metadata(
vec![
Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)),
Field::new("uint8", DataType::UInt8, false).with_metadata(field_md),
Field::new("uint16", DataType::UInt16, true),
Field::new("uint32", DataType::UInt32, false),
Field::new("uint64", DataType::UInt64, true),
Expand Down
6 changes: 3 additions & 3 deletions arrow-schema/src/datatype.rs
Expand Up @@ -387,12 +387,12 @@ mod tests {
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();

// Non-empty map: should be converted as JSON obj { ... }
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));
let first_name =
Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);

// Empty map: should be omitted.
let last_name = Field::new("last_name", DataType::Utf8, false)
.with_metadata(Some(BTreeMap::default()));
.with_metadata(BTreeMap::default());

let person = DataType::Struct(vec![
first_name,
Expand Down