Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move JSON Test Format To integration-testing #2724

Merged
merged 2 commits into from Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
344 changes: 0 additions & 344 deletions arrow/src/datatypes/datatype.rs

Large diffs are not rendered by default.

277 changes: 0 additions & 277 deletions arrow/src/datatypes/field.rs
Expand Up @@ -250,283 +250,6 @@ impl Field {
}
}

/// Parse a `Field` definition from a JSON representation.
#[cfg(feature = "json")]
pub fn from(json: &serde_json::Value) -> Result<Self> {
use serde_json::Value;
match *json {
Value::Object(ref map) => {
let name = match map.get("name") {
Some(&Value::String(ref name)) => name.to_string(),
_ => {
return Err(ArrowError::ParseError(
"Field missing 'name' attribute".to_string(),
));
}
};
let nullable = match map.get("nullable") {
Some(&Value::Bool(b)) => b,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'nullable' attribute".to_string(),
));
}
};
let data_type = match map.get("type") {
Some(t) => DataType::from(t)?,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'type' attribute".to_string(),
));
}
};

// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
let metadata = match map.get("metadata") {
Some(&Value::Array(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
for value in values {
match value.as_object() {
Some(map) => {
if map.len() != 2 {
return Err(ArrowError::ParseError(
"Field 'metadata' must have exact two entries for each key-value map".to_string(),
));
}
if let (Some(k), Some(v)) =
(map.get("key"), map.get("value"))
{
if let (Some(k_str), Some(v_str)) =
(k.as_str(), v.as_str())
{
res.insert(
k_str.to_string().clone(),
v_str.to_string().clone(),
);
} else {
return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
}
} else {
return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
}
}
_ => {
return Err(ArrowError::ParseError(
"Field 'metadata' contains non-object key-value pair".to_string(),
));
}
}
}
Some(res)
}
// We also support map format, because Schema's metadata supports this.
// See https://github.com/apache/arrow/pull/5907
Some(&Value::Object(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
for (k, v) in values {
if let Some(str_value) = v.as_str() {
res.insert(k.clone(), str_value.to_string().clone());
} else {
return Err(ArrowError::ParseError(
format!("Field 'metadata' contains non-string value for key {}", k),
));
}
}
Some(res)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field `metadata` is not json array".to_string(),
));
}
_ => None,
};

// if data_type is a struct or list, get its children
let data_type = match data_type {
DataType::List(_)
| DataType::LargeList(_)
| DataType::FixedSizeList(_, _) => match map.get("children") {
Some(Value::Array(values)) => {
if values.len() != 1 {
return Err(ArrowError::ParseError(
"Field 'children' must have one element for a list data type".to_string(),
));
}
match data_type {
DataType::List(_) => {
DataType::List(Box::new(Self::from(&values[0])?))
}
DataType::LargeList(_) => {
DataType::LargeList(Box::new(Self::from(&values[0])?))
}
DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
Box::new(Self::from(&values[0])?),
int,
),
_ => unreachable!(
"Data type should be a list, largelist or fixedsizelist"
),
}
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
DataType::Struct(mut fields) => match map.get("children") {
Some(Value::Array(values)) => {
let struct_fields: Result<Vec<Field>> =
values.iter().map(Field::from).collect();
fields.append(&mut struct_fields?);
DataType::Struct(fields)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
DataType::Map(_, keys_sorted) => {
match map.get("children") {
Some(Value::Array(values)) if values.len() == 1 => {
let child = Self::from(&values[0])?;
// child must be a struct
match child.data_type() {
DataType::Struct(map_fields) if map_fields.len() == 2 => {
DataType::Map(Box::new(child), keys_sorted)
}
t => {
return Err(ArrowError::ParseError(
format!("Map children should be a struct with 2 fields, found {:?}", t)
))
}
}
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array with 1 element"
.to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
}
}
DataType::Union(_, type_ids, mode) => match map.get("children") {
Some(Value::Array(values)) => {
let union_fields: Vec<Field> =
values.iter().map(Field::from).collect::<Result<_>>()?;
DataType::Union(union_fields, type_ids, mode)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
_ => data_type,
};

let mut dict_id = 0;
let mut dict_is_ordered = false;

let data_type = match map.get("dictionary") {
Some(dictionary) => {
let index_type = match dictionary.get("indexType") {
Some(t) => DataType::from(t)?,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'indexType' attribute".to_string(),
));
}
};
dict_id = match dictionary.get("id") {
Some(Value::Number(n)) => n.as_i64().unwrap(),
_ => {
return Err(ArrowError::ParseError(
"Field missing 'id' attribute".to_string(),
));
}
};
dict_is_ordered = match dictionary.get("isOrdered") {
Some(&Value::Bool(n)) => n,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'isOrdered' attribute".to_string(),
));
}
};
DataType::Dictionary(Box::new(index_type), Box::new(data_type))
}
_ => data_type,
};
Ok(Field {
name,
data_type,
nullable,
dict_id,
dict_is_ordered,
metadata,
})
}
_ => Err(ArrowError::ParseError(
"Invalid json value type for field".to_string(),
)),
}
}

/// Generate a JSON representation of the `Field`.
#[cfg(feature = "json")]
pub fn to_json(&self) -> serde_json::Value {
let children: Vec<serde_json::Value> = match self.data_type() {
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
DataType::List(field)
| DataType::LargeList(field)
| DataType::FixedSizeList(field, _)
| DataType::Map(field, _) => vec![field.to_json()],
_ => vec![],
};
match self.data_type() {
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": value_type.to_json(),
"children": children,
"dictionary": {
"id": self.dict_id,
"indexType": index_type.to_json(),
"isOrdered": self.dict_is_ordered
}
}),
_ => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": self.data_type.to_json(),
"children": children
}),
}
}

/// Merge this field into self if it is compatible.
///
/// Struct fields are merged recursively.
Expand Down