Skip to content

Commit

Permalink
Move JSON Test Format To integration-testing (#2724)
Browse files Browse the repository at this point in the history
* Move JSON Test Format To integration-testing

* Fix RAT
  • Loading branch information
tustvold committed Sep 14, 2022
1 parent 4f52a25 commit 5146663
Show file tree
Hide file tree
Showing 9 changed files with 1,716 additions and 1,642 deletions.
344 changes: 0 additions & 344 deletions arrow/src/datatypes/datatype.rs

Large diffs are not rendered by default.

277 changes: 0 additions & 277 deletions arrow/src/datatypes/field.rs
Expand Up @@ -250,283 +250,6 @@ impl Field {
}
}

/// Parse a `Field` definition from a JSON representation.
#[cfg(feature = "json")]
pub fn from(json: &serde_json::Value) -> Result<Self> {
use serde_json::Value;
match *json {
Value::Object(ref map) => {
let name = match map.get("name") {
Some(&Value::String(ref name)) => name.to_string(),
_ => {
return Err(ArrowError::ParseError(
"Field missing 'name' attribute".to_string(),
));
}
};
let nullable = match map.get("nullable") {
Some(&Value::Bool(b)) => b,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'nullable' attribute".to_string(),
));
}
};
let data_type = match map.get("type") {
Some(t) => DataType::from(t)?,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'type' attribute".to_string(),
));
}
};

// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
let metadata = match map.get("metadata") {
Some(&Value::Array(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
for value in values {
match value.as_object() {
Some(map) => {
if map.len() != 2 {
return Err(ArrowError::ParseError(
"Field 'metadata' must have exact two entries for each key-value map".to_string(),
));
}
if let (Some(k), Some(v)) =
(map.get("key"), map.get("value"))
{
if let (Some(k_str), Some(v_str)) =
(k.as_str(), v.as_str())
{
res.insert(
k_str.to_string().clone(),
v_str.to_string().clone(),
);
} else {
return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
}
} else {
return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
}
}
_ => {
return Err(ArrowError::ParseError(
"Field 'metadata' contains non-object key-value pair".to_string(),
));
}
}
}
Some(res)
}
// We also support map format, because Schema's metadata supports this.
// See https://github.com/apache/arrow/pull/5907
Some(&Value::Object(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
for (k, v) in values {
if let Some(str_value) = v.as_str() {
res.insert(k.clone(), str_value.to_string().clone());
} else {
return Err(ArrowError::ParseError(
format!("Field 'metadata' contains non-string value for key {}", k),
));
}
}
Some(res)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field `metadata` is not json array".to_string(),
));
}
_ => None,
};

// if data_type is a struct or list, get its children
let data_type = match data_type {
DataType::List(_)
| DataType::LargeList(_)
| DataType::FixedSizeList(_, _) => match map.get("children") {
Some(Value::Array(values)) => {
if values.len() != 1 {
return Err(ArrowError::ParseError(
"Field 'children' must have one element for a list data type".to_string(),
));
}
match data_type {
DataType::List(_) => {
DataType::List(Box::new(Self::from(&values[0])?))
}
DataType::LargeList(_) => {
DataType::LargeList(Box::new(Self::from(&values[0])?))
}
DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
Box::new(Self::from(&values[0])?),
int,
),
_ => unreachable!(
"Data type should be a list, largelist or fixedsizelist"
),
}
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
DataType::Struct(mut fields) => match map.get("children") {
Some(Value::Array(values)) => {
let struct_fields: Result<Vec<Field>> =
values.iter().map(Field::from).collect();
fields.append(&mut struct_fields?);
DataType::Struct(fields)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
DataType::Map(_, keys_sorted) => {
match map.get("children") {
Some(Value::Array(values)) if values.len() == 1 => {
let child = Self::from(&values[0])?;
// child must be a struct
match child.data_type() {
DataType::Struct(map_fields) if map_fields.len() == 2 => {
DataType::Map(Box::new(child), keys_sorted)
}
t => {
return Err(ArrowError::ParseError(
format!("Map children should be a struct with 2 fields, found {:?}", t)
))
}
}
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array with 1 element"
.to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
}
}
DataType::Union(_, type_ids, mode) => match map.get("children") {
Some(Value::Array(values)) => {
let union_fields: Vec<Field> =
values.iter().map(Field::from).collect::<Result<_>>()?;
DataType::Union(union_fields, type_ids, mode)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
_ => data_type,
};

let mut dict_id = 0;
let mut dict_is_ordered = false;

let data_type = match map.get("dictionary") {
Some(dictionary) => {
let index_type = match dictionary.get("indexType") {
Some(t) => DataType::from(t)?,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'indexType' attribute".to_string(),
));
}
};
dict_id = match dictionary.get("id") {
Some(Value::Number(n)) => n.as_i64().unwrap(),
_ => {
return Err(ArrowError::ParseError(
"Field missing 'id' attribute".to_string(),
));
}
};
dict_is_ordered = match dictionary.get("isOrdered") {
Some(&Value::Bool(n)) => n,
_ => {
return Err(ArrowError::ParseError(
"Field missing 'isOrdered' attribute".to_string(),
));
}
};
DataType::Dictionary(Box::new(index_type), Box::new(data_type))
}
_ => data_type,
};
Ok(Field {
name,
data_type,
nullable,
dict_id,
dict_is_ordered,
metadata,
})
}
_ => Err(ArrowError::ParseError(
"Invalid json value type for field".to_string(),
)),
}
}

/// Generate a JSON representation of the `Field`.
#[cfg(feature = "json")]
pub fn to_json(&self) -> serde_json::Value {
let children: Vec<serde_json::Value> = match self.data_type() {
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
DataType::List(field)
| DataType::LargeList(field)
| DataType::FixedSizeList(field, _)
| DataType::Map(field, _) => vec![field.to_json()],
_ => vec![],
};
match self.data_type() {
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": value_type.to_json(),
"children": children,
"dictionary": {
"id": self.dict_id,
"indexType": index_type.to_json(),
"isOrdered": self.dict_is_ordered
}
}),
_ => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": self.data_type.to_json(),
"children": children
}),
}
}

/// Merge this field into self if it is compatible.
///
/// Struct fields are merged recursively.
Expand Down

0 comments on commit 5146663

Please sign in to comment.