Skip to content

Commit

Permalink
Add json feature (apache#2300)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Aug 28, 2022
1 parent c6e7680 commit e23ba78
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 48 deletions.
9 changes: 5 additions & 4 deletions arrow/Cargo.toml
Expand Up @@ -44,9 +44,8 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }

[dependencies]
serde = { version = "1.0", default-features = false }
serde_derive = { version = "1.0", default-features = false }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
indexmap = { version = "1.9", default-features = false, features = ["std"] }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
num = { version = "0.4", default-features = false, features = ["std"] }
Expand All @@ -69,10 +68,11 @@ bitflags = { version = "1.2.1", default-features = false }
zstd = { version = "0.11.1", default-features = false, optional = true }

[features]
default = ["csv", "ipc"]
default = ["csv", "ipc", "json"]
ipc_compression = ["ipc", "zstd", "lz4"]
csv = ["csv_crate"]
ipc = ["flatbuffers"]
json = ["serde", "serde_json"]
simd = ["packed_simd"]
prettyprint = ["comfy-table"]
# The test utils feature enables code used in benchmarks and tests but
Expand Down Expand Up @@ -183,6 +183,7 @@ harness = false
[[bench]]
name = "json_reader"
harness = false
required-features = ["json"]

[[bench]]
name = "equal"
Expand Down
5 changes: 3 additions & 2 deletions arrow/src/array/mod.rs
Expand Up @@ -190,11 +190,12 @@ use crate::datatypes::*;
pub use self::array::Array;
pub use self::array::ArrayAccessor;
pub use self::array::ArrayRef;
pub(crate) use self::data::layout;
pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;
pub(crate) use self::data::BufferSpec;

#[cfg(feature = "ipc")]
pub(crate) use self::data::{layout, BufferSpec};

pub use self::array_binary::BinaryArray;
pub use self::array_binary::LargeBinaryArray;
Expand Down
25 changes: 15 additions & 10 deletions arrow/src/datatypes/datatype.rs
Expand Up @@ -19,9 +19,6 @@ use num::BigInt;
use std::cmp::Ordering;
use std::fmt;

use serde_derive::{Deserialize, Serialize};
use serde_json::{json, Value, Value::String as VString};

use crate::error::{ArrowError, Result};
use crate::util::decimal::singed_cmp_le_bytes;

Expand All @@ -42,7 +39,8 @@ use super::Field;
/// Nested types can themselves be nested within other arrays.
/// For more information on these types please see
/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum DataType {
/// Null type
Null,
Expand Down Expand Up @@ -222,7 +220,8 @@ pub enum DataType {
}

/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum TimeUnit {
/// Time in seconds.
Second,
Expand All @@ -235,7 +234,8 @@ pub enum TimeUnit {
}

/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum IntervalUnit {
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
YearMonth,
Expand All @@ -253,7 +253,8 @@ pub enum IntervalUnit {
}

// Sparse or Dense union layouts
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum UnionMode {
Sparse,
Dense,
Expand Down Expand Up @@ -1052,7 +1053,9 @@ pub(crate) fn validate_decimal256_precision_with_lt_bytes(

impl DataType {
/// Parse a data type from a JSON representation.
pub(crate) fn from(json: &Value) -> Result<DataType> {
#[cfg(feature = "json")]
pub(crate) fn from(json: &serde_json::Value) -> Result<DataType> {
use serde_json::Value;
let default_field = Field::new("", DataType::Boolean, true);
match *json {
Value::Object(ref map) => match map.get("name") {
Expand Down Expand Up @@ -1121,7 +1124,7 @@ impl DataType {
};
let tz = match map.get("timezone") {
None => Ok(None),
Some(VString(tz)) => Ok(Some(tz.clone())),
Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())),
_ => Err(ArrowError::ParseError(
"timezone must be a string".to_string(),
)),
Expand Down Expand Up @@ -1300,7 +1303,9 @@ impl DataType {
}

/// Generate a JSON representation of the data type.
pub fn to_json(&self) -> Value {
#[cfg(feature = "json")]
pub fn to_json(&self) -> serde_json::Value {
use serde_json::json;
match self {
DataType::Null => json!({"name": "null"}),
DataType::Boolean => json!({"name": "bool"}),
Expand Down
24 changes: 12 additions & 12 deletions arrow/src/datatypes/field.rs
Expand Up @@ -15,30 +15,27 @@
// specific language governing permissions and limitations
// under the License.

use crate::error::{ArrowError, Result};
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::hash::{Hash, Hasher};

use serde_derive::{Deserialize, Serialize};
use serde_json::{json, Value};

use crate::error::{ArrowError, Result};

use super::DataType;

/// Describes a single column in a [`Schema`](super::Schema).
///
/// A [`Schema`](super::Schema) is an ordered collection of
/// [`Field`] objects.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Field {
name: String,
data_type: DataType,
nullable: bool,
dict_id: i64,
dict_is_ordered: bool,
/// A map of key-value pairs containing additional custom meta data.
#[serde(skip_serializing_if = "Option::is_none")]
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
metadata: Option<BTreeMap<String, String>>,
}

Expand Down Expand Up @@ -254,7 +251,9 @@ impl Field {
}

/// Parse a `Field` definition from a JSON representation.
pub fn from(json: &Value) -> Result<Self> {
#[cfg(feature = "json")]
pub fn from(json: &serde_json::Value) -> Result<Self> {
use serde_json::Value;
match *json {
Value::Object(ref map) => {
let name = match map.get("name") {
Expand Down Expand Up @@ -497,8 +496,9 @@ impl Field {
}

/// Generate a JSON representation of the `Field`.
pub fn to_json(&self) -> Value {
let children: Vec<Value> = match self.data_type() {
#[cfg(feature = "json")]
pub fn to_json(&self) -> serde_json::Value {
let children: Vec<serde_json::Value> = match self.data_type() {
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
DataType::List(field)
| DataType::LargeList(field)
Expand All @@ -507,7 +507,7 @@ impl Field {
_ => vec![],
};
match self.data_type() {
DataType::Dictionary(ref index_type, ref value_type) => json!({
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": value_type.to_json(),
Expand All @@ -518,7 +518,7 @@ impl Field {
"isOrdered": self.dict_is_ordered
}
}),
_ => json!({
_ => serde_json::json!({
"name": self.name,
"nullable": self.nullable,
"type": self.data_type.to_json(),
Expand Down
27 changes: 21 additions & 6 deletions arrow/src/datatypes/mod.rs
Expand Up @@ -50,12 +50,15 @@ pub type SchemaRef = Arc<Schema>;
mod tests {
use super::*;
use crate::error::Result;
use std::collections::{BTreeMap, HashMap};

#[cfg(feature = "json")]
use crate::json::JsonSerializable;
use serde_json::Value::{Bool, Number as VNumber, String as VString};
use serde_json::{Number, Value};
use std::{
collections::{BTreeMap, HashMap},
f32::NAN,

#[cfg(feature = "json")]
use serde_json::{
Number, Value,
Value::{Bool, Number as VNumber, String as VString},
};

#[test]
Expand Down Expand Up @@ -107,6 +110,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn create_struct_type() {
let _person = DataType::Struct(vec![
Field::new("first_name", DataType::Utf8, false),
Expand All @@ -123,6 +127,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn serde_struct_type() {
let kv_array = [("k".to_string(), "v".to_string())];
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
Expand Down Expand Up @@ -170,6 +175,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn struct_field_to_json() {
let f = Field::new(
"address",
Expand Down Expand Up @@ -213,6 +219,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn map_field_to_json() {
let f = Field::new(
"my_map",
Expand Down Expand Up @@ -273,6 +280,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn primitive_field_to_json() {
let f = Field::new("first_name", DataType::Utf8, false);
let value: Value = serde_json::from_str(
Expand All @@ -289,6 +297,7 @@ mod tests {
assert_eq!(value, f.to_json());
}
#[test]
#[cfg(feature = "json")]
fn parse_struct_from_json() {
let json = r#"
{
Expand Down Expand Up @@ -335,6 +344,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn parse_map_from_json() {
let json = r#"
{
Expand Down Expand Up @@ -398,6 +408,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn parse_union_from_json() {
let json = r#"
{
Expand Down Expand Up @@ -453,6 +464,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn parse_utf8_from_json() {
let json = "{\"name\":\"utf8\"}";
let value: Value = serde_json::from_str(json).unwrap();
Expand All @@ -461,6 +473,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn parse_int32_from_json() {
let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
let value: Value = serde_json::from_str(json).unwrap();
Expand All @@ -469,6 +482,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn schema_json() {
// Add some custom metadata
let metadata: HashMap<String, String> =
Expand Down Expand Up @@ -1229,6 +1243,7 @@ mod tests {
}

#[test]
#[cfg(feature = "json")]
fn test_arrow_native_type_to_json() {
assert_eq!(Some(Bool(true)), true.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
Expand All @@ -1248,7 +1263,7 @@ mod tests {
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
0.01f64.into_json_value()
);
assert_eq!(None, NAN.into_json_value());
assert_eq!(None, f32::NAN.into_json_value());
}

fn person_schema() -> Schema {
Expand Down

0 comments on commit e23ba78

Please sign in to comment.