Skip to content

Commit

Permalink
feat: JSON encoding of FixedSizeList (#5646)
Browse files Browse the repository at this point in the history
Added ability to encode the FixedSizeList type in JSON. The implementation
of FixedSizeListEncoder is very similar to that of ListEncoder, but is
somewhat simpler, because of the constant offset.

A test was added to verify behaviour of the JSON encoder with and without
explicit nulls.
  • Loading branch information
hiltontj committed Apr 15, 2024
1 parent 0124307 commit 0d031cc
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 2 deletions.
85 changes: 84 additions & 1 deletion arrow-json/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,8 @@ mod tests {
use serde_json::json;

use arrow_array::builder::{
FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, StringBuilder,
FixedSizeBinaryBuilder, FixedSizeListBuilder, Int32Builder, Int64Builder, MapBuilder,
StringBuilder,
};
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::ArrayData;
Expand Down Expand Up @@ -2215,4 +2216,86 @@ mod tests {
);
}
}

#[test]
fn test_writer_fixed_size_list() {
let size = 3;
let field = FieldRef::new(Field::new("item", DataType::Int32, true));
let schema = SchemaRef::new(Schema::new(vec![Field::new(
"list",
DataType::FixedSizeList(field, size),
true,
)]));

let values_builder = Int32Builder::new();
let mut list_builder = FixedSizeListBuilder::new(values_builder, size);
let lists = [
Some([Some(1), Some(2), None]),
Some([Some(3), None, Some(4)]),
Some([None, Some(5), Some(6)]),
None,
];
for list in lists {
match list {
Some(l) => {
for value in l {
match value {
Some(v) => list_builder.values().append_value(v),
None => list_builder.values().append_null(),
}
}
list_builder.append(true);
}
None => {
for _ in 0..size {
list_builder.values().append_null();
}
list_builder.append(false);
}
}
}
let array = Arc::new(list_builder.finish()) as ArrayRef;
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();

//encode and check JSON with explicit nulls:
{
let json_value: Value = {
let mut buf = Vec::new();
let mut writer = WriterBuilder::new()
.with_explicit_nulls(true)
.build::<_, JsonArray>(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};
assert_eq!(
json!([
{"list": [1, 2, null]},
{"list": [3, null, 4]},
{"list": [null, 5, 6]},
{"list": null},
]),
json_value
);
}
// encode and check JSON with no explicit nulls:
{
let json_value: Value = {
let mut buf = Vec::new();
let mut writer = ArrayWriter::new(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};
assert_eq!(
json!([
{"list": [1, 2, null]},
{"list": [3, null, 4]},
{"list": [null, 5, 6]},
{}, // empty because nulls are omitted
]),
json_value
);
}
}
}
53 changes: 52 additions & 1 deletion arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ fn make_encoder_impl<'a>(
let array = array.as_list::<i64>();
(Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
}
DataType::FixedSizeList(_, _) => {
let array = array.as_fixed_size_list();
(Box::new(FixedSizeListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
}

DataType::Dictionary(_, _) => downcast_dictionary_array! {
array => (Box::new(DictionaryEncoder::try_new(array, options)?) as _, array.logical_nulls()),
Expand All @@ -100,7 +104,7 @@ fn make_encoder_impl<'a>(
}

DataType::FixedSizeBinary(_) => {
let array = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
let array = array.as_fixed_size_binary();
(Box::new(FixedSizeBinaryEncoder::new(array)) as _, array.nulls().cloned())
}

Expand Down Expand Up @@ -329,6 +333,53 @@ impl<'a, O: OffsetSizeTrait> Encoder for ListEncoder<'a, O> {
}
}

struct FixedSizeListEncoder<'a> {
value_length: usize,
nulls: Option<NullBuffer>,
encoder: Box<dyn Encoder + 'a>,
}

impl<'a> FixedSizeListEncoder<'a> {
fn try_new(
array: &'a FixedSizeListArray,
options: &EncoderOptions,
) -> Result<Self, ArrowError> {
let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), options)?;
Ok(Self {
encoder,
nulls,
value_length: array.value_length().as_usize(),
})
}
}

impl<'a> Encoder for FixedSizeListEncoder<'a> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
let start = idx * self.value_length;
let end = start + self.value_length;
out.push(b'[');
match self.nulls.as_ref() {
Some(n) => (start..end).for_each(|idx| {
if idx != start {
out.push(b',');
}
if n.is_null(idx) {
out.extend_from_slice(b"null");
} else {
self.encoder.encode(idx, out);
}
}),
None => (start..end).for_each(|idx| {
if idx != start {
out.push(b',');
}
self.encoder.encode(idx, out);
}),
}
out.push(b']');
}
}

struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
keys: ScalarBuffer<K::Native>,
encoder: Box<dyn Encoder + 'a>,
Expand Down

0 comments on commit 0d031cc

Please sign in to comment.