Skip to content

Commit

Permalink
PrettyPrint support for StringViewArray and BinaryViewArray (#5634)
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Apr 15, 2024
1 parent a999fb8 commit 905c46b
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 8 deletions.
19 changes: 19 additions & 0 deletions arrow-cast/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ fn make_formatter<'a>(
DataType::Boolean => array_format(as_boolean_array(array), options),
DataType::Utf8 => array_format(array.as_string::<i32>(), options),
DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
DataType::Utf8View => array_format(array.as_string_view(), options),
DataType::Binary => array_format(array.as_binary::<i32>(), options),
DataType::BinaryView => array_format(array.as_binary_view(), options),
DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
DataType::FixedSizeBinary(_) => {
let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
Expand Down Expand Up @@ -733,6 +735,13 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericStringArray<O> {
}
}

impl<'a> DisplayIndex for &'a StringViewArray {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
write!(f, "{}", self.value(idx))?;
Ok(())
}
}

impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray<O> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
Expand All @@ -743,6 +752,16 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray<O> {
}
}

impl<'a> DisplayIndex for &'a BinaryViewArray {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
for byte in v {
write!(f, "{byte:02x}")?;
}
Ok(())
}
}

impl<'a> DisplayIndex for &'a FixedSizeBinaryArray {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
Expand Down
98 changes: 90 additions & 8 deletions arrow-cast/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
//! Utilities for pretty printing record batches. Note this module is not
//! available unless `feature = "prettyprint"` is enabled.

use crate::display::{ArrayFormatter, FormatOptions};
use std::fmt::Display;

use comfy_table::{Cell, Table};

use arrow_array::{Array, ArrayRef, RecordBatch};
use arrow_schema::ArrowError;
use comfy_table::{Cell, Table};
use std::fmt::Display;

use crate::display::{ArrayFormatter, FormatOptions};

/// Create a visual representation of record batches
pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowError> {
Expand Down Expand Up @@ -131,17 +134,20 @@ fn create_column(

#[cfg(test)]
mod tests {
use std::fmt::Write;
use std::sync::Arc;

use half::f16;

use super::*;
use crate::display::array_value_to_string;
use arrow_array::builder::*;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::Buffer;
use arrow_schema::*;
use half::f16;
use std::fmt::Write;
use std::sync::Arc;

use crate::display::array_value_to_string;

use super::*;

#[test]
fn test_pretty_format_batches() {
Expand Down Expand Up @@ -317,6 +323,82 @@ mod tests {
assert_eq!(expected, actual, "Actual result:\n{table}");
}

#[test]
fn test_pretty_format_string_view() {
let schema = Arc::new(Schema::new(vec![Field::new(
"d1",
DataType::Utf8View,
true,
)]));

// Use a small capacity so we end up with multiple views
let mut builder = StringViewBuilder::with_capacity(20);
builder.append_value("hello");
builder.append_null();
builder.append_value("longer than 12 bytes");
builder.append_value("another than 12 bytes");
builder.append_null();
builder.append_value("small");

let array: ArrayRef = Arc::new(builder.finish());
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let expected = vec![
"+-----------------------+",
"| d1 |",
"+-----------------------+",
"| hello |",
"| |",
"| longer than 12 bytes |",
"| another than 12 bytes |",
"| |",
"| small |",
"+-----------------------+",
];

let actual: Vec<&str> = table.lines().collect();

assert_eq!(expected, actual, "Actual result:\n{table:#?}");
}

#[test]
fn test_pretty_format_binary_view() {
let schema = Arc::new(Schema::new(vec![Field::new(
"d1",
DataType::BinaryView,
true,
)]));

// Use a small capacity so we end up with multiple views
let mut builder = BinaryViewBuilder::with_capacity(20);
builder.append_value(b"hello");
builder.append_null();
builder.append_value(b"longer than 12 bytes");
builder.append_value(b"another than 12 bytes");
builder.append_null();
builder.append_value(b"small");

let array: ArrayRef = Arc::new(builder.finish());
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let expected = vec![
"+--------------------------------------------+",
"| d1 |",
"+--------------------------------------------+",
"| 68656c6c6f |",
"| |",
"| 6c6f6e676572207468616e203132206279746573 |",
"| 616e6f74686572207468616e203132206279746573 |",
"| |",
"| 736d616c6c |",
"+--------------------------------------------+",
];

let actual: Vec<&str> = table.lines().collect();

assert_eq!(expected, actual, "Actual result:\n\n{table:#?}");
}

#[test]
fn test_pretty_format_fixed_size_binary() {
// define a schema.
Expand Down

0 comments on commit 905c46b

Please sign in to comment.