Skip to content

Commit

Permalink
Format Timestamps as RFC3339 (#2939)
Browse files Browse the repository at this point in the history
* standarize-tz-display

* only test named timezone while chrono-tz enabled

* fix docs

* fix doc
  • Loading branch information
waitingkuo committed Oct 28, 2022
1 parent 843a2e5 commit cbee739
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 30 deletions.
33 changes: 16 additions & 17 deletions arrow-array/src/array/primitive_array.rs
Expand Up @@ -18,15 +18,18 @@
use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
use crate::iterator::PrimitiveIter;
use crate::raw_pointer::RawPtrBox;
use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time};
use crate::temporal_conversions::{
as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time,
};
use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::types::*;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{i256, ArrowNativeType, Buffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime};
use half::f16;
use std::any::Any;

Expand Down Expand Up @@ -116,40 +119,40 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// # Example: UTC timestamps post epoch
/// ```
/// # use arrow_array::TimestampSecondArray;
/// use chrono::FixedOffset;
/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00
/// let arr = TimestampSecondArray::from(vec![11111111]);
/// // OR
/// let arr = TimestampSecondArray::from(vec![Some(11111111)]);
/// let utc_offset = FixedOffset::east(0);
/// let utc_tz: Tz = "+00:00".parse().unwrap();
///
/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11")
/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00")
/// ```
///
/// # Example: UTC timestamps pre epoch
/// ```
/// # use arrow_array::TimestampSecondArray;
/// use chrono::FixedOffset;
/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00
/// let arr = TimestampSecondArray::from(vec![-11111111]);
/// // OR
/// let arr = TimestampSecondArray::from(vec![Some(-11111111)]);
/// let utc_offset = FixedOffset::east(0);
/// let utc_tz: Tz = "+00:00".parse().unwrap();
///
/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49")
/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00")
/// ```
///
/// # Example: With timezone specified
/// ```
/// # use arrow_array::TimestampSecondArray;
/// use chrono::FixedOffset;
/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00
/// let arr = TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string());
/// // OR
/// let arr = TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string());
/// let sydney_offset = FixedOffset::east(10 * 60 * 60);
/// let sydney_tz: Tz = "+10:00".parse().unwrap();
///
/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11")
/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00")
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
Expand Down Expand Up @@ -503,12 +506,8 @@ where
///
/// functionally it is same as `value_as_datetime`, however it adds
/// the passed tz to the to-be-returned NaiveDateTime
pub fn value_as_datetime_with_tz(
&self,
i: usize,
tz: FixedOffset,
) -> Option<NaiveDateTime> {
as_datetime::<T>(i64::from(self.value(i))).map(|datetime| datetime + tz)
pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> Option<DateTime<Tz>> {
as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz)
}

/// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
Expand Down
80 changes: 71 additions & 9 deletions arrow/src/util/display.rs
Expand Up @@ -33,6 +33,7 @@ use crate::{array, datatypes::IntervalUnit};
use array::DictionaryArray;

use crate::error::{ArrowError, Result};
use arrow_array::timezone::Tz;

macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
Expand Down Expand Up @@ -190,14 +191,37 @@ macro_rules! make_string_datetime {
} else {
array
.value_as_datetime($row)
.map(|d| d.to_string())
.map(|d| format!("{:?}", d))
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
};

Ok(s)
}};
}

macro_rules! make_string_datetime_with_tz {
($array_type:ty, $tz_string: ident, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();

let s = if array.is_null($row) {
"".to_string()
} else {
match $tz_string.parse::<Tz>() {
Ok(tz) => array
.value_as_datetime_with_tz($row, tz)
.map(|d| format!("{}", d.to_rfc3339()))
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
Err(_) => array
.value_as_datetime($row)
.map(|d| format!("{:?} (Unknown Time Zone '{}')", d, $tz_string))
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
}
};

Ok(s)
}};
}

// It's not possible to do array.value($row).to_string() for &[u8], let's format it as hex
macro_rules! make_string_hex {
($array_type:ty, $column: ident, $row: ident) => {{
Expand Down Expand Up @@ -334,17 +358,55 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
DataType::Float32 => make_string!(array::Float32Array, column, row),
DataType::Float64 => make_string!(array::Float64Array, column, row),
DataType::Decimal128(..) => make_string_from_decimal(column, row),
DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
make_string_datetime!(array::TimestampSecondArray, column, row)
DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Second => {
match tz_string_opt {
Some(tz_string) => make_string_datetime_with_tz!(
array::TimestampSecondArray,
tz_string,
column,
row
),
None => make_string_datetime!(array::TimestampSecondArray, column, row),
}
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
make_string_datetime!(array::TimestampMillisecondArray, column, row)
DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Millisecond => {
match tz_string_opt {
Some(tz_string) => make_string_datetime_with_tz!(
array::TimestampMillisecondArray,
tz_string,
column,
row
),
None => {
make_string_datetime!(array::TimestampMillisecondArray, column, row)
}
}
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
make_string_datetime!(array::TimestampMicrosecondArray, column, row)
DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Microsecond => {
match tz_string_opt {
Some(tz_string) => make_string_datetime_with_tz!(
array::TimestampMicrosecondArray,
tz_string,
column,
row
),
None => {
make_string_datetime!(array::TimestampMicrosecondArray, column, row)
}
}
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
make_string_datetime!(array::TimestampNanosecondArray, column, row)
DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Nanosecond => {
match tz_string_opt {
Some(tz_string) => make_string_datetime_with_tz!(
array::TimestampNanosecondArray,
tz_string,
column,
row
),
None => {
make_string_datetime!(array::TimestampNanosecondArray, column, row)
}
}
}
DataType::Date32 => make_string_date!(array::Date32Array, column, row),
DataType::Date64 => make_string_date!(array::Date64Array, column, row),
Expand Down
129 changes: 125 additions & 4 deletions arrow/src/util/pretty.rs
Expand Up @@ -370,13 +370,134 @@ mod tests {
};
}

/// Generate an array with type $ARRAYTYPE with a numeric value of
/// $VALUE, and compare $EXPECTED_RESULT to the output of
/// formatting that array with `pretty_format_batches`
macro_rules! check_datetime_with_timezone {
($ARRAYTYPE:ident, $VALUE:expr, $TZ_STRING:expr, $EXPECTED_RESULT:expr) => {
let mut builder = $ARRAYTYPE::builder(10);
builder.append_value($VALUE);
builder.append_null();
let array = builder.finish();
let array = array.with_timezone($TZ_STRING);

let schema = Arc::new(Schema::new(vec![Field::new(
"f",
array.data_type().clone(),
true,
)]));
let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();

let table = pretty_format_batches(&[batch])
.expect("formatting batches")
.to_string();

let expected = $EXPECTED_RESULT;
let actual: Vec<&str> = table.lines().collect();

assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", actual);
};
}

#[test]
#[cfg(features = "chrono-tz")]
fn test_pretty_format_timestamp_second_with_utc_timezone() {
let expected = vec![
"+---------------------------+",
"| f |",
"+---------------------------+",
"| 1970-05-09T14:25:11+00:00 |",
"| |",
"+---------------------------+",
];
check_datetime_with_timezone!(
TimestampSecondArray,
11111111,
"UTC".to_string(),
expected
);
}

#[test]
#[cfg(features = "chrono-tz")]
fn test_pretty_format_timestamp_second_with_non_utc_timezone() {
let expected = vec![
"+---------------------------+",
"| f |",
"+---------------------------+",
"| 1970-05-09T22:25:11+08:00 |",
"| |",
"+---------------------------+",
];
check_datetime_with_timezone!(
TimestampSecondArray,
11111111,
"Asia/Taipei".to_string(),
expected
);
}

#[test]
fn test_pretty_format_timestamp_second_with_fixed_offset_timezone() {
let expected = vec![
"+---------------------------+",
"| f |",
"+---------------------------+",
"| 1970-05-09T22:25:11+08:00 |",
"| |",
"+---------------------------+",
];
check_datetime_with_timezone!(
TimestampSecondArray,
11111111,
"+08:00".to_string(),
expected
);
}

#[test]
fn test_pretty_format_timestamp_second_with_incorrect_fixed_offset_timezone() {
let expected = vec![
"+-------------------------------------------------+",
"| f |",
"+-------------------------------------------------+",
"| 1970-05-09T14:25:11 (Unknown Time Zone '08:00') |",
"| |",
"+-------------------------------------------------+",
];
check_datetime_with_timezone!(
TimestampSecondArray,
11111111,
"08:00".to_string(),
expected
);
}

#[test]
fn test_pretty_format_timestamp_second_with_unknown_timezone() {
let expected = vec![
"+---------------------------------------------------+",
"| f |",
"+---------------------------------------------------+",
"| 1970-05-09T14:25:11 (Unknown Time Zone 'Unknown') |",
"| |",
"+---------------------------------------------------+",
];
check_datetime_with_timezone!(
TimestampSecondArray,
11111111,
"Unknown".to_string(),
expected
);
}

#[test]
fn test_pretty_format_timestamp_second() {
let expected = vec![
"+---------------------+",
"| f |",
"+---------------------+",
"| 1970-05-09 14:25:11 |",
"| 1970-05-09T14:25:11 |",
"| |",
"+---------------------+",
];
Expand All @@ -389,7 +510,7 @@ mod tests {
"+-------------------------+",
"| f |",
"+-------------------------+",
"| 1970-01-01 03:05:11.111 |",
"| 1970-01-01T03:05:11.111 |",
"| |",
"+-------------------------+",
];
Expand All @@ -402,7 +523,7 @@ mod tests {
"+----------------------------+",
"| f |",
"+----------------------------+",
"| 1970-01-01 00:00:11.111111 |",
"| 1970-01-01T00:00:11.111111 |",
"| |",
"+----------------------------+",
];
Expand All @@ -415,7 +536,7 @@ mod tests {
"+-------------------------------+",
"| f |",
"+-------------------------------+",
"| 1970-01-01 00:00:00.011111111 |",
"| 1970-01-01T00:00:00.011111111 |",
"| |",
"+-------------------------------+",
];
Expand Down

0 comments on commit cbee739

Please sign in to comment.