diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 016e5306cf8..eb3618f7c30 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -18,7 +18,10 @@ use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; use crate::iterator::PrimitiveIter; use crate::raw_pointer::RawPtrBox; -use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time}; +use crate::temporal_conversions::{ + as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time, +}; +use crate::timezone::Tz; use crate::trusted_len::trusted_len_unzip; use crate::types::*; use crate::{print_long_array, Array, ArrayAccessor}; @@ -26,7 +29,7 @@ use arrow_buffer::{i256, ArrowNativeType, Buffer}; use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; -use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime}; +use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime}; use half::f16; use std::any::Any; @@ -116,40 +119,40 @@ pub type Float64Array = PrimitiveArray; /// # Example: UTC timestamps post epoch /// ``` /// # use arrow_array::TimestampSecondArray; -/// use chrono::FixedOffset; +/// use arrow_array::timezone::Tz; /// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00 /// let arr = TimestampSecondArray::from(vec![11111111]); /// // OR /// let arr = TimestampSecondArray::from(vec![Some(11111111)]); -/// let utc_offset = FixedOffset::east(0); +/// let utc_tz: Tz = "+00:00".parse().unwrap(); /// -/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11") +/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00") /// ``` /// /// # Example: UTC timestamps pre epoch /// ``` /// # use arrow_array::TimestampSecondArray; -/// use chrono::FixedOffset; +/// use arrow_array::timezone::Tz; /// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00 /// let arr = TimestampSecondArray::from(vec![-11111111]); /// // OR /// let arr = TimestampSecondArray::from(vec![Some(-11111111)]); -/// let utc_offset = FixedOffset::east(0); +/// let utc_tz: Tz = "+00:00".parse().unwrap(); /// -/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49") +/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00") /// ``` /// /// # Example: With timezone specified /// ``` /// # use arrow_array::TimestampSecondArray; -/// use chrono::FixedOffset; +/// use arrow_array::timezone::Tz; /// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00 /// let arr = TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string()); /// // OR /// let arr = TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string()); -/// let sydney_offset = FixedOffset::east(10 * 60 * 60); +/// let sydney_tz: Tz = "+10:00".parse().unwrap(); /// -/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11") +/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00") /// ``` /// pub type TimestampSecondArray = PrimitiveArray; @@ -503,12 +506,8 @@ where /// /// functionally it is same as `value_as_datetime`, however it adds /// the passed tz to the to-be-returned NaiveDateTime - pub fn value_as_datetime_with_tz( - &self, - i: usize, - tz: FixedOffset, - ) -> Option { - as_datetime::(i64::from(self.value(i))).map(|datetime| datetime + tz) + pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> Option> { + as_datetime_with_timezone::(i64::from(self.value(i)), tz) } /// Returns value as a chrono `NaiveDate` by using `Self::datetime()` diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 7c0b5a28f89..f5bef1605ef 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -33,6 +33,7 @@ use crate::{array, datatypes::IntervalUnit}; use array::DictionaryArray; use crate::error::{ArrowError, Result}; +use arrow_array::timezone::Tz; macro_rules! make_string { ($array_type:ty, $column: ident, $row: ident) => {{ @@ -190,7 +191,7 @@ macro_rules! make_string_datetime { } else { array .value_as_datetime($row) - .map(|d| d.to_string()) + .map(|d| format!("{:?}", d)) .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()) }; @@ -198,6 +199,29 @@ macro_rules! make_string_datetime { }}; } +macro_rules! make_string_datetime_with_tz { + ($array_type:ty, $tz_string: ident, $column: ident, $row: ident) => {{ + let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + + let s = if array.is_null($row) { + "".to_string() + } else { + match $tz_string.parse::() { + Ok(tz) => array + .value_as_datetime_with_tz($row, tz) + .map(|d| format!("{}", d.to_rfc3339())) + .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()), + Err(_) => array + .value_as_datetime($row) + .map(|d| format!("{:?} (Unknown Time Zone '{}')", d, $tz_string)) + .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()), + } + }; + + Ok(s) + }}; +} + // It's not possible to do array.value($row).to_string() for &[u8], let's format it as hex macro_rules! make_string_hex { ($array_type:ty, $column: ident, $row: ident) => {{ @@ -334,17 +358,55 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result make_string!(array::Float32Array, column, row), DataType::Float64 => make_string!(array::Float64Array, column, row), DataType::Decimal128(..) => make_string_from_decimal(column, row), - DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { - make_string_datetime!(array::TimestampSecondArray, column, row) + DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Second => { + match tz_string_opt { + Some(tz_string) => make_string_datetime_with_tz!( + array::TimestampSecondArray, + tz_string, + column, + row + ), + None => make_string_datetime!(array::TimestampSecondArray, column, row), + } } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => { - make_string_datetime!(array::TimestampMillisecondArray, column, row) + DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Millisecond => { + match tz_string_opt { + Some(tz_string) => make_string_datetime_with_tz!( + array::TimestampMillisecondArray, + tz_string, + column, + row + ), + None => { + make_string_datetime!(array::TimestampMillisecondArray, column, row) + } + } } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => { - make_string_datetime!(array::TimestampMicrosecondArray, column, row) + DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Microsecond => { + match tz_string_opt { + Some(tz_string) => make_string_datetime_with_tz!( + array::TimestampMicrosecondArray, + tz_string, + column, + row + ), + None => { + make_string_datetime!(array::TimestampMicrosecondArray, column, row) + } + } } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => { - make_string_datetime!(array::TimestampNanosecondArray, column, row) + DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Nanosecond => { + match tz_string_opt { + Some(tz_string) => make_string_datetime_with_tz!( + array::TimestampNanosecondArray, + tz_string, + column, + row + ), + None => { + make_string_datetime!(array::TimestampNanosecondArray, column, row) + } + } } DataType::Date32 => make_string_date!(array::Date32Array, column, row), DataType::Date64 => make_string_date!(array::Date64Array, column, row), diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index f819e389f96..8d811223cbb 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -370,13 +370,134 @@ mod tests { }; } + /// Generate an array with type $ARRAYTYPE with a numeric value of + /// $VALUE, and compare $EXPECTED_RESULT to the output of + /// formatting that array with `pretty_format_batches` + macro_rules! check_datetime_with_timezone { + ($ARRAYTYPE:ident, $VALUE:expr, $TZ_STRING:expr, $EXPECTED_RESULT:expr) => { + let mut builder = $ARRAYTYPE::builder(10); + builder.append_value($VALUE); + builder.append_null(); + let array = builder.finish(); + let array = array.with_timezone($TZ_STRING); + + let schema = Arc::new(Schema::new(vec![Field::new( + "f", + array.data_type().clone(), + true, + )])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); + + let table = pretty_format_batches(&[batch]) + .expect("formatting batches") + .to_string(); + + let expected = $EXPECTED_RESULT; + let actual: Vec<&str> = table.lines().collect(); + + assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", actual); + }; + } + + #[test] + #[cfg(features = "chrono-tz")] + fn test_pretty_format_timestamp_second_with_utc_timezone() { + let expected = vec![ + "+---------------------------+", + "| f |", + "+---------------------------+", + "| 1970-05-09T14:25:11+00:00 |", + "| |", + "+---------------------------+", + ]; + check_datetime_with_timezone!( + TimestampSecondArray, + 11111111, + "UTC".to_string(), + expected + ); + } + + #[test] + #[cfg(features = "chrono-tz")] + fn test_pretty_format_timestamp_second_with_non_utc_timezone() { + let expected = vec![ + "+---------------------------+", + "| f |", + "+---------------------------+", + "| 1970-05-09T22:25:11+08:00 |", + "| |", + "+---------------------------+", + ]; + check_datetime_with_timezone!( + TimestampSecondArray, + 11111111, + "Asia/Taipei".to_string(), + expected + ); + } + + #[test] + fn test_pretty_format_timestamp_second_with_fixed_offset_timezone() { + let expected = vec![ + "+---------------------------+", + "| f |", + "+---------------------------+", + "| 1970-05-09T22:25:11+08:00 |", + "| |", + "+---------------------------+", + ]; + check_datetime_with_timezone!( + TimestampSecondArray, + 11111111, + "+08:00".to_string(), + expected + ); + } + + #[test] + fn test_pretty_format_timestamp_second_with_incorrect_fixed_offset_timezone() { + let expected = vec![ + "+-------------------------------------------------+", + "| f |", + "+-------------------------------------------------+", + "| 1970-05-09T14:25:11 (Unknown Time Zone '08:00') |", + "| |", + "+-------------------------------------------------+", + ]; + check_datetime_with_timezone!( + TimestampSecondArray, + 11111111, + "08:00".to_string(), + expected + ); + } + + #[test] + fn test_pretty_format_timestamp_second_with_unknown_timezone() { + let expected = vec![ + "+---------------------------------------------------+", + "| f |", + "+---------------------------------------------------+", + "| 1970-05-09T14:25:11 (Unknown Time Zone 'Unknown') |", + "| |", + "+---------------------------------------------------+", + ]; + check_datetime_with_timezone!( + TimestampSecondArray, + 11111111, + "Unknown".to_string(), + expected + ); + } + #[test] fn test_pretty_format_timestamp_second() { let expected = vec![ "+---------------------+", "| f |", "+---------------------+", - "| 1970-05-09 14:25:11 |", + "| 1970-05-09T14:25:11 |", "| |", "+---------------------+", ]; @@ -389,7 +510,7 @@ mod tests { "+-------------------------+", "| f |", "+-------------------------+", - "| 1970-01-01 03:05:11.111 |", + "| 1970-01-01T03:05:11.111 |", "| |", "+-------------------------+", ]; @@ -402,7 +523,7 @@ mod tests { "+----------------------------+", "| f |", "+----------------------------+", - "| 1970-01-01 00:00:11.111111 |", + "| 1970-01-01T00:00:11.111111 |", "| |", "+----------------------------+", ]; @@ -415,7 +536,7 @@ mod tests { "+-------------------------------+", "| f |", "+-------------------------------+", - "| 1970-01-01 00:00:00.011111111 |", + "| 1970-01-01T00:00:00.011111111 |", "| |", "+-------------------------------+", ];