From 1aac86ae5437758d8eade9b8c70a9775c80e92e5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 29 Aug 2022 23:00:04 -0700 Subject: [PATCH] Cast timestamp array to string array with timezone. --- arrow/src/compute/kernels/cast.rs | 96 +++++++++++++++++++++------ arrow/src/compute/kernels/mod.rs | 1 + arrow/src/compute/kernels/temporal.rs | 80 ++++++++++++++-------- 3 files changed, 128 insertions(+), 49 deletions(-) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 3df0c861c70..5f157768279 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -35,6 +35,8 @@ //! assert_eq!(7.0, c.value(2)); //! ``` +use chrono::format::strftime::StrftimeItems; +use chrono::format::{parse, Parsed}; use chrono::Timelike; use std::ops::{Div, Mul}; use std::str; @@ -45,6 +47,9 @@ use crate::compute::divide_scalar; use crate::compute::kernels::arithmetic::{divide, multiply}; use crate::compute::kernels::arity::unary; use crate::compute::kernels::cast_utils::string_to_timestamp_nanos; +use crate::compute::kernels::temporal::extract_component_from_array; +use crate::compute::kernels::temporal::return_compute_error_with; +use crate::compute::using_chrono_tz_and_utc_naive_date_time; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::temporal_conversions::{ @@ -728,18 +733,18 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), - Timestamp(unit, _) => match unit { + Timestamp(unit, tz) => match unit { TimeUnit::Nanosecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Microsecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Millisecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Second => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } }, Date32 => cast_date32_to_string::(array), @@ -784,18 +789,18 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), - Timestamp(unit, _) => match unit { + Timestamp(unit, tz) => match unit { TimeUnit::Nanosecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Microsecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Millisecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Second => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } }, Date32 => cast_date32_to_string::(array), @@ -1482,7 +1487,10 @@ where } /// Cast timestamp types to Utf8/LargeUtf8 -fn cast_timestamp_to_string(array: &ArrayRef) -> Result +fn cast_timestamp_to_string( + array: &ArrayRef, + tz: &Option, +) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From<::Native>, @@ -1490,17 +1498,24 @@ where { let array = array.as_any().downcast_ref::>().unwrap(); - Ok(Arc::new( - (0..array.len()) - .map(|ix| { - if array.is_null(ix) { - None - } else { - array.value_as_datetime(ix).map(|v| v.to_string()) - } - }) - .collect::>(), - )) + let mut builder = GenericStringBuilder::::new(); + + if let Some(tz) = tz { + let mut scratch = Parsed::new(); + extract_component_from_array!( + array, + builder, + to_string, + value_as_datetime_with_tz, + tz, + scratch, + |h| h + ) + } else { + extract_component_from_array!(array, builder, to_string, value_as_datetime, |h| h) + } + + Ok(Arc::new(builder.finish()) as ArrayRef) } /// Cast date32 types to Utf8/LargeUtf8 @@ -3602,6 +3617,7 @@ mod tests { } #[test] + #[cfg(feature = "chrono-tz")] fn test_cast_timestamp_to_string() { let a = TimestampMillisecondArray::from_opt_vec( vec![Some(864000000005), Some(1545696000001), None], @@ -5127,6 +5143,7 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] // running forever + #[cfg(feature = "chrono-tz")] fn test_can_cast_types() { // this function attempts to ensure that can_cast_types stays // in sync with cast. It simply tries all combinations of @@ -5490,4 +5507,39 @@ mod tests { assert_eq!(&out1, &out2.slice(1, 2)) } + + #[test] + #[cfg(feature = "chrono-tz")] + fn test_timestamp_cast_utf8() { + let array: PrimitiveArray = + vec![Some(37800000000), None, Some(86339000000)].into(); + let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap(); + + let expected = StringArray::from(vec![ + Some("1970-01-01 10:30:00"), + None, + Some("1970-01-01 23:58:59"), + ]); + + assert_eq!( + out.as_any().downcast_ref::().unwrap(), + &expected + ); + + let array: PrimitiveArray = + vec![Some(37800000000), None, Some(86339000000)].into(); + let array = array.with_timezone("Australia/Sydney".to_string()); + let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap(); + + let expected = StringArray::from(vec![ + Some("1970-01-01 20:30:00"), + None, + Some("1970-01-02 09:58:59"), + ]); + + assert_eq!( + out.as_any().downcast_ref::().unwrap(), + &expected + ); + } } diff --git a/arrow/src/compute/kernels/mod.rs b/arrow/src/compute/kernels/mod.rs index c615d3a55e1..505d0d07d7c 100644 --- a/arrow/src/compute/kernels/mod.rs +++ b/arrow/src/compute/kernels/mod.rs @@ -35,5 +35,6 @@ pub mod sort; pub mod substring; pub mod take; pub mod temporal; +// pub(crate) use temporal::extract_component_from_array; pub mod window; pub mod zip; diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index b24a6333f5f..1bec1d84f68 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -28,33 +28,33 @@ use chrono::format::{parse, Parsed}; use chrono::FixedOffset; macro_rules! extract_component_from_array { - ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => { + ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $convert:expr) => { for i in 0..$array.len() { if $array.is_null(i) { $builder.append_null(); } else { match $array.$using(i) { - Some(dt) => $builder.append_value(dt.$extract_fn() as i32), + Some(dt) => $builder.append_value($convert(dt.$extract_fn())), None => $builder.append_null(), } } } }; - ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, $using:ident) => { + ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, $using:ident, $convert:expr) => { for i in 0..$array.len() { if $array.is_null(i) { $builder.append_null(); } else { match $array.$using(i) { Some(dt) => { - $builder.append_value(dt.$extract_fn1().$extract_fn2() as i32); + $builder.append_value($convert(dt.$extract_fn1().$extract_fn2())); } None => $builder.append_null(), } } } }; - ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, $parsed:ident) => { + ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, $parsed:ident, $convert:expr) => { if ($tz.starts_with('+') || $tz.starts_with('-')) && !$tz.contains(':') { return_compute_error_with!( "Invalid timezone", @@ -90,7 +90,7 @@ macro_rules! extract_component_from_array { }; match $array.$using(i, fixed_offset) { Some(dt) => { - $builder.append_value(dt.$extract_fn() as i32); + $builder.append_value($convert(dt.$extract_fn())); } None => $builder.append_null(), } @@ -112,6 +112,9 @@ macro_rules! return_compute_error_with { }; } +pub(crate) use extract_component_from_array; +pub(crate) use return_compute_error_with; + // Internal trait, which is used for mapping values from DateLike structures trait ChronoDateExt { /// Returns a value in range `1..=4` indicating the quarter this date falls into @@ -177,10 +180,10 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Time32(_) | &DataType::Time64(_) => { - extract_component_from_array!(array, b, hour, value_as_time) + extract_component_from_array!(array, b, hour, value_as_time, |h| h as i32) } &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, hour, value_as_datetime) + extract_component_from_array!(array, b, hour, value_as_datetime, |h| h as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -190,7 +193,8 @@ where hour, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("hour does not support", dt), @@ -208,7 +212,7 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => { - extract_component_from_array!(array, b, year, value_as_datetime) + extract_component_from_array!(array, b, year, value_as_datetime, |h| h as i32) } dt => return_compute_error_with!("year does not support", dt), } @@ -225,7 +229,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, quarter, value_as_datetime) + extract_component_from_array!(array, b, quarter, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -235,7 +240,8 @@ where quarter, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("quarter does not support", dt), @@ -253,7 +259,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, month, value_as_datetime) + extract_component_from_array!(array, b, month, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -263,7 +270,8 @@ where month, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("month does not support", dt), @@ -290,7 +298,8 @@ where array, b, num_days_from_monday, - value_as_datetime + value_as_datetime, + |h| h as i32 ) } &DataType::Timestamp(_, Some(ref tz)) => { @@ -301,7 +310,8 @@ where num_days_from_monday, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("weekday does not support", dt), @@ -328,7 +338,8 @@ where array, b, num_days_from_sunday, - value_as_datetime + value_as_datetime, + |h| h as i32 ) } &DataType::Timestamp(_, Some(ref tz)) => { @@ -339,7 +350,8 @@ where num_days_from_sunday, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("num_days_from_sunday does not support", dt), @@ -357,7 +369,7 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, day, value_as_datetime) + extract_component_from_array!(array, b, day, value_as_datetime, |h| h as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -367,7 +379,8 @@ where day, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("day does not support", dt), @@ -386,7 +399,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, ordinal, value_as_datetime) + extract_component_from_array!(array, b, ordinal, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -396,7 +410,8 @@ where ordinal, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("doy does not support", dt), @@ -414,7 +429,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, minute, value_as_datetime) + extract_component_from_array!(array, b, minute, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -424,7 +440,8 @@ where minute, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("minute does not support", dt), @@ -443,7 +460,14 @@ where match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, iso_week, week, value_as_datetime) + extract_component_from_array!( + array, + b, + iso_week, + week, + value_as_datetime, + |h| h as i32 + ) } dt => return_compute_error_with!("week does not support", dt), } @@ -460,7 +484,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, second, value_as_datetime) + extract_component_from_array!(array, b, second, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -470,7 +495,8 @@ where second, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("second does not support", dt),