diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 3df0c861c70..6b4f224708d 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -35,6 +35,8 @@ //! assert_eq!(7.0, c.value(2)); //! ``` +use chrono::format::strftime::StrftimeItems; +use chrono::format::{parse, Parsed}; use chrono::Timelike; use std::ops::{Div, Mul}; use std::str; @@ -45,6 +47,9 @@ use crate::compute::divide_scalar; use crate::compute::kernels::arithmetic::{divide, multiply}; use crate::compute::kernels::arity::unary; use crate::compute::kernels::cast_utils::string_to_timestamp_nanos; +use crate::compute::kernels::temporal::extract_component_from_array; +use crate::compute::kernels::temporal::return_compute_error_with; +use crate::compute::using_chrono_tz_and_utc_naive_date_time; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::temporal_conversions::{ @@ -728,18 +733,18 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), - Timestamp(unit, _) => match unit { + Timestamp(unit, tz) => match unit { TimeUnit::Nanosecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Microsecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Millisecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Second => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } }, Date32 => cast_date32_to_string::(array), @@ -784,18 +789,18 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), - Timestamp(unit, _) => match unit { + Timestamp(unit, tz) => match unit { TimeUnit::Nanosecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Microsecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Millisecond => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } TimeUnit::Second => { - cast_timestamp_to_string::(array) + cast_timestamp_to_string::(array, tz) } }, Date32 => cast_date32_to_string::(array), @@ -1482,7 +1487,10 @@ where } /// Cast timestamp types to Utf8/LargeUtf8 -fn cast_timestamp_to_string(array: &ArrayRef) -> Result +fn cast_timestamp_to_string( + array: &ArrayRef, + tz: &Option, +) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From<::Native>, @@ -1490,17 +1498,28 @@ where { let array = array.as_any().downcast_ref::>().unwrap(); - Ok(Arc::new( - (0..array.len()) - .map(|ix| { - if array.is_null(ix) { - None - } else { - array.value_as_datetime(ix).map(|v| v.to_string()) - } - }) - .collect::>(), - )) + let mut builder = GenericStringBuilder::::new(); + + if let Some(tz) = tz { + let mut scratch = Parsed::new(); + // The macro calls `value_as_datetime_with_tz` on timestamp values of the array. + // After applying timezone offset on the datatime, calling `to_string` to get + // the strings. + extract_component_from_array!( + array, + builder, + to_string, + value_as_datetime_with_tz, + tz, + scratch, + |h| h + ) + } else { + // No timezone available. Calling `to_string` on the datatime value simply. + extract_component_from_array!(array, builder, to_string, value_as_datetime, |h| h) + } + + Ok(Arc::new(builder.finish()) as ArrayRef) } /// Cast date32 types to Utf8/LargeUtf8 @@ -3602,6 +3621,7 @@ mod tests { } #[test] + #[cfg(feature = "chrono-tz")] fn test_cast_timestamp_to_string() { let a = TimestampMillisecondArray::from_opt_vec( vec![Some(864000000005), Some(1545696000001), None], @@ -5127,6 +5147,7 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] // running forever + #[cfg(feature = "chrono-tz")] fn test_can_cast_types() { // this function attempts to ensure that can_cast_types stays // in sync with cast. It simply tries all combinations of @@ -5194,6 +5215,7 @@ mod tests { } /// Create instances of arrays with varying types for cast tests + #[cfg(feature = "chrono-tz")] fn get_arrays_of_all_types() -> Vec { let tz_name = String::from("America/New_York"); let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"]; @@ -5334,6 +5356,7 @@ mod tests { LargeListArray::from(list_data) } + #[cfg(feature = "chrono-tz")] fn make_fixed_size_list_array() -> FixedSizeListArray { // Construct a value array let value_data = ArrayData::builder(DataType::Int32) @@ -5355,6 +5378,7 @@ mod tests { FixedSizeListArray::from(list_data) } + #[cfg(feature = "chrono-tz")] fn make_fixed_size_binary_array() -> FixedSizeBinaryArray { let values: [u8; 15] = *b"hellotherearrow"; @@ -5366,6 +5390,7 @@ mod tests { FixedSizeBinaryArray::from(array_data) } + #[cfg(feature = "chrono-tz")] fn make_union_array() -> UnionArray { let mut builder = UnionBuilder::with_capacity_dense(7); builder.append::("a", 1).unwrap(); @@ -5374,6 +5399,7 @@ mod tests { } /// Creates a dictionary with primitive dictionary values, and keys of type K + #[cfg(feature = "chrono-tz")] fn make_dictionary_primitive() -> ArrayRef { let keys_builder = PrimitiveBuilder::::new(); // Pick Int32 arbitrarily for dictionary values @@ -5385,6 +5411,7 @@ mod tests { } /// Creates a dictionary with utf8 values, and keys of type K + #[cfg(feature = "chrono-tz")] fn make_dictionary_utf8() -> ArrayRef { let keys_builder = PrimitiveBuilder::::new(); // Pick Int32 arbitrarily for dictionary values @@ -5396,6 +5423,7 @@ mod tests { } // Get a selection of datatypes to try and cast to + #[cfg(feature = "chrono-tz")] fn get_all_types() -> Vec { use DataType::*; let tz_name = String::from("America/New_York"); @@ -5490,4 +5518,39 @@ mod tests { assert_eq!(&out1, &out2.slice(1, 2)) } + + #[test] + #[cfg(feature = "chrono-tz")] + fn test_timestamp_cast_utf8() { + let array: PrimitiveArray = + vec![Some(37800000000), None, Some(86339000000)].into(); + let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap(); + + let expected = StringArray::from(vec![ + Some("1970-01-01 10:30:00"), + None, + Some("1970-01-01 23:58:59"), + ]); + + assert_eq!( + out.as_any().downcast_ref::().unwrap(), + &expected + ); + + let array: PrimitiveArray = + vec![Some(37800000000), None, Some(86339000000)].into(); + let array = array.with_timezone("Australia/Sydney".to_string()); + let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap(); + + let expected = StringArray::from(vec![ + Some("1970-01-01 20:30:00"), + None, + Some("1970-01-02 09:58:59"), + ]); + + assert_eq!( + out.as_any().downcast_ref::().unwrap(), + &expected + ); + } } diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index b24a6333f5f..1bec1d84f68 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -28,33 +28,33 @@ use chrono::format::{parse, Parsed}; use chrono::FixedOffset; macro_rules! extract_component_from_array { - ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => { + ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $convert:expr) => { for i in 0..$array.len() { if $array.is_null(i) { $builder.append_null(); } else { match $array.$using(i) { - Some(dt) => $builder.append_value(dt.$extract_fn() as i32), + Some(dt) => $builder.append_value($convert(dt.$extract_fn())), None => $builder.append_null(), } } } }; - ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, $using:ident) => { + ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, $using:ident, $convert:expr) => { for i in 0..$array.len() { if $array.is_null(i) { $builder.append_null(); } else { match $array.$using(i) { Some(dt) => { - $builder.append_value(dt.$extract_fn1().$extract_fn2() as i32); + $builder.append_value($convert(dt.$extract_fn1().$extract_fn2())); } None => $builder.append_null(), } } } }; - ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, $parsed:ident) => { + ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, $parsed:ident, $convert:expr) => { if ($tz.starts_with('+') || $tz.starts_with('-')) && !$tz.contains(':') { return_compute_error_with!( "Invalid timezone", @@ -90,7 +90,7 @@ macro_rules! extract_component_from_array { }; match $array.$using(i, fixed_offset) { Some(dt) => { - $builder.append_value(dt.$extract_fn() as i32); + $builder.append_value($convert(dt.$extract_fn())); } None => $builder.append_null(), } @@ -112,6 +112,9 @@ macro_rules! return_compute_error_with { }; } +pub(crate) use extract_component_from_array; +pub(crate) use return_compute_error_with; + // Internal trait, which is used for mapping values from DateLike structures trait ChronoDateExt { /// Returns a value in range `1..=4` indicating the quarter this date falls into @@ -177,10 +180,10 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Time32(_) | &DataType::Time64(_) => { - extract_component_from_array!(array, b, hour, value_as_time) + extract_component_from_array!(array, b, hour, value_as_time, |h| h as i32) } &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, hour, value_as_datetime) + extract_component_from_array!(array, b, hour, value_as_datetime, |h| h as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -190,7 +193,8 @@ where hour, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("hour does not support", dt), @@ -208,7 +212,7 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => { - extract_component_from_array!(array, b, year, value_as_datetime) + extract_component_from_array!(array, b, year, value_as_datetime, |h| h as i32) } dt => return_compute_error_with!("year does not support", dt), } @@ -225,7 +229,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, quarter, value_as_datetime) + extract_component_from_array!(array, b, quarter, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -235,7 +240,8 @@ where quarter, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("quarter does not support", dt), @@ -253,7 +259,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, month, value_as_datetime) + extract_component_from_array!(array, b, month, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -263,7 +270,8 @@ where month, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("month does not support", dt), @@ -290,7 +298,8 @@ where array, b, num_days_from_monday, - value_as_datetime + value_as_datetime, + |h| h as i32 ) } &DataType::Timestamp(_, Some(ref tz)) => { @@ -301,7 +310,8 @@ where num_days_from_monday, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("weekday does not support", dt), @@ -328,7 +338,8 @@ where array, b, num_days_from_sunday, - value_as_datetime + value_as_datetime, + |h| h as i32 ) } &DataType::Timestamp(_, Some(ref tz)) => { @@ -339,7 +350,8 @@ where num_days_from_sunday, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("num_days_from_sunday does not support", dt), @@ -357,7 +369,7 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, day, value_as_datetime) + extract_component_from_array!(array, b, day, value_as_datetime, |h| h as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -367,7 +379,8 @@ where day, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("day does not support", dt), @@ -386,7 +399,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, ordinal, value_as_datetime) + extract_component_from_array!(array, b, ordinal, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -396,7 +410,8 @@ where ordinal, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("doy does not support", dt), @@ -414,7 +429,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, minute, value_as_datetime) + extract_component_from_array!(array, b, minute, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -424,7 +440,8 @@ where minute, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("minute does not support", dt), @@ -443,7 +460,14 @@ where match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, iso_week, week, value_as_datetime) + extract_component_from_array!( + array, + b, + iso_week, + week, + value_as_datetime, + |h| h as i32 + ) } dt => return_compute_error_with!("week does not support", dt), } @@ -460,7 +484,8 @@ where let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { - extract_component_from_array!(array, b, second, value_as_datetime) + extract_component_from_array!(array, b, second, value_as_datetime, |h| h + as i32) } &DataType::Timestamp(_, Some(ref tz)) => { let mut scratch = Parsed::new(); @@ -470,7 +495,8 @@ where second, value_as_datetime_with_tz, tz, - scratch + scratch, + |h| h as i32 ) } dt => return_compute_error_with!("second does not support", dt),