Skip to content

Commit

Permalink
Cast timestamp array to string array with timezone.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Aug 30, 2022
1 parent 24036e9 commit 1aac86a
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 49 deletions.
96 changes: 74 additions & 22 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -35,6 +35,8 @@
//! assert_eq!(7.0, c.value(2));
//! ```

use chrono::format::strftime::StrftimeItems;
use chrono::format::{parse, Parsed};
use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
Expand All @@ -45,6 +47,9 @@ use crate::compute::divide_scalar;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::compute::kernels::arity::unary;
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
use crate::compute::kernels::temporal::extract_component_from_array;
use crate::compute::kernels::temporal::return_compute_error_with;
use crate::compute::using_chrono_tz_and_utc_naive_date_time;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::{
Expand Down Expand Up @@ -728,18 +733,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i32>(array)
cast_timestamp_to_string::<TimestampSecondType, i32>(array, tz)
}
},
Date32 => cast_date32_to_string::<i32>(array),
Expand Down Expand Up @@ -784,18 +789,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i64>(array)
cast_timestamp_to_string::<TimestampSecondType, i64>(array, tz)
}
},
Date32 => cast_date32_to_string::<i64>(array),
Expand Down Expand Up @@ -1482,25 +1487,35 @@ where
}

/// Cast timestamp types to Utf8/LargeUtf8
fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
fn cast_timestamp_to_string<T, OffsetSize>(
array: &ArrayRef,
tz: &Option<String>,
) -> Result<ArrayRef>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<<T as ArrowPrimitiveType>::Native>,
OffsetSize: OffsetSizeTrait,
{
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();

Ok(Arc::new(
(0..array.len())
.map(|ix| {
if array.is_null(ix) {
None
} else {
array.value_as_datetime(ix).map(|v| v.to_string())
}
})
.collect::<GenericStringArray<OffsetSize>>(),
))
let mut builder = GenericStringBuilder::<OffsetSize>::new();

if let Some(tz) = tz {
let mut scratch = Parsed::new();
extract_component_from_array!(
array,
builder,
to_string,
value_as_datetime_with_tz,
tz,
scratch,
|h| h
)
} else {
extract_component_from_array!(array, builder, to_string, value_as_datetime, |h| h)
}

Ok(Arc::new(builder.finish()) as ArrayRef)
}

/// Cast date32 types to Utf8/LargeUtf8
Expand Down Expand Up @@ -3602,6 +3617,7 @@ mod tests {
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Expand Down Expand Up @@ -5127,6 +5143,7 @@ mod tests {

#[test]
#[cfg_attr(miri, ignore)] // running forever
#[cfg(feature = "chrono-tz")]
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
Expand Down Expand Up @@ -5490,4 +5507,39 @@ mod tests {

assert_eq!(&out1, &out2.slice(1, 2))
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_timestamp_cast_utf8() {
let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 10:30:00"),
None,
Some("1970-01-01 23:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);

let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let array = array.with_timezone("Australia/Sydney".to_string());
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 20:30:00"),
None,
Some("1970-01-02 09:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);
}
}
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/mod.rs
Expand Up @@ -35,5 +35,6 @@ pub mod sort;
pub mod substring;
pub mod take;
pub mod temporal;
// pub(crate) use temporal::extract_component_from_array;
pub mod window;
pub mod zip;

0 comments on commit 1aac86a

Please sign in to comment.