Skip to content

Commit

Permalink
Cast timestamp array to string array with timezone.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Aug 30, 2022
1 parent 24036e9 commit 9d1594a
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 49 deletions.
103 changes: 81 additions & 22 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -35,6 +35,8 @@
//! assert_eq!(7.0, c.value(2));
//! ```

use chrono::format::strftime::StrftimeItems;
use chrono::format::{parse, Parsed};
use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
Expand All @@ -45,6 +47,9 @@ use crate::compute::divide_scalar;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::compute::kernels::arity::unary;
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
use crate::compute::kernels::temporal::extract_component_from_array;
use crate::compute::kernels::temporal::return_compute_error_with;
use crate::compute::using_chrono_tz_and_utc_naive_date_time;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::{
Expand Down Expand Up @@ -728,18 +733,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i32>(array)
cast_timestamp_to_string::<TimestampSecondType, i32>(array, tz)
}
},
Date32 => cast_date32_to_string::<i32>(array),
Expand Down Expand Up @@ -784,18 +789,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i64>(array)
cast_timestamp_to_string::<TimestampSecondType, i64>(array, tz)
}
},
Date32 => cast_date32_to_string::<i64>(array),
Expand Down Expand Up @@ -1482,25 +1487,35 @@ where
}

/// Cast timestamp types to Utf8/LargeUtf8
fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
fn cast_timestamp_to_string<T, OffsetSize>(
array: &ArrayRef,
tz: &Option<String>,
) -> Result<ArrayRef>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<<T as ArrowPrimitiveType>::Native>,
OffsetSize: OffsetSizeTrait,
{
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();

Ok(Arc::new(
(0..array.len())
.map(|ix| {
if array.is_null(ix) {
None
} else {
array.value_as_datetime(ix).map(|v| v.to_string())
}
})
.collect::<GenericStringArray<OffsetSize>>(),
))
let mut builder = GenericStringBuilder::<OffsetSize>::new();

if let Some(tz) = tz {
let mut scratch = Parsed::new();
extract_component_from_array!(
array,
builder,
to_string,
value_as_datetime_with_tz,
tz,
scratch,
|h| h
)
} else {
extract_component_from_array!(array, builder, to_string, value_as_datetime, |h| h)
}

Ok(Arc::new(builder.finish()) as ArrayRef)
}

/// Cast date32 types to Utf8/LargeUtf8
Expand Down Expand Up @@ -3602,6 +3617,7 @@ mod tests {
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Expand Down Expand Up @@ -5127,6 +5143,7 @@ mod tests {

#[test]
#[cfg_attr(miri, ignore)] // running forever
#[cfg(feature = "chrono-tz")]
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
Expand Down Expand Up @@ -5194,6 +5211,7 @@ mod tests {
}

/// Create instances of arrays with varying types for cast tests
#[cfg(feature = "chrono-tz")]
fn get_arrays_of_all_types() -> Vec<ArrayRef> {
let tz_name = String::from("America/New_York");
let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
Expand Down Expand Up @@ -5334,6 +5352,7 @@ mod tests {
LargeListArray::from(list_data)
}

#[cfg(feature = "chrono-tz")]
fn make_fixed_size_list_array() -> FixedSizeListArray {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
Expand All @@ -5355,6 +5374,7 @@ mod tests {
FixedSizeListArray::from(list_data)
}

#[cfg(feature = "chrono-tz")]
fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
let values: [u8; 15] = *b"hellotherearrow";

Expand All @@ -5366,6 +5386,7 @@ mod tests {
FixedSizeBinaryArray::from(array_data)
}

#[cfg(feature = "chrono-tz")]
fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
Expand All @@ -5374,6 +5395,7 @@ mod tests {
}

/// Creates a dictionary with primitive dictionary values, and keys of type K
#[cfg(feature = "chrono-tz")]
fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
Expand All @@ -5385,6 +5407,7 @@ mod tests {
}

/// Creates a dictionary with utf8 values, and keys of type K
#[cfg(feature = "chrono-tz")]
fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
Expand All @@ -5396,6 +5419,7 @@ mod tests {
}

// Get a selection of datatypes to try and cast to
#[cfg(feature = "chrono-tz")]
fn get_all_types() -> Vec<DataType> {
use DataType::*;
let tz_name = String::from("America/New_York");
Expand Down Expand Up @@ -5490,4 +5514,39 @@ mod tests {

assert_eq!(&out1, &out2.slice(1, 2))
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_timestamp_cast_utf8() {
let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 10:30:00"),
None,
Some("1970-01-01 23:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);

let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let array = array.with_timezone("Australia/Sydney".to_string());
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 20:30:00"),
None,
Some("1970-01-02 09:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);
}
}
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/mod.rs
Expand Up @@ -35,5 +35,6 @@ pub mod sort;
pub mod substring;
pub mod take;
pub mod temporal;
// pub(crate) use temporal::extract_component_from_array;
pub mod window;
pub mod zip;

0 comments on commit 9d1594a

Please sign in to comment.