Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cast timestamp array to string array with timezone #2608

Merged
merged 2 commits into from Sep 1, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
103 changes: 81 additions & 22 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -35,6 +35,8 @@
//! assert_eq!(7.0, c.value(2));
//! ```

use chrono::format::strftime::StrftimeItems;
use chrono::format::{parse, Parsed};
use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
Expand All @@ -45,6 +47,9 @@ use crate::compute::divide_scalar;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::compute::kernels::arity::unary;
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
use crate::compute::kernels::temporal::extract_component_from_array;
use crate::compute::kernels::temporal::return_compute_error_with;
use crate::compute::using_chrono_tz_and_utc_naive_date_time;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::{
Expand Down Expand Up @@ -728,18 +733,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i32>(array)
cast_timestamp_to_string::<TimestampSecondType, i32>(array, tz)
}
},
Date32 => cast_date32_to_string::<i32>(array),
Expand Down Expand Up @@ -784,18 +789,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
Timestamp(unit, _) => match unit {
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array)
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array)
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i64>(array)
cast_timestamp_to_string::<TimestampSecondType, i64>(array, tz)
}
},
Date32 => cast_date32_to_string::<i64>(array),
Expand Down Expand Up @@ -1482,25 +1487,35 @@ where
}

/// Cast timestamp types to Utf8/LargeUtf8
fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
fn cast_timestamp_to_string<T, OffsetSize>(
array: &ArrayRef,
tz: &Option<String>,
) -> Result<ArrayRef>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<<T as ArrowPrimitiveType>::Native>,
OffsetSize: OffsetSizeTrait,
{
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();

Ok(Arc::new(
(0..array.len())
.map(|ix| {
if array.is_null(ix) {
None
} else {
array.value_as_datetime(ix).map(|v| v.to_string())
}
})
.collect::<GenericStringArray<OffsetSize>>(),
))
let mut builder = GenericStringBuilder::<OffsetSize>::new();

if let Some(tz) = tz {
let mut scratch = Parsed::new();
extract_component_from_array!(
viirya marked this conversation as resolved.
Show resolved Hide resolved
array,
builder,
to_string,
value_as_datetime_with_tz,
tz,
scratch,
|h| h
)
} else {
extract_component_from_array!(array, builder, to_string, value_as_datetime, |h| h)
}

Ok(Arc::new(builder.finish()) as ArrayRef)
}

/// Cast date32 types to Utf8/LargeUtf8
Expand Down Expand Up @@ -3602,6 +3617,7 @@ mod tests {
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Expand Down Expand Up @@ -5127,6 +5143,7 @@ mod tests {

#[test]
#[cfg_attr(miri, ignore)] // running forever
#[cfg(feature = "chrono-tz")]
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
Expand Down Expand Up @@ -5194,6 +5211,7 @@ mod tests {
}

/// Create instances of arrays with varying types for cast tests
#[cfg(feature = "chrono-tz")]
fn get_arrays_of_all_types() -> Vec<ArrayRef> {
let tz_name = String::from("America/New_York");
let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
Expand Down Expand Up @@ -5334,6 +5352,7 @@ mod tests {
LargeListArray::from(list_data)
}

#[cfg(feature = "chrono-tz")]
fn make_fixed_size_list_array() -> FixedSizeListArray {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
Expand All @@ -5355,6 +5374,7 @@ mod tests {
FixedSizeListArray::from(list_data)
}

#[cfg(feature = "chrono-tz")]
fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
let values: [u8; 15] = *b"hellotherearrow";

Expand All @@ -5366,6 +5386,7 @@ mod tests {
FixedSizeBinaryArray::from(array_data)
}

#[cfg(feature = "chrono-tz")]
fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
Expand All @@ -5374,6 +5395,7 @@ mod tests {
}

/// Creates a dictionary with primitive dictionary values, and keys of type K
#[cfg(feature = "chrono-tz")]
fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
Expand All @@ -5385,6 +5407,7 @@ mod tests {
}

/// Creates a dictionary with utf8 values, and keys of type K
#[cfg(feature = "chrono-tz")]
fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
Expand All @@ -5396,6 +5419,7 @@ mod tests {
}

// Get a selection of datatypes to try and cast to
#[cfg(feature = "chrono-tz")]
fn get_all_types() -> Vec<DataType> {
use DataType::*;
let tz_name = String::from("America/New_York");
Expand Down Expand Up @@ -5490,4 +5514,39 @@ mod tests {

assert_eq!(&out1, &out2.slice(1, 2))
}

#[test]
#[cfg(feature = "chrono-tz")]
fn test_timestamp_cast_utf8() {
let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 10:30:00"),
None,
Some("1970-01-01 23:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);

let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let array = array.with_timezone("Australia/Sydney".to_string());
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();

let expected = StringArray::from(vec![
Some("1970-01-01 20:30:00"),
None,
Some("1970-01-02 09:58:59"),
]);

assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);
}
}
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/mod.rs
Expand Up @@ -35,5 +35,6 @@ pub mod sort;
pub mod substring;
pub mod take;
pub mod temporal;
// pub(crate) use temporal::extract_component_from_array;
viirya marked this conversation as resolved.
Show resolved Hide resolved
pub mod window;
pub mod zip;