diff --git a/arrow/src/compute/kernels/cast_utils.rs b/arrow/src/compute/kernels/cast_utils.rs index e43961b4ab8..718ea5ac64a 100644 --- a/arrow/src/compute/kernels/cast_utils.rs +++ b/arrow/src/compute/kernels/cast_utils.rs @@ -16,7 +16,7 @@ // under the License. use crate::error::{ArrowError, Result}; -use chrono::{prelude::*, LocalResult}; +use chrono::prelude::*; /// Accepts a string in RFC3339 / ISO8601 standard format and some /// variants and converts it to a nanosecond precision timestamp. @@ -96,27 +96,27 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result { // without a timezone specifier as a local time, using T as a separator // Example: 2020-09-08T13:42:29.190855 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") { - return naive_datetime_to_timestamp(s, ts); + return Ok(ts.timestamp_nanos()); } // without a timezone specifier as a local time, using T as a // separator, no fractional seconds // Example: 2020-09-08T13:42:29 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { - return naive_datetime_to_timestamp(s, ts); + return Ok(ts.timestamp_nanos()); } // without a timezone specifier as a local time, using ' ' as a separator // Example: 2020-09-08 13:42:29.190855 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f") { - return naive_datetime_to_timestamp(s, ts); + return Ok(ts.timestamp_nanos()); } // without a timezone specifier as a local time, using ' ' as a // separator, no fractional seconds // Example: 2020-09-08 13:42:29 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { - return naive_datetime_to_timestamp(s, ts); + return Ok(ts.timestamp_nanos()); } // Note we don't pass along the error message from the underlying @@ -130,30 +130,6 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result { ))) } -/// Converts the naive datetime (which has no specific timezone) to a -/// nanosecond epoch timestamp relative to UTC. -fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result { - let l = Local {}; - - match l.from_local_datetime(&datetime) { - LocalResult::None => Err(ArrowError::CastError(format!( - "Error parsing '{}' as timestamp: local time representation is invalid", - s - ))), - LocalResult::Single(local_datetime) => { - Ok(local_datetime.with_timezone(&Utc).timestamp_nanos()) - } - // Ambiguous times can happen if the timestamp is exactly when - // a daylight savings time transition occurs, for example, and - // so the datetime could validly be said to be in two - // potential offsets. However, since we are about to convert - // to UTC anyways, we can pick one arbitrarily - LocalResult::Ambiguous(local_datetime, _) => { - Ok(local_datetime.with_timezone(&Utc).timestamp_nanos()) - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -202,23 +178,6 @@ mod tests { Ok(()) } - /// Interprets a naive_datetime (with no explicit timezone offset) - /// using the local timezone and returns the timestamp in UTC (0 - /// offset) - fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 { - // Note: Use chrono APIs that are different than - // naive_datetime_to_timestamp to compute the utc offset to - // try and double check the logic - let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) { - LocalResult::Single(local_offset) => { - local_offset.fix().local_minus_utc() as i64 - } - _ => panic!("Unexpected failure converting to local datetime"), - }; - let utc_offset_nanos = utc_offset_secs * 1_000_000_000; - naive_datetime.timestamp_nanos() - utc_offset_nanos - } - #[test] #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime fn string_to_timestamp_no_timezone() -> Result<()> { @@ -232,12 +191,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime_to_timestamp(&naive_datetime), + naive_datetime.timestamp_nanos(), parse_timestamp("2020-09-08T13:42:29.190855")? ); assert_eq!( - naive_datetime_to_timestamp(&naive_datetime), + naive_datetime.timestamp_nanos(), parse_timestamp("2020-09-08 13:42:29.190855")? ); @@ -250,12 +209,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime_to_timestamp(&naive_datetime_whole_secs), + naive_datetime_whole_secs.timestamp_nanos(), parse_timestamp("2020-09-08T13:42:29")? ); assert_eq!( - naive_datetime_to_timestamp(&naive_datetime_whole_secs), + naive_datetime_whole_secs.timestamp_nanos(), parse_timestamp("2020-09-08 13:42:29")? ); @@ -297,4 +256,43 @@ mod tests { } } } + + #[test] + fn string_without_timezone_to_timestamp() -> Result<()> { + // string without timezone should always output the same regardless the local or session timezone + + let naive_datetime = NaiveDateTime::new( + NaiveDate::from_ymd(2020, 9, 8), + NaiveTime::from_hms_nano(13, 42, 29, 190855000), + ); + + // Ensure both T and ' ' variants work + assert_eq!( + naive_datetime.timestamp_nanos(), + parse_timestamp("2020-09-08T13:42:29.190855")? + ); + + assert_eq!( + naive_datetime.timestamp_nanos(), + parse_timestamp("2020-09-08 13:42:29.190855")? + ); + + let naive_datetime = NaiveDateTime::new( + NaiveDate::from_ymd(2020, 9, 8), + NaiveTime::from_hms_nano(13, 42, 29, 0), + ); + + // Ensure both T and ' ' variants work + assert_eq!( + naive_datetime.timestamp_nanos(), + parse_timestamp("2020-09-08T13:42:29")? + ); + + assert_eq!( + naive_datetime.timestamp_nanos(), + parse_timestamp("2020-09-08 13:42:29")? + ); + + Ok(()) + } } diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index a8c71a8e019..0c7536053ff 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -1136,7 +1136,7 @@ mod tests { use crate::array::*; use crate::compute::cast; use crate::datatypes::Field; - use chrono::{prelude::*, LocalResult}; + use chrono::prelude::*; #[test] fn test_csv() { @@ -1696,26 +1696,6 @@ mod tests { } } - /// Interprets a naive_datetime (with no explicit timezone offset) - /// using the local timezone and returns the timestamp in UTC (0 - /// offset) - fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 { - // Note: Use chrono APIs that are different than - // naive_datetime_to_timestamp to compute the utc offset to - // try and double check the logic - let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) { - LocalResult::Single(local_offset) => { - local_offset.fix().local_minus_utc() as i64 - } - _ => panic!( - "Unexpected failure converting {} to local datetime", - naive_datetime - ), - }; - let utc_offset_nanos = utc_offset_secs * 1_000_000_000; - naive_datetime.timestamp_nanos() - utc_offset_nanos - } - #[test] fn test_parse_timestamp_microseconds() { assert_eq!( @@ -1728,11 +1708,11 @@ mod tests { ); assert_eq!( parse_item::("2018-11-13T17:11:10").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) / 1000 + naive_datetime.timestamp_nanos() / 1000 ); assert_eq!( parse_item::("2018-11-13 17:11:10").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) / 1000 + naive_datetime.timestamp_nanos() / 1000 ); let naive_datetime = NaiveDateTime::new( NaiveDate::from_ymd(2018, 11, 13), @@ -1740,7 +1720,7 @@ mod tests { ); assert_eq!( parse_item::("2018-11-13T17:11:10.011").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) / 1000 + naive_datetime.timestamp_nanos() / 1000 ); let naive_datetime = NaiveDateTime::new( NaiveDate::from_ymd(1900, 2, 28), @@ -1748,7 +1728,7 @@ mod tests { ); assert_eq!( parse_item::("1900-02-28T12:34:56").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) / 1000 + naive_datetime.timestamp_nanos() / 1000 ); } @@ -1764,11 +1744,11 @@ mod tests { ); assert_eq!( parse_item::("2018-11-13T17:11:10").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) + naive_datetime.timestamp_nanos() ); assert_eq!( parse_item::("2018-11-13 17:11:10").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) + naive_datetime.timestamp_nanos() ); let naive_datetime = NaiveDateTime::new( NaiveDate::from_ymd(2018, 11, 13), @@ -1776,7 +1756,7 @@ mod tests { ); assert_eq!( parse_item::("2018-11-13T17:11:10.011").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) + naive_datetime.timestamp_nanos() ); let naive_datetime = NaiveDateTime::new( NaiveDate::from_ymd(1900, 2, 28), @@ -1784,7 +1764,7 @@ mod tests { ); assert_eq!( parse_item::("1900-02-28T12:34:56").unwrap(), - naive_datetime_to_timestamp(&naive_datetime) + naive_datetime.timestamp_nanos() ); }