Skip to content

Commit

Permalink
fix timestamp parsing while no explicit timezone given (#2814)
Browse files Browse the repository at this point in the history
* fix timestamp parsing

* add test cases
  • Loading branch information
waitingkuo committed Oct 3, 2022
1 parent 4df1f3c commit 15f8cfd
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 79 deletions.
98 changes: 48 additions & 50 deletions arrow/src/compute/kernels/cast_utils.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::error::{ArrowError, Result};
use chrono::{prelude::*, LocalResult};
use chrono::prelude::*;

/// Accepts a string in RFC3339 / ISO8601 standard format and some
/// variants and converts it to a nanosecond precision timestamp.
Expand Down Expand Up @@ -96,27 +96,27 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
// without a timezone specifier as a local time, using T as a separator
// Example: 2020-09-08T13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using T as a
// separator, no fractional seconds
// Example: 2020-09-08T13:42:29
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using ' ' as a separator
// Example: 2020-09-08 13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using ' ' as a
// separator, no fractional seconds
// Example: 2020-09-08 13:42:29
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// Note we don't pass along the error message from the underlying
Expand All @@ -130,30 +130,6 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
)))
}

/// Converts the naive datetime (which has no specific timezone) to a
/// nanosecond epoch timestamp relative to UTC.
fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
let l = Local {};

match l.from_local_datetime(&datetime) {
LocalResult::None => Err(ArrowError::CastError(format!(
"Error parsing '{}' as timestamp: local time representation is invalid",
s
))),
LocalResult::Single(local_datetime) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
// Ambiguous times can happen if the timestamp is exactly when
// a daylight savings time transition occurs, for example, and
// so the datetime could validly be said to be in two
// potential offsets. However, since we are about to convert
// to UTC anyways, we can pick one arbitrarily
LocalResult::Ambiguous(local_datetime, _) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -202,23 +178,6 @@ mod tests {
Ok(())
}

/// Interprets a naive_datetime (with no explicit timezone offset)
/// using the local timezone and returns the timestamp in UTC (0
/// offset)
fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
// Note: Use chrono APIs that are different than
// naive_datetime_to_timestamp to compute the utc offset to
// try and double check the logic
let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) {
LocalResult::Single(local_offset) => {
local_offset.fix().local_minus_utc() as i64
}
_ => panic!("Unexpected failure converting to local datetime"),
};
let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
naive_datetime.timestamp_nanos() - utc_offset_nanos
}

#[test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
fn string_to_timestamp_no_timezone() -> Result<()> {
Expand All @@ -232,12 +191,12 @@ mod tests {

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime_to_timestamp(&naive_datetime),
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29.190855")?
);

assert_eq!(
naive_datetime_to_timestamp(&naive_datetime),
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29.190855")?
);

Expand All @@ -250,12 +209,12 @@ mod tests {

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime_to_timestamp(&naive_datetime_whole_secs),
naive_datetime_whole_secs.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29")?
);

assert_eq!(
naive_datetime_to_timestamp(&naive_datetime_whole_secs),
naive_datetime_whole_secs.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29")?
);

Expand Down Expand Up @@ -297,4 +256,43 @@ mod tests {
}
}
}

#[test]
fn string_without_timezone_to_timestamp() -> Result<()> {
// string without timezone should always output the same regardless the local or session timezone

let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2020, 9, 8),
NaiveTime::from_hms_nano(13, 42, 29, 190855000),
);

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29.190855")?
);

assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29.190855")?
);

let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2020, 9, 8),
NaiveTime::from_hms_nano(13, 42, 29, 0),
);

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29")?
);

assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29")?
);

Ok(())
}
}
38 changes: 9 additions & 29 deletions arrow/src/csv/reader.rs
Expand Up @@ -1136,7 +1136,7 @@ mod tests {
use crate::array::*;
use crate::compute::cast;
use crate::datatypes::Field;
use chrono::{prelude::*, LocalResult};
use chrono::prelude::*;

#[test]
fn test_csv() {
Expand Down Expand Up @@ -1696,26 +1696,6 @@ mod tests {
}
}

/// Interprets a naive_datetime (with no explicit timezone offset)
/// using the local timezone and returns the timestamp in UTC (0
/// offset)
fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
// Note: Use chrono APIs that are different than
// naive_datetime_to_timestamp to compute the utc offset to
// try and double check the logic
let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) {
LocalResult::Single(local_offset) => {
local_offset.fix().local_minus_utc() as i64
}
_ => panic!(
"Unexpected failure converting {} to local datetime",
naive_datetime
),
};
let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
naive_datetime.timestamp_nanos() - utc_offset_nanos
}

#[test]
fn test_parse_timestamp_microseconds() {
assert_eq!(
Expand All @@ -1728,27 +1708,27 @@ mod tests {
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13T17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13 17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2018, 11, 13),
NaiveTime::from_hms_nano(17, 11, 10, 11000000),
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13T17:11:10.011").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(1900, 2, 28),
NaiveTime::from_hms_nano(12, 34, 56, 0),
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("1900-02-28T12:34:56").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
}

Expand All @@ -1764,27 +1744,27 @@ mod tests {
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13T17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13 17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2018, 11, 13),
NaiveTime::from_hms_nano(17, 11, 10, 11000000),
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13T17:11:10.011").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(1900, 2, 28),
NaiveTime::from_hms_nano(12, 34, 56, 0),
);
assert_eq!(
parse_item::<TimestampNanosecondType>("1900-02-28T12:34:56").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
}

Expand Down

0 comments on commit 15f8cfd

Please sign in to comment.