Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix timestamp parsing while no explicit timezone given #2814

Merged
merged 2 commits into from Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 48 additions & 50 deletions arrow/src/compute/kernels/cast_utils.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::error::{ArrowError, Result};
use chrono::{prelude::*, LocalResult};
use chrono::prelude::*;

/// Accepts a string in RFC3339 / ISO8601 standard format and some
/// variants and converts it to a nanosecond precision timestamp.
Expand Down Expand Up @@ -96,27 +96,27 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
// without a timezone specifier as a local time, using T as a separator
// Example: 2020-09-08T13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using T as a
// separator, no fractional seconds
// Example: 2020-09-08T13:42:29
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using ' ' as a separator
// Example: 2020-09-08 13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// without a timezone specifier as a local time, using ' ' as a
// separator, no fractional seconds
// Example: 2020-09-08 13:42:29
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return naive_datetime_to_timestamp(s, ts);
return Ok(ts.timestamp_nanos());
}

// Note we don't pass along the error message from the underlying
Expand All @@ -130,30 +130,6 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
)))
}

/// Converts the naive datetime (which has no specific timezone) to a
/// nanosecond epoch timestamp relative to UTC.
fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
let l = Local {};

match l.from_local_datetime(&datetime) {
LocalResult::None => Err(ArrowError::CastError(format!(
"Error parsing '{}' as timestamp: local time representation is invalid",
s
))),
LocalResult::Single(local_datetime) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
// Ambiguous times can happen if the timestamp is exactly when
// a daylight savings time transition occurs, for example, and
// so the datetime could validly be said to be in two
// potential offsets. However, since we are about to convert
// to UTC anyways, we can pick one arbitrarily
LocalResult::Ambiguous(local_datetime, _) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -202,23 +178,6 @@ mod tests {
Ok(())
}

/// Interprets a naive_datetime (with no explicit timezone offset)
/// using the local timezone and returns the timestamp in UTC (0
/// offset)
fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
// Note: Use chrono APIs that are different than
// naive_datetime_to_timestamp to compute the utc offset to
// try and double check the logic
let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) {
LocalResult::Single(local_offset) => {
local_offset.fix().local_minus_utc() as i64
}
_ => panic!("Unexpected failure converting to local datetime"),
};
let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
naive_datetime.timestamp_nanos() - utc_offset_nanos
}

#[test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
fn string_to_timestamp_no_timezone() -> Result<()> {
Expand All @@ -232,12 +191,12 @@ mod tests {

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime_to_timestamp(&naive_datetime),
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29.190855")?
);

assert_eq!(
naive_datetime_to_timestamp(&naive_datetime),
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29.190855")?
);

Expand All @@ -250,12 +209,12 @@ mod tests {

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime_to_timestamp(&naive_datetime_whole_secs),
naive_datetime_whole_secs.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29")?
);

assert_eq!(
naive_datetime_to_timestamp(&naive_datetime_whole_secs),
naive_datetime_whole_secs.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29")?
);

Expand Down Expand Up @@ -297,4 +256,43 @@ mod tests {
}
}
}

#[test]
fn string_without_timezone_to_timestamp() -> Result<()> {
// string without timezone should always output the same regardless the local or session timezone

let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2020, 9, 8),
NaiveTime::from_hms_nano(13, 42, 29, 190855000),
);

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29.190855")?
);

assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29.190855")?
);

let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2020, 9, 8),
NaiveTime::from_hms_nano(13, 42, 29, 0),
);

// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08T13:42:29")?
);

assert_eq!(
naive_datetime.timestamp_nanos(),
parse_timestamp("2020-09-08 13:42:29")?
);

Ok(())
}
}
38 changes: 9 additions & 29 deletions arrow/src/csv/reader.rs
Expand Up @@ -1136,7 +1136,7 @@ mod tests {
use crate::array::*;
use crate::compute::cast;
use crate::datatypes::Field;
use chrono::{prelude::*, LocalResult};
use chrono::prelude::*;

#[test]
fn test_csv() {
Expand Down Expand Up @@ -1696,26 +1696,6 @@ mod tests {
}
}

/// Interprets a naive_datetime (with no explicit timezone offset)
/// using the local timezone and returns the timestamp in UTC (0
/// offset)
fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
// Note: Use chrono APIs that are different than
// naive_datetime_to_timestamp to compute the utc offset to
// try and double check the logic
let utc_offset_secs = match Local.offset_from_local_datetime(naive_datetime) {
LocalResult::Single(local_offset) => {
local_offset.fix().local_minus_utc() as i64
}
_ => panic!(
"Unexpected failure converting {} to local datetime",
naive_datetime
),
};
let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
naive_datetime.timestamp_nanos() - utc_offset_nanos
}

#[test]
fn test_parse_timestamp_microseconds() {
assert_eq!(
Expand All @@ -1728,27 +1708,27 @@ mod tests {
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13T17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13 17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2018, 11, 13),
NaiveTime::from_hms_nano(17, 11, 10, 11000000),
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("2018-11-13T17:11:10.011").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(1900, 2, 28),
NaiveTime::from_hms_nano(12, 34, 56, 0),
);
assert_eq!(
parse_item::<TimestampMicrosecondType>("1900-02-28T12:34:56").unwrap(),
naive_datetime_to_timestamp(&naive_datetime) / 1000
naive_datetime.timestamp_nanos() / 1000
);
}

Expand All @@ -1764,27 +1744,27 @@ mod tests {
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13T17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13 17:11:10").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(2018, 11, 13),
NaiveTime::from_hms_nano(17, 11, 10, 11000000),
);
assert_eq!(
parse_item::<TimestampNanosecondType>("2018-11-13T17:11:10.011").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd(1900, 2, 28),
NaiveTime::from_hms_nano(12, 34, 56, 0),
);
assert_eq!(
parse_item::<TimestampNanosecondType>("1900-02-28T12:34:56").unwrap(),
naive_datetime_to_timestamp(&naive_datetime)
naive_datetime.timestamp_nanos()
);
}

Expand Down