Skip to content

Commit

Permalink
Parse nanoseconds for intervals (#4186)
Browse files Browse the repository at this point in the history
* Parse nanoseconds for intervals

* Formatting

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* Empty commit

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
  • Loading branch information
Jefffrey and waynexia committed Nov 15, 2022
1 parent d91ce06 commit 8bfc0ea
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 68 deletions.
143 changes: 78 additions & 65 deletions datafusion/common/src/parsers.rs
Expand Up @@ -19,8 +19,8 @@
use crate::{DataFusionError, Result, ScalarValue};
use std::str::FromStr;

const SECONDS_PER_HOUR: f32 = 3_600_f32;
const MILLIS_PER_SECOND: f32 = 1_000_f32;
const SECONDS_PER_HOUR: f64 = 3_600_f64;
const NANOS_PER_SECOND: f64 = 1_000_000_000_f64;

/// Parses a string with an interval like `'0.5 MONTH'` to an
/// appropriately typed [`ScalarValue`]
Expand All @@ -29,73 +29,74 @@ pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
// INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
// INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
let align_interval_parts =
|month_part: f32, mut day_part: f32, mut milles_part: f32| -> (i32, i32, f32) {
|month_part: f64, mut day_part: f64, mut nanos_part: f64| -> (i64, i64, f64) {
// Convert fractional month to days, It's not supported by Arrow types, but anyway
day_part += (month_part - (month_part as i32) as f32) * 30_f32;
day_part += (month_part - (month_part as i64) as f64) * 30_f64;

// Convert fractional days to hours
milles_part += (day_part - ((day_part as i32) as f32))
* 24_f32
nanos_part += (day_part - ((day_part as i64) as f64))
* 24_f64
* SECONDS_PER_HOUR
* MILLIS_PER_SECOND;
* NANOS_PER_SECOND;

(month_part as i32, day_part as i32, milles_part)
(month_part as i64, day_part as i64, nanos_part)
};

let calculate_from_part =
|interval_period_str: &str, interval_type: &str| -> Result<(i32, i32, f32)> {
// @todo It's better to use Decimal in order to protect rounding errors
// Wait https://github.com/apache/arrow/pull/9232
let interval_period = match f32::from_str(interval_period_str) {
Ok(n) => n,
Err(_) => {
return Err(DataFusionError::NotImplemented(format!(
"Unsupported Interval Expression with value {:?}",
value
)));
}
};

if interval_period > (i32::MAX as f32) {
let calculate_from_part = |interval_period_str: &str,
interval_type: &str|
-> Result<(i64, i64, f64)> {
// @todo It's better to use Decimal in order to protect rounding errors
// Wait https://github.com/apache/arrow/pull/9232
let interval_period = match f64::from_str(interval_period_str) {
Ok(n) => n,
Err(_) => {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
"Unsupported Interval Expression with value {:?}",
value
)));
}
};

match interval_type.to_lowercase().as_str() {
"century" | "centuries" => {
Ok(align_interval_parts(interval_period * 1200_f32, 0.0, 0.0))
}
"decade" | "decades" => {
Ok(align_interval_parts(interval_period * 120_f32, 0.0, 0.0))
}
"year" | "years" => {
Ok(align_interval_parts(interval_period * 12_f32, 0.0, 0.0))
}
"month" | "months" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
"week" | "weeks" => {
Ok(align_interval_parts(0.0, interval_period * 7_f32, 0.0))
}
"day" | "days" => Ok(align_interval_parts(0.0, interval_period, 0.0)),
"hour" | "hours" => {
Ok((0, 0, interval_period * SECONDS_PER_HOUR * MILLIS_PER_SECOND))
}
"minute" | "minutes" => {
Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
}
"second" | "seconds" => Ok((0, 0, interval_period * MILLIS_PER_SECOND)),
"millisecond" | "milliseconds" => Ok((0, 0, interval_period)),
_ => Err(DataFusionError::NotImplemented(format!(
"Invalid input syntax for type interval: {:?}",
value
))),
if interval_period > (i64::MAX as f64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
)));
}

match interval_type.to_lowercase().as_str() {
"century" | "centuries" => {
Ok(align_interval_parts(interval_period * 1200_f64, 0.0, 0.0))
}
};
"decade" | "decades" => {
Ok(align_interval_parts(interval_period * 120_f64, 0.0, 0.0))
}
"year" | "years" => {
Ok(align_interval_parts(interval_period * 12_f64, 0.0, 0.0))
}
"month" | "months" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
"week" | "weeks" => {
Ok(align_interval_parts(0.0, interval_period * 7_f64, 0.0))
}
"day" | "days" => Ok(align_interval_parts(0.0, interval_period, 0.0)),
"hour" | "hours" => {
Ok((0, 0, interval_period * SECONDS_PER_HOUR * NANOS_PER_SECOND))
}
"minute" | "minutes" => {
Ok((0, 0, interval_period * 60_f64 * NANOS_PER_SECOND))
}
"second" | "seconds" => Ok((0, 0, interval_period * NANOS_PER_SECOND)),
"millisecond" | "milliseconds" => Ok((0, 0, interval_period * 1_000_000f64)),
_ => Err(DataFusionError::NotImplemented(format!(
"Invalid input syntax for type interval: {:?}",
value
))),
}
};

let mut result_month: i64 = 0;
let mut result_days: i64 = 0;
let mut result_millis: i64 = 0;
let mut result_nanos: i128 = 0;

let mut parts = value.split_whitespace();

Expand All @@ -107,7 +108,7 @@ pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {

let unit = parts.next().unwrap_or(leading_field);

let (diff_month, diff_days, diff_millis) =
let (diff_month, diff_days, diff_nanos) =
calculate_from_part(interval_period_str.unwrap(), unit)?;

result_month += diff_month as i64;
Expand All @@ -128,9 +129,9 @@ pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
)));
}

result_millis += diff_millis as i64;
result_nanos += diff_nanos as i128;

if result_millis > (i32::MAX as i64) {
if result_nanos > (i64::MAX as i128) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
Expand All @@ -142,11 +143,13 @@ pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
// 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days != 1 month
// The true way to store and calculate intervals is to store it as it defined
// It's why we there are 3 different interval types in Arrow
if result_month != 0 && (result_days != 0 || result_millis != 0) {
let result: i128 = ((result_month as i128) << 96)
| ((result_days as i128) << 64)
// IntervalMonthDayNano uses nanos, but IntervalDayTime uses milles
| ((result_millis * 1_000_000_i64) as i128);

// If have a unit smaller than milliseconds then must use IntervalMonthDayNano
if (result_nanos % 1_000_000 != 0)
|| (result_month != 0 && (result_days != 0 || result_nanos != 0))
{
let result: i128 =
((result_month as i128) << 96) | ((result_days as i128) << 64) | result_nanos;

return Ok(ScalarValue::IntervalMonthDayNano(Some(result)));
}
Expand All @@ -156,7 +159,8 @@ pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
return Ok(ScalarValue::IntervalYearMonth(Some(result_month as i32)));
}

let result: i64 = (result_days << 32) | result_millis;
// IntervalMonthDayNano uses nanos, but IntervalDayTime uses millis
let result: i64 = (result_days << 32) | (result_nanos as i64 / 1_000_000);
Ok(ScalarValue::IntervalDayTime(Some(result)))
}

Expand All @@ -165,6 +169,8 @@ mod test {
use super::*;
use crate::assert_contains;

const MILLIS_PER_SECOND: f64 = 1_000_f64;

#[test]
fn test_parse_ym() {
assert_eq!(
Expand Down Expand Up @@ -212,13 +218,20 @@ mod test {
}

#[test]
// https://github.com/apache/arrow-rs/pull/2235
#[ignore]
fn test_mdn() {
// these should be the same, I think
assert_eq!(
parse_interval("months", "1 year 25 millisecond").unwrap(),
ScalarValue::new_interval_mdn(12, 0, 25 * 1_000_000)
);

assert_eq!(
parse_interval("months", "1 year 1 day 0.000000001 seconds").unwrap(),
ScalarValue::new_interval_mdn(12, 1, 1)
);

assert_eq!(
parse_interval("months", "1 year 1 day 0.1 milliseconds").unwrap(),
ScalarValue::new_interval_mdn(12, 1, 1_00 * 1_000)
);
}
}
6 changes: 3 additions & 3 deletions datafusion/core/tests/sql/expr.rs
Expand Up @@ -917,19 +917,19 @@ async fn test_interval_expressions() -> Result<()> {
);
test_expression!(
"interval '0.499 day'",
"0 years 0 mons 0 days 11 hours 58 mins 33.596 secs"
"0 years 0 mons 0 days 11 hours 58 mins 33.600 secs"
);
test_expression!(
"interval '0.4999 day'",
"0 years 0 mons 0 days 11 hours 59 mins 51.364 secs"
"0 years 0 mons 0 days 11 hours 59 mins 51.360 secs"
);
test_expression!(
"interval '0.49999 day'",
"0 years 0 mons 0 days 11 hours 59 mins 59.136 secs"
);
test_expression!(
"interval '0.49999999999 day'",
"0 years 0 mons 0 days 12 hours 0 mins 0.00 secs"
"0 years 0 mons 0 days 11 hours 59 mins 59.999999136 secs"
);
test_expression!(
"interval '5 day'",
Expand Down

0 comments on commit 8bfc0ea

Please sign in to comment.