Skip to content

Commit

Permalink
Add support for month & year intervals (#2797)
Browse files Browse the repository at this point in the history
  • Loading branch information
avantgardnerio committed Jul 12, 2022
1 parent d25e822 commit 5a63c87
Show file tree
Hide file tree
Showing 6 changed files with 673 additions and 77 deletions.
15 changes: 10 additions & 5 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};

/// Represents a dynamically typed, nullable single value.
/// This is the single-valued counter-part of arrow’s `Array`.
/// https://arrow.apache.org/docs/python/api/datatypes.html
/// https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375
#[derive(Clone)]
pub enum ScalarValue {
/// represents `DataType::Null` (castable to/from any other type)
Expand Down Expand Up @@ -76,9 +78,9 @@ pub enum ScalarValue {
LargeBinary(Option<Vec<u8>>),
/// list of nested ScalarValue
List(Option<Vec<ScalarValue>>, Box<DataType>),
/// Date stored as a signed 32bit int
/// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
Date32(Option<i32>),
/// Date stored as a signed 64bit int
/// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
Date64(Option<i64>),
/// Timestamp Second
TimestampSecond(Option<i64>, Option<String>),
Expand All @@ -88,11 +90,14 @@ pub enum ScalarValue {
TimestampMicrosecond(Option<i64>, Option<String>),
/// Timestamp Nanoseconds
TimestampNanosecond(Option<i64>, Option<String>),
/// Interval with YearMonth unit
/// Number of elapsed whole months
IntervalYearMonth(Option<i32>),
/// Interval with DayTime unit
/// Number of elapsed days and milliseconds (no leap seconds)
/// stored as 2 contiguous 32-bit signed integers
IntervalDayTime(Option<i64>),
/// Interval with MonthDayNano unit
/// A triple of the number of elapsed months, days, and nanoseconds.
/// Months and days are encoded as 32-bit signed integers.
/// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
IntervalMonthDayNano(Option<i128>),
/// struct of nested ScalarValue
Struct(Option<Vec<ScalarValue>>, Box<Vec<Field>>),
Expand Down
120 changes: 120 additions & 0 deletions datafusion/core/tests/sql/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,3 +814,123 @@ async fn group_by_timestamp_millis() -> Result<()> {
assert_batches_eq!(expected, &actual);
Ok(())
}

#[tokio::test]
async fn interval_year() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-01-01' + interval '1' year as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1995-01-01 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn add_interval_month() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-01-31' + interval '1' month as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1994-02-28 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn sub_interval_month() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-03-31' - interval '1' month as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1994-02-28 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn sub_month_wrap() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-01-15' - interval '1' month as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1993-12-15 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn add_interval_day() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-01-15' + interval '1' day as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1994-01-16 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn sub_interval_day() -> Result<()> {
let ctx = SessionContext::new();

let sql = "select date '1994-01-01' - interval '1' day as date;";
let results = execute_to_batches(&ctx, sql).await;

let expected = vec![
"+------------+",
"| date |",
"+------------+",
"| 1993-12-31 |",
"+------------+",
];

assert_batches_eq!(expected, &results);

Ok(())
}
8 changes: 4 additions & 4 deletions datafusion/optimizer/src/simplify_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1946,7 +1946,7 @@ mod tests {
let date_plus_interval_expr = to_timestamp_expr(ts_string)
.cast_to(&DataType::Date32, schema)
.unwrap()
+ Expr::Literal(ScalarValue::IntervalDayTime(Some(123)));
+ Expr::Literal(ScalarValue::IntervalDayTime(Some(123i64 << 32)));

let plan = LogicalPlanBuilder::from(table_scan.clone())
.project(vec![date_plus_interval_expr])
Expand All @@ -1958,10 +1958,10 @@ mod tests {

// Note that constant folder runs and folds the entire
// expression down to a single constant (true)
let expected = "Projection: Date32(\"18636\") AS CAST(totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) AS Date32) + IntervalDayTime(\"123\")\
\n TableScan: test";
let expected = r#"Projection: Date32("18636") AS CAST(totimestamp(Utf8("2020-09-08T12:05:00+00:00")) AS Date32) + IntervalDayTime("528280977408")
TableScan: test"#;
let actual = get_optimized_plan_formatted(&plan, &time);

assert_eq!(expected, actual);
assert_eq!(actual, expected);
}
}

0 comments on commit 5a63c87

Please sign in to comment.