Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to arrow 26, change timezones #4039

Merged
merged 12 commits into from
Nov 4, 2022
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,8 @@ opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "26.0.0"}
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "26.0.0"}
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "26.0.0"}
4 changes: 2 additions & 2 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ simd = ["datafusion/simd"]
snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = "25.0.0"
arrow = "26.0.0"
datafusion = { path = "../datafusion/core" }
env_logger = "0.9"
futures = "0.3"
mimalloc = { version = "0.1", optional = true, default-features = false }
num_cpus = "1.13.0"
object_store = "0.5.0"
parquet = "25.0.0"
parquet = "26.0.0"
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.78"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.62"
readme = "README.md"

[dependencies]
arrow = "25.0.0"
arrow = "26.0.0"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "13.0.0" }
dirs = "4.0.0"
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow = "25.0.0"
arrow-flight = "25.0.0"
arrow = "26.0.0"
arrow-flight = "26.0.0"
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
datafusion-common = { path = "../datafusion/common" }
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
apache-avro = { version = "0.14", default-features = false, features = ["snappy"], optional = true }
arrow = { version = "25.0.0", default-features = false }
arrow = { version = "26.0.0", default-features = false }
chrono = { version = "0.4", default-features = false }
cranelift-module = { version = "0.89.0", optional = true }
object_store = { version = "0.5.0", default-features = false, optional = true }
ordered-float = "3.0"
parquet = { version = "25.0.0", default-features = false, optional = true }
parquet = { version = "26.0.0", default-features = false, optional = true }
pyo3 = { version = "0.17.1", optional = true }
sqlparser = "0.26"
6 changes: 3 additions & 3 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion
[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.14", optional = true }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow = { version = "26.0.0", features = ["prettyprint"] }
async-compression = { version = "0.3.14", features = ["bzip2", "gzip", "futures-io", "tokio"] }
async-trait = "0.1.41"
bytes = "1.1"
Expand All @@ -81,7 +81,7 @@ num_cpus = "1.13.0"
object_store = "0.5.0"
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "25.0.0", features = ["arrow", "async"] }
parquet = { version = "26.0.0", features = ["arrow", "async"] }
paste = "^1.0"
percent-encoding = "2.2.0"
pin-project-lite = "^0.2.7"
Expand All @@ -98,7 +98,7 @@ url = "2.2"
uuid = { version = "1.0", features = ["v4"] }

[dev-dependencies]
arrow = { version = "25.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow = { version = "26.0.0", features = ["prettyprint"] }
async-trait = "0.1.53"
criterion = "0.4"
csv = "1.1.6"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ impl BuiltInConfigs {
"The session time zone which some function require \
e.g. EXTRACT(HOUR from SOME_TIME) shift the underline datetime according to the time zone,
then extract the hour.",
"UTC".into()
"+00:00".into()
),
ConfigDefinition::new_bool(
OPT_PARQUET_PUSHDOWN_FILTERS,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! The table implementation.

use ahash::HashMap;
use hashbrown::HashMap;
use std::str::FromStr;
use std::{any::Any, sync::Arc};

Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ mod tests {
"+-------------------------------+",
"| s1_min |",
"+-------------------------------+",
"| 1970-01-01 00:00:00.000000010 |",
"| 1970-01-01T00:00:00.000000010 |",
"+-------------------------------+",
];

Expand Down
102 changes: 51 additions & 51 deletions datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,16 +672,16 @@ mod tests {
"+----+---+-------------------------------+",
"| a | b | c |",
"+----+---+-------------------------------+",
"| 1 | a | 1970-01-01 00:00:00.000000008 |",
"| 10 | b | 1970-01-01 00:00:00.000000004 |",
"| 2 | c | 1970-01-01 00:00:00.000000007 |",
"| 20 | d | 1970-01-01 00:00:00.000000006 |",
"| 7 | e | 1970-01-01 00:00:00.000000006 |",
"| 70 | f | 1970-01-01 00:00:00.000000002 |",
"| 9 | g | 1970-01-01 00:00:00.000000005 |",
"| 90 | h | 1970-01-01 00:00:00.000000002 |",
"| 30 | j | 1970-01-01 00:00:00.000000006 |", // input b2 before b1
"| 3 | j | 1970-01-01 00:00:00.000000008 |",
"| 1 | a | 1970-01-01T00:00:00.000000008 |",
"| 10 | b | 1970-01-01T00:00:00.000000004 |",
"| 2 | c | 1970-01-01T00:00:00.000000007 |",
"| 20 | d | 1970-01-01T00:00:00.000000006 |",
"| 7 | e | 1970-01-01T00:00:00.000000006 |",
"| 70 | f | 1970-01-01T00:00:00.000000002 |",
"| 9 | g | 1970-01-01T00:00:00.000000005 |",
"| 90 | h | 1970-01-01T00:00:00.000000002 |",
"| 30 | j | 1970-01-01T00:00:00.000000006 |", // input b2 before b1
"| 3 | j | 1970-01-01T00:00:00.000000008 |",
"+----+---+-------------------------------+",
],
task_ctx,
Expand Down Expand Up @@ -721,16 +721,16 @@ mod tests {
"+-----+---+-------------------------------+",
"| a | b | c |",
"+-----+---+-------------------------------+",
"| 1 | a | 1970-01-01 00:00:00.000000008 |",
"| 2 | b | 1970-01-01 00:00:00.000000007 |",
"| 70 | c | 1970-01-01 00:00:00.000000004 |",
"| 7 | c | 1970-01-01 00:00:00.000000006 |",
"| 9 | d | 1970-01-01 00:00:00.000000005 |",
"| 90 | d | 1970-01-01 00:00:00.000000006 |",
"| 30 | e | 1970-01-01 00:00:00.000000002 |",
"| 3 | e | 1970-01-01 00:00:00.000000008 |",
"| 100 | f | 1970-01-01 00:00:00.000000002 |",
"| 110 | g | 1970-01-01 00:00:00.000000006 |",
"| 1 | a | 1970-01-01T00:00:00.000000008 |",
"| 2 | b | 1970-01-01T00:00:00.000000007 |",
"| 70 | c | 1970-01-01T00:00:00.000000004 |",
"| 7 | c | 1970-01-01T00:00:00.000000006 |",
"| 9 | d | 1970-01-01T00:00:00.000000005 |",
"| 90 | d | 1970-01-01T00:00:00.000000006 |",
"| 30 | e | 1970-01-01T00:00:00.000000002 |",
"| 3 | e | 1970-01-01T00:00:00.000000008 |",
"| 100 | f | 1970-01-01T00:00:00.000000002 |",
"| 110 | g | 1970-01-01T00:00:00.000000006 |",
"+-----+---+-------------------------------+",
],
task_ctx,
Expand Down Expand Up @@ -770,16 +770,16 @@ mod tests {
"+----+---+-------------------------------+",
"| a | b | c |",
"+----+---+-------------------------------+",
"| 1 | a | 1970-01-01 00:00:00.000000008 |",
"| 2 | b | 1970-01-01 00:00:00.000000007 |",
"| 7 | c | 1970-01-01 00:00:00.000000006 |",
"| 9 | d | 1970-01-01 00:00:00.000000005 |",
"| 3 | e | 1970-01-01 00:00:00.000000008 |",
"| 10 | f | 1970-01-01 00:00:00.000000004 |",
"| 20 | g | 1970-01-01 00:00:00.000000006 |",
"| 70 | h | 1970-01-01 00:00:00.000000002 |",
"| 90 | i | 1970-01-01 00:00:00.000000002 |",
"| 30 | j | 1970-01-01 00:00:00.000000006 |",
"| 1 | a | 1970-01-01T00:00:00.000000008 |",
"| 2 | b | 1970-01-01T00:00:00.000000007 |",
"| 7 | c | 1970-01-01T00:00:00.000000006 |",
"| 9 | d | 1970-01-01T00:00:00.000000005 |",
"| 3 | e | 1970-01-01T00:00:00.000000008 |",
"| 10 | f | 1970-01-01T00:00:00.000000004 |",
"| 20 | g | 1970-01-01T00:00:00.000000006 |",
"| 70 | h | 1970-01-01T00:00:00.000000002 |",
"| 90 | i | 1970-01-01T00:00:00.000000002 |",
"| 30 | j | 1970-01-01T00:00:00.000000006 |",
"+----+---+-------------------------------+",
],
task_ctx,
Expand Down Expand Up @@ -831,21 +831,21 @@ mod tests {
"+-----+---+-------------------------------+",
"| a | b | c |",
"+-----+---+-------------------------------+",
"| 1 | a | 1970-01-01 00:00:00.000000008 |",
"| 2 | b | 1970-01-01 00:00:00.000000007 |",
"| 7 | c | 1970-01-01 00:00:00.000000006 |",
"| 9 | d | 1970-01-01 00:00:00.000000005 |",
"| 10 | e | 1970-01-01 00:00:00.000000040 |",
"| 100 | f | 1970-01-01 00:00:00.000000004 |",
"| 3 | f | 1970-01-01 00:00:00.000000008 |",
"| 200 | g | 1970-01-01 00:00:00.000000006 |",
"| 20 | g | 1970-01-01 00:00:00.000000060 |",
"| 700 | h | 1970-01-01 00:00:00.000000002 |",
"| 70 | h | 1970-01-01 00:00:00.000000020 |",
"| 900 | i | 1970-01-01 00:00:00.000000002 |",
"| 90 | i | 1970-01-01 00:00:00.000000020 |",
"| 300 | j | 1970-01-01 00:00:00.000000006 |",
"| 30 | j | 1970-01-01 00:00:00.000000060 |",
"| 1 | a | 1970-01-01T00:00:00.000000008 |",
"| 2 | b | 1970-01-01T00:00:00.000000007 |",
"| 7 | c | 1970-01-01T00:00:00.000000006 |",
"| 9 | d | 1970-01-01T00:00:00.000000005 |",
"| 10 | e | 1970-01-01T00:00:00.000000040 |",
"| 100 | f | 1970-01-01T00:00:00.000000004 |",
"| 3 | f | 1970-01-01T00:00:00.000000008 |",
"| 200 | g | 1970-01-01T00:00:00.000000006 |",
"| 20 | g | 1970-01-01T00:00:00.000000060 |",
"| 700 | h | 1970-01-01T00:00:00.000000002 |",
"| 70 | h | 1970-01-01T00:00:00.000000020 |",
"| 900 | i | 1970-01-01T00:00:00.000000002 |",
"| 90 | i | 1970-01-01T00:00:00.000000020 |",
"| 300 | j | 1970-01-01T00:00:00.000000006 |",
"| 30 | j | 1970-01-01T00:00:00.000000060 |",
"+-----+---+-------------------------------+",
],
task_ctx,
Expand Down Expand Up @@ -1155,16 +1155,16 @@ mod tests {
"+---+---+-------------------------------+",
"| a | b | c |",
"+---+---+-------------------------------+",
"| 1 | | 1970-01-01 00:00:00.000000008 |",
"| 1 | | 1970-01-01 00:00:00.000000008 |",
"| 1 | | 1970-01-01T00:00:00.000000008 |",
"| 1 | | 1970-01-01T00:00:00.000000008 |",
"| 2 | a | |",
"| 7 | b | 1970-01-01 00:00:00.000000006 |",
"| 7 | b | 1970-01-01T00:00:00.000000006 |",
"| 2 | b | |",
"| 9 | d | |",
"| 3 | e | 1970-01-01 00:00:00.000000004 |",
"| 3 | g | 1970-01-01 00:00:00.000000005 |",
"| 3 | e | 1970-01-01T00:00:00.000000004 |",
"| 3 | g | 1970-01-01T00:00:00.000000005 |",
"| 4 | h | |",
"| 5 | i | 1970-01-01 00:00:00.000000004 |",
"| 5 | i | 1970-01-01T00:00:00.000000004 |",
"+---+---+-------------------------------+",
],
collected.as_slice()
Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/parquet_pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -822,10 +822,10 @@ fn make_timestamp_batch(offset: Duration) -> RecordBatch {
.map(|(i, _)| format!("Row {} + {}", i, offset))
.collect::<Vec<_>>();

let arr_nanos = TimestampNanosecondArray::from_opt_vec(ts_nanos, None);
let arr_micros = TimestampMicrosecondArray::from_opt_vec(ts_micros, None);
let arr_millis = TimestampMillisecondArray::from_opt_vec(ts_millis, None);
let arr_seconds = TimestampSecondArray::from_opt_vec(ts_seconds, None);
let arr_nanos = TimestampNanosecondArray::from(ts_nanos);
let arr_micros = TimestampMicrosecondArray::from(ts_micros);
let arr_millis = TimestampMillisecondArray::from(ts_millis);
let arr_seconds = TimestampSecondArray::from(ts_seconds);

let names = names.iter().map(|s| s.as_str()).collect::<Vec<_>>();
let arr_names = StringArray::from(names);
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sql/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1475,7 +1475,7 @@ async fn aggregate_timestamps_min() -> Result<()> {
"+----------------------------+----------------------------+-------------------------+---------------------+",
"| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |",
"+----------------------------+----------------------------+-------------------------+---------------------+",
"| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 |",
"| 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123 | 2011-12-13T11:13:10 |",
"+----------------------------+----------------------------+-------------------------+---------------------+",
];
assert_batches_sorted_eq!(expected, &results);
Expand All @@ -1498,7 +1498,7 @@ async fn aggregate_timestamps_max() -> Result<()> {
"+-------------------------+-------------------------+-------------------------+---------------------+",
"| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |",
"+-------------------------+-------------------------+-------------------------+---------------------+",
"| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 |",
"| 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10 |",
"+-------------------------+-------------------------+-------------------------+---------------------+",
];
assert_batches_sorted_eq!(expected, &results);
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sql/create_drop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ async fn create_external_table_with_timestamps() {
"+--------+-------------------------+",
"| name | ts |",
"+--------+-------------------------+",
"| Andrew | 2018-11-13 17:11:10.011 |",
"| Jorge | 2018-12-13 12:12:10.011 |",
"| Andrew | 2018-11-13T17:11:10.011 |",
"| Jorge | 2018-12-13T12:12:10.011 |",
"+--------+-------------------------+",
];
assert_batches_sorted_eq!(expected, &result);
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sql/group_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ async fn group_by_date_trunc() -> Result<()> {
"+---------------------+--------------+",
"| week | SUM(test.c2) |",
"+---------------------+--------------+",
"| 2020-12-07 00:00:00 | 24 |",
"| 2020-12-14 00:00:00 | 156 |",
"| 2020-12-07T00:00:00 | 24 |",
"| 2020-12-14T00:00:00 | 156 |",
"+---------------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
Expand Down
5 changes: 1 addition & 4 deletions datafusion/core/tests/sql/information_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,7 @@ fn table_with_many_types() -> Arc<dyn TableProvider> {
Arc::new(LargeStringArray::from(vec![Some("bar")])),
Arc::new(BinaryArray::from_slice(&[b"foo" as &[u8]])),
Arc::new(LargeBinaryArray::from_slice(&[b"foo" as &[u8]])),
Arc::new(TimestampNanosecondArray::from_opt_vec(
vec![Some(123)],
None,
)),
Arc::new(TimestampNanosecondArray::from(vec![Some(123)])),
],
)
.unwrap();
Expand Down
12 changes: 6 additions & 6 deletions datafusion/core/tests/sql/joins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,9 +768,9 @@ async fn test_join_timestamp() -> Result<()> {
"+-------------------------------+-------------------------------+",
"| time | time |",
"+-------------------------------+-------------------------------+",
"| 1970-01-02 12:39:24.190213133 | 1970-01-02 12:39:24.190213133 |",
"| 1970-01-02 12:39:24.190213134 | 1970-01-02 12:39:24.190213134 |",
"| 1970-01-02 12:39:24.190213135 | 1970-01-02 12:39:24.190213135 |",
"| 1970-01-02T12:39:24.190213133 | 1970-01-02T12:39:24.190213133 |",
"| 1970-01-02T12:39:24.190213134 | 1970-01-02T12:39:24.190213134 |",
"| 1970-01-02T12:39:24.190213135 | 1970-01-02T12:39:24.190213135 |",
"+-------------------------------+-------------------------------+",
];
assert_batches_eq!(expected, &actual);
Expand Down Expand Up @@ -1025,9 +1025,9 @@ async fn join_timestamp() -> Result<()> {
"+-------------------------------+----------------------------+-------------------------+---------------------+-------+-------------------------------+----------------------------+-------------------------+---------------------+-------+",
"| nanos | micros | millis | secs | name | nanos | micros | millis | secs | name |",
"+-------------------------------+----------------------------+-------------------------+---------------------+-------+-------------------------------+----------------------------+-------------------------+---------------------+-------+",
"| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 | Row 1 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 | Row 1 |",
"| 2018-11-13 17:11:10.011375885 | 2018-11-13 17:11:10.011375 | 2018-11-13 17:11:10.011 | 2018-11-13 17:11:10 | Row 0 | 2018-11-13 17:11:10.011375885 | 2018-11-13 17:11:10.011375 | 2018-11-13 17:11:10.011 | 2018-11-13 17:11:10 | Row 0 |",
"| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 | Row 3 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 | Row 3 |",
"| 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123 | 2011-12-13T11:13:10 | Row 1 | 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123450 | 2011-12-13T11:13:10.123 | 2011-12-13T11:13:10 | Row 1 |",
"| 2018-11-13T17:11:10.011375885 | 2018-11-13T17:11:10.011375 | 2018-11-13T17:11:10.011 | 2018-11-13T17:11:10 | Row 0 | 2018-11-13T17:11:10.011375885 | 2018-11-13T17:11:10.011375 | 2018-11-13T17:11:10.011 | 2018-11-13T17:11:10 | Row 0 |",
"| 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10 | Row 3 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10.432 | 2021-01-01T05:11:10 | Row 3 |",
"+-------------------------------+----------------------------+-------------------------+---------------------+-------+-------------------------------+----------------------------+-------------------------+---------------------+-------+",
];

Expand Down