Skip to content

Commit

Permalink
Upgrade to arrow 16.0.0 (#2718)
Browse files Browse the repository at this point in the history
* Move from deprecated API

* Update parquet tests

* Update test as now Cast from UTF8 -> Boolean is supported

* Update to arrow 16.0.0 now that it is released
  • Loading branch information
alamb committed Jun 14, 2022
1 parent f100f3b commit 5096ff4
Show file tree
Hide file tree
Showing 14 changed files with 34 additions and 36 deletions.
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.59"
readme = "README.md"

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "9.0.0" }
dirs = "4.0.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow-flight = { version = "15.0.0" }
arrow-flight = { version = "16.0.0" }
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
futures = "0.3"
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ jit = ["cranelift-module"]
pyarrow = ["pyo3"]

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.84.0", optional = true }
ordered-float = "3.0"
parquet = { version = "15.0.0", features = ["arrow"], optional = true }
parquet = { version = "16.0.0", features = ["arrow"], optional = true }
pyo3 = { version = "0.16", optional = true }
sqlparser = "0.18"
4 changes: 2 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
chrono = { version = "0.4", default-features = false }
Expand All @@ -77,7 +77,7 @@ num-traits = { version = "0.2", optional = true }
num_cpus = "1.13.0"
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "15.0.0", features = ["arrow"] }
parquet = { version = "16.0.0", features = ["arrow"] }
paste = "^1.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.16", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
6 changes: 3 additions & 3 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ fn fetch_statistics(
let file_schema = arrow_reader.get_schema()?;
let num_fields = table_schema.fields().len();
let fields = table_schema.fields().to_vec();
let meta_data = arrow_reader.get_metadata();
let meta_data = arrow_reader.metadata();

let mut num_rows = 0;
let mut total_byte_size = 0;
Expand Down Expand Up @@ -560,8 +560,8 @@ mod tests {
let _ = collect(exec.clone(), task_ctx.clone()).await?;
let _ = collect(exec_projected.clone(), task_ctx).await?;

assert_bytes_scanned(exec, 2522);
assert_bytes_scanned(exec_projected, 1924);
assert_bytes_scanned(exec, 1409);
assert_bytes_scanned(exec_projected, 811);

Ok(())
}
Expand Down
36 changes: 17 additions & 19 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1543,6 +1543,7 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
#[cfg(test)]
mod tests {
use super::*;
use crate::assert_contains;
use crate::execution::context::TaskContext;
use crate::execution::options::CsvReadOptions;
use crate::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
Expand Down Expand Up @@ -1826,10 +1827,7 @@ mod tests {
#[tokio::test]
async fn in_list_types() -> Result<()> {
// expression: "a in ('a', 1)"
let list = vec![
Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
Expr::Literal(ScalarValue::Int64(Some(1))),
];
let list = vec![lit("a"), lit(1i64)];
let logical_plan = test_csv_scan()
.await?
// filter clause needs the type coercion rule applied
Expand All @@ -1841,11 +1839,9 @@ mod tests {
let expected = "InListExpr { expr: Column { name: \"c1\", index: 0 }, list: [Literal { value: Utf8(\"a\") }, CastExpr { expr: Literal { value: Int64(1) }, cast_type: Utf8, cast_options: CastOptions { safe: false } }], negated: false, set: None }";
assert!(format!("{:?}", execution_plan).contains(expected));

// expression: "a in (true, 'a')"
let list = vec![
Expr::Literal(ScalarValue::Boolean(Some(true))),
Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
];
// expression: "a in (struct::null, 'a')"
let list = vec![struct_literal(), lit("a")];

let logical_plan = test_csv_scan()
.await?
// filter clause needs the type coercion rule applied
Expand All @@ -1854,20 +1850,22 @@ mod tests {
.build()?;
let execution_plan = plan(&logical_plan).await;

let expected_error = "Unsupported CAST from Utf8 to Boolean";
match execution_plan {
Ok(_) => panic!("Expected planning failure"),
Err(e) => assert!(
e.to_string().contains(expected_error),
"Error '{}' did not contain expected error '{}'",
e,
expected_error
),
}
let e = execution_plan.unwrap_err().to_string();
assert_contains!(&e, "Unsupported CAST from Struct");
assert_contains!(&e, "to Boolean");

Ok(())
}

/// Return a `null` literal representing a struct type like: `{ a: bool }`
fn struct_literal() -> Expr {
let struct_literal = ScalarValue::Struct(
None,
Box::new(vec![Field::new("foo", DataType::Boolean, false)]),
);
lit(struct_literal)
}

#[tokio::test]
async fn in_set_test() -> Result<()> {
// OPTIMIZER_INSET_THRESHOLD = 10
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ path = "src/lib.rs"

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "9.0.0" }
sqlparser = "0.18"
2 changes: 1 addition & 1 deletion datafusion/jit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/lib.rs"
jit = []

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
cranelift = "0.84.0"
cranelift-jit = "0.84.0"
cranelift-module = "0.84.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ default = ["unicode_expressions"]
unicode_expressions = []

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "9.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
chrono = { version = "0.4", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/proto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ path = "src/lib.rs"
[features]

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
datafusion = { path = "../core", version = "9.0.0" }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-expr = { path = "../expr", version = "9.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/row/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ path = "src/lib.rs"
jit = ["datafusion-jit"]

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-jit = { path = "../jit", version = "9.0.0", optional = true }
paste = "^1.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ unicode_expressions = []

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-expr = { path = "../expr", version = "9.0.0" }
hashbrown = "0.12"
Expand Down

0 comments on commit 5096ff4

Please sign in to comment.