Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to arrow 16.0.0 #2718

Merged
merged 4 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.59"
readme = "README.md"

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "9.0.0" }
dirs = "4.0.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow-flight = { version = "15.0.0" }
arrow-flight = { version = "16.0.0" }
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
futures = "0.3"
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ jit = ["cranelift-module"]
pyarrow = ["pyo3"]

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.84.0", optional = true }
ordered-float = "3.0"
parquet = { version = "15.0.0", features = ["arrow"], optional = true }
parquet = { version = "16.0.0", features = ["arrow"], optional = true }
pyo3 = { version = "0.16", optional = true }
sqlparser = "0.18"
4 changes: 2 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
chrono = { version = "0.4", default-features = false }
Expand All @@ -77,7 +77,7 @@ num-traits = { version = "0.2", optional = true }
num_cpus = "1.13.0"
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "15.0.0", features = ["arrow"] }
parquet = { version = "16.0.0", features = ["arrow"] }
paste = "^1.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.16", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
6 changes: 3 additions & 3 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ fn fetch_statistics(
let file_schema = arrow_reader.get_schema()?;
let num_fields = table_schema.fields().len();
let fields = table_schema.fields().to_vec();
let meta_data = arrow_reader.get_metadata();
let meta_data = arrow_reader.metadata();

let mut num_rows = 0;
let mut total_byte_size = 0;
Expand Down Expand Up @@ -560,8 +560,8 @@ mod tests {
let _ = collect(exec.clone(), task_ctx.clone()).await?;
let _ = collect(exec_projected.clone(), task_ctx).await?;

assert_bytes_scanned(exec, 2522);
assert_bytes_scanned(exec_projected, 1924);
assert_bytes_scanned(exec, 1409);
assert_bytes_scanned(exec_projected, 811);

Ok(())
}
Expand Down
36 changes: 17 additions & 19 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,7 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
#[cfg(test)]
mod tests {
use super::*;
use crate::assert_contains;
use crate::execution::context::TaskContext;
use crate::execution::options::CsvReadOptions;
use crate::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
Expand Down Expand Up @@ -1516,10 +1517,7 @@ mod tests {
#[tokio::test]
async fn in_list_types() -> Result<()> {
// expression: "a in ('a', 1)"
let list = vec![
Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
Expr::Literal(ScalarValue::Int64(Some(1))),
];
let list = vec![lit("a"), lit(1i64)];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

used more concise construction of ScalarValues

let logical_plan = test_csv_scan()
.await?
// filter clause needs the type coercion rule applied
Expand All @@ -1531,11 +1529,9 @@ mod tests {
let expected = "InListExpr { expr: Column { name: \"c1\", index: 0 }, list: [Literal { value: Utf8(\"a\") }, CastExpr { expr: Literal { value: Int64(1) }, cast_type: Utf8, cast_options: CastOptions { safe: false } }], negated: false, set: None }";
assert!(format!("{:?}", execution_plan).contains(expected));

// expression: "a in (true, 'a')"
let list = vec![
Expr::Literal(ScalarValue::Boolean(Some(true))),
Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
];
// expression: "a in (struct::null, 'a')"
let list = vec![struct_literal(), lit("a")];

let logical_plan = test_csv_scan()
.await?
// filter clause needs the type coercion rule applied
Expand All @@ -1544,20 +1540,22 @@ mod tests {
.build()?;
let execution_plan = plan(&logical_plan).await;

let expected_error = "Unsupported CAST from Utf8 to Boolean";
match execution_plan {
Ok(_) => panic!("Expected planning failure"),
Err(e) => assert!(
e.to_string().contains(expected_error),
"Error '{}' did not contain expected error '{}'",
e,
expected_error
),
}
let e = execution_plan.unwrap_err().to_string();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great -- arrow 16.0.0 contains support for casting Utf8 to bool (apache/arrow-rs#1738 - Thanks @MazterQyou) -- so I updated the test to try and cast Utf8 to some struct.

I also took the opportunity to clean up the test in general

assert_contains!(&e, "Unsupported CAST from Struct");
assert_contains!(&e, "to Boolean");

Ok(())
}

/// Return a `null` literal representing a struct type like: `{ a: bool }`
fn struct_literal() -> Expr {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Casting from Utf8 to Struct is still not supported

let struct_literal = ScalarValue::Struct(
None,
Box::new(vec![Field::new("foo", DataType::Boolean, false)]),
);
lit(struct_literal)
}

#[tokio::test]
async fn in_set_test() -> Result<()> {
// OPTIMIZER_INSET_THRESHOLD = 10
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ path = "src/lib.rs"

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "9.0.0" }
sqlparser = "0.18"
2 changes: 1 addition & 1 deletion datafusion/jit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/lib.rs"
jit = []

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
cranelift = "0.84.0"
cranelift-jit = "0.84.0"
cranelift-module = "0.84.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ default = ["unicode_expressions"]
unicode_expressions = []

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "9.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
chrono = { version = "0.4", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/proto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ path = "src/lib.rs"
[features]

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
datafusion = { path = "../core", version = "9.0.0" }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-expr = { path = "../expr", version = "9.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/row/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ path = "src/lib.rs"
jit = ["datafusion-jit"]

[dependencies]
arrow = { version = "15.0.0" }
arrow = { version = "16.0.0" }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-jit = { path = "../jit", version = "9.0.0", optional = true }
paste = "^1.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ unicode_expressions = []

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { version = "15.0.0", features = ["prettyprint"] }
arrow = { version = "16.0.0", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "9.0.0" }
datafusion-expr = { path = "../expr", version = "9.0.0" }
hashbrown = "0.12"
Expand Down