From 6495d2a001f314488cfbc5077e8c2df048ff3ced Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 6 Oct 2022 14:53:31 -0600 Subject: [PATCH 1/3] update version --- benchmarks/Cargo.toml | 2 +- datafusion-cli/Cargo.toml | 4 ++-- datafusion-examples/Cargo.toml | 2 +- datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 16 ++++++++-------- datafusion/expr/Cargo.toml | 4 ++-- datafusion/jit/Cargo.toml | 6 +++--- datafusion/optimizer/Cargo.toml | 10 +++++----- datafusion/physical-expr/Cargo.toml | 8 ++++---- datafusion/proto/Cargo.toml | 8 ++++---- datafusion/row/Cargo.toml | 6 +++--- datafusion/sql/Cargo.toml | 6 +++--- 12 files changed, 37 insertions(+), 37 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 5da6daeba1e..7367e9682be 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-benchmarks" description = "DataFusion Benchmarks" -version = "12.0.0" +version = "13.0.0" edition = "2021" authors = ["Apache Arrow "] homepage = "https://github.com/apache/arrow-datafusion" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 84c4bab48c6..61328a8bfd7 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "12.0.0" +version = "13.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "arrow", "datafusion", "query", "sql" ] @@ -31,7 +31,7 @@ readme = "README.md" [dependencies] arrow = "24.0.0" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "12.0.0" } +datafusion = { path = "../datafusion/core", version = "13.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 12c267832fb..844bdba7a0f 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-examples" description = "DataFusion usage examples" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 51529ccc764..382f66e5dd9 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-common" description = "Common functionality for DataFusion query engine" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 1ba9f769f3f..097ee3158ae 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../../README.md" @@ -60,13 +60,13 @@ arrow = { version = "24.0.0", features = ["prettyprint"] } async-trait = "0.1.41" bytes = "1.1" chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "12.0.0", features = ["parquet", "object_store"] } -datafusion-expr = { path = "../expr", version = "12.0.0" } -datafusion-jit = { path = "../jit", version = "12.0.0", optional = true } -datafusion-optimizer = { path = "../optimizer", version = "12.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "12.0.0" } -datafusion-row = { path = "../row", version = "12.0.0" } -datafusion-sql = { path = "../sql", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0", features = ["parquet", "object_store"] } +datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion-jit = { path = "../jit", version = "13.0.0", optional = true } +datafusion-optimizer = { path = "../optimizer", version = "13.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "13.0.0" } +datafusion-row = { path = "../row", version = "13.0.0" } +datafusion-sql = { path = "../sql", version = "13.0.0" } futures = "0.3" glob = "0.3.0" hashbrown = { version = "0.12", features = ["raw"] } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 1d466264e7a..3280628a42e 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,5 +37,5 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { version = "24.0.0", default-features = false } -datafusion-common = { path = "../common", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0" } sqlparser = "0.25" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 6166e91747e..571ddb4bb60 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-jit" description = "Just In Time (JIT) compilation support for DataFusion query engine" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,7 +41,7 @@ cranelift = "0.88.0" cranelift-jit = "0.88.0" cranelift-module = "0.88.0" cranelift-native = "0.88.0" -datafusion-common = { path = "../common", version = "12.0.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "13.0.0" } parking_lot = "0.12" diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 862f5102426..ee4d4016dcd 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-optimizer" description = "DataFusion Query Optimizer" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -40,14 +40,14 @@ unicode_expressions = [] arrow = { version = "24.0.0", features = ["prettyprint"] } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "12.0.0" } -datafusion-expr = { path = "../expr", version = "12.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "13.0.0" } hashbrown = { version = "0.12", features = ["raw"] } log = "^0.4" [dev-dependencies] ctor = "0.1.22" -datafusion-sql = { path = "../sql", version = "12.0.0" } +datafusion-sql = { path = "../sql", version = "13.0.0" } env_logger = "0.9.0" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index a2473fb7533..4707b0e7912 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -44,9 +44,9 @@ arrow = { version = "24.0.0", features = ["prettyprint"] } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "12.0.0" } -datafusion-expr = { path = "../expr", version = "12.0.0" } -datafusion-row = { path = "../row", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion-row = { path = "../row", version = "13.0.0" } hashbrown = { version = "0.12", features = ["raw"] } lazy_static = { version = "^1.4.0" } md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 122b980cfa7..569fe5c4493 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,9 +41,9 @@ json = ["pbjson", "pbjson-build", "serde", "serde_json"] [dependencies] arrow = "24.0.0" -datafusion = { path = "../core", version = "12.0.0" } -datafusion-common = { path = "../common", version = "12.0.0" } -datafusion-expr = { path = "../expr", version = "12.0.0" } +datafusion = { path = "../core", version = "13.0.0" } +datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-expr = { path = "../expr", version = "13.0.0" } pbjson = { version = "0.5", optional = true } pbjson-types = { version = "0.5", optional = true } prost = "0.11.0" diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index 306ab7b3cba..bc32b07d9fb 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-row" description = "Row backed by raw bytes for DataFusion query engine" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = "24.0.0" -datafusion-common = { path = "../common", version = "12.0.0" } -datafusion-jit = { path = "../jit", version = "12.0.0", optional = true } +datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-jit = { path = "../jit", version = "13.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 47cf6e8ac36..4633e225ae5 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-sql" description = "DataFusion SQL Query Planner" -version = "12.0.0" +version = "13.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,6 +38,6 @@ unicode_expressions = [] [dependencies] arrow = { version = "24.0.0", default-features = false } -datafusion-common = { path = "../common", version = "12.0.0" } -datafusion-expr = { path = "../expr", version = "12.0.0" } +datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-expr = { path = "../expr", version = "13.0.0" } sqlparser = "0.25" From c37693d13d194203318656052168ca3d899f3e84 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 6 Oct 2022 15:18:00 -0600 Subject: [PATCH 2/3] update cli Cargo.lock --- datafusion-cli/Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index bc4e084f10d..0a08e57248c 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -452,7 +452,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "12.0.0" +version = "13.0.0" dependencies = [ "ahash 0.8.0", "arrow", @@ -490,7 +490,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "12.0.0" +version = "13.0.0" dependencies = [ "arrow", "clap", @@ -506,7 +506,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "12.0.0" +version = "13.0.0" dependencies = [ "arrow", "object_store", @@ -517,7 +517,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "12.0.0" +version = "13.0.0" dependencies = [ "ahash 0.8.0", "arrow", @@ -527,7 +527,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "12.0.0" +version = "13.0.0" dependencies = [ "arrow", "async-trait", @@ -541,7 +541,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "12.0.0" +version = "13.0.0" dependencies = [ "ahash 0.8.0", "arrow", @@ -564,7 +564,7 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "12.0.0" +version = "13.0.0" dependencies = [ "arrow", "datafusion-common", @@ -574,7 +574,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "12.0.0" +version = "13.0.0" dependencies = [ "arrow", "datafusion-common", @@ -1978,9 +1978,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.24.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac9c312566fdfc45a38ecf1924013c82af2a7d5315e46f67b1cc987f12be260" +checksum = "0781f2b6bd03e5adf065c8e772b49eaea9f640d06a1b9130330fe8bd2563f4fd" dependencies = [ "log", ] From 4f34109db6c7cdaecf915745fd4708b4d628a9c7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 6 Oct 2022 15:19:48 -0600 Subject: [PATCH 3/3] CHANGELOG --- datafusion/CHANGELOG.md | 216 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index 62f970eabec..6cffc354f11 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,222 @@ # Changelog +## [13.0.0](https://github.com/apache/arrow-datafusion/tree/13.0.0) (2022-10-06) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/12.0.0...13.0.0) + +**Breaking changes:** + +- Make ObjectStoreProvider fallible \(return `Result` rather than `Option`\) [\#3584](https://github.com/apache/arrow-datafusion/pull/3584) ([tustvold](https://github.com/tustvold)) +- Make `OptimizerConfig` a builder style API [\#3525](https://github.com/apache/arrow-datafusion/pull/3525) ([alamb](https://github.com/alamb)) + +**Implemented enhancements:** + +- remove `type coercion` for ScalarUDF in the physical phase [\#3734](https://github.com/apache/arrow-datafusion/issues/3734) +- Allow with statements to specify their columns alongside their expression names [\#3716](https://github.com/apache/arrow-datafusion/issues/3716) +- Support SQLDataType::Timestamp\(TimezoneInfo\) [\#3693](https://github.com/apache/arrow-datafusion/issues/3693) +- support `type coercion` for case when expr [\#3673](https://github.com/apache/arrow-datafusion/issues/3673) +- Add simplification rules for the `Modulo` operator [\#3664](https://github.com/apache/arrow-datafusion/issues/3664) +- Add TIMESTAMPTZ [\#3659](https://github.com/apache/arrow-datafusion/issues/3659) +- Simplify `A * 0` and `A * null`. [\#3626](https://github.com/apache/arrow-datafusion/issues/3626) +- change rule of `PreCastLitInComparisonExpressions` to unwrap cast rule after \#3582 [\#3622](https://github.com/apache/arrow-datafusion/issues/3622) +- Optimize regex\_replace with a known pattern / replacement [\#3613](https://github.com/apache/arrow-datafusion/issues/3613) +- Simplify `CONCAT_WS(NULL, ..)` to `NULL` [\#3607](https://github.com/apache/arrow-datafusion/issues/3607) +- Add OctoSQL to list of systems powered by DataFusion [\#3605](https://github.com/apache/arrow-datafusion/issues/3605) +- Prevent over-allocation \(and spills\) on TopK queries [\#3596](https://github.com/apache/arrow-datafusion/issues/3596) +- Allow ObjectStoreProvider to return None \(return Result\ rather than Result\) [\#3594](https://github.com/apache/arrow-datafusion/issues/3594) +- simplify between expr should consider the data type [\#3587](https://github.com/apache/arrow-datafusion/issues/3587) +- make type coercion simple and remove the evaluate logic [\#3585](https://github.com/apache/arrow-datafusion/issues/3585) +- ReduceOuterJoin optimizer support `cast or try_cast` expr. [\#3565](https://github.com/apache/arrow-datafusion/issues/3565) +- Support type coercion for subquery [\#3557](https://github.com/apache/arrow-datafusion/issues/3557) +- Make `ParquetScanOptions` public and expose a reference to the scan options from `ParquetExec` [\#3550](https://github.com/apache/arrow-datafusion/issues/3550) +- Use `fetch` limit in `get_sorted_iter` [\#3544](https://github.com/apache/arrow-datafusion/issues/3544) +- Push limit to sort [\#3528](https://github.com/apache/arrow-datafusion/issues/3528) +- Execute sorts in parallel when limit is used after sort [\#3526](https://github.com/apache/arrow-datafusion/issues/3526) +- Consolidate optimizer passes in optimizer module for better testing [\#3524](https://github.com/apache/arrow-datafusion/issues/3524) +- Support Top-K query optimization for `ORDER BY \ \[ASC [\#3515](https://github.com/apache/arrow-datafusion/issues/3515) +- support the type coercion for `like` `unlike` `istrue` `isfalse` `isunknown` [\#3509](https://github.com/apache/arrow-datafusion/issues/3509) +- Automate the pushing of releases to Homebrew [\#3506](https://github.com/apache/arrow-datafusion/issues/3506) +- Add extra DATE\_PART units that are already supported in arrow-rs [\#3502](https://github.com/apache/arrow-datafusion/issues/3502) +- Release datafusion-cli 12.0.0 on Homebrew [\#3501](https://github.com/apache/arrow-datafusion/issues/3501) +- Make `from_proto_binary_op` public [\#3489](https://github.com/apache/arrow-datafusion/issues/3489) +- coercion between decimal and other types lacking, compared to other numeric types [\#3479](https://github.com/apache/arrow-datafusion/issues/3479) +- move type coercion for inlist from physical phase to logical phase [\#3468](https://github.com/apache/arrow-datafusion/issues/3468) +- Make `datafusion::physical_plan::file_format::file_strean::FileStream` public [\#3466](https://github.com/apache/arrow-datafusion/issues/3466) +- Support using offset index in `ParquetRecordBatchStream` when pushing down `RowFilter` [\#3456](https://github.com/apache/arrow-datafusion/issues/3456) +- Support timestamp data type in In\_list node [\#3449](https://github.com/apache/arrow-datafusion/issues/3449) +- Evaluate expressions after type coercion [\#3431](https://github.com/apache/arrow-datafusion/issues/3431) +- Make a convenience function to register a single `RecordBatch` as a table from SessionContext [\#3426](https://github.com/apache/arrow-datafusion/issues/3426) +- add datafusion-cli support of external table locations that object\_store supports [\#3424](https://github.com/apache/arrow-datafusion/issues/3424) +- pruning support cast/try\_cast expr [\#3414](https://github.com/apache/arrow-datafusion/issues/3414) +- Add documentation on querying against files in object store such as S3 [\#3399](https://github.com/apache/arrow-datafusion/issues/3399) +- Remove type-coercion from physical planner [\#3388](https://github.com/apache/arrow-datafusion/issues/3388) +- support `Statement::ShowVariable` to show session configs [\#3364](https://github.com/apache/arrow-datafusion/issues/3364) +- Support `RowFilter` in `ParquetExec` [\#3360](https://github.com/apache/arrow-datafusion/issues/3360) +- Apply `TypeCoercion` rule before `FilterPushDown` [\#3289](https://github.com/apache/arrow-datafusion/issues/3289) +- Add support for `get` / `show` timezone [\#3255](https://github.com/apache/arrow-datafusion/issues/3255) +- Consider adding DataFusion to ClickBench benchmarks [\#2902](https://github.com/apache/arrow-datafusion/issues/2902) +- `filter_push_down` panics on semi/anti join with join filters [\#2888](https://github.com/apache/arrow-datafusion/issues/2888) +- Migrate the `cross join -> inner join optimization` from the planner to the optimizer [\#2859](https://github.com/apache/arrow-datafusion/issues/2859) +- ObjectStore write support [\#2185](https://github.com/apache/arrow-datafusion/issues/2185) +- DataFusion should scan Parquet statistics once per query [\#871](https://github.com/apache/arrow-datafusion/issues/871) +- Extend & generalize constant folding / evaluation in logical optimizer [\#237](https://github.com/apache/arrow-datafusion/issues/237) + +**Fixed bugs:** + +- `projection_push_down` produces invalid aggregate plans in some cases [\#3738](https://github.com/apache/arrow-datafusion/issues/3738) +- `Time With Time Zone` should raise error until `DataType::Time64` support tz [\#3715](https://github.com/apache/arrow-datafusion/issues/3715) +- SQL Planner doesn't distinguish normal CTEs from the recursive ones. [\#3713](https://github.com/apache/arrow-datafusion/issues/3713) +- Fix inconsistency between column name formats [\#3711](https://github.com/apache/arrow-datafusion/issues/3711) +- Optimizer rule 'projection\_push\_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/arrow-datafusion/issues/3704) +- Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/arrow-datafusion/issues/3690) +- Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/arrow-datafusion/issues/3685) +- Specialized regexp\_replace should early-abort when the the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/arrow-datafusion/issues/3646) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/arrow-datafusion/issues/3645) +- Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/arrow-datafusion/issues/3644) +- LEFT JOIN not working as expected, error message is confusing [\#3639](https://github.com/apache/arrow-datafusion/issues/3639) +- `INTERSECT` and `EXCEPT` don't return an error when 2 sets have the different number of columns [\#3632](https://github.com/apache/arrow-datafusion/issues/3632) +- The datafusion-cli panics when `union` 2 table with different number of columns. [\#3630](https://github.com/apache/arrow-datafusion/issues/3630) +- The expression `col(a) / null` is not optimized. [\#3624](https://github.com/apache/arrow-datafusion/issues/3624) +- `s3_build_error` test may fail in some environments [\#3601](https://github.com/apache/arrow-datafusion/issues/3601) +- New clippy errors appears to be break the CI on the master [\#3597](https://github.com/apache/arrow-datafusion/issues/3597) +- `StringConcat` gives inconsistent result with `concat` when containing `null` [\#3569](https://github.com/apache/arrow-datafusion/issues/3569) +- simplify\_expressions don't support different data type for binary [\#3556](https://github.com/apache/arrow-datafusion/issues/3556) +- Broken logical plan serialization for aggregation queries [\#3555](https://github.com/apache/arrow-datafusion/issues/3555) +- Aggregate filters do not get pushed down to table scan [\#3546](https://github.com/apache/arrow-datafusion/issues/3546) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) +- DataFusion serialization doesn't handle `ScalarValue::Dictionary, Binary, LargeBinary, Time64, IntervalMonthDayNano, Struct` [\#3531](https://github.com/apache/arrow-datafusion/issues/3531) +- What should be returned when trying to get a config in invalid format? [\#3505](https://github.com/apache/arrow-datafusion/issues/3505) +- Dividing decimal type gives wrong error: "170141183460469231731687303715884105727 is too large to store in a Decimal128 [\#3498](https://github.com/apache/arrow-datafusion/issues/3498) +- Add BitwiseXor in function `from_proto_binary_op` [\#3495](https://github.com/apache/arrow-datafusion/issues/3495) +- comparison operations with a scalar null and decimal array panics [\#3487](https://github.com/apache/arrow-datafusion/issues/3487) +- Union columns with different types [\#3467](https://github.com/apache/arrow-datafusion/issues/3467) +- Can't get the right logical plan after optimizer [\#3421](https://github.com/apache/arrow-datafusion/issues/3421) +- Fix conflict between simplify\_expression rule and CAST expressions [\#3409](https://github.com/apache/arrow-datafusion/issues/3409) +- Empty array giving error [\#2439](https://github.com/apache/arrow-datafusion/issues/2439) +- Internal error: Unsupported data type in hasher: FixedSizeBinary\(16\) [\#1516](https://github.com/apache/arrow-datafusion/issues/1516) +- Predicates on to\_timestamp do not work as expected with "naive" timestamp strings [\#765](https://github.com/apache/arrow-datafusion/issues/765) +- Address performance/execution plan of TPCH query 19 [\#78](https://github.com/apache/arrow-datafusion/issues/78) +- Bug fix: expr\_visitor was not visiting aggregate filter expressions [\#3548](https://github.com/apache/arrow-datafusion/pull/3548) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- Publish 8.0.0 user guide [\#2558](https://github.com/apache/arrow-datafusion/issues/2558) +- MINOR: Add Dask SQL to list of projects powered by DataFusion [\#3581](https://github.com/apache/arrow-datafusion/pull/3581) ([andygrove](https://github.com/andygrove)) +- Add Parseable as Datafusion user [\#3471](https://github.com/apache/arrow-datafusion/pull/3471) ([nitisht](https://github.com/nitisht)) + +**Closed issues:** + +- Upgrade to Arrow 24.0.0 [\#3689](https://github.com/apache/arrow-datafusion/issues/3689) +- what's the best practice to get a single value from arrow array? [\#3497](https://github.com/apache/arrow-datafusion/issues/3497) +- The data type of predicate in the row filter should be same in the binary expr [\#3469](https://github.com/apache/arrow-datafusion/issues/3469) +- Extend constant folding and parquet filtering support [\#188](https://github.com/apache/arrow-datafusion/issues/188) +- Add FORMAT to explain plan and an easy to visualize format [\#96](https://github.com/apache/arrow-datafusion/issues/96) + +**Merged pull requests:** + +- Build aggregate schema in Aggregate::try\_new [\#3739](https://github.com/apache/arrow-datafusion/pull/3739) ([andygrove](https://github.com/andygrove)) +- delete type coercion for scalar udf in the physical phase [\#3735](https://github.com/apache/arrow-datafusion/pull/3735) ([liukun4515](https://github.com/liukun4515)) +- Consolidate coercion code in `datafusion_expr::type_coercion` and submodules [\#3728](https://github.com/apache/arrow-datafusion/pull/3728) ([alamb](https://github.com/alamb)) +- Skip filter push down on semi/anti joins [\#3723](https://github.com/apache/arrow-datafusion/pull/3723) ([andygrove](https://github.com/andygrove)) +- Raise `Unsupported SQL type` for `Time(WithTimeZone)` and `Time(Tz)` [\#3718](https://github.com/apache/arrow-datafusion/pull/3718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Support column aliases specified by `WITH` statements [\#3717](https://github.com/apache/arrow-datafusion/pull/3717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Reject recursive CTEs before processing the sub-expressions [\#3714](https://github.com/apache/arrow-datafusion/pull/3714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Make column name consistent between Expr::name and Display/Debug [\#3712](https://github.com/apache/arrow-datafusion/pull/3712) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Fix aggregate type coercion bug [\#3710](https://github.com/apache/arrow-datafusion/pull/3710) ([alamb](https://github.com/alamb)) +- MINOR: Add `Expr::canonical_name` and improve docs on `Expr::name` [\#3706](https://github.com/apache/arrow-datafusion/pull/3706) ([andygrove](https://github.com/andygrove)) +- Remove type coercions from ScalarValue and aggregation function code [\#3705](https://github.com/apache/arrow-datafusion/pull/3705) ([ozankabak](https://github.com/ozankabak)) +- `unwrap_cast_in_comparison`: fix bug which can find the field for the schema [\#3699](https://github.com/apache/arrow-datafusion/pull/3699) ([liukun4515](https://github.com/liukun4515)) +- bump sql-parser 0.25 [\#3698](https://github.com/apache/arrow-datafusion/pull/3698) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Move optimizer init to optimizer crate [\#3692](https://github.com/apache/arrow-datafusion/pull/3692) ([andygrove](https://github.com/andygrove)) +- Upgrade `arrow` `parquet` and `arrow-flight` to 24.0.0 [\#3691](https://github.com/apache/arrow-datafusion/pull/3691) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix bug in dictionary coercion and allow better coercion [\#3688](https://github.com/apache/arrow-datafusion/pull/3688) ([alamb](https://github.com/alamb)) +- \[MINOR\] Improve docstrings in binary\_rule.rs [\#3687](https://github.com/apache/arrow-datafusion/pull/3687) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add `ScalarValue::new_utf8`, clean up creation of literals in casting tests [\#3680](https://github.com/apache/arrow-datafusion/pull/3680) ([alamb](https://github.com/alamb)) +- Disable code coverage until we figure out why it is broken [\#3679](https://github.com/apache/arrow-datafusion/pull/3679) ([alamb](https://github.com/alamb)) +- move `type coercion` for case when expr [\#3676](https://github.com/apache/arrow-datafusion/pull/3676) ([liukun4515](https://github.com/liukun4515)) +- Update sqlparser to 0.24.0 [\#3675](https://github.com/apache/arrow-datafusion/pull/3675) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fail if field lengths are not same in INTERSECT and EXPECT [\#3674](https://github.com/apache/arrow-datafusion/pull/3674) ([askoa](https://github.com/askoa)) +- Simplification Rules for Modulo Operator [\#3669](https://github.com/apache/arrow-datafusion/pull/3669) ([askoa](https://github.com/askoa)) +- change pre\_cast\_lit\_in\_comparison to unwrap\_cast\_in\_comparison [\#3662](https://github.com/apache/arrow-datafusion/pull/3662) ([liukun4515](https://github.com/liukun4515)) +- restore optimization for `between` in simplify expression rule [\#3661](https://github.com/apache/arrow-datafusion/pull/3661) ([liukun4515](https://github.com/liukun4515)) +- add timestamptz [\#3660](https://github.com/apache/arrow-datafusion/pull/3660) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- remove the type coercion in the simplify\_expressions rule [\#3657](https://github.com/apache/arrow-datafusion/pull/3657) ([liukun4515](https://github.com/liukun4515)) +- Cache collected file statistics [\#3649](https://github.com/apache/arrow-datafusion/pull/3649) ([mateuszkj](https://github.com/mateuszkj)) +- make regexp\_replace early abort with empty input [\#3648](https://github.com/apache/arrow-datafusion/pull/3648) ([isidentical](https://github.com/isidentical)) +- Check each query has same number of columns when building the UNION plan [\#3638](https://github.com/apache/arrow-datafusion/pull/3638) ([HaoYang670](https://github.com/HaoYang670)) +- move the `type coercion` to the beginning of the optimizer rule and support type coercion for subquery [\#3636](https://github.com/apache/arrow-datafusion/pull/3636) ([liukun4515](https://github.com/liukun4515)) +- Add documentation for querying S3 data with CLI [\#3631](https://github.com/apache/arrow-datafusion/pull/3631) ([andygrove](https://github.com/andygrove)) +- Simplify multiplication by `0` and by `null` [\#3627](https://github.com/apache/arrow-datafusion/pull/3627) ([HaoYang670](https://github.com/HaoYang670)) +- Simplify null division. [\#3625](https://github.com/apache/arrow-datafusion/pull/3625) ([HaoYang670](https://github.com/HaoYang670)) +- support cast/try\_cast expr in reduceOuterJoin [\#3621](https://github.com/apache/arrow-datafusion/pull/3621) ([AssHero](https://github.com/AssHero)) +- MINOR: fix TPC-H conversion function to not miss a row of data [\#3620](https://github.com/apache/arrow-datafusion/pull/3620) ([kmitchener](https://github.com/kmitchener)) +- Document ObjectStoreProvider [\#3619](https://github.com/apache/arrow-datafusion/pull/3619) ([tustvold](https://github.com/tustvold)) +- \[feat\] Support using offset index in ParquetRecordBatchStream when pu… [\#3616](https://github.com/apache/arrow-datafusion/pull/3616) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize `regex_replace` for scalar patterns [\#3614](https://github.com/apache/arrow-datafusion/pull/3614) ([isidentical](https://github.com/isidentical)) +- Simplify `concat_ws(null, ..)` to `null` [\#3608](https://github.com/apache/arrow-datafusion/pull/3608) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: improve docstrings on SessionContext [\#3603](https://github.com/apache/arrow-datafusion/pull/3603) ([alamb](https://github.com/alamb)) +- Merge s3\_success and s3\_build\_error tests into one test [\#3602](https://github.com/apache/arrow-datafusion/pull/3602) ([Licht-T](https://github.com/Licht-T)) +- add `register_batch` and `read_batch` to `SessionContext` to register a single RecordBatch as a table [\#3600](https://github.com/apache/arrow-datafusion/pull/3600) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- \[CI\] Fix the newly added linting errors to make clippy happy [\#3598](https://github.com/apache/arrow-datafusion/pull/3598) ([isidentical](https://github.com/isidentical)) +- Prevent over-allocations \(and spills\) on sorts with a fixed limit [\#3593](https://github.com/apache/arrow-datafusion/pull/3593) ([isidentical](https://github.com/isidentical)) +- update datafusion cli deps [\#3588](https://github.com/apache/arrow-datafusion/pull/3588) ([Jimexist](https://github.com/Jimexist)) +- Update cranelift\* dependencies `0.87` --\> `0.88` [\#3586](https://github.com/apache/arrow-datafusion/pull/3586) ([alamb](https://github.com/alamb)) +- Fix docs.rs [\#3580](https://github.com/apache/arrow-datafusion/pull/3580) ([avantgardnerio](https://github.com/avantgardnerio)) +- Fix build [\#3576](https://github.com/apache/arrow-datafusion/pull/3576) ([alamb](https://github.com/alamb)) +- Use consistent name for `TimeUnit::Millisecond` [\#3575](https://github.com/apache/arrow-datafusion/pull/3575) ([alamb](https://github.com/alamb)) +- Fix logical plan serialization [\#3574](https://github.com/apache/arrow-datafusion/pull/3574) ([thinkharderdev](https://github.com/thinkharderdev)) +- Custom window frame logic \(support `ROWS`, `RANGE`, `PRECEDING` and `FOLLOWING` for window functions\) [\#3570](https://github.com/apache/arrow-datafusion/pull/3570) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([metesynnada](https://github.com/metesynnada)) +- fix comparison of decimal array with null scalar [\#3567](https://github.com/apache/arrow-datafusion/pull/3567) ([kmitchener](https://github.com/kmitchener)) +- Reduce dependencies of `datafusion-sql` crate [\#3566](https://github.com/apache/arrow-datafusion/pull/3566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- Update pbjson-types requirement from 0.3 to 0.5 [\#3560](https://github.com/apache/arrow-datafusion/pull/3560) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson requirement from 0.3 to 0.5 [\#3559](https://github.com/apache/arrow-datafusion/pull/3559) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson-build requirement from 0.3 to 0.5 [\#3558](https://github.com/apache/arrow-datafusion/pull/3558) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable q19 in TPCH [\#3553](https://github.com/apache/arrow-datafusion/pull/3553) ([kmitchener](https://github.com/kmitchener)) +- MINOR: remove out-of-date is\_dictionary checks from binary\_rule.rs [\#3552](https://github.com/apache/arrow-datafusion/pull/3552) ([kmitchener](https://github.com/kmitchener)) +- Make ParquetScanOptions public and add method to get a reference from… [\#3551](https://github.com/apache/arrow-datafusion/pull/3551) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix coercion of null for decimal math in binary\_rules [\#3549](https://github.com/apache/arrow-datafusion/pull/3549) ([kmitchener](https://github.com/kmitchener)) +- Use `fetch` limit in get\_sorted\_iter [\#3545](https://github.com/apache/arrow-datafusion/pull/3545) ([Dandandan](https://github.com/Dandandan)) +- feat: allow object store registration from datafusion-cli [\#3540](https://github.com/apache/arrow-datafusion/pull/3540) ([turbo1912](https://github.com/turbo1912)) +- Actually test that `ScalarValue`s are the same after round trip serialization [\#3537](https://github.com/apache/arrow-datafusion/pull/3537) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Struct` [\#3536](https://github.com/apache/arrow-datafusion/pull/3536) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::IntervalMonthDayNano` [\#3535](https://github.com/apache/arrow-datafusion/pull/3535) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Binary` and `ScalarValue::LargeBinary`, `ScalarValue::Time64` [\#3534](https://github.com/apache/arrow-datafusion/pull/3534) ([alamb](https://github.com/alamb)) +- MINOR: Impl `Debug` for TableReference and ResolvedTableReference [\#3533](https://github.com/apache/arrow-datafusion/pull/3533) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add support for serializing `ScalarValue::Dictionary` to datafusion-proto [\#3532](https://github.com/apache/arrow-datafusion/pull/3532) ([alamb](https://github.com/alamb)) +- Push down limit to sort [\#3530](https://github.com/apache/arrow-datafusion/pull/3530) ([Dandandan](https://github.com/Dandandan)) +- Execute sort in parallel when a limit is used after sort [\#3527](https://github.com/apache/arrow-datafusion/pull/3527) ([Dandandan](https://github.com/Dandandan)) +- Config support type conversion [\#3522](https://github.com/apache/arrow-datafusion/pull/3522) ([comphead](https://github.com/comphead)) +- MINOR: Add more execs to list of supported execs [\#3519](https://github.com/apache/arrow-datafusion/pull/3519) ([andygrove](https://github.com/andygrove)) +- fix divide by zero not throwing proper error for decimal [\#3517](https://github.com/apache/arrow-datafusion/pull/3517) ([kmitchener](https://github.com/kmitchener)) +- Make FileStream and FileOpener public [\#3514](https://github.com/apache/arrow-datafusion/pull/3514) ([thinkharderdev](https://github.com/thinkharderdev)) +- feat: Union types coercion [\#3513](https://github.com/apache/arrow-datafusion/pull/3513) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- \[DataFrame\] - Add cache function for DataFrame [\#3512](https://github.com/apache/arrow-datafusion/pull/3512) ([francis-du](https://github.com/francis-du)) +- type coercion: support is/is\_not\_`bool`/like/unknown expr [\#3510](https://github.com/apache/arrow-datafusion/pull/3510) ([liukun4515](https://github.com/liukun4515)) +- MINOR: remove unused dependencies [\#3508](https://github.com/apache/arrow-datafusion/pull/3508) ([waynexia](https://github.com/waynexia)) +- Automate postrelease publishing to Homebrew [\#3507](https://github.com/apache/arrow-datafusion/pull/3507) ([iajoiner](https://github.com/iajoiner)) +- Add additional DATE\_PART units [\#3503](https://github.com/apache/arrow-datafusion/pull/3503) ([jonmmease](https://github.com/jonmmease)) +- Add BitwiseXor in function from\_proto\_binary\_op [\#3496](https://github.com/apache/arrow-datafusion/pull/3496) ([askoa](https://github.com/askoa)) +- Make the function from\_proto\_binary\_op public [\#3490](https://github.com/apache/arrow-datafusion/pull/3490) ([askoa](https://github.com/askoa)) +- minor: fix bug in `downcast_value!` macro \(`T` --\> `$T`\) [\#3486](https://github.com/apache/arrow-datafusion/pull/3486) ([alamb](https://github.com/alamb)) +- add time\_zone into ConfigOptions [\#3485](https://github.com/apache/arrow-datafusion/pull/3485) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- \[MINOR\] Change `downcast_value!` macro so it does not need to use `use std::any::type_name;` [\#3484](https://github.com/apache/arrow-datafusion/pull/3484) ([alamb](https://github.com/alamb)) +- Convert more cross joins to inner joins \(Address performance/execution plan of TPCH query 19\) [\#3482](https://github.com/apache/arrow-datafusion/pull/3482) ([DhamoPS](https://github.com/DhamoPS)) +- \[minor\] Remove unused arg in macro in Inlist [\#3474](https://github.com/apache/arrow-datafusion/pull/3474) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- inlist: move type coercion to logical phase [\#3472](https://github.com/apache/arrow-datafusion/pull/3472) ([liukun4515](https://github.com/liukun4515)) +- Use the column data type as the NULL data type in the row filter [\#3470](https://github.com/apache/arrow-datafusion/pull/3470) ([liukun4515](https://github.com/liukun4515)) +- apply type coercion before filter pushdown [\#3459](https://github.com/apache/arrow-datafusion/pull/3459) ([liukun4515](https://github.com/liukun4515)) +- add FixedSizeBinary support to create\_hashes [\#3458](https://github.com/apache/arrow-datafusion/pull/3458) ([mcassels](https://github.com/mcassels)) +- Support ShowVariable Statement [\#3455](https://github.com/apache/arrow-datafusion/pull/3455) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add additional pruning tests with casts, handle unsupported predicates better [\#3454](https://github.com/apache/arrow-datafusion/pull/3454) ([alamb](https://github.com/alamb)) +- Add `InList` support for timestamp type. \(\#3449\) [\#3450](https://github.com/apache/arrow-datafusion/pull/3450) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Evaluate expressions after type coercion [\#3444](https://github.com/apache/arrow-datafusion/pull/3444) ([Dandandan](https://github.com/Dandandan)) +- remove type coercion in the binary physical expr [\#3396](https://github.com/apache/arrow-datafusion/pull/3396) ([liukun4515](https://github.com/liukun4515)) +- Use arrow row format in SortPreservingMerge ~50-70% faster [\#3386](https://github.com/apache/arrow-datafusion/pull/3386) ([tustvold](https://github.com/tustvold)) +- Pushdown `RowFilter` in `ParquetExec` [\#3380](https://github.com/apache/arrow-datafusion/pull/3380) ([thinkharderdev](https://github.com/thinkharderdev)) + + ## [12.0.0](https://github.com/apache/arrow-datafusion/tree/12.0.0) (2022-09-12) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/11.0.0...12.0.0)