diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index adb9c7dbddc6..3f041dd7eafa 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "10.0.0" +version = "11.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "arrow", "datafusion", "query", "sql" ] @@ -31,7 +31,7 @@ readme = "README.md" [dependencies] arrow = { version = "20.0.0" } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "10.0.0" } +datafusion = { path = "../datafusion/core", version = "11.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index 8aca2af71748..2f00aa60b0b0 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,250 @@ # Changelog +## [11.0.0](https://github.com/apache/arrow-datafusion/tree/11.0.0) (2022-08-16) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0-rc1...11.0.0) + +**Breaking changes:** + +- Implement exact median, add `AggregateState` [\#3009](https://github.com/apache/arrow-datafusion/pull/3009) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) + +**Implemented enhancements:** + +- Make RowAccumulator public [\#3138](https://github.com/apache/arrow-datafusion/issues/3138) +- docs: proposal for consolidating docs into a Contributor Guide [\#3127](https://github.com/apache/arrow-datafusion/issues/3127) +- feat: support Timestamp +/- Interval [\#3103](https://github.com/apache/arrow-datafusion/issues/3103) +- a `arrow_typeof` like posgresql's `pg_typeof` [\#3095](https://github.com/apache/arrow-datafusion/issues/3095) +- Add DataFrame section to user guide [\#3066](https://github.com/apache/arrow-datafusion/issues/3066) +- Document all scalar SQL functions in user guide [\#3065](https://github.com/apache/arrow-datafusion/issues/3065) +- Simplify implementation of approx\_median so that it can be exposed in Python [\#3063](https://github.com/apache/arrow-datafusion/issues/3063) +- Support double quoted literal strings for dialects\(such as mysql,bigquery\) [\#3055](https://github.com/apache/arrow-datafusion/issues/3055) +- Simplify / speed up implementation of character\_length to unicode points [\#3049](https://github.com/apache/arrow-datafusion/issues/3049) +- Follow-up on Clickbench benchmark [\#3048](https://github.com/apache/arrow-datafusion/issues/3048) +- Why the PhysicalPlanner is an async trait ? [\#3032](https://github.com/apache/arrow-datafusion/issues/3032) +- Optimize file stream metrics. [\#3024](https://github.com/apache/arrow-datafusion/issues/3024) +- Proposal: Enable typed strings expressions for VALUES clause [\#3017](https://github.com/apache/arrow-datafusion/issues/3017) +- Proposal: Add `date_bin` function [\#3015](https://github.com/apache/arrow-datafusion/issues/3015) +- The upcoming release of Arrow \(20?\) breaks datafusion [\#3006](https://github.com/apache/arrow-datafusion/issues/3006) +- Can I select some files for query based on the filtering rules in the directory? [\#2993](https://github.com/apache/arrow-datafusion/issues/2993) +- Rename FormatReader to FileOpener [\#2990](https://github.com/apache/arrow-datafusion/issues/2990) +- Derive `Hash` trait for `JoinType` [\#2971](https://github.com/apache/arrow-datafusion/issues/2971) +- CAST from Utf8 to Boolean [\#2967](https://github.com/apache/arrow-datafusion/issues/2967) +- Add baseline\_metrics for FileStream to record metrics like elapsed time, record output, etc [\#2961](https://github.com/apache/arrow-datafusion/issues/2961) +- Example to show how to convert query result into rust struct [\#2959](https://github.com/apache/arrow-datafusion/issues/2959) +- simplify not clause [\#2957](https://github.com/apache/arrow-datafusion/issues/2957) +- Implement Debug for ColumnarValue [\#2950](https://github.com/apache/arrow-datafusion/issues/2950) +- Parallel fetching of column chunks when reading parquet files [\#2949](https://github.com/apache/arrow-datafusion/issues/2949) +- Extension mechanism for `SessionConfig` [\#2939](https://github.com/apache/arrow-datafusion/issues/2939) +- Streaming CSV/JSON Object Store Read [\#2935](https://github.com/apache/arrow-datafusion/issues/2935) +- Support CSV Limit Pushdown to Object Storage [\#2930](https://github.com/apache/arrow-datafusion/issues/2930) +- Add support for `pow` scalar function [\#2926](https://github.com/apache/arrow-datafusion/issues/2926) +- Add support for exact `median` aggregate function [\#2925](https://github.com/apache/arrow-datafusion/issues/2925) +- Support `mean` as synonym for `avg` [\#2922](https://github.com/apache/arrow-datafusion/issues/2922) +- Rename a column name [\#2919](https://github.com/apache/arrow-datafusion/issues/2919) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `core` [\#2913](https://github.com/apache/arrow-datafusion/issues/2913) +- Fail gracefully if optimization rule fails [\#2908](https://github.com/apache/arrow-datafusion/issues/2908) +- Make ObjectStoreRegistry as a trait which can allow Ballista to introduce a self registry ObjectStoreRegistry [\#2905](https://github.com/apache/arrow-datafusion/issues/2905) +- Remove datafusion-data-access crate [\#2903](https://github.com/apache/arrow-datafusion/issues/2903) +- Improve formatting of logical plans containing subquery expressions [\#2898](https://github.com/apache/arrow-datafusion/issues/2898) +- Atan2 added to built-in functions [\#2897](https://github.com/apache/arrow-datafusion/issues/2897) +- The explain statements only print logical plans for debug/other purpose. [\#2894](https://github.com/apache/arrow-datafusion/issues/2894) +- JSON version of `display_indent()` [\#2889](https://github.com/apache/arrow-datafusion/issues/2889) +- It would be nice to have a way to generate unique IDs in optimizer rules [\#2886](https://github.com/apache/arrow-datafusion/issues/2886) +- Add support for `TIME` literal values [\#2883](https://github.com/apache/arrow-datafusion/issues/2883) +- Add h2o benchmark [\#2879](https://github.com/apache/arrow-datafusion/issues/2879) +- Implement `from_unixtime` function [\#2871](https://github.com/apache/arrow-datafusion/issues/2871) +- Add `cast` function for creating logical cast expression [\#2870](https://github.com/apache/arrow-datafusion/issues/2870) +- Release DataFusion 10.0.0 [\#2862](https://github.com/apache/arrow-datafusion/issues/2862) +- Implement `information_schema.views` [\#2857](https://github.com/apache/arrow-datafusion/issues/2857) +- Migrate from avro\_rs to apache\_avro [\#2783](https://github.com/apache/arrow-datafusion/issues/2783) +- Add optimizer rule to remove `OFFSET 0` [\#2584](https://github.com/apache/arrow-datafusion/issues/2584) +- Preserve Element Name in ScalarValue::List [\#2450](https://github.com/apache/arrow-datafusion/issues/2450) +- Add EXISTS subquery support to Ballista [\#2338](https://github.com/apache/arrow-datafusion/issues/2338) +- Add documentation on supported functions to datafusion website [\#1487](https://github.com/apache/arrow-datafusion/issues/1487) +- documentations for datafusion-cli can be consolidated a bit more [\#1352](https://github.com/apache/arrow-datafusion/issues/1352) +- Optimizer: Predicate Rewrite pass for TPCH Q19 [\#217](https://github.com/apache/arrow-datafusion/issues/217) +- feat: add optimize rule `rewrite_disjunctive_predicate` [\#2858](https://github.com/apache/arrow-datafusion/pull/2858) ([xudong963](https://github.com/xudong963)) + +**Fixed bugs:** + +- Regression in SQL support for `ORDER BY` and aliased expressions [\#3160](https://github.com/apache/arrow-datafusion/issues/3160) +- panic when deal with `@` operator [\#3137](https://github.com/apache/arrow-datafusion/issues/3137) +- Incorrect type coercion rule for date + interval [\#3093](https://github.com/apache/arrow-datafusion/issues/3093) +- Cast string to timestamp crash while we input time before 1970 with floating number second [\#3082](https://github.com/apache/arrow-datafusion/issues/3082) +- INTEGER type does't work while importing csv [\#3059](https://github.com/apache/arrow-datafusion/issues/3059) +- Cannot GROUP BY Binary [\#3050](https://github.com/apache/arrow-datafusion/issues/3050) +- incorrect i32 coercion for `to_timestamp` [\#3046](https://github.com/apache/arrow-datafusion/issues/3046) +- Error pruning `IsNull` expressions: Column 'instance\_null\_count' is declared as non-nullable but contains null values [\#3042](https://github.com/apache/arrow-datafusion/issues/3042) +- I want to query some files in a directory. Is there any way? [\#3013](https://github.com/apache/arrow-datafusion/issues/3013) +- The expression to get an indexed field is only valid for `List` types \(`common_sub_expression_eliminate`\) [\#3002](https://github.com/apache/arrow-datafusion/issues/3002) +- Double to\_timestamp\_seconds produces abnormal result [\#2998](https://github.com/apache/arrow-datafusion/issues/2998) +- External parquet table fails when schema contains differing key / value metadata [\#2982](https://github.com/apache/arrow-datafusion/issues/2982) +- SELECT on column with uppercase column name fails with FieldNotFound error [\#2978](https://github.com/apache/arrow-datafusion/issues/2978) +- panic reading AWS-generated parquet file [\#2963](https://github.com/apache/arrow-datafusion/issues/2963) +- Can't filter rowgroup for parquet prune for some data type [\#2962](https://github.com/apache/arrow-datafusion/issues/2962) +- CI test is failing with ` final link failed: No space left on device` [\#2947](https://github.com/apache/arrow-datafusion/issues/2947) +- bug: new ObjectStore breaks backward compatibility with contrib plugins [\#2931](https://github.com/apache/arrow-datafusion/issues/2931) +- bug: file types handled wrong [\#2929](https://github.com/apache/arrow-datafusion/issues/2929) +- bug: changing the number of partitions does not increase concurrency [\#2928](https://github.com/apache/arrow-datafusion/issues/2928) +- csv\_explain fails on RC verifier [\#2916](https://github.com/apache/arrow-datafusion/issues/2916) +- index out of range error from datafusion\_row::write::write\_field [\#2910](https://github.com/apache/arrow-datafusion/issues/2910) +- Optimization rule `CommonSubexprEliminate` creates invalid projections [\#2907](https://github.com/apache/arrow-datafusion/issues/2907) +- serde\_json requires that either `std` \(default\) or `alloc` feature is enabled [\#2896](https://github.com/apache/arrow-datafusion/issues/2896) +- Inconsistent type coercion rules with comparison expressions [\#2890](https://github.com/apache/arrow-datafusion/issues/2890) +- Doc Error: the test directory link 404 which is in CONTRIBUTING.md [\#2880](https://github.com/apache/arrow-datafusion/issues/2880) +- Round trips through `ScalarValue`'s sometimes don't preserve types \(e.g. change types from `DictionaryArray`\) [\#2874](https://github.com/apache/arrow-datafusion/issues/2874) +- Error with CASE and DictionaryArrays: `ArrowError(InvalidArgumentError("arguments need to have the same data type"))` [\#2873](https://github.com/apache/arrow-datafusion/issues/2873) +- window functions not supported in expressions [\#2869](https://github.com/apache/arrow-datafusion/issues/2869) +- Unable to work with month intervals [\#2796](https://github.com/apache/arrow-datafusion/issues/2796) +- Discord invite link in communication page has expired [\#2743](https://github.com/apache/arrow-datafusion/issues/2743) +- Test \(path normalization\) failures while verifying release candidate 9.0.0 RC1 [\#2719](https://github.com/apache/arrow-datafusion/issues/2719) +- Reading parquet with \(pre-release\) arrow fails with "out of order projection is not supported" [\#2543](https://github.com/apache/arrow-datafusion/issues/2543) +- Fix SQL planner bug when resolving columns with same name as a relation [\#3003](https://github.com/apache/arrow-datafusion/pull/3003) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- fix `RowWriter` index out of bounds error [\#2968](https://github.com/apache/arrow-datafusion/pull/2968) ([comphead](https://github.com/comphead)) +- fix: support decimal statistic for row group prune [\#2966](https://github.com/apache/arrow-datafusion/pull/2966) ([liukun4515](https://github.com/liukun4515)) +- Fix invalid projection in `CommonSubexprEliminate` [\#2915](https://github.com/apache/arrow-datafusion/pull/2915) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- MINOR: Fix broken links in contrib guide [\#3135](https://github.com/apache/arrow-datafusion/pull/3135) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move expressions to top-level page [\#3134](https://github.com/apache/arrow-datafusion/pull/3134) ([andygrove](https://github.com/andygrove)) +- User Guide: Combine CLI pages [\#3133](https://github.com/apache/arrow-datafusion/pull/3133) ([andygrove](https://github.com/andygrove)) +- User Guide: Add documentation for JOIN syntax [\#3130](https://github.com/apache/arrow-datafusion/pull/3130) ([andygrove](https://github.com/andygrove)) +- separate contributors guide [\#3128](https://github.com/apache/arrow-datafusion/pull/3128) ([kmitchener](https://github.com/kmitchener)) +- minor: remove python docs, now they're in another project [\#3119](https://github.com/apache/arrow-datafusion/pull/3119) ([kmitchener](https://github.com/kmitchener)) +- minor: doc fixes: fix link to datafusion-python project and add link to slides for rece… [\#3118](https://github.com/apache/arrow-datafusion/pull/3118) ([kmitchener](https://github.com/kmitchener)) +- Add all scalar SQL functions to user guide [\#3090](https://github.com/apache/arrow-datafusion/pull/3090) ([andygrove](https://github.com/andygrove)) +- Add DataFrame reference to the user guide [\#3067](https://github.com/apache/arrow-datafusion/pull/3067) ([andygrove](https://github.com/andygrove)) +- MINOR: Add CeresDB to list of products using DataFusion [\#3060](https://github.com/apache/arrow-datafusion/pull/3060) ([andygrove](https://github.com/andygrove)) +- Minor: improve some docstrings about pruning [\#3041](https://github.com/apache/arrow-datafusion/pull/3041) ([alamb](https://github.com/alamb)) +- doc: add a new video link about datafusion [\#3025](https://github.com/apache/arrow-datafusion/pull/3025) ([xudong963](https://github.com/xudong963)) +- Update README.md to add CnosDB into the Known Uses [\#2933](https://github.com/apache/arrow-datafusion/pull/2933) ([cnoshb](https://github.com/cnoshb)) + +**Performance improvements:** + +- Use code points instead of grapheme clusters for string functions [\#3054](https://github.com/apache/arrow-datafusion/pull/3054) ([Dandandan](https://github.com/Dandandan)) + +**Closed issues:** + +- Rename `do_data_time_math()` to `do_date_time_math()` [\#3172](https://github.com/apache/arrow-datafusion/issues/3172) +- Automatic version updates for github actions with dependabot [\#3106](https://github.com/apache/arrow-datafusion/issues/3106) +- \[EPIC\] Proposal for Date/Time enhancement [\#3100](https://github.com/apache/arrow-datafusion/issues/3100) +- Upgrade prost/tonic everywhere [\#3028](https://github.com/apache/arrow-datafusion/issues/3028) +- \[Question\] interested in helping with documentation [\#2866](https://github.com/apache/arrow-datafusion/issues/2866) +- Introducing a new optimizer framework for datafusion. [\#2633](https://github.com/apache/arrow-datafusion/issues/2633) +- Enable discussion tab? [\#2350](https://github.com/apache/arrow-datafusion/issues/2350) +- Add support for AVG\(Timestamp\) types [\#200](https://github.com/apache/arrow-datafusion/issues/200) +- TPC-H Query 22 [\#175](https://github.com/apache/arrow-datafusion/issues/175) +- TPC-H Query 21 [\#172](https://github.com/apache/arrow-datafusion/issues/172) +- TPC-H Query 20 [\#171](https://github.com/apache/arrow-datafusion/issues/171) +- TPC-H Query 17 [\#168](https://github.com/apache/arrow-datafusion/issues/168) +- TPC-H Query 11 [\#163](https://github.com/apache/arrow-datafusion/issues/163) +- TPC-H Query 4 [\#160](https://github.com/apache/arrow-datafusion/issues/160) +- TPC-H Query 2 [\#159](https://github.com/apache/arrow-datafusion/issues/159) +- \[Datafusion\] Optimize literal expression evaluation [\#106](https://github.com/apache/arrow-datafusion/issues/106) + +**Merged pull requests:** + +- Rename do\_data\_time\_math\(\) to do\_date\_time\_math\(\) [\#3173](https://github.com/apache/arrow-datafusion/pull/3173) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- \[Minor\] Remove some redundant code [\#3169](https://github.com/apache/arrow-datafusion/pull/3169) ([alamb](https://github.com/alamb)) +- Support `INTEGER` again in addition to `INT` in `CREATE TABLE` and `CAST` statements [\#3167](https://github.com/apache/arrow-datafusion/pull/3167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix regression in SQL parser related to resolution of aliased expressions [\#3165](https://github.com/apache/arrow-datafusion/pull/3165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- update cargo lock [\#3164](https://github.com/apache/arrow-datafusion/pull/3164) ([waitingkuo](https://github.com/waitingkuo)) +- add test case for cast\_timestamp\_before\_1970 [\#3163](https://github.com/apache/arrow-datafusion/pull/3163) ([waitingkuo](https://github.com/waitingkuo)) +- Return proper error message for ill formed variable reference [\#3162](https://github.com/apache/arrow-datafusion/pull/3162) ([alamb](https://github.com/alamb)) +- Remove outdated license text left over from arrow repo [\#3154](https://github.com/apache/arrow-datafusion/pull/3154) ([alamb](https://github.com/alamb)) +- Expose RowAccumulator in physical\_plan [\#3151](https://github.com/apache/arrow-datafusion/pull/3151) ([iajoiner](https://github.com/iajoiner)) +- Rename `DateIntervalExpr` to `DateTimeIntervalExpr` [\#3150](https://github.com/apache/arrow-datafusion/pull/3150) ([alamb](https://github.com/alamb)) +- Bump actions/labeler from 4.0.0 to 4.0.1 [\#3144](https://github.com/apache/arrow-datafusion/pull/3144) ([dependabot[bot]](https://github.com/apps/dependabot)) +- User Guide: Add documentation for subquery syntax [\#3132](https://github.com/apache/arrow-datafusion/pull/3132) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move Data Types and Information Schema to their own pages [\#3131](https://github.com/apache/arrow-datafusion/pull/3131) ([andygrove](https://github.com/andygrove)) +- Minor: Clean up `array` test [\#3121](https://github.com/apache/arrow-datafusion/pull/3121) ([alamb](https://github.com/alamb)) +- add arrow\_typeof [\#3120](https://github.com/apache/arrow-datafusion/pull/3120) ([waitingkuo](https://github.com/waitingkuo)) +- Bump actions/labeler from 2.2.0 to 4.0.0 [\#3114](https://github.com/apache/arrow-datafusion/pull/3114) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/checkout from 2 to 3 [\#3113](https://github.com/apache/arrow-datafusion/pull/3113) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-node from 2 to 3 [\#3112](https://github.com/apache/arrow-datafusion/pull/3112) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-python from 3 to 4 [\#3111](https://github.com/apache/arrow-datafusion/pull/3111) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Feature/support timestamp plus minus interval [\#3110](https://github.com/apache/arrow-datafusion/pull/3110) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- docs: fix typo [\#3109](https://github.com/apache/arrow-datafusion/pull/3109) ([dzvon](https://github.com/dzvon)) +- Remove offset if its zero [\#3102](https://github.com/apache/arrow-datafusion/pull/3102) ([turbo1912](https://github.com/turbo1912)) +- Hash binary values [\#3098](https://github.com/apache/arrow-datafusion/pull/3098) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Update to object\_store 0.4 [\#3089](https://github.com/apache/arrow-datafusion/pull/3089) ([tustvold](https://github.com/tustvold)) +- Add cast function for creating cast expression [\#3084](https://github.com/apache/arrow-datafusion/pull/3084) ([turbo1912](https://github.com/turbo1912)) +- Upgrade to arrow 20.0.0 \(but no change to object\_store\), including `prost`, and `tonic` [\#3083](https://github.com/apache/arrow-datafusion/pull/3083) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- impl Debug for ColumnarValue, add some docs [\#3076](https://github.com/apache/arrow-datafusion/pull/3076) ([alamb](https://github.com/alamb)) +- \[Minor\] run cargo update in datafusion-cli directory [\#3075](https://github.com/apache/arrow-datafusion/pull/3075) ([alamb](https://github.com/alamb)) +- update cargo.lock in `datafusion-cli` [\#3074](https://github.com/apache/arrow-datafusion/pull/3074) ([waitingkuo](https://github.com/waitingkuo)) +- Update sql parser to v0.20.0 [\#3072](https://github.com/apache/arrow-datafusion/pull/3072) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add opening, scanning, processing metrics in file stream [\#3070](https://github.com/apache/arrow-datafusion/pull/3070) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Simplify `approx_median` implementation, expose via `DataFrame` API [\#3064](https://github.com/apache/arrow-datafusion/pull/3064) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- docs: fix PruningStatistics example and some typos [\#3062](https://github.com/apache/arrow-datafusion/pull/3062) ([roeap](https://github.com/roeap)) +- feat: support double quoted literal strings for dialects\(such as mysql,bigquery,spark\) [\#3056](https://github.com/apache/arrow-datafusion/pull/3056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Rachelint](https://github.com/Rachelint)) +- Allow Overriding AsyncFileReader used by ParquetExec [\#3051](https://github.com/apache/arrow-datafusion/pull/3051) ([Cheappie](https://github.com/Cheappie)) +- to\_timestamp i32 coerced to i64 [\#3047](https://github.com/apache/arrow-datafusion/pull/3047) ([waitingkuo](https://github.com/waitingkuo)) +- Fix `IsNull` pruning expression generation without null\_count statistics [\#3044](https://github.com/apache/arrow-datafusion/pull/3044) ([alamb](https://github.com/alamb)) +- feat: Support `week`, `decade`, `century` for Interval literal [\#3038](https://github.com/apache/arrow-datafusion/pull/3038) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- feat: Support Binary bitwise shift operators \(\<\< and \>\>\) [\#3037](https://github.com/apache/arrow-datafusion/pull/3037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- Use concat\_elements\_utf8 from arrow rather than custom kernel [\#3036](https://github.com/apache/arrow-datafusion/pull/3036) ([alamb](https://github.com/alamb)) +- minor: update minimal rust version to 1.62, matching arrow-rs [\#3035](https://github.com/apache/arrow-datafusion/pull/3035) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- feat: Add `date_bin` built-in function [\#3034](https://github.com/apache/arrow-datafusion/pull/3034) ([stuartcarnie](https://github.com/stuartcarnie)) +- Split `binary_expr.rs` into smaller modules [\#3026](https://github.com/apache/arrow-datafusion/pull/3026) ([alamb](https://github.com/alamb)) +- feat: Enable typed strings expressions for VALUES clause [\#3018](https://github.com/apache/arrow-datafusion/pull/3018) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- fix typo for PR3003 [\#3011](https://github.com/apache/arrow-datafusion/pull/3011) ([waitingkuo](https://github.com/waitingkuo)) +- feat: Add support for TIME literal values [\#3010](https://github.com/apache/arrow-datafusion/pull/3010) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- add TimeUnit::Second as signature for ToTimestampSeconds [\#3004](https://github.com/apache/arrow-datafusion/pull/3004) ([waitingkuo](https://github.com/waitingkuo)) +- Rename FileReader to FileOpener \(\#2990\) [\#2991](https://github.com/apache/arrow-datafusion/pull/2991) ([tustvold](https://github.com/tustvold)) +- minor: collation the prune test [\#2986](https://github.com/apache/arrow-datafusion/pull/2986) ([liukun4515](https://github.com/liukun4515)) +- Optionally skip metadata from schema when merging parquet files [\#2985](https://github.com/apache/arrow-datafusion/pull/2985) ([alamb](https://github.com/alamb)) +- \[Minor\] Extract interval parsing logic, add unit tests [\#2984](https://github.com/apache/arrow-datafusion/pull/2984) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Update sqlparser to 0.19 [\#2981](https://github.com/apache/arrow-datafusion/pull/2981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- test: add file/SQL level test for pruning parquet row group with decimal data type. [\#2977](https://github.com/apache/arrow-datafusion/pull/2977) ([liukun4515](https://github.com/liukun4515)) +- Derive Hash for JoinType [\#2972](https://github.com/apache/arrow-datafusion/pull/2972) ([liurenjie1024](https://github.com/liurenjie1024)) +- Example that shows how to convert query result into rust struct \#2959 [\#2969](https://github.com/apache/arrow-datafusion/pull/2969) ([thomas-k-cameron](https://github.com/thomas-k-cameron)) +- Add baseline\_metrics for FileStream to record metrics like elapsed ti… [\#2965](https://github.com/apache/arrow-datafusion/pull/2965) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- test: add test for decimal and pruning for decimal column [\#2960](https://github.com/apache/arrow-datafusion/pull/2960) ([liukun4515](https://github.com/liukun4515)) +- Simplify expressions with `NOT` clause [\#2958](https://github.com/apache/arrow-datafusion/pull/2958) ([AssHero](https://github.com/AssHero)) +- chore: update jit-related dependencies [\#2956](https://github.com/apache/arrow-datafusion/pull/2956) ([xudong963](https://github.com/xudong963)) +- Update to arrow `19.0.0` [\#2955](https://github.com/apache/arrow-datafusion/pull/2955) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Remove CI Caching to preserve diskspace [\#2948](https://github.com/apache/arrow-datafusion/pull/2948) ([alamb](https://github.com/alamb)) +- Add metadata\_size\_hint for optimistic fetching of parquet metadata [\#2946](https://github.com/apache/arrow-datafusion/pull/2946) ([thinkharderdev](https://github.com/thinkharderdev)) +- Minor: Remove left over debugging statement [\#2944](https://github.com/apache/arrow-datafusion/pull/2944) ([alamb](https://github.com/alamb)) +- add Atan2 [\#2942](https://github.com/apache/arrow-datafusion/pull/2942) ([waitingkuo](https://github.com/waitingkuo)) +- Use `Arc` and remove ObjectStoreRegistry::clone [\#2941](https://github.com/apache/arrow-datafusion/pull/2941) ([tustvold](https://github.com/tustvold)) +- add extension system to `SessionConfig` [\#2940](https://github.com/apache/arrow-datafusion/pull/2940) ([crepererum](https://github.com/crepererum)) +- Update prost-build requirement from 0.7 to 0.10 [\#2937](https://github.com/apache/arrow-datafusion/pull/2937) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add streaming JSON and CSV reading, `NewlineDelimitedStream' \(\#2935\) [\#2936](https://github.com/apache/arrow-datafusion/pull/2936) ([tustvold](https://github.com/tustvold)) +- feat\(catalog\): Implement information\_schema.views [\#2934](https://github.com/apache/arrow-datafusion/pull/2934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Support `window` functions in expressions by re-write projection after building window plan [\#2932](https://github.com/apache/arrow-datafusion/pull/2932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add pow as synonym for power [\#2927](https://github.com/apache/arrow-datafusion/pull/2927) ([andygrove](https://github.com/andygrove)) +- Add `from_unixtime` function [\#2924](https://github.com/apache/arrow-datafusion/pull/2924) ([waitingkuo](https://github.com/waitingkuo)) +- fix\(aggregate\): support mean as synonym avg [\#2923](https://github.com/apache/arrow-datafusion/pull/2923) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add `DataFrame::with_column_renamed` [\#2920](https://github.com/apache/arrow-datafusion/pull/2920) ([andygrove](https://github.com/andygrove)) +- Run clippy with optional features [\#2918](https://github.com/apache/arrow-datafusion/pull/2918) ([tustvold](https://github.com/tustvold)) +- Fix release verification script by not overriding `ARROW_TEST_DATA` or `PARQUET_TEST_DATA` [\#2917](https://github.com/apache/arrow-datafusion/pull/2917) ([alamb](https://github.com/alamb)) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `datafusion_core` [\#2914](https://github.com/apache/arrow-datafusion/pull/2914) ([alamb](https://github.com/alamb)) +- Optimizer should have option to skip failing rules [\#2909](https://github.com/apache/arrow-datafusion/pull/2909) ([andygrove](https://github.com/andygrove)) +- Introduce ObjectStoreProvider to create an object store based on the url [\#2906](https://github.com/apache/arrow-datafusion/pull/2906) ([yahoNanJing](https://github.com/yahoNanJing)) +- Remove datafusion-data-access crate [\#2904](https://github.com/apache/arrow-datafusion/pull/2904) ([yahoNanJing](https://github.com/yahoNanJing)) +- Combine all comparison coercion rules [\#2901](https://github.com/apache/arrow-datafusion/pull/2901) ([andygrove](https://github.com/andygrove)) +- Add `Projection::try_new` and `Projection::try_new_with_schema` [\#2900](https://github.com/apache/arrow-datafusion/pull/2900) ([andygrove](https://github.com/andygrove)) +- Improve formatting of logical plans containing subqueries [\#2899](https://github.com/apache/arrow-datafusion/pull/2899) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- add session option 'datafusion.explain.logical\_plan'. when set to true, the explain statement will only print logical plans. [\#2895](https://github.com/apache/arrow-datafusion/pull/2895) ([AssHero](https://github.com/AssHero)) +- Preserve field name in `ScalarValue::List` [\#2893](https://github.com/apache/arrow-datafusion/pull/2893) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) +- Adds optional serde support to datafusion-proto [\#2892](https://github.com/apache/arrow-datafusion/pull/2892) ([tustvold](https://github.com/tustvold)) +- Implement `ScalarValue::Dictionary` and preserve type through conversion back/forth to Array [\#2891](https://github.com/apache/arrow-datafusion/pull/2891) ([alamb](https://github.com/alamb)) +- Add an ID generator in preparation for PR 2885 [\#2887](https://github.com/apache/arrow-datafusion/pull/2887) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add support for correlated subqueries & fix all related TPC-H benchmark issues [\#2885](https://github.com/apache/arrow-datafusion/pull/2885) ([avantgardnerio](https://github.com/avantgardnerio)) +- fix\(doc\): update test directory link in CONTRIBUTING.md [\#2882](https://github.com/apache/arrow-datafusion/pull/2882) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add h2o bench groupby queries [\#2881](https://github.com/apache/arrow-datafusion/pull/2881) ([andygrove](https://github.com/andygrove)) +- Add support for month & year intervals [\#2797](https://github.com/apache/arrow-datafusion/pull/2797) ([avantgardnerio](https://github.com/avantgardnerio)) +- Migrate from avro\_rs \(0.13\) to apache\_avro \(0.14\) [\#2784](https://github.com/apache/arrow-datafusion/pull/2784) ([martin-g](https://github.com/martin-g)) + +## [10.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/10.0.0-rc1) (2022-07-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0...10.0.0-rc1) + + ## [10.0.0](https://github.com/apache/arrow-datafusion/tree/10.0.0) (2022-07-12) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/9.0.0...10.0.0) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 36bb2d69f9b5..20cb0967022f 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-common" description = "Common functionality for DataFusion query engine" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index f417f9ef299b..7e69243039df 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../../README.md" @@ -60,13 +60,13 @@ arrow = { version = "20.0.0", features = ["prettyprint"] } async-trait = "0.1.41" bytes = "1.1" chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "10.0.0", features = ["parquet", "object_store"] } -datafusion-expr = { path = "../expr", version = "10.0.0" } -datafusion-jit = { path = "../jit", version = "10.0.0", optional = true } -datafusion-optimizer = { path = "../optimizer", version = "10.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "10.0.0" } -datafusion-row = { path = "../row", version = "10.0.0" } -datafusion-sql = { path = "../sql", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0", features = ["parquet", "object_store"] } +datafusion-expr = { path = "../expr", version = "11.0.0" } +datafusion-jit = { path = "../jit", version = "11.0.0", optional = true } +datafusion-optimizer = { path = "../optimizer", version = "11.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "11.0.0" } +datafusion-row = { path = "../row", version = "11.0.0" } +datafusion-sql = { path = "../sql", version = "11.0.0" } futures = "0.3" glob = "0.3.0" hashbrown = { version = "0.12", features = ["raw"] } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index a3c4dd4acb0f..2241e474b503 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,5 +37,5 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.7", default-features = false } arrow = { version = "20.0.0", features = ["prettyprint"] } -datafusion-common = { path = "../common", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0" } sqlparser = "0.20" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 8da87bbd7607..a9216e928122 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-jit" description = "Just In Time (JIT) compilation support for DataFusion query engine" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,7 +41,7 @@ cranelift = "0.86.1" cranelift-jit = "0.86.1" cranelift-module = "0.86.1" cranelift-native = "0.86.1" -datafusion-common = { path = "../common", version = "10.0.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "11.0.0" } parking_lot = "0.12" diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 695f5be9b185..b1a64384c83b 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-optimizer" description = "DataFusion Query Optimizer" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -40,9 +40,9 @@ unicode_expressions = [] arrow = { version = "20.0.0", features = ["prettyprint"] } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "10.0.0" } -datafusion-expr = { path = "../expr", version = "10.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0" } +datafusion-expr = { path = "../expr", version = "11.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "11.0.0" } hashbrown = { version = "0.12", features = ["raw"] } log = "^0.4" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 5f25b2e4f188..5cd65ca0ad07 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -44,9 +44,9 @@ arrow = { version = "20.0.0", features = ["prettyprint"] } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "10.0.0" } -datafusion-expr = { path = "../expr", version = "10.0.0" } -datafusion-row = { path = "../row", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0" } +datafusion-expr = { path = "../expr", version = "11.0.0" } +datafusion-row = { path = "../row", version = "11.0.0" } hashbrown = { version = "0.12", features = ["raw"] } lazy_static = { version = "^1.4.0" } md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index dc8991975bd5..88a74b088aad 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,9 +38,9 @@ json = ["pbjson", "pbjson-build", "serde", "serde_json"] [dependencies] arrow = { version = "20.0.0" } -datafusion = { path = "../core", version = "10.0.0" } -datafusion-common = { path = "../common", version = "10.0.0" } -datafusion-expr = { path = "../expr", version = "10.0.0" } +datafusion = { path = "../core", version = "11.0.0" } +datafusion-common = { path = "../common", version = "11.0.0" } +datafusion-expr = { path = "../expr", version = "11.0.0" } pbjson = { version = "0.3", optional = true } pbjson-types = { version = "0.3", optional = true } prost = "0.11.0" diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index 1621a216e1a9..7ee2f01d8527 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-row" description = "Row backed by raw bytes for DataFusion query engine" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = { version = "20.0.0" } -datafusion-common = { path = "../common", version = "10.0.0" } -datafusion-jit = { path = "../jit", version = "10.0.0", optional = true } +datafusion-common = { path = "../common", version = "11.0.0" } +datafusion-jit = { path = "../jit", version = "11.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index a6499a7f998d..5fbc5e9f2174 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-sql" description = "DataFusion SQL Query Planner" -version = "10.0.0" +version = "11.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -39,8 +39,8 @@ unicode_expressions = [] [dependencies] ahash = { version = "0.7", default-features = false } arrow = { version = "20.0.0", features = ["prettyprint"] } -datafusion-common = { path = "../common", version = "10.0.0" } -datafusion-expr = { path = "../expr", version = "10.0.0" } +datafusion-common = { path = "../common", version = "11.0.0" } +datafusion-expr = { path = "../expr", version = "11.0.0" } hashbrown = "0.12" sqlparser = "0.20" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } diff --git a/dev/release/README.md b/dev/release/README.md index 222f5f3432e3..924f65198f5b 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -23,7 +23,7 @@ ### Major Release -DataFusion typically has major releases from the `master` branch every 3 months, including breaking API changes. +DataFusion typically has major releases from the `master` branch every 4 weeks, including breaking API changes. ### Minor Release diff --git a/docs/source/index.rst b/docs/source/index.rst index 34e9b135be47..86b3b7e2c8ff 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,14 +22,6 @@ Apache Arrow DataFusion Table of Contents ================= -.. _toc.usage: - -.. toctree:: - :maxdepth: 1 - :caption: Supported Environments - - Rust - .. _toc.guide: .. toctree:: @@ -45,6 +37,7 @@ Table of Contents user-guide/sql/index user-guide/configs user-guide/faq + Rust Crate Documentation .. _toc.contributor-guide: diff --git a/docs/source/user-guide/cli.md b/docs/source/user-guide/cli.md index ed6131fe957e..5e993774a9fc 100644 --- a/docs/source/user-guide/cli.md +++ b/docs/source/user-guide/cli.md @@ -79,7 +79,7 @@ this to work. ```bash git clone https://github.com/apache/arrow-datafusion -git checkout 8.0.0 +git checkout 11.0.0 cd arrow-datafusion docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli docker run -it -v $(your_data_location):/data datafusion-cli diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index de8cae295579..48ca791ffe6f 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -24,7 +24,7 @@ Add the following to your `Cargo.toml` file: ```toml -datafusion = "8.0.0" +datafusion = "11.0" tokio = "1.0" ``` diff --git a/docs/source/user-guide/library.md b/docs/source/user-guide/library.md index 422c9d6d1ec4..688520f9c770 100644 --- a/docs/source/user-guide/library.md +++ b/docs/source/user-guide/library.md @@ -44,7 +44,7 @@ To get started, add the following to your `Cargo.toml` file: ```toml [dependencies] -datafusion = "8.0.0" +datafusion = "11.0" ``` ## Create a main function @@ -76,7 +76,7 @@ worth noting that using the settings in the `[profile.release]` section will sig ```toml [dependencies] -datafusion = { version = "7.0" , features = ["simd"]} +datafusion = { version = "11.0" , features = ["simd"]} tokio = { version = "^1.0", features = ["rt-multi-thread"] } snmalloc-rs = "0.2" diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md index 75ec0f6cb0fa..ee73370e0c25 100644 --- a/docs/source/user-guide/sql/ddl.md +++ b/docs/source/user-guide/sql/ddl.md @@ -30,9 +30,17 @@ STORED AS PARQUET LOCATION '/mnt/nyctaxi/tripdata.parquet'; ``` -CSV data sources can also be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is necessary to -provide schema information for CSV files since DataFusion does not automatically infer the schema when using SQL -to query CSV files. +CSV data sources can also be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. The schema will be +inferred based on scanning a subset of the file. + +```sql +CREATE EXTERNAL TABLE test +STORED AS CSV +WITH HEADER ROW +LOCATION '/path/to/aggregate_simple.csv'; +``` + +It is also possible to specify the schema manually. ```sql CREATE EXTERNAL TABLE test (