diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cbc5418867dd..e73e52cba1ea 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -95,11 +95,11 @@ jobs: - name: Build tests run: | export PATH=$PATH:$HOME/d/protoc/bin - cargo test --features avro,jit,scheduler,json --no-run + cargo test --features avro,jit,scheduler,json,dictionary_expressions --no-run - name: Run tests run: | export PATH=$PATH:$HOME/d/protoc/bin - cargo test --features avro,jit,scheduler,json + cargo test --features avro,jit,scheduler,json,dictionary_expressions - name: Run examples run: | export PATH=$PATH:$HOME/d/protoc/bin diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index becc733eb2eb..00ca39f47137 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -43,6 +43,9 @@ avro = ["apache-avro", "num-traits", "datafusion-common/avro"] compression = ["xz2", "bzip2", "flate2", "async-compression"] crypto_expressions = ["datafusion-physical-expr/crypto_expressions"] default = ["crypto_expressions", "regex_expressions", "unicode_expressions", "compression"] +# Enables support for non-scalar, binary operations on dictionaries +# Note: this results in significant additional codegen +dictionary_expressions = ["datafusion-physical-expr/dictionary_expressions"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) force_hash_collisions = [] # Used to enable JIT code generation @@ -102,7 +105,6 @@ xz2 = { version = "0.1", optional = true } [dev-dependencies] -arrow = { version = "31.0.0", features = ["prettyprint", "dyn_cmp_dict"] } async-trait = "0.1.53" bigdecimal = "0.3.0" criterion = "0.4" diff --git a/datafusion/core/tests/path_partition.rs b/datafusion/core/tests/path_partition.rs index 2d257d49a389..670b508c3347 100644 --- a/datafusion/core/tests/path_partition.rs +++ b/datafusion/core/tests/path_partition.rs @@ -204,7 +204,7 @@ async fn csv_filter_with_file_col() -> Result<()> { ); let result = ctx - .sql("SELECT c1, c2 FROM t WHERE date='2021-10-27' and date!=c1 LIMIT 5") + .sql("SELECT c1, c2 FROM t WHERE date='2021-10-27' and c1!='2021-10-27' LIMIT 5") .await? .collect() .await?; diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index f65835101c52..124f25d36a88 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -621,6 +621,7 @@ async fn query_nested_get_indexed_field_on_struct() -> Result<()> { } #[tokio::test] +#[cfg(feature = "dictionary_expressions")] async fn query_on_string_dictionary() -> Result<()> { // Test to ensure DataFusion can operate on dictionary types // Use StringDictionary (32 bit indexes = keys) diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 5b25d97075df..6ab1bb7fa9d5 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -24,7 +24,7 @@ repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" authors = ["Apache Arrow "] license = "Apache-2.0" -keywords = [ "arrow", "query", "sql" ] +keywords = ["arrow", "query", "sql"] edition = "2021" rust-version = "1.62" @@ -35,12 +35,15 @@ path = "src/lib.rs" [features] crypto_expressions = ["md-5", "sha2", "blake2", "blake3"] default = ["crypto_expressions", "regex_expressions", "unicode_expressions"] +# Enables support for non-scalar, binary operations on dictionaries +# Note: this results in significant additional codegen +dictionary_expressions = ["arrow/dyn_cmp_dict", "arrow/dyn_arith_dict"] regex_expressions = ["regex"] unicode_expressions = ["unicode-segmentation"] [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } -arrow = { version = "31.0.0", features = ["prettyprint", "dyn_cmp_dict"] } +arrow = { version = "31.0.0", features = ["prettyprint"] } arrow-buffer = "31.0.0" arrow-schema = "31.0.0" blake2 = { version = "^0.10.2", optional = true } diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index df90be163864..d2346d278dc4 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -1502,6 +1502,7 @@ mod tests { // is no way at the time of this writing to create a dictionary // array using the `From` trait #[test] + #[cfg(feature = "dictionary_expressions")] fn test_dictionary_type_to_array_coersion() -> Result<()> { // Test string a string dictionary let dict_type =