Skip to content

Commit

Permalink
Add dictionary_expresions feature (#4386) (#4999)
Browse files Browse the repository at this point in the history
* Add dictionary_expresions feature (#4386)

* Toml format
  • Loading branch information
tustvold committed Jan 24, 2023
1 parent 5d4038a commit 9f498bb
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 6 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Expand Up @@ -95,11 +95,11 @@ jobs:
- name: Build tests
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --features avro,jit,scheduler,json --no-run
cargo test --features avro,jit,scheduler,json,dictionary_expressions --no-run
- name: Run tests
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --features avro,jit,scheduler,json
cargo test --features avro,jit,scheduler,json,dictionary_expressions
- name: Run examples
run: |
export PATH=$PATH:$HOME/d/protoc/bin
Expand Down
4 changes: 3 additions & 1 deletion datafusion/core/Cargo.toml
Expand Up @@ -43,6 +43,9 @@ avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
compression = ["xz2", "bzip2", "flate2", "async-compression"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions"]
default = ["crypto_expressions", "regex_expressions", "unicode_expressions", "compression"]
# Enables support for non-scalar, binary operations on dictionaries
# Note: this results in significant additional codegen
dictionary_expressions = ["datafusion-physical-expr/dictionary_expressions"]
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
force_hash_collisions = []
# Used to enable JIT code generation
Expand Down Expand Up @@ -102,7 +105,6 @@ xz2 = { version = "0.1", optional = true }


[dev-dependencies]
arrow = { version = "31.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
async-trait = "0.1.53"
bigdecimal = "0.3.0"
criterion = "0.4"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/path_partition.rs
Expand Up @@ -204,7 +204,7 @@ async fn csv_filter_with_file_col() -> Result<()> {
);

let result = ctx
.sql("SELECT c1, c2 FROM t WHERE date='2021-10-27' and date!=c1 LIMIT 5")
.sql("SELECT c1, c2 FROM t WHERE date='2021-10-27' and c1!='2021-10-27' LIMIT 5")
.await?
.collect()
.await?;
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/tests/sql/select.rs
Expand Up @@ -621,6 +621,7 @@ async fn query_nested_get_indexed_field_on_struct() -> Result<()> {
}

#[tokio::test]
#[cfg(feature = "dictionary_expressions")]
async fn query_on_string_dictionary() -> Result<()> {
// Test to ensure DataFusion can operate on dictionary types
// Use StringDictionary (32 bit indexes = keys)
Expand Down
7 changes: 5 additions & 2 deletions datafusion/physical-expr/Cargo.toml
Expand Up @@ -24,7 +24,7 @@ repository = "https://github.com/apache/arrow-datafusion"
readme = "README.md"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = [ "arrow", "query", "sql" ]
keywords = ["arrow", "query", "sql"]
edition = "2021"
rust-version = "1.62"

Expand All @@ -35,12 +35,15 @@ path = "src/lib.rs"
[features]
crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
# Enables support for non-scalar, binary operations on dictionaries
# Note: this results in significant additional codegen
dictionary_expressions = ["arrow/dyn_cmp_dict", "arrow/dyn_arith_dict"]
regex_expressions = ["regex"]
unicode_expressions = ["unicode-segmentation"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
arrow = { version = "31.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow = { version = "31.0.0", features = ["prettyprint"] }
arrow-buffer = "31.0.0"
arrow-schema = "31.0.0"
blake2 = { version = "^0.10.2", optional = true }
Expand Down
1 change: 1 addition & 0 deletions datafusion/physical-expr/src/expressions/binary.rs
Expand Up @@ -1502,6 +1502,7 @@ mod tests {
// is no way at the time of this writing to create a dictionary
// array using the `From` trait
#[test]
#[cfg(feature = "dictionary_expressions")]
fn test_dictionary_type_to_array_coersion() -> Result<()> {
// Test string a string dictionary
let dict_type =
Expand Down

0 comments on commit 9f498bb

Please sign in to comment.