Skip to content

Commit

Permalink
Improve performance of %pat% (>3x speedup) (apache#2521)
Browse files Browse the repository at this point in the history
* Special case contains

* Fix
  • Loading branch information
Dandandan authored and amrltqt committed Aug 20, 2022
1 parent 8a6520a commit da22f0d
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions arrow/src/compute/kernels/comparison.rs
Expand Up @@ -263,11 +263,23 @@ pub fn like_utf8_scalar<OffsetSize: OffsetSizeTrait>(
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
let ends_with = &right[1..];

for i in 0..left.len() {
if left.value(i).ends_with(ends_with) {
bit_util::set_bit(bool_slice, i);
}
}
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use contains
let contains = &right[1..right.len() - 1];
for i in 0..left.len() {
if left.value(i).contains(contains) {
bit_util::set_bit(bool_slice, i);
}
}
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
Expand Down Expand Up @@ -383,6 +395,15 @@ pub fn nlike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
for i in 0..left.len() {
result.append(!left.value(i).ends_with(&right[1..]));
}
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use contains
let contains = &right[1..right.len() - 1];
for i in 0..left.len() {
result.append(!left.value(i).contains(contains));
}
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
Expand Down

0 comments on commit da22f0d

Please sign in to comment.