From b2f0c65e15a40c7f2f01e1d96335ea8ec362f65d Mon Sep 17 00:00:00 2001 From: Vrishabh Date: Tue, 23 Aug 2022 16:10:22 +0530 Subject: [PATCH] Fix ilike_utf8_scalar kernals (#2545) * fix ilike kernals * minor refactor for perf improvements * Remove wrongly added file --- arrow/src/compute/kernels/comparison.rs | 50 ++++++++++--------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 39828b64fcf..714b4b1b75f 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -467,29 +467,24 @@ pub fn ilike_utf8_scalar( if !right.contains(is_like_pattern) { // fast path, can use equals + let right_uppercase = right.to_uppercase(); for i in 0..left.len() { - result.append(left.value(i) == right); + result.append(left.value(i).to_uppercase() == right_uppercase); } } else if right.ends_with('%') && !right.ends_with("\\%") && !right[..right.len() - 1].contains(is_like_pattern) { - // fast path, can use ends_with + // fast path, can use starts_with + let start_str = &right[..right.len() - 1].to_uppercase(); for i in 0..left.len() { - result.append( - left.value(i) - .to_uppercase() - .starts_with(&right[..right.len() - 1].to_uppercase()), - ); + result.append(left.value(i).to_uppercase().starts_with(start_str)); } } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) { - // fast path, can use starts_with + // fast path, can use ends_with + let ends_str = &right[1..].to_uppercase(); for i in 0..left.len() { - result.append( - left.value(i) - .to_uppercase() - .ends_with(&right[1..].to_uppercase()), - ); + result.append(left.value(i).to_uppercase().ends_with(ends_str)); } } else { let re_pattern = replace_like_wildcards(right)?; @@ -550,31 +545,24 @@ pub fn nilike_utf8_scalar( if !right.contains(is_like_pattern) { // fast path, can use equals + let right_uppercase = right.to_uppercase(); for i in 0..left.len() { - result.append(left.value(i) != right); + result.append(left.value(i).to_uppercase() != right_uppercase); } } else if right.ends_with('%') && !right.ends_with("\\%") && !right[..right.len() - 1].contains(is_like_pattern) { - // fast path, can use ends_with + // fast path, can use starts_with + let start_str = &right[..right.len() - 1].to_uppercase(); for i in 0..left.len() { - result.append( - !left - .value(i) - .to_uppercase() - .starts_with(&right[..right.len() - 1].to_uppercase()), - ); + result.append(!left.value(i).to_uppercase().starts_with(start_str)); } } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) { - // fast path, can use starts_with + // fast path, can use ends_with + let end_str = &right[1..].to_uppercase(); for i in 0..left.len() { - result.append( - !left - .value(i) - .to_uppercase() - .ends_with(&right[1..].to_uppercase()), - ); + result.append(!left.value(i).to_uppercase().ends_with(end_str)); } } else { let re_pattern = replace_like_wildcards(right)?; @@ -4181,7 +4169,7 @@ mod tests { test_utf8_scalar!( test_utf8_array_ilike_scalar_equals, vec!["arrow", "parrow", "arrows", "arr"], - "arrow", + "Arrow", ilike_utf8_scalar, vec![true, false, false, false] ); @@ -4234,8 +4222,8 @@ mod tests { test_utf8_scalar!( test_utf8_array_nilike_scalar_equals, - vec!["arrow", "parrow", "arrows", "arr"], - "arrow", + vec!["arRow", "parrow", "arrows", "arr"], + "Arrow", nilike_utf8_scalar, vec![false, true, true, true] );