diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 6438acc3b11..05c8b7aa615 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -278,6 +278,39 @@ fn like_scalar<'a, L: ArrayAccessor>( like_scalar_op(left, right, |x| x) } +/// Perform SQL `left LIKE right` operation on [`StringArray`] / +/// [`LargeStringArray`], or [`DictionaryArray`] with values +/// [`StringArray`]/[`LargeStringArray`] and a scalar. +/// +/// See the documentation on [`like_utf8`] for more details. +pub fn like_utf8_scalar_dyn(left: &dyn Array, right: &str) -> Result { + match left.data_type() { + DataType::Utf8 => { + let left = as_string_array(left); + like_scalar(left, right) + } + DataType::LargeUtf8 => { + let left = as_largestring_array(left); + like_scalar(left, right) + } + DataType::Dictionary(_, _) => { + downcast_dictionary_array!( + left => { + like_dict_scalar(left, right) + } + t => Err(ArrowError::ComputeError(format!( + "Should be DictionaryArray but got: {}", t + ))) + ) + } + _ => { + Err(ArrowError::ComputeError( + "like_utf8_scalar_dyn only supports Utf8, LargeUtf8 or DictionaryArray with Utf8 or LargeUtf8 values".to_string(), + )) + } + } +} + /// Perform SQL `left LIKE right` operation on [`StringArray`] / /// [`LargeStringArray`] and a scalar. /// @@ -4471,6 +4504,14 @@ mod tests { vec![true, true, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_escape_testing, + vec!["varchar(255)", "int(255)", "varchar", "int"], + "%(%)%", + like_utf8_scalar_dyn, + vec![true, true, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_escape_regex, vec![".*", "a", "*"], @@ -4479,6 +4520,14 @@ mod tests { vec![true, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_escape_regex, + vec![".*", "a", "*"], + ".*", + like_utf8_scalar_dyn, + vec![true, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_escape_regex_dot, vec![".", "a", "*"], @@ -4487,6 +4536,14 @@ mod tests { vec![true, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_escape_regex_dot, + vec![".", "a", "*"], + ".", + like_utf8_scalar_dyn, + vec![true, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar, vec!["arrow", "parquet", "datafusion", "flight"], @@ -4494,6 +4551,15 @@ mod tests { like_utf8_scalar, vec![true, true, false, false] ); + + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn, + vec!["arrow", "parquet", "datafusion", "flight"], + "%ar%", + like_utf8_scalar_dyn, + vec![true, true, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_start, vec!["arrow", "parrow", "arrows", "arr"], @@ -4502,6 +4568,14 @@ mod tests { vec![true, false, true, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_start, + vec!["arrow", "parrow", "arrows", "arr"], + "arrow%", + like_utf8_scalar_dyn, + vec![true, false, true, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_end, vec!["arrow", "parrow", "arrows", "arr"], @@ -4510,6 +4584,14 @@ mod tests { vec![true, true, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_end, + vec!["arrow", "parrow", "arrows", "arr"], + "%arrow", + like_utf8_scalar_dyn, + vec![true, true, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_equals, vec!["arrow", "parrow", "arrows", "arr"], @@ -4518,6 +4600,14 @@ mod tests { vec![true, false, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_equals, + vec!["arrow", "parrow", "arrows", "arr"], + "arrow", + like_utf8_scalar_dyn, + vec![true, false, false, false] + ); + test_utf8_scalar!( test_utf8_array_like_scalar_one, vec!["arrow", "arrows", "parrow", "arr"], @@ -4526,6 +4616,14 @@ mod tests { vec![false, true, false, false] ); + test_utf8_scalar!( + test_utf8_array_like_scalar_dyn_one, + vec!["arrow", "arrows", "parrow", "arr"], + "arrow_", + like_utf8_scalar_dyn, + vec![false, true, false, false] + ); + test_utf8_scalar!( test_utf8_scalar_like_escape, vec!["a%", "a\\x"], @@ -4534,6 +4632,14 @@ mod tests { vec![true, false] ); + test_utf8_scalar!( + test_utf8_scalar_like_dyn_escape, + vec!["a%", "a\\x"], + "a\\%", + like_utf8_scalar_dyn, + vec![true, false] + ); + test_utf8_scalar!( test_utf8_scalar_like_escape_contains, vec!["ba%", "ba\\x"], @@ -4542,6 +4648,14 @@ mod tests { vec![true, false] ); + test_utf8_scalar!( + test_utf8_scalar_like_dyn_escape_contains, + vec!["ba%", "ba\\x"], + "%a\\%", + like_utf8_scalar_dyn, + vec![true, false] + ); + test_utf8!( test_utf8_scalar_ilike_regex, vec!["%%%"], @@ -6138,6 +6252,12 @@ mod tests { let dict_array: DictionaryArray = data.into_iter().collect(); + let data = + vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")]; + + let dict_arrayref: DictionaryArray = data.into_iter().collect(); + let dict_arrayref = Arc::new(dict_arrayref) as ArrayRef; + assert_eq!( like_dict_scalar(&dict_array, "Air").unwrap(), BooleanArray::from( @@ -6145,6 +6265,13 @@ mod tests { ), ); + assert_eq!( + like_utf8_scalar_dyn(&dict_arrayref, "Air").unwrap(), + BooleanArray::from( + vec![Some(false), Some(false), Some(false), Some(true), None, Some(true)] + ), + ); + assert_eq!( like_dict_scalar(&dict_array, "Wa%").unwrap(), BooleanArray::from( @@ -6152,6 +6279,13 @@ mod tests { ), ); + assert_eq!( + like_utf8_scalar_dyn(&dict_arrayref, "Wa%").unwrap(), + BooleanArray::from( + vec![Some(false), Some(false), Some(true), Some(false), None, Some(false)] + ), + ); + assert_eq!( like_dict_scalar(&dict_array, "%r").unwrap(), BooleanArray::from( @@ -6159,6 +6293,13 @@ mod tests { ), ); + assert_eq!( + like_utf8_scalar_dyn(&dict_arrayref, "%r").unwrap(), + BooleanArray::from( + vec![Some(false), Some(false), Some(true), Some(true), None, Some(true)] + ), + ); + assert_eq!( like_dict_scalar(&dict_array, "%i%").unwrap(), BooleanArray::from( @@ -6166,12 +6307,26 @@ mod tests { ), ); + assert_eq!( + like_utf8_scalar_dyn(&dict_arrayref, "%i%").unwrap(), + BooleanArray::from( + vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)] + ), + ); + assert_eq!( like_dict_scalar(&dict_array, "%a%r%").unwrap(), BooleanArray::from( vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)] ), ); + + assert_eq!( + like_utf8_scalar_dyn(&dict_arrayref, "%a%r%").unwrap(), + BooleanArray::from( + vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)] + ), + ); } #[test]