From 68ce0bc617cf7313d685c21931a1975d74269819 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Mon, 13 Jun 2022 09:38:41 +0800 Subject: [PATCH 1/5] clean up Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/substring.rs | 811 +++++++------------------ 1 file changed, 232 insertions(+), 579 deletions(-) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index 625a37514d1..ca54aa4450e 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -445,64 +445,62 @@ mod tests { use super::*; use crate::datatypes::*; - #[allow(clippy::type_complexity)] + macro_rules! gen_test_cases { + ($input:expr, $(($start:expr, $len:expr, $result:expr)), *) => { + [ + $( + ($input.clone(), $start, $len, $result), + )* + ] + }; + } + + macro_rules! do_test { + ($cases:expr, $array_ty:ty, $substring_fn:ident) => { + $cases.into_iter().try_for_each::<_, Result<()>>( + |(array, start, length, expected)| { + let array = <$array_ty>::from(array); + let result = $substring_fn(&array, start, length)?; + assert_eq!(array.len(), result.len()); + let result = result.as_any().downcast_ref::<$array_ty>().unwrap(); + let expected = <$array_ty>::from(expected); + assert_eq!(&expected, result); + Ok(()) + }, + ) + }; + } + fn with_nulls_generic_binary() -> Result<()> { - let cases: Vec<(Vec>, i64, Option, Vec>)> = vec![ - // all-nulls array is always identical - (vec![None, None, None], -1, Some(1), vec![None, None, None]), + let input = vec![ + Some("hello".as_bytes()), + None, + Some(&[0xf8, 0xf9, 0xff, 0xfa]), + ]; + // all-nulls array is always identical + let base_case = gen_test_cases!( + vec![None, None, None], + (-1, Some(1), vec![None, None, None]) + ); + let cases = gen_test_cases!( + input, // identity - ( - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - 0, - None, - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - ), + (0, None, input.clone()), // 0 length -> Nothing - ( - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - 0, - Some(0), - vec![Some(&[]), None, Some(&[])], - ), + (0, Some(0), vec![Some(&[]), None, Some(&[])]), // high start -> Nothing - ( - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - 1000, - Some(0), - vec![Some(&[]), None, Some(&[])], - ), + (1000, Some(0), vec![Some(&[]), None, Some(&[])]), // high negative start -> identity - ( - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - -1000, - None, - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - ), + (-1000, None, input.clone()), // high length -> identity - ( - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - 0, - Some(1000), - vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])], - ), - ]; + (0, Some(1000), input.clone()) + ); - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericBinaryArray::::from(array); - let result: ArrayRef = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); - - let result = result - .as_any() - .downcast_ref::>() - .unwrap(); - let expected = GenericBinaryArray::::from(expected); - assert_eq!(&expected, result); - Ok(()) - }, + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericBinaryArray, + substring )?; - Ok(()) } @@ -516,133 +514,43 @@ mod tests { with_nulls_generic_binary::() } - #[allow(clippy::type_complexity)] fn without_nulls_generic_binary() -> Result<()> { - let cases: Vec<(Vec<&[u8]>, i64, Option, Vec<&[u8]>)> = vec![ - // empty array is always identical - (vec![b"", b"", b""], 2, Some(1), vec![b"", b"", b""]), + let input = vec!["hello".as_bytes(), b"", &[0xf8, 0xf9, 0xff, 0xfa]]; + // empty array is always identical + let base_case = gen_test_cases!( + vec!["".as_bytes(), b"", b""], + (2, Some(1), vec!["".as_bytes(), b"", b""]) + ); + let cases = gen_test_cases!( + input, + // identity + (0, None, input.clone()), // increase start - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 0, - None, - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 1, - None, - vec![b"ello", b"", &[0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 2, - None, - vec![b"llo", b"", &[0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 3, - None, - vec![b"lo", b"", &[0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 10, - None, - vec![b"", b"", b""], - ), + (1, None, vec![b"ello", b"", &[0xf9, 0xff, 0xfa]]), + (2, None, vec![b"llo", b"", &[0xff, 0xfa]]), + (3, None, vec![b"lo", b"", &[0xfa]]), + (10, None, vec![b"", b"", b""]), // increase start negatively - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -1, - None, - vec![b"o", b"", &[0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -2, - None, - vec![b"lo", b"", &[0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -3, - None, - vec![b"llo", b"", &[0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -10, - None, - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - ), + (-1, None, vec![b"o", b"", &[0xfa]]), + (-2, None, vec![b"lo", b"", &[0xff, 0xfa]]), + (-3, None, vec![b"llo", b"", &[0xf9, 0xff, 0xfa]]), + (-10, None, input.clone()), // increase length - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 1, - Some(1), - vec![b"e", b"", &[0xf9]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 1, - Some(2), - vec![b"el", b"", &[0xf9, 0xff]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 1, - Some(3), - vec![b"ell", b"", &[0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - 1, - Some(4), - vec![b"ello", b"", &[0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -3, - Some(1), - vec![b"l", b"", &[0xf9]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -3, - Some(2), - vec![b"ll", b"", &[0xf9, 0xff]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -3, - Some(3), - vec![b"llo", b"", &[0xf9, 0xff, 0xfa]], - ), - ( - vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]], - -3, - Some(4), - vec![b"llo", b"", &[0xf9, 0xff, 0xfa]], - ), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericBinaryArray::::from(array); - let result = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); - let result = result - .as_any() - .downcast_ref::>() - .unwrap(); - let expected = GenericBinaryArray::::from(expected); - assert_eq!(&expected, result,); - Ok(()) - }, + (1, Some(1), vec![b"e", b"", &[0xf9]]), + (1, Some(2), vec![b"el", b"", &[0xf9, 0xff]]), + (1, Some(3), vec![b"ell", b"", &[0xf9, 0xff, 0xfa]]), + (1, Some(4), vec![b"ello", b"", &[0xf9, 0xff, 0xfa]]), + (-3, Some(1), vec![b"l", b"", &[0xf9]]), + (-3, Some(2), vec![b"ll", b"", &[0xf9, 0xff]]), + (-3, Some(3), vec![b"llo", b"", &[0xf9, 0xff, 0xfa]]), + (-3, Some(4), vec![b"llo", b"", &[0xf9, 0xff, 0xfa]]) + ); + + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericBinaryArray, + substring )?; - Ok(()) } @@ -700,114 +608,39 @@ mod tests { } #[test] - #[allow(clippy::type_complexity)] fn with_nulls_fixed_size_binary() -> Result<()> { - let cases: Vec<(Vec>, i64, Option, Vec>)> = vec![ - // all-nulls array is always identical - (vec![None, None, None], 3, Some(2), vec![None, None, None]), + let input = vec![Some("cat".as_bytes()), None, Some(&[0xf8, 0xf9, 0xff])]; + // all-nulls array is always identical + let base_case = + gen_test_cases!(vec![None, None, None], (3, Some(2), vec![None, None, None])); + let cases = gen_test_cases!( + input, + // identity + (0, None, input.clone()), // increase start - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 0, - None, - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 1, - None, - vec![Some(b"at"), None, Some(&[0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 2, - None, - vec![Some(b"t"), None, Some(&[0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 3, - None, - vec![Some(b""), None, Some(&[])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 10, - None, - vec![Some(b""), None, Some(b"")], - ), + (1, None, vec![Some(b"at"), None, Some(&[0xf9, 0xff])]), + (2, None, vec![Some(b"t"), None, Some(&[0xff])]), + (3, None, vec![Some(b""), None, Some(b"")]), + (10, None, vec![Some(b""), None, Some(b"")]), // increase start negatively - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -1, - None, - vec![Some(b"t"), None, Some(&[0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -2, - None, - vec![Some(b"at"), None, Some(&[0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -3, - None, - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -10, - None, - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - ), + (-1, None, vec![Some(b"t"), None, Some(&[0xff])]), + (-2, None, vec![Some(b"at"), None, Some(&[0xf9, 0xff])]), + (-3, None, input.clone()), + (-10, None, input.clone()), // increase length - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 1, - Some(1), - vec![Some(b"a"), None, Some(&[0xf9])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 1, - Some(2), - vec![Some(b"at"), None, Some(&[0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - 1, - Some(3), - vec![Some(b"at"), None, Some(&[0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -3, - Some(1), - vec![Some(b"c"), None, Some(&[0xf8])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -3, - Some(2), - vec![Some(b"ca"), None, Some(&[0xf8, 0xf9])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -3, - Some(3), - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - ), - ( - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - -3, - Some(4), - vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])], - ), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { + (1, Some(1), vec![Some(b"a"), None, Some(&[0xf9])]), + (1, Some(2), vec![Some(b"at"), None, Some(&[0xf9, 0xff])]), + (1, Some(3), vec![Some(b"at"), None, Some(&[0xf9, 0xff])]), + (-3, Some(1), vec![Some(b"c"), None, Some(&[0xf8])]), + (-3, Some(2), vec![Some(b"ca"), None, Some(&[0xf8, 0xf9])]), + (-3, Some(3), input.clone()), + (-3, Some(4), input.clone()) + ); + + [&base_case[..], &cases[..]] + .concat() + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { let array = FixedSizeBinaryArray::try_from_sparse_iter(array.into_iter()) .unwrap(); let result = substring(&array, start, length)?; @@ -821,121 +654,47 @@ mod tests { .unwrap(); assert_eq!(&expected, result,); Ok(()) - }, - )?; + })?; Ok(()) } #[test] - #[allow(clippy::type_complexity)] fn without_nulls_fixed_size_binary() -> Result<()> { - let cases: Vec<(Vec<&[u8]>, i64, Option, Vec<&[u8]>)> = vec![ - // empty array is always identical - (vec![b"", b"", &[]], 3, Some(2), vec![b"", b"", &[]]), + let input = vec!["cat".as_bytes(), b"dog", &[0xf8, 0xf9, 0xff]]; + // empty array is always identical + let base_case = gen_test_cases!( + vec!["".as_bytes(), &[], &[]], + (1, Some(2), vec!["".as_bytes(), &[], &[]]) + ); + let cases = gen_test_cases!( + input, + // identity + (0, None, input.clone()), // increase start - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 0, - None, - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 1, - None, - vec![b"at", b"og", &[0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 2, - None, - vec![b"t", b"g", &[0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 3, - None, - vec![b"", b"", &[]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 10, - None, - vec![b"", b"", b""], - ), + (1, None, vec![b"at", b"og", &[0xf9, 0xff]]), + (2, None, vec![b"t", b"g", &[0xff]]), + (3, None, vec![&[], &[], &[]]), + (10, None, vec![&[], &[], &[]]), // increase start negatively - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -1, - None, - vec![b"t", b"g", &[0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -2, - None, - vec![b"at", b"og", &[0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -3, - None, - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -10, - None, - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - ), + (-1, None, vec![b"t", b"g", &[0xff]]), + (-2, None, vec![b"at", b"og", &[0xf9, 0xff]]), + (-3, None, input.clone()), + (-10, None, input.clone()), // increase length - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 1, - Some(1), - vec![b"a", b"o", &[0xf9]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 1, - Some(2), - vec![b"at", b"og", &[0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - 1, - Some(3), - vec![b"at", b"og", &[0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -3, - Some(1), - vec![b"c", b"d", &[0xf8]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -3, - Some(2), - vec![b"ca", b"do", &[0xf8, 0xf9]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -3, - Some(3), - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - ), - ( - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - -3, - Some(4), - vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]], - ), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { + (1, Some(1), vec![b"a", b"o", &[0xf9]]), + (1, Some(2), vec![b"at", b"og", &[0xf9, 0xff]]), + (1, Some(3), vec![b"at", b"og", &[0xf9, 0xff]]), + (-3, Some(1), vec![b"c", b"d", &[0xf8]]), + (-3, Some(2), vec![b"ca", b"do", &[0xf8, 0xf9]]), + (-3, Some(3), input.clone()), + (-3, Some(4), input.clone()) + ); + + [&base_case[..], &cases[..]] + .concat() + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { let array = FixedSizeBinaryArray::try_from_iter(array.into_iter()).unwrap(); let result = substring(&array, start, length)?; @@ -948,8 +707,7 @@ mod tests { FixedSizeBinaryArray::try_from_iter(expected.into_iter()).unwrap(); assert_eq!(&expected, result,); Ok(()) - }, - )?; + })?; Ok(()) } @@ -985,62 +743,29 @@ mod tests { } fn with_nulls_generic_string() -> Result<()> { - let cases = vec![ - // all-nulls array is always identical - (vec![None, None, None], 0, None, vec![None, None, None]), + let input = vec![Some("hello"), None, Some("word")]; + // all-nulls array is always identical + let base_case = + gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None])); + let cases = gen_test_cases!( + input, // identity - ( - vec![Some("hello"), None, Some("word")], - 0, - None, - vec![Some("hello"), None, Some("word")], - ), + (0, None, input.clone()), // 0 length -> Nothing - ( - vec![Some("hello"), None, Some("word")], - 0, - Some(0), - vec![Some(""), None, Some("")], - ), + (0, Some(0), vec![Some(""), None, Some("")]), // high start -> Nothing - ( - vec![Some("hello"), None, Some("word")], - 1000, - Some(0), - vec![Some(""), None, Some("")], - ), + (1000, Some(0), vec![Some(""), None, Some("")]), // high negative start -> identity - ( - vec![Some("hello"), None, Some("word")], - -1000, - None, - vec![Some("hello"), None, Some("word")], - ), + (-1000, None, input.clone()), // high length -> identity - ( - vec![Some("hello"), None, Some("word")], - 0, - Some(1000), - vec![Some("hello"), None, Some("word")], - ), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericStringArray::::from(array); - let result: ArrayRef = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); + (0, Some(1000), input.clone()) + ); - let result = result - .as_any() - .downcast_ref::>() - .unwrap(); - let expected = GenericStringArray::::from(expected); - assert_eq!(&expected, result); - Ok(()) - }, + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericStringArray, + substring )?; - Ok(()) } @@ -1055,76 +780,38 @@ mod tests { } fn without_nulls_generic_string() -> Result<()> { - let cases = vec![ - // empty array is always identical - (vec!["", "", ""], 0, None, vec!["", "", ""]), - // increase start - ( - vec!["hello", "", "word"], - 0, - None, - vec!["hello", "", "word"], - ), - (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]), - (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]), - (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]), - (vec!["hello", "", "word"], 10, None, vec!["", "", ""]), + let input = vec!["hello", "", "word"]; + // empty array is always identical + let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""])); + let cases = gen_test_cases!( + input, + // identity + (0, None, input.clone()), + (1, None, vec!["ello", "", "ord"]), + (2, None, vec!["llo", "", "rd"]), + (3, None, vec!["lo", "", "d"]), + (10, None, vec!["", "", ""]), // increase start negatively - (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]), - (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]), - (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]), - ( - vec!["hello", "", "word"], - -10, - None, - vec!["hello", "", "word"], - ), + (-1, None, vec!["o", "", "d"]), + (-2, None, vec!["lo", "", "rd"]), + (-3, None, vec!["llo", "", "ord"]), + (-10, None, input.clone()), // increase length - (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]), - (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]), - ( - vec!["hello", "", "word"], - 1, - Some(3), - vec!["ell", "", "ord"], - ), - ( - vec!["hello", "", "word"], - 1, - Some(4), - vec!["ello", "", "ord"], - ), - (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]), - (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]), - ( - vec!["hello", "", "word"], - -3, - Some(3), - vec!["llo", "", "ord"], - ), - ( - vec!["hello", "", "word"], - -3, - Some(4), - vec!["llo", "", "ord"], - ), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericStringArray::::from(array); - let result = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); - let result = result - .as_any() - .downcast_ref::>() - .unwrap(); - let expected = GenericStringArray::::from(expected); - assert_eq!(&expected, result,); - Ok(()) - }, + (1, Some(1), vec!["e", "", "o"]), + (1, Some(2), vec!["el", "", "or"]), + (1, Some(3), vec!["ell", "", "ord"]), + (1, Some(4), vec!["ello", "", "ord"]), + (-3, Some(1), vec!["l", "", "o"]), + (-3, Some(2), vec!["ll", "", "or"]), + (-3, Some(3), vec!["llo", "", "ord"]), + (-3, Some(4), vec!["llo", "", "ord"]) + ); + + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericStringArray, + substring )?; - Ok(()) } @@ -1181,59 +868,29 @@ mod tests { } fn with_nulls_generic_string_by_char() -> Result<()> { - let input_vals = vec![Some("hello"), None, Some("Γ ⊢x:T")]; - let cases = vec![ - // all-nulls array is always identical - (vec![None, None, None], 0, None, vec![None, None, None]), + let input = vec![Some("hello"), None, Some("Γ ⊢x:T")]; + // all-nulls array is always identical + let base_case = + gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None])); + let cases = gen_test_cases!( + input, // identity - ( - input_vals.clone(), - 0, - None, - vec![Some("hello"), None, Some("Γ ⊢x:T")], - ), + (0, None, input.clone()), // 0 length -> Nothing - ( - input_vals.clone(), - 0, - Some(0), - vec![Some(""), None, Some("")], - ), + (0, Some(0), vec![Some(""), None, Some("")]), // high start -> Nothing - ( - input_vals.clone(), - 1000, - Some(0), - vec![Some(""), None, Some("")], - ), + (1000, Some(0), vec![Some(""), None, Some("")]), // high negative start -> identity - ( - input_vals.clone(), - -1000, - None, - vec![Some("hello"), None, Some("Γ ⊢x:T")], - ), + (-1000, None, input.clone()), // high length -> identity - ( - input_vals.clone(), - 0, - Some(1000), - vec![Some("hello"), None, Some("Γ ⊢x:T")], - ), - ]; + (0, Some(1000), input.clone()) + ); - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericStringArray::::from(array); - let result = substring_by_char(&array, start, length)?; - assert_eq!(array.len(), result.len()); - - let expected = GenericStringArray::::from(expected); - assert_eq!(expected, result); - Ok(()) - }, + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericStringArray, + substring_by_char )?; - Ok(()) } @@ -1248,43 +905,39 @@ mod tests { } fn without_nulls_generic_string_by_char() -> Result<()> { - let input_vals = vec!["hello", "", "Γ ⊢x:T"]; - let cases = vec![ - // empty array is always identical - (vec!["", "", ""], 0, None, vec!["", "", ""]), + let input = vec!["hello", "", "Γ ⊢x:T"]; + // empty array is always identical + let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""])); + let cases = gen_test_cases!( + input, + //identity + (0, None, input.clone()), // increase start - (input_vals.clone(), 0, None, vec!["hello", "", "Γ ⊢x:T"]), - (input_vals.clone(), 1, None, vec!["ello", "", " ⊢x:T"]), - (input_vals.clone(), 2, None, vec!["llo", "", "⊢x:T"]), - (input_vals.clone(), 3, None, vec!["lo", "", "x:T"]), - (input_vals.clone(), 10, None, vec!["", "", ""]), + (1, None, vec!["ello", "", " ⊢x:T"]), + (2, None, vec!["llo", "", "⊢x:T"]), + (3, None, vec!["lo", "", "x:T"]), + (10, None, vec!["", "", ""]), // increase start negatively - (input_vals.clone(), -1, None, vec!["o", "", "T"]), - (input_vals.clone(), -2, None, vec!["lo", "", ":T"]), - (input_vals.clone(), -4, None, vec!["ello", "", "⊢x:T"]), - (input_vals.clone(), -10, None, vec!["hello", "", "Γ ⊢x:T"]), + (-1, None, vec!["o", "", "T"]), + (-2, None, vec!["lo", "", ":T"]), + (-4, None, vec!["ello", "", "⊢x:T"]), + (-10, None, input.clone()), // increase length - (input_vals.clone(), 1, Some(1), vec!["e", "", " "]), - (input_vals.clone(), 1, Some(2), vec!["el", "", " ⊢"]), - (input_vals.clone(), 1, Some(3), vec!["ell", "", " ⊢x"]), - (input_vals.clone(), 1, Some(6), vec!["ello", "", " ⊢x:T"]), - (input_vals.clone(), -4, Some(1), vec!["e", "", "⊢"]), - (input_vals.clone(), -4, Some(2), vec!["el", "", "⊢x"]), - (input_vals.clone(), -4, Some(3), vec!["ell", "", "⊢x:"]), - (input_vals.clone(), -4, Some(4), vec!["ello", "", "⊢x:T"]), - ]; - - cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { - let array = GenericStringArray::::from(array); - let result = substring_by_char(&array, start, length)?; - assert_eq!(array.len(), result.len()); - let expected = GenericStringArray::::from(expected); - assert_eq!(expected, result); - Ok(()) - }, + (1, Some(1), vec!["e", "", " "]), + (1, Some(2), vec!["el", "", " ⊢"]), + (1, Some(3), vec!["ell", "", " ⊢x"]), + (1, Some(6), vec!["ello", "", " ⊢x:T"]), + (-4, Some(1), vec!["e", "", "⊢"]), + (-4, Some(2), vec!["el", "", "⊢x"]), + (-4, Some(3), vec!["ell", "", "⊢x:"]), + (-4, Some(4), vec!["ello", "", "⊢x:T"]) + ); + + do_test!( + [&base_case[..], &cases[..]].concat(), + GenericStringArray, + substring_by_char )?; - Ok(()) } From 5cdb7c0e745c9b3b9cc0644370e1856d46e2c8f5 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Mon, 13 Jun 2022 20:19:08 +0800 Subject: [PATCH 2/5] clean up fixed binary Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/substring.rs | 48 ++++++-------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index ca54aa4450e..b2c8a133348 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -461,7 +461,6 @@ mod tests { |(array, start, length, expected)| { let array = <$array_ty>::from(array); let result = $substring_fn(&array, start, length)?; - assert_eq!(array.len(), result.len()); let result = result.as_any().downcast_ref::<$array_ty>().unwrap(); let expected = <$array_ty>::from(expected); assert_eq!(&expected, result); @@ -637,25 +636,11 @@ mod tests { (-3, Some(4), input.clone()) ); - [&base_case[..], &cases[..]] - .concat() - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = FixedSizeBinaryArray::try_from_sparse_iter(array.into_iter()) - .unwrap(); - let result = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); - let result = result - .as_any() - .downcast_ref::() - .unwrap(); - let expected = - FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter()) - .unwrap(); - assert_eq!(&expected, result,); - Ok(()) - })?; - + do_test!( + [&base_case[..], &cases[..]].concat(), + FixedSizeBinaryArray, + substring + )?; Ok(()) } @@ -691,24 +676,11 @@ mod tests { (-3, Some(4), input.clone()) ); - [&base_case[..], &cases[..]] - .concat() - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = - FixedSizeBinaryArray::try_from_iter(array.into_iter()).unwrap(); - let result = substring(&array, start, length)?; - assert_eq!(array.len(), result.len()); - let result = result - .as_any() - .downcast_ref::() - .unwrap(); - let expected = - FixedSizeBinaryArray::try_from_iter(expected.into_iter()).unwrap(); - assert_eq!(&expected, result,); - Ok(()) - })?; - + do_test!( + [&base_case[..], &cases[..]].concat(), + FixedSizeBinaryArray, + substring + )?; Ok(()) } From 9004bba3339faddac04d413ff8886f8d44795c74 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 14 Jun 2022 09:36:44 +0800 Subject: [PATCH 3/5] trigger GitHub actions From c2dd7883931ea54a87da616ac4134232bc222a24 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Fri, 17 Jun 2022 09:14:33 +0800 Subject: [PATCH 4/5] directly panic Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/substring.rs | 149 +++++++++++-------------- 1 file changed, 66 insertions(+), 83 deletions(-) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index b2c8a133348..a900b3d4c9c 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -457,20 +457,19 @@ mod tests { macro_rules! do_test { ($cases:expr, $array_ty:ty, $substring_fn:ident) => { - $cases.into_iter().try_for_each::<_, Result<()>>( - |(array, start, length, expected)| { + $cases + .into_iter() + .for_each(|(array, start, length, expected)| { let array = <$array_ty>::from(array); - let result = $substring_fn(&array, start, length)?; + let result = $substring_fn(&array, start, length).unwrap(); let result = result.as_any().downcast_ref::<$array_ty>().unwrap(); let expected = <$array_ty>::from(expected); assert_eq!(&expected, result); - Ok(()) - }, - ) + }) }; } - fn with_nulls_generic_binary() -> Result<()> { + fn with_nulls_generic_binary() { let input = vec![ Some("hello".as_bytes()), None, @@ -499,21 +498,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericBinaryArray, substring - )?; - Ok(()) + ); } #[test] - fn with_nulls_binary() -> Result<()> { + fn with_nulls_binary() { with_nulls_generic_binary::() } #[test] - fn with_nulls_large_binary() -> Result<()> { + fn with_nulls_large_binary() { with_nulls_generic_binary::() } - fn without_nulls_generic_binary() -> Result<()> { + fn without_nulls_generic_binary() { let input = vec!["hello".as_bytes(), b"", &[0xf8, 0xf9, 0xff, 0xfa]]; // empty array is always identical let base_case = gen_test_cases!( @@ -549,21 +547,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericBinaryArray, substring - )?; - Ok(()) + ); } #[test] - fn without_nulls_binary() -> Result<()> { + fn without_nulls_binary() { without_nulls_generic_binary::() } #[test] - fn without_nulls_large_binary() -> Result<()> { + fn without_nulls_large_binary() { without_nulls_generic_binary::() } - fn generic_binary_with_non_zero_offset() -> Result<()> { + fn generic_binary_with_non_zero_offset() { let values = 0_u8..15; let offsets = &[ O::zero(), @@ -580,11 +577,12 @@ mod tests { .add_buffer(Buffer::from_iter(values)) .null_bit_buffer(Some(Buffer::from(bitmap))) .offset(1) - .build()?; + .build() + .unwrap(); // array is `[null, [10, 11, 12, 13, 14]]` let array = GenericBinaryArray::::from(data); // result is `[null, [11, 12, 13, 14]]` - let result = substring(&array, 1, None)?; + let result = substring(&array, 1, None).unwrap(); let result = result .as_any() .downcast_ref::>() @@ -592,22 +590,20 @@ mod tests { let expected = GenericBinaryArray::::from_opt_vec(vec![None, Some(&[11_u8, 12, 13, 14])]); assert_eq!(result, &expected); - - Ok(()) } #[test] - fn binary_with_non_zero_offset() -> Result<()> { + fn binary_with_non_zero_offset() { generic_binary_with_non_zero_offset::() } #[test] - fn large_binary_with_non_zero_offset() -> Result<()> { + fn large_binary_with_non_zero_offset() { generic_binary_with_non_zero_offset::() } #[test] - fn with_nulls_fixed_size_binary() -> Result<()> { + fn with_nulls_fixed_size_binary() { let input = vec![Some("cat".as_bytes()), None, Some(&[0xf8, 0xf9, 0xff])]; // all-nulls array is always identical let base_case = @@ -640,12 +636,11 @@ mod tests { [&base_case[..], &cases[..]].concat(), FixedSizeBinaryArray, substring - )?; - Ok(()) + ); } #[test] - fn without_nulls_fixed_size_binary() -> Result<()> { + fn without_nulls_fixed_size_binary() { let input = vec!["cat".as_bytes(), b"dog", &[0xf8, 0xf9, 0xff]]; // empty array is always identical let base_case = gen_test_cases!( @@ -680,12 +675,11 @@ mod tests { [&base_case[..], &cases[..]].concat(), FixedSizeBinaryArray, substring - )?; - Ok(()) + ); } #[test] - fn fixed_size_binary_with_non_zero_offset() -> Result<()> { + fn fixed_size_binary_with_non_zero_offset() { let values: [u8; 15] = *b"hellotherearrow"; // set the first and third element to be valid let bits_v = [0b101_u8]; @@ -700,7 +694,7 @@ mod tests { // array is `[null, "arrow"]` let array = FixedSizeBinaryArray::from(data); // result is `[null, "rrow"]` - let result = substring(&array, 1, None)?; + let result = substring(&array, 1, None).unwrap(); let result = result .as_any() .downcast_ref::() @@ -710,11 +704,9 @@ mod tests { ) .unwrap(); assert_eq!(result, &expected); - - Ok(()) } - fn with_nulls_generic_string() -> Result<()> { + fn with_nulls_generic_string() { let input = vec![Some("hello"), None, Some("word")]; // all-nulls array is always identical let base_case = @@ -737,21 +729,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericStringArray, substring - )?; - Ok(()) + ); } #[test] - fn with_nulls_string() -> Result<()> { + fn with_nulls_string() { with_nulls_generic_string::() } #[test] - fn with_nulls_large_string() -> Result<()> { + fn with_nulls_large_string() { with_nulls_generic_string::() } - fn without_nulls_generic_string() -> Result<()> { + fn without_nulls_generic_string() { let input = vec!["hello", "", "word"]; // empty array is always identical let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""])); @@ -783,21 +774,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericStringArray, substring - )?; - Ok(()) + ); } #[test] - fn without_nulls_string() -> Result<()> { + fn without_nulls_string() { without_nulls_generic_string::() } #[test] - fn without_nulls_large_string() -> Result<()> { + fn without_nulls_large_string() { without_nulls_generic_string::() } - fn generic_string_with_non_zero_offset() -> Result<()> { + fn generic_string_with_non_zero_offset() { let values = "hellotherearrow"; let offsets = &[ O::zero(), @@ -814,32 +804,31 @@ mod tests { .add_buffer(Buffer::from(values)) .null_bit_buffer(Some(Buffer::from(bitmap))) .offset(1) - .build()?; + .build() + .unwrap(); // array is `[null, "arrow"]` let array = GenericStringArray::::from(data); // result is `[null, "rrow"]` - let result = substring(&array, 1, None)?; + let result = substring(&array, 1, None).unwrap(); let result = result .as_any() .downcast_ref::>() .unwrap(); let expected = GenericStringArray::::from(vec![None, Some("rrow")]); assert_eq!(result, &expected); - - Ok(()) } #[test] - fn string_with_non_zero_offset() -> Result<()> { + fn string_with_non_zero_offset() { generic_string_with_non_zero_offset::() } #[test] - fn large_string_with_non_zero_offset() -> Result<()> { + fn large_string_with_non_zero_offset() { generic_string_with_non_zero_offset::() } - fn with_nulls_generic_string_by_char() -> Result<()> { + fn with_nulls_generic_string_by_char() { let input = vec![Some("hello"), None, Some("Γ ⊢x:T")]; // all-nulls array is always identical let base_case = @@ -862,21 +851,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericStringArray, substring_by_char - )?; - Ok(()) + ); } #[test] - fn with_nulls_string_by_char() -> Result<()> { + fn with_nulls_string_by_char() { with_nulls_generic_string_by_char::() } #[test] - fn with_nulls_large_string_by_char() -> Result<()> { + fn with_nulls_large_string_by_char() { with_nulls_generic_string_by_char::() } - fn without_nulls_generic_string_by_char() -> Result<()> { + fn without_nulls_generic_string_by_char() { let input = vec!["hello", "", "Γ ⊢x:T"]; // empty array is always identical let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""])); @@ -909,21 +897,20 @@ mod tests { [&base_case[..], &cases[..]].concat(), GenericStringArray, substring_by_char - )?; - Ok(()) + ); } #[test] - fn without_nulls_string_by_char() -> Result<()> { + fn without_nulls_string_by_char() { without_nulls_generic_string_by_char::() } #[test] - fn without_nulls_large_string_by_char() -> Result<()> { + fn without_nulls_large_string_by_char() { without_nulls_generic_string_by_char::() } - fn generic_string_by_char_with_non_zero_offset() -> Result<()> { + fn generic_string_by_char_with_non_zero_offset() { let values = "S→T = Πx:S.T"; let offsets = &[ O::zero(), @@ -942,41 +929,39 @@ mod tests { .add_buffer(Buffer::from(values)) .null_bit_buffer(Some(Buffer::from(bitmap))) .offset(1) - .build()?; + .build() + .unwrap(); // array is `[null, "Πx:S.T"]` let array = GenericStringArray::::from(data); // result is `[null, "x:S.T"]` - let result = substring_by_char(&array, 1, None)?; + let result = substring_by_char(&array, 1, None).unwrap(); let expected = GenericStringArray::::from(vec![None, Some("x:S.T")]); assert_eq!(result, expected); - - Ok(()) } #[test] - fn string_with_non_zero_offset_by_char() -> Result<()> { + fn string_with_non_zero_offset_by_char() { generic_string_by_char_with_non_zero_offset::() } #[test] - fn large_string_with_non_zero_offset_by_char() -> Result<()> { + fn large_string_with_non_zero_offset_by_char() { generic_string_by_char_with_non_zero_offset::() } #[test] - fn dictionary() -> Result<()> { - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - _dictionary::()?; - Ok(()) - } - - fn _dictionary() -> Result<()> { + fn dictionary() { + _dictionary::(); + _dictionary::(); + _dictionary::(); + _dictionary::(); + _dictionary::(); + _dictionary::(); + _dictionary::(); + _dictionary::(); + } + + fn _dictionary() { const TOTAL: i32 = 100; let v = ["aaa", "bbb", "ccc", "ddd", "eee"]; @@ -996,7 +981,7 @@ mod tests { let expected: Vec> = data.iter().map(|opt| opt.map(|s| &s[1..3])).collect(); - let res = substring(&dict_array, 1, Some(2))?; + let res = substring(&dict_array, 1, Some(2)).unwrap(); let actual = res.as_any().downcast_ref::>().unwrap(); let actual: Vec> = actual .values() @@ -1009,8 +994,6 @@ mod tests { for i in 0..TOTAL as usize { assert_eq!(expected[i], actual[i],); } - - Ok(()) } #[test] From d788f9901108b8afae9a80d27d831c0c41c1bfc9 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Fri, 17 Jun 2022 09:40:13 +0800 Subject: [PATCH 5/5] add docs for helper macros Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/substring.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index a900b3d4c9c..024f5633fef 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -445,6 +445,16 @@ mod tests { use super::*; use crate::datatypes::*; + /// A helper macro to generate test cases. + /// # Arguments + /// * `input` - A vector which array can be built from. + /// * `start` - The start index of the substring. + /// * `len` - The length of the substring. + /// * `result` - The expected result of substring, which is a vector that array can be built from. + /// # Return + /// A vector of `(input, start, len, result)`. + /// + /// Users can provide any number of `(start, len, result)` to generate test cases for one `input`. macro_rules! gen_test_cases { ($input:expr, $(($start:expr, $len:expr, $result:expr)), *) => { [ @@ -455,6 +465,12 @@ mod tests { }; } + /// A helper macro to test the substring functions. + /// # Arguments + /// * `cases` - The test cases which is a vector of `(input, start, len, result)`. + /// Please look at [`gen_test_cases`] to find how to generate it. + /// * `array_ty` - The array type. + /// * `substring_fn` - Either [`substring`] or [`substring_by_char`]. macro_rules! do_test { ($cases:expr, $array_ty:ty, $substring_fn:ident) => { $cases