From 66c9636742162f832b434a513769e158f9723e67 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Oct 2022 13:26:15 -0400 Subject: [PATCH] Fix ignored limit on `lexsort_to_indices` (#2991) * Fix ignored limit on lexsort_to_indices * Update comments * Update arrow/src/compute/kernels/sort.rs Co-authored-by: Batuhan Taskaya Co-authored-by: Batuhan Taskaya --- arrow/src/compute/kernels/sort.rs | 40 +++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index b297622647e..a10e674ac9d 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -950,7 +950,7 @@ pub fn lexsort_to_indices( }); Ok(UInt32Array::from_iter_values( - value_indices.iter().map(|i| *i as u32), + value_indices.iter().take(len).map(|i| *i as u32), )) } @@ -1422,6 +1422,18 @@ mod tests { } } + /// slice all arrays in expected_output to offset/length + fn slice_arrays( + expected_output: Vec, + offset: usize, + length: usize, + ) -> Vec { + expected_output + .into_iter() + .map(|array| array.slice(offset, length)) + .collect() + } + fn test_sort_binary_arrays( data: Vec>>, options: Option, @@ -3439,8 +3451,10 @@ mod tests { Some(2), Some(17), ])) as ArrayRef]; - test_lex_sort_arrays(input.clone(), expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2)); + // Explicitly test a limit on the sort as a demonstration let expected = vec![Arc::new(PrimitiveArray::::from(vec![ Some(-1), Some(0), @@ -3519,7 +3533,8 @@ mod tests { Some(-2), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test mix of string and in64 with option let input = vec![ @@ -3562,7 +3577,8 @@ mod tests { Some("7"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3)); // test sort with nulls first let input = vec![ @@ -3605,7 +3621,8 @@ mod tests { Some("world"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1)); // test sort with nulls last let input = vec![ @@ -3648,7 +3665,8 @@ mod tests { None, ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test sort with opposite options let input = vec![ @@ -3695,7 +3713,15 @@ mod tests { Some("foo"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays( + input.clone(), + slice_arrays(expected.clone(), 0, 5), + Some(5), + ); + + // Limiting by more rows than present is ok + test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10)); } #[test]