From cf34814a0c036081638f609be6b7e82ee73b8c1a Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 28 Nov 2022 10:30:41 +0100 Subject: [PATCH] perf: auto-vectorized try_check_offsets --- src/array/specification.rs | 40 +++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/array/specification.rs b/src/array/specification.rs index 6b9139feedf..fee67e1ad31 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -76,17 +76,35 @@ pub fn try_check_offsets_and_utf8(offsets: &[O], values: &[u8]) -> Re /// Checks that `offsets` is monotonically increasing, and all offsets are less than or equal to /// `values_len`. pub fn try_check_offsets(offsets: &[O], values_len: usize) -> Result<()> { - if offsets.windows(2).any(|window| window[0] > window[1]) { - Err(Error::oos("offsets must be monotonically increasing")) - } else if offsets - .last() - .map_or(true, |last| last.to_usize() > values_len) - { - Err(Error::oos( - "offsets must have at least one element and must not exceed values length", - )) - } else { - Ok(()) + // this code is carefully constructed to auto-vectorize, don't change naively! + match offsets.first() { + None => Ok(()), + Some(first) => { + let mut previous = *first; + let mut any_invalid = false; + + // This loop will auto-vectorize because there is not any break, + // an invalid value will be returned once the whole offsets buffer is processed. + for offset in offsets { + if previous > *offset { + any_invalid = true + } + previous = *offset; + } + + if any_invalid { + Err(Error::oos("offsets must be monotonically increasing")) + } else if offsets + .last() + .map_or(true, |last| last.to_usize() > values_len) + { + Err(Error::oos( + "offsets must have at least one element and must not exceed values length", + )) + } else { + Ok(()) + } + } } }