From cf34814a0c036081638f609be6b7e82ee73b8c1a Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 28 Nov 2022 10:30:41 +0100 Subject: [PATCH 1/2] perf: auto-vectorized try_check_offsets --- src/array/specification.rs | 40 +++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/array/specification.rs b/src/array/specification.rs index 6b9139feedf..fee67e1ad31 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -76,17 +76,35 @@ pub fn try_check_offsets_and_utf8(offsets: &[O], values: &[u8]) -> Re /// Checks that `offsets` is monotonically increasing, and all offsets are less than or equal to /// `values_len`. pub fn try_check_offsets(offsets: &[O], values_len: usize) -> Result<()> { - if offsets.windows(2).any(|window| window[0] > window[1]) { - Err(Error::oos("offsets must be monotonically increasing")) - } else if offsets - .last() - .map_or(true, |last| last.to_usize() > values_len) - { - Err(Error::oos( - "offsets must have at least one element and must not exceed values length", - )) - } else { - Ok(()) + // this code is carefully constructed to auto-vectorize, don't change naively! + match offsets.first() { + None => Ok(()), + Some(first) => { + let mut previous = *first; + let mut any_invalid = false; + + // This loop will auto-vectorize because there is not any break, + // an invalid value will be returned once the whole offsets buffer is processed. + for offset in offsets { + if previous > *offset { + any_invalid = true + } + previous = *offset; + } + + if any_invalid { + Err(Error::oos("offsets must be monotonically increasing")) + } else if offsets + .last() + .map_or(true, |last| last.to_usize() > values_len) + { + Err(Error::oos( + "offsets must have at least one element and must not exceed values length", + )) + } else { + Ok(()) + } + } } } From 2dc1f3f068dff378ae8a662d1b0d9946215e3ca5 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 28 Nov 2022 11:37:47 +0100 Subject: [PATCH 2/2] err on empty offsets --- src/array/specification.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/specification.rs b/src/array/specification.rs index fee67e1ad31..7b1e0d86640 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -78,7 +78,7 @@ pub fn try_check_offsets_and_utf8(offsets: &[O], values: &[u8]) -> Re pub fn try_check_offsets(offsets: &[O], values_len: usize) -> Result<()> { // this code is carefully constructed to auto-vectorize, don't change naively! match offsets.first() { - None => Ok(()), + None => Err(Error::oos("offsets must have at least one element")), Some(first) => { let mut previous = *first; let mut any_invalid = false;