From 189b424c697493cc9c5216ad2a6a4cf0477ba808 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 23 Sep 2021 21:01:51 +0200 Subject: [PATCH] Elide bounds checks in (fixedsize)list-iter Also adds a slice unchecked and dispatches slice method to the unchecked. This also reduces a double assert to a single assert in safe code. As the bounds were checked at the array buffer and at the validity. --- Cargo.toml | 4 ++ benches/iter_list.rs | 54 +++++++++++++++++++++++++++ src/array/binary/mod.rs | 23 +++++++++++- src/array/boolean/mod.rs | 25 ++++++++++++- src/array/dictionary/mod.rs | 15 ++++++++ src/array/fixed_size_binary/mod.rs | 25 ++++++++++++- src/array/fixed_size_list/iterator.rs | 4 +- src/array/fixed_size_list/mod.rs | 36 +++++++++++++++++- src/array/list/iterator.rs | 12 ++++-- src/array/list/mod.rs | 31 +++++++++++++-- src/array/mod.rs | 12 +++++- src/array/null.rs | 3 ++ src/array/primitive/mod.rs | 24 +++++++++++- src/array/struct_.rs | 23 +++++++++++- src/array/union/mod.rs | 20 ++++++++++ src/array/utf8/mod.rs | 22 ++++++++++- src/bitmap/immutable.rs | 12 +++++- src/buffer/immutable.rs | 13 ++++++- 18 files changed, 330 insertions(+), 28 deletions(-) create mode 100644 benches/iter_list.rs diff --git a/Cargo.toml b/Cargo.toml index e00dcbce12b..6b7d9562c6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -230,3 +230,7 @@ harness = false [[bench]] name = "iter_utf8" harness = false + +[[bench]] +name = "iter_list" +harness = false diff --git a/benches/iter_list.rs b/benches/iter_list.rs new file mode 100644 index 00000000000..87869943b85 --- /dev/null +++ b/benches/iter_list.rs @@ -0,0 +1,54 @@ +use arrow2::{ + array::{ListArray, PrimitiveArray}, + buffer::Buffer, + datatypes::DataType, +}; + +use arrow2::bitmap::Bitmap; +use arrow2::buffer::MutableBuffer; +use criterion::{criterion_group, criterion_main, Criterion}; +use std::iter::FromIterator; +use std::sync::Arc; + +fn add_benchmark(c: &mut Criterion) { + (10..=20).step_by(2).for_each(|log2_size| { + let size = 2usize.pow(log2_size); + + let values = Buffer::from_iter(0..size as i32); + let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + + let mut offsets = MutableBuffer::from_iter((0..size as i32).step_by(2)); + offsets.push(size as i32); + + let validity = (0..(offsets.len() - 1)) + .map(|i| i % 4 == 0) + .collect::(); + + let data_type = ListArray::::default_datatype(DataType::Int32); + let array = ListArray::::from_data( + data_type, + offsets.into(), + Arc::new(values), + Some(validity), + ); + + c.bench_function(&format!("list: iter_values 2^{}", log2_size), |b| { + b.iter(|| { + for x in array.values_iter() { + assert_eq!(x.len(), 2); + } + }) + }); + + c.bench_function(&format!("list: iter 2^{}", log2_size), |b| { + b.iter(|| { + for x in array.iter() { + assert_eq!(x.unwrap().len(), 2); + } + }) + }); + }) +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index c7b0f65992f..67908f2e810 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -90,8 +90,24 @@ impl BinaryArray { /// # Panics /// iff `offset + length > self.len()`. pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); - let offsets = self.offsets.clone().slice(offset, length + 1); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`]. + /// # Implementation + /// This function is `O(1)`: all data will be shared between both arrays. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); + let offsets = self.offsets.clone().slice_unchecked(offset, length + 1); Self { data_type: self.data_type.clone(), offsets, @@ -175,6 +191,9 @@ impl Array for BinaryArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index dfc8134f771..977760dc8f2 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -61,10 +61,27 @@ impl BooleanArray { /// This function panics iff `offset + length >= self.len()`. #[inline] pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`BooleanArray`]. + /// # Implementation + /// This operation is `O(1)` as it amounts to increase two ref counts. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + #[inline] + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); Self { data_type: self.data_type.clone(), - values: self.values.clone().slice(offset, length), + values: self.values.clone().slice_unchecked(offset, length), validity, offset: self.offset + offset, } @@ -130,6 +147,10 @@ impl Array for BooleanArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + #[inline] + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index e938b1e224b..a9e02edffec 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -74,6 +74,8 @@ impl DictionaryArray { } /// Creates a new [`DictionaryArray`] by slicing the existing [`DictionaryArray`]. + /// # Panics + /// iff `offset + length > self.len()`. pub fn slice(&self, offset: usize, length: usize) -> Self { Self { data_type: self.data_type.clone(), @@ -83,6 +85,16 @@ impl DictionaryArray { } } + /// Creates a new [`DictionaryArray`] by slicing the existing [`DictionaryArray`]. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + Self { + data_type: self.data_type.clone(), + keys: self.keys.clone().slice_unchecked(offset, length), + values: self.values.clone(), + offset: self.offset + offset, + } + } + /// Sets the validity bitmap on this [`Array`]. /// # Panic /// This function panics iff `validity.len() != self.len()`. @@ -149,6 +161,9 @@ impl Array for DictionaryArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 7d7d8a58506..dab08e2ca71 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -50,12 +50,30 @@ impl FixedSizeBinaryArray { /// Returns a slice of this [`FixedSizeBinaryArray`]. /// # Implementation /// This operation is `O(1)` as it amounts to increase 3 ref counts. + /// # Panics + /// panics iff `offset + length >= self.len()` pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`FixedSizeBinaryArray`]. + /// # Implementation + /// This operation is `O(1)` as it amounts to increase 3 ref counts. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); let values = self .values .clone() - .slice(offset * self.size as usize, length * self.size as usize); + .slice_unchecked(offset * self.size as usize, length * self.size as usize); Self { data_type: self.data_type.clone(), size: self.size, @@ -139,6 +157,9 @@ impl Array for FixedSizeBinaryArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/fixed_size_list/iterator.rs b/src/array/fixed_size_list/iterator.rs index 031f3f15f48..69bf7d595c3 100644 --- a/src/array/fixed_size_list/iterator.rs +++ b/src/array/fixed_size_list/iterator.rs @@ -6,8 +6,8 @@ use crate::{ use super::FixedSizeListArray; impl IterableListArray for FixedSizeListArray { - fn value(&self, i: usize) -> Box { - FixedSizeListArray::value(self, i) + unsafe fn value_unchecked(&self, i: usize) -> Box { + FixedSizeListArray::value_unchecked(self, i) } } diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index bc7ae65e025..c92d0de8d73 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -63,12 +63,30 @@ impl FixedSizeListArray { /// Returns a slice of this [`FixedSizeListArray`]. /// # Implementation /// This operation is `O(1)`. + /// # Panics + /// panics iff `offset + length >= self.len()` pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`FixedSizeListArray`]. + /// # Implementation + /// This operation is `O(1)`. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); let values = self .values .clone() - .slice(offset * self.size as usize, length * self.size as usize) + .slice_unchecked(offset * self.size as usize, length * self.size as usize) .into(); Self { data_type: self.data_type.clone(), @@ -85,12 +103,23 @@ impl FixedSizeListArray { } /// Returns the `Vec` at position `i`. + /// # Panic: + /// panics iff `i >= self.len()` #[inline] pub fn value(&self, i: usize) -> Box { self.values .slice(i * self.size as usize, self.size as usize) } + /// Returns the `Vec` at position `i`. + /// # Safety: + /// Caller must ensure that `i < self.len()` + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> Box { + self.values + .slice_unchecked(i * self.size as usize, self.size as usize) + } + /// Sets the validity bitmap on this [`FixedSizeListArray`]. /// # Panic /// This function panics iff `validity.len() != self.len()`. @@ -143,6 +172,9 @@ impl Array for FixedSizeListArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/list/iterator.rs b/src/array/list/iterator.rs index 9d6991df4f4..6885bf9d925 100644 --- a/src/array/list/iterator.rs +++ b/src/array/list/iterator.rs @@ -32,7 +32,9 @@ impl<'a, A: IterableListArray> Iterator for ListValuesIter<'a, A> { } let old = self.index; self.index += 1; - Some(self.array.value(old)) + // Safety: + // self.end is maximized by the length of the array + Some(unsafe { self.array.value_unchecked(old) }) } #[inline] @@ -50,14 +52,16 @@ impl<'a, A: IterableListArray> DoubleEndedIterator for ListValuesIter<'a, A> { None } else { self.end -= 1; - Some(self.array.value(self.end)) + // Safety: + // self.end is maximized by the length of the array + Some(unsafe { self.array.value_unchecked(self.end) }) } } } impl IterableListArray for ListArray { - fn value(&self, i: usize) -> Box { - ListArray::::value(self, i) + unsafe fn value_unchecked(&self, i: usize) -> Box { + ListArray::::value_unchecked(self, i) } } diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 1e84981df86..913c745215a 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -80,7 +80,10 @@ impl ListArray { let offset_1 = offsets[i + 1]; let length = (offset_1 - offset).to_usize(); - self.values.slice(offset.to_usize(), length) + // Safety: + // One of the invariants of the struct + // is that offsets are in bounds + unsafe { self.values.slice_unchecked(offset.to_usize(), length) } } } @@ -93,12 +96,29 @@ impl ListArray { let offset_1 = *self.offsets.as_ptr().add(i + 1); let length = (offset_1 - offset).to_usize(); - self.values.slice(offset.to_usize(), length) + self.values.slice_unchecked(offset.to_usize(), length) } + /// Returns a slice of this [`ListArray`]. + /// # Panics + /// panics iff `offset + length >= self.len()` pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); - let offsets = self.offsets.clone().slice(offset, length + 1); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`ListArray`]. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); + let offsets = self.offsets.clone().slice_unchecked(offset, length + 1); Self { data_type: self.data_type.clone(), offsets, @@ -186,6 +206,9 @@ impl Array for ListArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/mod.rs b/src/array/mod.rs index 5dbadb28846..8397b030ebd 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -88,6 +88,14 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// This function panics iff `offset + length >= self.len()`. fn slice(&self, offset: usize, length: usize) -> Box; + /// Slices the [`Array`], returning a new `Box`. + /// # Implementation + /// This operation is `O(1)` over `len`, as it amounts to increase two ref counts + /// and moving the struct to the heap. + /// # Safety + /// The caller must ensure that `offset + length < self.len()` + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box; + /// Sets the validity bitmap on this [`Array`]. /// # Panic /// This function panics iff `validity.len() < self.len()`. @@ -426,7 +434,9 @@ fn display_fmt>>( /// Trait that list arrays implement for the purposes of DRY. pub trait IterableListArray: Array { - fn value(&self, i: usize) -> Box; + // # Safety: + // The caller must ensure that `i < self.len()` + unsafe fn value_unchecked(&self, i: usize) -> Box; } /// Trait that [`BinaryArray`] and [`Utf8Array`] implement for the purposes of DRY. diff --git a/src/array/null.rs b/src/array/null.rs index d7c339f29e9..cec79e855af 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -63,6 +63,9 @@ impl Array for NullArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice(offset, length)) + } fn with_validity(&self, _: Option) -> Box { panic!("cannot set validity of a null array") } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 0cb84bec91b..980577bed77 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -86,10 +86,27 @@ impl PrimitiveArray { /// This function panics iff `offset + length >= self.len()`. #[inline] pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`PrimitiveArray`]. + /// # Implementation + /// This operation is `O(1)` as it amounts to increase two ref counts. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + #[inline] + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); Self { data_type: self.data_type.clone(), - values: self.values.clone().slice(offset, length), + values: self.values.clone().slice_unchecked(offset, length), validity, offset: self.offset + offset, } @@ -176,6 +193,9 @@ impl Array for PrimitiveArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/struct_.rs b/src/array/struct_.rs index ddb18724135..18818f7d0c6 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -105,13 +105,29 @@ impl StructArray { /// # Implementation /// This operation is `O(F)` where `F` is the number of fields. pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Creates a new [`StructArray`] that is a slice of `self`. + /// # Implementation + /// This operation is `O(F)` where `F` is the number of fields. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); Self { data_type: self.data_type.clone(), values: self .values .iter() - .map(|x| x.slice(offset, length).into()) + .map(|x| x.slice_unchecked(offset, length).into()) .collect(), validity, } @@ -175,6 +191,9 @@ impl Array for StructArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index 68b93002222..52b98552fd9 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -190,6 +190,23 @@ impl UnionArray { offset: self.offset + offset, } } + + /// Returns a slice of this [`UnionArray`]. + /// # Implementation + /// This operation is `O(F)` where `F` is the number of fields. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + #[inline] + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + Self { + data_type: self.data_type.clone(), + fields: self.fields.clone(), + fields_hash: self.fields_hash.clone(), + types: self.types.clone().slice_unchecked(offset, length), + offsets: self.offsets.clone(), + offset: self.offset + offset, + } + } } impl Array for UnionArray { @@ -212,6 +229,9 @@ impl Array for UnionArray { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, _: Option) -> Box { panic!("cannot set validity of a union array") } diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 0e5ce24d9c2..ab1819e4359 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -165,9 +165,24 @@ impl Utf8Array { /// # Panic /// This function panics iff `offset + length >= self.len()`. pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + /// Returns a slice of this [`Utf8Array`]. + /// # Implementation + /// This operation is `O(1)` as it amounts to essentially increase two ref counts. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); // + 1: `length == 0` implies that we take the first offset. - let offsets = self.offsets.clone().slice(offset, length + 1); + let offsets = self.offsets.clone().slice_unchecked(offset, length + 1); Self { data_type: self.data_type.clone(), offsets, @@ -225,6 +240,9 @@ impl Array for Utf8Array { fn slice(&self, offset: usize, length: usize) -> Box { Box::new(self.slice(offset, length)) } + unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box { + Box::new(self.slice_unchecked(offset, length)) + } fn with_validity(&self, validity: Option) -> Box { Box::new(self.with_validity(validity)) } diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index d15dcc80a77..b6d0df82e26 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -108,11 +108,19 @@ impl Bitmap { /// Slices `self`, offseting by `offset` and truncating up to `length` bits. /// # Panic - /// Panics iff `self.offset + offset + length <= self.bytes.len() * 8`, i.e. if the offset and `length` + /// Panics iff `self.offset + offset + length >= self.bytes.len() * 8`, i.e. if the offset and `length` /// exceeds the allocated capacity of `self`. #[inline] - pub fn slice(mut self, offset: usize, length: usize) -> Self { + pub fn slice(self, offset: usize, length: usize) -> Self { assert!(offset + length <= self.length); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Slices `self`, offseting by `offset` and truncating up to `length` bits. + /// # Safety + /// The caller must ensure that `self.offset + offset + length <= self.bytes.len() * 8` + #[inline] + pub unsafe fn slice_unchecked(mut self, offset: usize, length: usize) -> Self { self.offset += offset; self.length = length; self.null_count = count_zeros(&self.bytes, self.offset, self.length); diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 07f0d68239b..948bff115ed 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -86,11 +86,22 @@ impl Buffer { /// # Panics /// Panics iff `offset` is larger than `len`. #[inline] - pub fn slice(mut self, offset: usize, length: usize) -> Self { + pub fn slice(self, offset: usize, length: usize) -> Self { assert!( offset + length <= self.len(), "the offset of the new Buffer cannot exceed the existing length" ); + // Safety: + // we just check bounds + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`. + /// Doing so allows the same memory region to be shared between buffers. + /// # Safety + /// The caller must ensure `offset + length < self.len()` + #[inline] + pub unsafe fn slice_unchecked(mut self, offset: usize, length: usize) -> Self { self.offset += offset; self.length = length; self