From 6b3602828a6d86be904a5b5702f44ace062d3a22 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 30 Jan 2022 09:47:53 +0100 Subject: [PATCH 1/9] make Bytes wrap Vec --- src/array/mod.rs | 2 +- src/buffer/bytes.rs | 73 +++++++++++++++++++++++++-------------------- src/ffi/ffi.rs | 4 +-- 3 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/array/mod.rs b/src/array/mod.rs index ce205f32c89..fe979231c08 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -29,7 +29,7 @@ use crate::{ pub(self) mod physical_binary; /// A trait representing an immutable Arrow array. Arrow arrays are trait objects -/// that are infalibly downcasted to concrete types according to the [`Array::data_type`]. +/// that are infallibly downcasted to concrete types according to the [`Array::data_type`]. pub trait Array: Send + Sync { /// Convert to trait object. fn as_any(&self) -> &dyn Any; diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index 66d8839992d..f9ef7683802 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -1,7 +1,7 @@ //! This module contains an implementation of a contiguous immutable memory region that knows //! how to de-allocate itself, [`Bytes`]. -use std::slice; +use std::mem::ManuallyDrop; use std::{fmt::Debug, fmt::Formatter}; use std::{ptr::NonNull, sync::Arc}; @@ -11,7 +11,7 @@ use crate::types::NativeType; /// Mode of deallocating memory regions pub enum Deallocation { /// Native deallocation, using Rust deallocator with Arrow-specific memory aligment - Native(usize), + Native, // Foreign interface, via a callback Foreign(Arc), } @@ -19,8 +19,8 @@ pub enum Deallocation { impl Debug for Deallocation { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { - Deallocation::Native(capacity) => { - write!(f, "Deallocation::Native {{ capacity: {} }}", capacity) + Deallocation::Native => { + write!(f, "Deallocation::Native") } Deallocation::Foreign(_) => { write!(f, "Deallocation::Foreign {{ capacity: unknown }}") @@ -36,12 +36,8 @@ impl Debug for Deallocation { /// When the region is allocated by a foreign allocator, [Deallocation::Foreign], this calls the /// foreign deallocator to deallocate the region when it is no longer needed. pub struct Bytes { - /// The raw pointer to be begining of the region - ptr: NonNull, - - /// The number of bytes visible to this region. This is always smaller than its capacity (when avaliable). - len: usize, - + /// inner data + data: Vec, /// how to deallocate this region deallocation: Deallocation, } @@ -59,13 +55,20 @@ impl Bytes { /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed. + /// + /// # Panics + /// + /// This function panics if the give deallocation not is `Deallocation::Foreign` #[inline] - pub unsafe fn new(ptr: std::ptr::NonNull, len: usize, deallocation: Deallocation) -> Self { - Self { - ptr, - len, - deallocation, - } + pub unsafe fn from_ffi( + ptr: std::ptr::NonNull, + len: usize, + deallocation: Deallocation, + ) -> Self { + let data = Vec::from_raw_parts(ptr.as_ptr(), len, len); + assert!(matches!(deallocation, Deallocation::Foreign(_))); + + Self { data, deallocation } } #[inline] @@ -75,12 +78,13 @@ impl Bytes { #[inline] pub fn len(&self) -> usize { - self.len + self.data.len() } #[inline] pub fn ptr(&self) -> NonNull { - self.ptr + debug_assert!(!self.data.as_ptr().is_null()); + unsafe { NonNull::new_unchecked(self.data.as_ptr() as *mut T) } } } @@ -88,11 +92,13 @@ impl Drop for Bytes { #[inline] fn drop(&mut self) { match &self.deallocation { - Deallocation::Native(capacity) => unsafe { - let _ = Vec::from_raw_parts(self.ptr.as_ptr(), self.len, *capacity); - }, + // the Vec may be dropped + Deallocation::Native => {} // foreign interface knows how to deallocate itself. - Deallocation::Foreign(_) => (), + Deallocation::Foreign(_) => { + let data = std::mem::take(&mut self.data); + let _ = ManuallyDrop::new(data); + } } } } @@ -101,7 +107,7 @@ impl std::ops::Deref for Bytes { type Target = [T]; fn deref(&self) -> &[T] { - unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) } + &self.data } } @@ -113,7 +119,12 @@ impl PartialEq for Bytes { impl Debug for Bytes { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "Bytes {{ ptr: {:?}, len: {}, data: ", self.ptr, self.len,)?; + write!( + f, + "Bytes {{ ptr: {:?}, len: {}, data: ", + self.data.as_ptr(), + self.len(), + )?; f.debug_list().entries(self.iter()).finish()?; @@ -123,15 +134,11 @@ impl Debug for Bytes { impl From> for Bytes { #[inline] - fn from(mut data: Vec) -> Self { - let ptr = NonNull::new(data.as_mut_ptr()).unwrap(); - let len = data.len(); - let capacity = data.capacity(); - - let result = unsafe { Bytes::new(ptr, len, Deallocation::Native(capacity)) }; - // so that the memory region is not deallocated. - std::mem::forget(data); - result + fn from(data: Vec) -> Self { + Self { + data, + deallocation: Deallocation::Native, + } } } diff --git a/src/ffi/ffi.rs b/src/ffi/ffi.rs index 73c999a36ac..528a679dd5b 100644 --- a/src/ffi/ffi.rs +++ b/src/ffi/ffi.rs @@ -205,7 +205,7 @@ unsafe fn create_buffer( let len = buffer_len(array, data_type, index)?; let offset = buffer_offset(array, data_type, index); let bytes = ptr - .map(|ptr| Bytes::new(ptr, len, deallocation)) + .map(|ptr| Bytes::from_ffi(ptr, len, deallocation)) .ok_or_else(|| { ArrowError::OutOfSpec(format!("The buffer at position {} is null", index)) })?; @@ -240,7 +240,7 @@ unsafe fn create_bitmap( let bytes_len = bytes_for(offset + len); let ptr = NonNull::new(ptr as *mut u8); let bytes = ptr - .map(|ptr| Bytes::new(ptr, bytes_len, deallocation)) + .map(|ptr| Bytes::from_ffi(ptr, bytes_len, deallocation)) .ok_or_else(|| { ArrowError::OutOfSpec(format!( "The buffer {} is a null pointer and cannot be interpreted as a bitmap", From 3d21e569576557cd65dae0abfbc98186e39d2b18 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 30 Jan 2022 10:02:43 +0100 Subject: [PATCH 2/9] add get/make_vec --- src/buffer/bytes.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index f9ef7683802..3c3ab864c8d 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -86,6 +86,39 @@ impl Bytes { debug_assert!(!self.data.as_ptr().is_null()); unsafe { NonNull::new_unchecked(self.data.as_ptr() as *mut T) } } + + /// Returns a mutable reference to the `Vec` data if it is allocated in this process. + /// Returns `None` if allocated by a foreign interface. + /// + /// See also [`Bytes::make_vec`], which will clone the inner data when allocated via ffi. + pub fn get_vec(&mut self) -> Option<&mut Vec> { + match &self.deallocation { + Deallocation::Foreign(_) => None, + Deallocation::Native => Some(&mut self.data), + } + } + + /// Returns a mutable reference to the `Vec` data if it is allocated in this process. + /// This function will clone the inner data when allocated via ffi. + /// + /// See also [`Bytes::get_vec`], which will return `None` if this [`Bytes`] does not own its data. + pub fn make_vec(&mut self) -> &mut Vec { + match &self.deallocation { + // We clone the data, set that as native allocation + // and return a mutable reference to the new data + Deallocation::Foreign(_) => { + self.deallocation = Deallocation::Native; + // forget the memory because the foreign interface will dealocate + let data = std::mem::take(&mut self.data); + let new_data = data.clone(); + let _ = ManuallyDrop::new(data); + + self.data = new_data; + &mut self.data + } + Deallocation::Native => &mut self.data, + } + } } impl Drop for Bytes { From 8ee7ffb339085d36bfa6fc7d0248170d0afd17be Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 30 Jan 2022 13:44:06 +0100 Subject: [PATCH 3/9] add wrapper type with documented invariants --- src/buffer/bytes.rs | 80 ++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index 3c3ab864c8d..d89994cbd53 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -8,6 +8,37 @@ use std::{ptr::NonNull, sync::Arc}; use crate::ffi; use crate::types::NativeType; +/// Holds a `Vec` that may hold a pointer memory that is not +/// allocated by `Vec`. It is therefore not +/// safe to deallocate the inner type naively +struct MaybeUnaligned { + inner: Vec, + aligned: bool, +} + +impl Drop for MaybeUnaligned { + fn drop(&mut self) { + // this memory is not allocated by `Vec`, we leak it + // this memory will be deallocated by the foreign interface + if !self.aligned { + let data = std::mem::take(&mut self.inner); + let _ = ManuallyDrop::new(data); + } + } +} + +impl MaybeUnaligned { + /// # Safety + /// The caller must ensure that if `aligned` the `Vec` holds + /// all invariants set by https://doc.rust-lang.org/std/vec/struct.Vec.html#safety + unsafe fn new(data: Vec, aligned: bool) -> Self { + Self { + inner: data, + aligned, + } + } +} + /// Mode of deallocating memory regions pub enum Deallocation { /// Native deallocation, using Rust deallocator with Arrow-specific memory aligment @@ -37,7 +68,7 @@ impl Debug for Deallocation { /// foreign deallocator to deallocate the region when it is no longer needed. pub struct Bytes { /// inner data - data: Vec, + data: MaybeUnaligned, /// how to deallocate this region deallocation: Deallocation, } @@ -65,8 +96,9 @@ impl Bytes { len: usize, deallocation: Deallocation, ) -> Self { - let data = Vec::from_raw_parts(ptr.as_ptr(), len, len); assert!(matches!(deallocation, Deallocation::Foreign(_))); + let data = Vec::from_raw_parts(ptr.as_ptr(), len, len); + let data = MaybeUnaligned::new(data, false); Self { data, deallocation } } @@ -78,13 +110,13 @@ impl Bytes { #[inline] pub fn len(&self) -> usize { - self.data.len() + self.data.inner.len() } #[inline] pub fn ptr(&self) -> NonNull { - debug_assert!(!self.data.as_ptr().is_null()); - unsafe { NonNull::new_unchecked(self.data.as_ptr() as *mut T) } + debug_assert!(!self.data.inner.as_ptr().is_null()); + unsafe { NonNull::new_unchecked(self.data.inner.as_ptr() as *mut T) } } /// Returns a mutable reference to the `Vec` data if it is allocated in this process. @@ -94,7 +126,7 @@ impl Bytes { pub fn get_vec(&mut self) -> Option<&mut Vec> { match &self.deallocation { Deallocation::Foreign(_) => None, - Deallocation::Native => Some(&mut self.data), + Deallocation::Native => Some(&mut self.data.inner), } } @@ -108,30 +140,15 @@ impl Bytes { // and return a mutable reference to the new data Deallocation::Foreign(_) => { self.deallocation = Deallocation::Native; - // forget the memory because the foreign interface will dealocate - let data = std::mem::take(&mut self.data); - let new_data = data.clone(); - let _ = ManuallyDrop::new(data); - - self.data = new_data; - &mut self.data - } - Deallocation::Native => &mut self.data, - } - } -} + let new_data = self.data.inner.clone(); + // Safety: + // the new data is allocated by Vec itself + let data = unsafe { MaybeUnaligned::new(new_data, true) }; -impl Drop for Bytes { - #[inline] - fn drop(&mut self) { - match &self.deallocation { - // the Vec may be dropped - Deallocation::Native => {} - // foreign interface knows how to deallocate itself. - Deallocation::Foreign(_) => { - let data = std::mem::take(&mut self.data); - let _ = ManuallyDrop::new(data); + self.data = data; + &mut self.data.inner } + Deallocation::Native => &mut self.data.inner, } } } @@ -140,7 +157,7 @@ impl std::ops::Deref for Bytes { type Target = [T]; fn deref(&self) -> &[T] { - &self.data + &self.data.inner } } @@ -155,7 +172,7 @@ impl Debug for Bytes { write!( f, "Bytes {{ ptr: {:?}, len: {}, data: ", - self.data.as_ptr(), + self.data.inner.as_ptr(), self.len(), )?; @@ -168,6 +185,9 @@ impl Debug for Bytes { impl From> for Bytes { #[inline] fn from(data: Vec) -> Self { + // Safety: + // A `Vec` is allocated by `Vec` + let data = unsafe { MaybeUnaligned::new(data, true) }; Self { data, deallocation: Deallocation::Native, From 3f8136a2371f12408b7a42845cdbea171951f56c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 30 Jan 2022 15:08:02 +0100 Subject: [PATCH 4/9] PrimitiveArray -> MutablePrimitiveArray --- src/array/primitive/mod.rs | 33 ++++++++++++++++ src/bitmap/immutable.rs | 17 ++++++++- src/buffer/bytes.rs | 24 ------------ src/buffer/immutable.rs | 11 ++++++ src/lib.rs | 1 + src/to_mutable.rs | 13 +++++++ tests/it/array/primitive/mod.rs | 1 + tests/it/array/primitive/to_mutable.rs | 52 ++++++++++++++++++++++++++ 8 files changed, 127 insertions(+), 25 deletions(-) create mode 100644 src/to_mutable.rs create mode 100644 tests/it/array/primitive/to_mutable.rs diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 6da02cc5313..d3143e093ff 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -14,6 +14,7 @@ mod from_natural; mod iterator; pub use iterator::*; mod mutable; +use crate::to_mutable::MaybeMut; pub use mutable::*; /// A [`PrimitiveArray`] is arrow's equivalent to `Vec>`, i.e. @@ -184,6 +185,38 @@ impl PrimitiveArray { validity: self.validity, } } + /// Try to convert this `PrimitiveArray` to a `MutablePrimitiveArray` + pub fn into_mutable(mut self) -> MaybeMut> { + match (self.validity, self.values.get_vec()) { + (None, None) => { + MaybeMut::Immutable(PrimitiveArray::from_data(self.data_type, self.values, None)) + } + (None, Some(v)) => { + let data = std::mem::take(v); + MaybeMut::Mutable(MutablePrimitiveArray::from_data(self.data_type, data, None)) + } + (Some(bitmap), None) => MaybeMut::Immutable(PrimitiveArray::from_data( + self.data_type, + self.values, + Some(bitmap), + )), + (Some(bitmap), Some(v)) => match bitmap.into_inner() { + MaybeMut::Immutable(bitmap) => MaybeMut::Immutable(PrimitiveArray::from_data( + self.data_type, + self.values, + Some(bitmap), + )), + MaybeMut::Mutable(mutable) => { + let data = std::mem::take(v); + MaybeMut::Mutable(MutablePrimitiveArray::from_data( + self.data_type, + data, + Some(mutable), + )) + } + }, + } + } } impl Array for PrimitiveArray { diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index eea39d78480..745984e0a6a 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -1,6 +1,7 @@ use std::iter::FromIterator; use std::sync::Arc; +use crate::to_mutable::MaybeMut; use crate::{buffer::bytes::Bytes, trusted_len::TrustedLen}; use super::{ @@ -107,7 +108,7 @@ impl Bitmap { self.null_count } - /// Slices `self`, offseting by `offset` and truncating up to `length` bits. + /// Slices `self`, offsetting by `offset` and truncating up to `length` bits. /// # Panic /// Panics iff `self.offset + offset + length >= self.bytes.len() * 8`, i.e. if the offset and `length` /// exceeds the allocated capacity of `self`. @@ -175,6 +176,20 @@ impl Bitmap { pub(crate) fn offset(&self) -> usize { self.offset } + + /// Try to convert this `Bitmap` to a `MutableBitmap` + pub fn into_inner(mut self) -> MaybeMut { + match ( + self.offset, + Arc::get_mut(&mut self.bytes).map(|b| b.get_vec()).flatten(), + ) { + (0, Some(v)) => { + let data = std::mem::take(v); + MaybeMut::Mutable(MutableBitmap::from_vec(data, self.length)) + } + _ => MaybeMut::Immutable(self), + } + } } impl> From

for Bitmap { diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index d89994cbd53..06fae114ca4 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -121,36 +121,12 @@ impl Bytes { /// Returns a mutable reference to the `Vec` data if it is allocated in this process. /// Returns `None` if allocated by a foreign interface. - /// - /// See also [`Bytes::make_vec`], which will clone the inner data when allocated via ffi. pub fn get_vec(&mut self) -> Option<&mut Vec> { match &self.deallocation { Deallocation::Foreign(_) => None, Deallocation::Native => Some(&mut self.data.inner), } } - - /// Returns a mutable reference to the `Vec` data if it is allocated in this process. - /// This function will clone the inner data when allocated via ffi. - /// - /// See also [`Bytes::get_vec`], which will return `None` if this [`Bytes`] does not own its data. - pub fn make_vec(&mut self) -> &mut Vec { - match &self.deallocation { - // We clone the data, set that as native allocation - // and return a mutable reference to the new data - Deallocation::Foreign(_) => { - self.deallocation = Deallocation::Native; - let new_data = self.data.inner.clone(); - // Safety: - // the new data is allocated by Vec itself - let data = unsafe { MaybeUnaligned::new(new_data, true) }; - - self.data = data; - &mut self.data.inner - } - Deallocation::Native => &mut self.data.inner, - } - } } impl std::ops::Deref for Bytes { diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index a890699afd3..678367f1ea3 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -123,6 +123,17 @@ impl Buffer { pub fn offset(&self) -> usize { self.offset } + + /// Try to get a hold to the inner data as `&mut Vec`. + /// This will only succeed if the `reference count == 1` and the data + /// is allocated by this program. + pub fn get_vec(&mut self) -> Option<&mut Vec> { + if self.offset != 0 { + None + } else { + Arc::get_mut(&mut self.data).map(|b| b.get_vec()).flatten() + } + } } impl Buffer { diff --git a/src/lib.rs b/src/lib.rs index 608bef98120..d27168e5dae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,3 +28,4 @@ pub mod util; // so that documentation gets test #[cfg(any(test, doctest))] mod docs; +pub mod to_mutable; diff --git a/src/to_mutable.rs b/src/to_mutable.rs new file mode 100644 index 00000000000..3f2e713aa20 --- /dev/null +++ b/src/to_mutable.rs @@ -0,0 +1,13 @@ +pub enum MaybeMut { + Immutable(I), + Mutable(M), +} + +impl MaybeMut { + pub fn unwrap_mut(self) -> M { + match self { + MaybeMut::Mutable(m) => m, + MaybeMut::Immutable(_) => panic!("was immutable"), + } + } +} diff --git a/tests/it/array/primitive/mod.rs b/tests/it/array/primitive/mod.rs index 1ca5114927a..341cc0615de 100644 --- a/tests/it/array/primitive/mod.rs +++ b/tests/it/array/primitive/mod.rs @@ -9,6 +9,7 @@ use arrow2::{ }; mod mutable; +mod to_mutable; #[test] fn basics() { diff --git a/tests/it/array/primitive/to_mutable.rs b/tests/it/array/primitive/to_mutable.rs new file mode 100644 index 00000000000..18dfb7d4301 --- /dev/null +++ b/tests/it/array/primitive/to_mutable.rs @@ -0,0 +1,52 @@ +use arrow2::array::PrimitiveArray; +use arrow2::bitmap::Bitmap; +use arrow2::datatypes::DataType; +use arrow2::to_mutable::MaybeMut; + +#[test] +fn array_to_mutable() { + let data = vec![1, 2, 3]; + let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); + + // to mutable push and freeze again + let mut mut_arr = arr.into_mutable().unwrap_mut(); + mut_arr.push(Some(5)); + let immut: PrimitiveArray = mut_arr.into(); + assert_eq!(immut.values().as_slice(), [1, 2, 3, 5]); + + // let's cause a realloc and see if miri is ok + let mut mut_arr = immut.into_mutable().unwrap_mut(); + mut_arr.extend_constant(256, Some(9)); + let immut: PrimitiveArray = mut_arr.into(); + assert_eq!(immut.values().len(), 256 + 4); +} + +#[test] +fn array_to_mutable_not_owned() { + let data = vec![1, 2, 3]; + let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); + let arr2 = arr.clone(); + + // to the `to_mutable` should fail and we should get back the original array + match arr2.into_mutable() { + MaybeMut::Immutable(arr2) => { + assert_eq!(arr, arr2); + } + _ => panic!(), + } +} + +#[test] +fn array_to_mutable_validity() { + let data = vec![1, 2, 3]; + + // both have a single reference should be ok + let bitmap = Bitmap::from_iter([true, false, true]); + let arr = PrimitiveArray::from_data(DataType::Int32, data.clone().into(), Some(bitmap)); + assert!(matches!(arr.into_mutable(), MaybeMut::Mutable(_))); + + // now we clone the bitmap increasing the ref count + let bitmap = Bitmap::from_iter([true, false, true]); + let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), Some(bitmap.clone())); + assert!(matches!(arr.into_mutable(), MaybeMut::Immutable(_))); +} From 423935f18889e00d4d1c3d7d56742017b992a47c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 31 Jan 2022 07:46:59 +0100 Subject: [PATCH 5/9] consistent names --- src/array/primitive/mod.rs | 4 ++-- src/bitmap/immutable.rs | 2 +- tests/it/array/primitive/to_mutable.rs | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index d3143e093ff..c7474db5df4 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -186,7 +186,7 @@ impl PrimitiveArray { } } /// Try to convert this `PrimitiveArray` to a `MutablePrimitiveArray` - pub fn into_mutable(mut self) -> MaybeMut> { + pub fn into_mut(mut self) -> MaybeMut> { match (self.validity, self.values.get_vec()) { (None, None) => { MaybeMut::Immutable(PrimitiveArray::from_data(self.data_type, self.values, None)) @@ -200,7 +200,7 @@ impl PrimitiveArray { self.values, Some(bitmap), )), - (Some(bitmap), Some(v)) => match bitmap.into_inner() { + (Some(bitmap), Some(v)) => match bitmap.into_mut() { MaybeMut::Immutable(bitmap) => MaybeMut::Immutable(PrimitiveArray::from_data( self.data_type, self.values, diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index 745984e0a6a..bb2d8dbe834 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -178,7 +178,7 @@ impl Bitmap { } /// Try to convert this `Bitmap` to a `MutableBitmap` - pub fn into_inner(mut self) -> MaybeMut { + pub fn into_mut(mut self) -> MaybeMut { match ( self.offset, Arc::get_mut(&mut self.bytes).map(|b| b.get_vec()).flatten(), diff --git a/tests/it/array/primitive/to_mutable.rs b/tests/it/array/primitive/to_mutable.rs index 18dfb7d4301..62a24dded55 100644 --- a/tests/it/array/primitive/to_mutable.rs +++ b/tests/it/array/primitive/to_mutable.rs @@ -9,13 +9,13 @@ fn array_to_mutable() { let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); // to mutable push and freeze again - let mut mut_arr = arr.into_mutable().unwrap_mut(); + let mut mut_arr = arr.into_mut().unwrap_mut(); mut_arr.push(Some(5)); let immut: PrimitiveArray = mut_arr.into(); assert_eq!(immut.values().as_slice(), [1, 2, 3, 5]); // let's cause a realloc and see if miri is ok - let mut mut_arr = immut.into_mutable().unwrap_mut(); + let mut mut_arr = immut.into_mut().unwrap_mut(); mut_arr.extend_constant(256, Some(9)); let immut: PrimitiveArray = mut_arr.into(); assert_eq!(immut.values().len(), 256 + 4); @@ -28,7 +28,7 @@ fn array_to_mutable_not_owned() { let arr2 = arr.clone(); // to the `to_mutable` should fail and we should get back the original array - match arr2.into_mutable() { + match arr2.into_mut() { MaybeMut::Immutable(arr2) => { assert_eq!(arr, arr2); } @@ -37,16 +37,17 @@ fn array_to_mutable_not_owned() { } #[test] +#[allow(clippy::redundant_clone)] fn array_to_mutable_validity() { let data = vec![1, 2, 3]; // both have a single reference should be ok let bitmap = Bitmap::from_iter([true, false, true]); let arr = PrimitiveArray::from_data(DataType::Int32, data.clone().into(), Some(bitmap)); - assert!(matches!(arr.into_mutable(), MaybeMut::Mutable(_))); + assert!(matches!(arr.into_mut(), MaybeMut::Mutable(_))); // now we clone the bitmap increasing the ref count let bitmap = Bitmap::from_iter([true, false, true]); let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), Some(bitmap.clone())); - assert!(matches!(arr.into_mutable(), MaybeMut::Immutable(_))); + assert!(matches!(arr.into_mut(), MaybeMut::Immutable(_))); } From 525b5090fc537f11d92792ee8797c403d75ff1f9 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 31 Jan 2022 14:53:34 +0100 Subject: [PATCH 6/9] better encapsulate unsafe --- src/buffer/bytes.rs | 69 ++++++++++++++++--------------------------- src/buffer/foreign.rs | 48 ++++++++++++++++++++++++++++++ src/buffer/mod.rs | 1 + 3 files changed, 75 insertions(+), 43 deletions(-) create mode 100644 src/buffer/foreign.rs diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index 06fae114ca4..6142a0f6960 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -1,44 +1,13 @@ //! This module contains an implementation of a contiguous immutable memory region that knows //! how to de-allocate itself, [`Bytes`]. -use std::mem::ManuallyDrop; use std::{fmt::Debug, fmt::Formatter}; use std::{ptr::NonNull, sync::Arc}; +use super::foreign::MaybeForeign; use crate::ffi; use crate::types::NativeType; -/// Holds a `Vec` that may hold a pointer memory that is not -/// allocated by `Vec`. It is therefore not -/// safe to deallocate the inner type naively -struct MaybeUnaligned { - inner: Vec, - aligned: bool, -} - -impl Drop for MaybeUnaligned { - fn drop(&mut self) { - // this memory is not allocated by `Vec`, we leak it - // this memory will be deallocated by the foreign interface - if !self.aligned { - let data = std::mem::take(&mut self.inner); - let _ = ManuallyDrop::new(data); - } - } -} - -impl MaybeUnaligned { - /// # Safety - /// The caller must ensure that if `aligned` the `Vec` holds - /// all invariants set by https://doc.rust-lang.org/std/vec/struct.Vec.html#safety - unsafe fn new(data: Vec, aligned: bool) -> Self { - Self { - inner: data, - aligned, - } - } -} - /// Mode of deallocating memory regions pub enum Deallocation { /// Native deallocation, using Rust deallocator with Arrow-specific memory aligment @@ -68,7 +37,7 @@ impl Debug for Deallocation { /// foreign deallocator to deallocate the region when it is no longer needed. pub struct Bytes { /// inner data - data: MaybeUnaligned, + data: MaybeForeign, /// how to deallocate this region deallocation: Deallocation, } @@ -98,7 +67,7 @@ impl Bytes { ) -> Self { assert!(matches!(deallocation, Deallocation::Foreign(_))); let data = Vec::from_raw_parts(ptr.as_ptr(), len, len); - let data = MaybeUnaligned::new(data, false); + let data = MaybeForeign::new(data); Self { data, deallocation } } @@ -110,13 +79,13 @@ impl Bytes { #[inline] pub fn len(&self) -> usize { - self.data.inner.len() + self.data.len() } #[inline] pub fn ptr(&self) -> NonNull { - debug_assert!(!self.data.inner.as_ptr().is_null()); - unsafe { NonNull::new_unchecked(self.data.inner.as_ptr() as *mut T) } + debug_assert!(!self.data.as_ptr().is_null()); + unsafe { NonNull::new_unchecked(self.data.as_ptr() as *mut T) } } /// Returns a mutable reference to the `Vec` data if it is allocated in this process. @@ -124,7 +93,23 @@ impl Bytes { pub fn get_vec(&mut self) -> Option<&mut Vec> { match &self.deallocation { Deallocation::Foreign(_) => None, - Deallocation::Native => Some(&mut self.data.inner), + // Safety: + // The allocation is native so we can share the vec + Deallocation::Native => Some(unsafe { self.data.mut_vec() }), + } + } +} + +impl Drop for Bytes { + fn drop(&mut self) { + match self.deallocation { + // a foreign interface knows how to deallocate itself + Deallocation::Foreign(_) => {} + Deallocation::Native => { + // Safety: + // the allocation is native, so we can safely drop + unsafe { self.data.drop_local() } + } } } } @@ -133,7 +118,7 @@ impl std::ops::Deref for Bytes { type Target = [T]; fn deref(&self) -> &[T] { - &self.data.inner + &self.data } } @@ -148,7 +133,7 @@ impl Debug for Bytes { write!( f, "Bytes {{ ptr: {:?}, len: {}, data: ", - self.data.inner.as_ptr(), + self.data.as_ptr(), self.len(), )?; @@ -161,9 +146,7 @@ impl Debug for Bytes { impl From> for Bytes { #[inline] fn from(data: Vec) -> Self { - // Safety: - // A `Vec` is allocated by `Vec` - let data = unsafe { MaybeUnaligned::new(data, true) }; + let data = MaybeForeign::new(data); Self { data, deallocation: Deallocation::Native, diff --git a/src/buffer/foreign.rs b/src/buffer/foreign.rs new file mode 100644 index 00000000000..c8682eed253 --- /dev/null +++ b/src/buffer/foreign.rs @@ -0,0 +1,48 @@ +use crate::types::NativeType; +use std::mem::ManuallyDrop; +use std::ops::{Deref, DerefMut}; +// this code is in its own module so that inner types are not accessible +// as that might break invariants assumptions + +/// Holds a `Vec` that may hold a pointer memory that is not +/// allocated by `Vec`. It is therefore not +/// safe to deallocate the inner type naively +/// +/// # Safety +/// +/// it is unsafe to take and drop the inner value of `MaybeForeign` +/// Doing so is only allowed if the `Vec` was created from the native `Vec` allocator +pub(super) struct MaybeForeign { + inner: ManuallyDrop>, +} + +impl MaybeForeign { + pub(super) fn new(data: Vec) -> Self { + Self { + inner: ManuallyDrop::new(data), + } + } + + /// # Safety + /// This function may only be called if the inner `Vec` was allocated + /// in Rust and the default `Vec` allocator `A`. + pub(super) unsafe fn drop_local(&mut self) { + let data = std::mem::take(&mut self.inner); + let _data = ManuallyDrop::into_inner(data); + } + + /// # Safety + /// This function may only be called if the inner `Vec` was allocated + /// in Rust and the default `Vec` allocator `A`. + pub(super) unsafe fn mut_vec(&mut self) -> &mut Vec { + self.inner.deref_mut() + } +} + +impl Deref for MaybeForeign { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index dc055f8ce04..5841a147fd4 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -4,5 +4,6 @@ mod immutable; pub(crate) mod bytes; +mod foreign; pub use immutable::Buffer; From c45d2c2544fd0516d3f496dececb908c5fd2745c Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Tue, 1 Feb 2022 21:24:25 +0000 Subject: [PATCH 7/9] Improved docs --- src/bitmap/immutable.rs | 2 +- src/buffer/bytes.rs | 6 +++++- src/buffer/foreign.rs | 17 ++++++++++++----- src/buffer/immutable.rs | 10 ++++++---- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index bb2d8dbe834..3083ec88f20 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -181,7 +181,7 @@ impl Bitmap { pub fn into_mut(mut self) -> MaybeMut { match ( self.offset, - Arc::get_mut(&mut self.bytes).map(|b| b.get_vec()).flatten(), + Arc::get_mut(&mut self.bytes).and_then(|b| b.get_vec()), ) { (0, Some(v)) => { let data = std::mem::take(v); diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index 6142a0f6960..109302a4816 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -66,6 +66,10 @@ impl Bytes { deallocation: Deallocation, ) -> Self { assert!(matches!(deallocation, Deallocation::Foreign(_))); + // This line is technically outside the assumptions of `Vec::from_raw_parts`, since + // `ptr` was not allocated by `Vec`. However, one of the invariants of this struct + // is that we do not expose this region as a `Vec`; we only use `Vec` on it to provide + // immutable access to the region (via `Vec::deref` to `&[T]`). let data = Vec::from_raw_parts(ptr.as_ptr(), len, len); let data = MaybeForeign::new(data); @@ -88,7 +92,7 @@ impl Bytes { unsafe { NonNull::new_unchecked(self.data.as_ptr() as *mut T) } } - /// Returns a mutable reference to the `Vec` data if it is allocated in this process. + /// Returns a mutable reference to the internal [`Vec`] if it is natively allocated. /// Returns `None` if allocated by a foreign interface. pub fn get_vec(&mut self) -> Option<&mut Vec> { match &self.deallocation { diff --git a/src/buffer/foreign.rs b/src/buffer/foreign.rs index c8682eed253..ca5cb7f0225 100644 --- a/src/buffer/foreign.rs +++ b/src/buffer/foreign.rs @@ -1,13 +1,15 @@ +// this code is in its own module so that inner types are not accessible +// as that might break invariants assumptions use crate::types::NativeType; use std::mem::ManuallyDrop; use std::ops::{Deref, DerefMut}; -// this code is in its own module so that inner types are not accessible -// as that might break invariants assumptions -/// Holds a `Vec` that may hold a pointer memory that is not -/// allocated by `Vec`. It is therefore not +/// Holds a `Vec` that may hold a pointer that is not allocated by `Vec`. It is therefore not /// safe to deallocate the inner type naively /// +/// This struct exists to avoid holding an `enum` of a `Vec` or a foreign pointer, whose `deref` +/// is known to be least 50% more expensive than the deref of a `Vec`. +/// /// # Safety /// /// it is unsafe to take and drop the inner value of `MaybeForeign` @@ -17,6 +19,7 @@ pub(super) struct MaybeForeign { } impl MaybeForeign { + #[inline] pub(super) fn new(data: Vec) -> Self { Self { inner: ManuallyDrop::new(data), @@ -25,7 +28,8 @@ impl MaybeForeign { /// # Safety /// This function may only be called if the inner `Vec` was allocated - /// in Rust and the default `Vec` allocator `A`. + /// by `Vec` allocator `A`. + #[inline] pub(super) unsafe fn drop_local(&mut self) { let data = std::mem::take(&mut self.inner); let _data = ManuallyDrop::into_inner(data); @@ -34,6 +38,8 @@ impl MaybeForeign { /// # Safety /// This function may only be called if the inner `Vec` was allocated /// in Rust and the default `Vec` allocator `A`. + /// Otherwise, users may reallocate `Vec`, which is unsound + #[inline] pub(super) unsafe fn mut_vec(&mut self) -> &mut Vec { self.inner.deref_mut() } @@ -42,6 +48,7 @@ impl MaybeForeign { impl Deref for MaybeForeign { type Target = [T]; + #[inline] fn deref(&self) -> &Self::Target { &self.inner } diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 678367f1ea3..59e9c5db399 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -124,14 +124,16 @@ impl Buffer { self.offset } - /// Try to get a hold to the inner data as `&mut Vec`. - /// This will only succeed if the `reference count == 1` and the data - /// is allocated by this program. + /// Try to get the inner data as a mutable [`Vec`]. + /// This succeeds iff: + /// * This data was allocated by Rust (i.e. it does not come from the C data interface) + /// * This region is not being shared any other struct. + /// * This buffer has not been offsetted pub fn get_vec(&mut self) -> Option<&mut Vec> { if self.offset != 0 { None } else { - Arc::get_mut(&mut self.data).map(|b| b.get_vec()).flatten() + Arc::get_mut(&mut self.data).and_then(|b| b.get_vec()) } } } From e87b3cfe413a701f73dad71247387316fe5bfada Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 2 Feb 2022 09:50:52 +0100 Subject: [PATCH 8/9] user Either as sum type --- Cargo.toml | 1 + src/array/primitive/mod.rs | 53 ++++++++++++++------------ src/bitmap/immutable.rs | 8 ++-- src/buffer/immutable.rs | 15 ++++++-- src/lib.rs | 1 - src/to_mutable.rs | 13 ------- tests/it/array/primitive/to_mutable.rs | 12 +++--- 7 files changed, 50 insertions(+), 53 deletions(-) delete mode 100644 src/to_mutable.rs diff --git a/Cargo.toml b/Cargo.toml index e13c15dc143..42171337c0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ name = "arrow2" bench = false [dependencies] +either = "1.6" num-traits = "0.2" bytemuck = { version = "1", features = ["derive"] } chrono = { version = "0.4", default_features = false, features = ["std"] } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index c7474db5df4..1c2563e3e02 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -7,6 +7,7 @@ use crate::{ }; use super::Array; +use either::Either; mod display; mod ffi; @@ -14,7 +15,6 @@ mod from_natural; mod iterator; pub use iterator::*; mod mutable; -use crate::to_mutable::MaybeMut; pub use mutable::*; /// A [`PrimitiveArray`] is arrow's equivalent to `Vec>`, i.e. @@ -186,35 +186,38 @@ impl PrimitiveArray { } } /// Try to convert this `PrimitiveArray` to a `MutablePrimitiveArray` - pub fn into_mut(mut self) -> MaybeMut> { - match (self.validity, self.values.get_vec()) { - (None, None) => { - MaybeMut::Immutable(PrimitiveArray::from_data(self.data_type, self.values, None)) - } - (None, Some(v)) => { - let data = std::mem::take(v); - MaybeMut::Mutable(MutablePrimitiveArray::from_data(self.data_type, data, None)) - } - (Some(bitmap), None) => MaybeMut::Immutable(PrimitiveArray::from_data( - self.data_type, - self.values, - Some(bitmap), - )), - (Some(bitmap), Some(v)) => match bitmap.into_mut() { - MaybeMut::Immutable(bitmap) => MaybeMut::Immutable(PrimitiveArray::from_data( + pub fn into_mut(self) -> Either> { + use Either::*; + + if let Some(bitmap) = self.validity { + match bitmap.into_mut() { + Left(bitmap) => Left(PrimitiveArray::from_data( self.data_type, self.values, Some(bitmap), )), - MaybeMut::Mutable(mutable) => { - let data = std::mem::take(v); - MaybeMut::Mutable(MutablePrimitiveArray::from_data( + Right(mutable_bitmap) => match self.values.get_vec() { + Left(buffer) => Left(PrimitiveArray::from_data( + self.data_type, + buffer, + Some(mutable_bitmap.into()), + )), + Right(values) => Right(MutablePrimitiveArray::from_data( self.data_type, - data, - Some(mutable), - )) - } - }, + values, + Some(mutable_bitmap), + )), + }, + } + } else { + match self.values.get_vec() { + Left(buffer) => Left(PrimitiveArray::from_data(self.data_type, buffer, None)), + Right(values) => Right(MutablePrimitiveArray::from_data( + self.data_type, + values, + None, + )), + } } } } diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index 3083ec88f20..25d7f244968 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -1,7 +1,7 @@ +use either::Either; use std::iter::FromIterator; use std::sync::Arc; -use crate::to_mutable::MaybeMut; use crate::{buffer::bytes::Bytes, trusted_len::TrustedLen}; use super::{ @@ -178,16 +178,16 @@ impl Bitmap { } /// Try to convert this `Bitmap` to a `MutableBitmap` - pub fn into_mut(mut self) -> MaybeMut { + pub fn into_mut(mut self) -> Either { match ( self.offset, Arc::get_mut(&mut self.bytes).and_then(|b| b.get_vec()), ) { (0, Some(v)) => { let data = std::mem::take(v); - MaybeMut::Mutable(MutableBitmap::from_vec(data, self.length)) + Either::Right(MutableBitmap::from_vec(data, self.length)) } - _ => MaybeMut::Immutable(self), + _ => Either::Left(self), } } } diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 59e9c5db399..93ca7e41116 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -1,3 +1,4 @@ +use either::Either; use std::{iter::FromIterator, sync::Arc, usize}; use crate::{trusted_len::TrustedLen, types::NativeType}; @@ -128,12 +129,18 @@ impl Buffer { /// This succeeds iff: /// * This data was allocated by Rust (i.e. it does not come from the C data interface) /// * This region is not being shared any other struct. - /// * This buffer has not been offsetted - pub fn get_vec(&mut self) -> Option<&mut Vec> { + /// * This buffer has no offset + pub fn get_vec(mut self) -> Either> { if self.offset != 0 { - None + Either::Left(self) } else { - Arc::get_mut(&mut self.data).and_then(|b| b.get_vec()) + match Arc::get_mut(&mut self.data).and_then(|b| b.get_vec()) { + Some(v) => { + let data = std::mem::take(v); + Either::Right(data) + } + None => Either::Left(self), + } } } } diff --git a/src/lib.rs b/src/lib.rs index d27168e5dae..608bef98120 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,4 +28,3 @@ pub mod util; // so that documentation gets test #[cfg(any(test, doctest))] mod docs; -pub mod to_mutable; diff --git a/src/to_mutable.rs b/src/to_mutable.rs deleted file mode 100644 index 3f2e713aa20..00000000000 --- a/src/to_mutable.rs +++ /dev/null @@ -1,13 +0,0 @@ -pub enum MaybeMut { - Immutable(I), - Mutable(M), -} - -impl MaybeMut { - pub fn unwrap_mut(self) -> M { - match self { - MaybeMut::Mutable(m) => m, - MaybeMut::Immutable(_) => panic!("was immutable"), - } - } -} diff --git a/tests/it/array/primitive/to_mutable.rs b/tests/it/array/primitive/to_mutable.rs index 62a24dded55..eadd5fb853d 100644 --- a/tests/it/array/primitive/to_mutable.rs +++ b/tests/it/array/primitive/to_mutable.rs @@ -1,7 +1,7 @@ use arrow2::array::PrimitiveArray; use arrow2::bitmap::Bitmap; use arrow2::datatypes::DataType; -use arrow2::to_mutable::MaybeMut; +use either::Either; #[test] fn array_to_mutable() { @@ -9,13 +9,13 @@ fn array_to_mutable() { let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); // to mutable push and freeze again - let mut mut_arr = arr.into_mut().unwrap_mut(); + let mut mut_arr = arr.into_mut().unwrap_right(); mut_arr.push(Some(5)); let immut: PrimitiveArray = mut_arr.into(); assert_eq!(immut.values().as_slice(), [1, 2, 3, 5]); // let's cause a realloc and see if miri is ok - let mut mut_arr = immut.into_mut().unwrap_mut(); + let mut mut_arr = immut.into_mut().unwrap_right(); mut_arr.extend_constant(256, Some(9)); let immut: PrimitiveArray = mut_arr.into(); assert_eq!(immut.values().len(), 256 + 4); @@ -29,7 +29,7 @@ fn array_to_mutable_not_owned() { // to the `to_mutable` should fail and we should get back the original array match arr2.into_mut() { - MaybeMut::Immutable(arr2) => { + Either::Left(arr2) => { assert_eq!(arr, arr2); } _ => panic!(), @@ -44,10 +44,10 @@ fn array_to_mutable_validity() { // both have a single reference should be ok let bitmap = Bitmap::from_iter([true, false, true]); let arr = PrimitiveArray::from_data(DataType::Int32, data.clone().into(), Some(bitmap)); - assert!(matches!(arr.into_mut(), MaybeMut::Mutable(_))); + assert!(matches!(arr.into_mut(), Either::Right(_))); // now we clone the bitmap increasing the ref count let bitmap = Bitmap::from_iter([true, false, true]); let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), Some(bitmap.clone())); - assert!(matches!(arr.into_mut(), MaybeMut::Immutable(_))); + assert!(matches!(arr.into_mut(), Either::Left(_))); } From 5ed0c43a978cd2b40f4f51d7cace2bb479582278 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 2 Feb 2022 09:55:52 +0100 Subject: [PATCH 9/9] elide bounds check --- src/buffer/immutable.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 93ca7e41116..ec67febbe9f 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -85,7 +85,13 @@ impl Buffer { /// Returns the byte slice stored in this buffer #[inline] pub fn as_slice(&self) -> &[T] { - &self.data[self.offset..self.offset + self.length] + // Safety: + // invariant of this struct `offset + length <= data.len()` + debug_assert!(self.offset + self.length <= self.data.len()); + unsafe { + self.data + .get_unchecked(self.offset..self.offset + self.length) + } } /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.