diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 84cf3671999..c6d4fce470c 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -82,7 +82,7 @@ impl BinaryArray { if validity .as_ref() - .map_or(false, |validity| validity.len() != offsets.len()) + .map_or(false, |validity| validity.len() != offsets.len_proxy()) { return Err(Error::oos( "validity mask length must match the number of values", @@ -127,7 +127,7 @@ impl BinaryArray { /// Returns the length of this array #[inline] pub fn len(&self) -> usize { - self.offsets.len() + self.offsets.len_proxy() } /// Returns the element at index `i` diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index fa0ff32c01e..6d6fe22eb87 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -44,7 +44,7 @@ impl ListArray { if validity .as_ref() - .map_or(false, |validity| validity.len() != offsets.len()) + .map_or(false, |validity| validity.len() != offsets.len_proxy()) { return Err(Error::oos( "validity mask length must match the number of values", @@ -138,7 +138,7 @@ impl ListArray { /// Returns the length of this array #[inline] pub fn len(&self) -> usize { - self.offsets.len() + self.offsets.len_proxy() } /// Returns the element at index `i` diff --git a/src/array/map/mod.rs b/src/array/map/mod.rs index 150ebbeeb7b..96b9aacde21 100644 --- a/src/array/map/mod.rs +++ b/src/array/map/mod.rs @@ -59,7 +59,7 @@ impl MapArray { if validity .as_ref() - .map_or(false, |validity| validity.len() != offsets.len()) + .map_or(false, |validity| validity.len() != offsets.len_proxy()) { return Err(Error::oos( "validity mask length must match the number of values", @@ -154,7 +154,7 @@ impl MapArray { /// Returns the length of this array #[inline] pub fn len(&self) -> usize { - self.offsets.len() + self.offsets.len_proxy() } /// returns the offsets diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 9af4ee76256..c5a69576c5c 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -95,7 +95,7 @@ impl Utf8Array { try_check_utf8(&offsets, &values)?; if validity .as_ref() - .map_or(false, |validity| validity.len() != offsets.len()) + .map_or(false, |validity| validity.len() != offsets.len_proxy()) { return Err(Error::oos( "validity mask length must match the number of values", @@ -144,7 +144,7 @@ impl Utf8Array { /// Returns the length of this array #[inline] pub fn len(&self) -> usize { - self.offsets.len() + self.offsets.len_proxy() } /// Returns the value of the element at index `i`, ignoring the array's validity. @@ -365,7 +365,7 @@ impl Utf8Array { if validity .as_ref() - .map_or(false, |validity| validity.len() != offsets.len()) + .map_or(false, |validity| validity.len() != offsets.len_proxy()) { return Err(Error::oos( "validity mask length must match the number of values", diff --git a/src/compute/aggregate/memory.rs b/src/compute/aggregate/memory.rs index d497a60ae99..7e3218a828a 100644 --- a/src/compute/aggregate/memory.rs +++ b/src/compute/aggregate/memory.rs @@ -63,7 +63,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize { List => { let array = array.as_any().downcast_ref::>().unwrap(); estimated_bytes_size(array.values().as_ref()) - + array.offsets().len() * std::mem::size_of::() + + array.offsets().len_proxy() * std::mem::size_of::() + validity_size(array.validity()) } FixedSizeList => { @@ -73,7 +73,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize { LargeList => { let array = array.as_any().downcast_ref::>().unwrap(); estimated_bytes_size(array.values().as_ref()) - + array.offsets().len() * std::mem::size_of::() + + array.offsets().len_proxy() * std::mem::size_of::() + validity_size(array.validity()) } Struct => { @@ -111,7 +111,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize { }), Map => { let array = array.as_any().downcast_ref::().unwrap(); - let offsets = array.offsets().len() * std::mem::size_of::(); + let offsets = array.offsets().len_proxy() * std::mem::size_of::(); offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity()) } } diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index 50bfdb63be9..10aea638a22 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -50,8 +50,8 @@ impl Nested { pub fn len(&self) -> usize { match self { Nested::Primitive(_, _, length) => *length, - Nested::List(nested) => nested.offsets.len(), - Nested::LargeList(nested) => nested.offsets.len(), + Nested::List(nested) => nested.offsets.len_proxy(), + Nested::LargeList(nested) => nested.offsets.len_proxy(), Nested::Struct(_, _, len) => *len, } } diff --git a/src/offset.rs b/src/offset.rs index 4ab062ef6da..744fb31966c 100644 --- a/src/offset.rs +++ b/src/offset.rs @@ -377,12 +377,18 @@ impl OffsetsBuffer { &self.0 } - /// Returns the length of this container + /// Returns the length an array with these offsets would be. #[inline] - pub fn len(&self) -> usize { + pub fn len_proxy(&self) -> usize { self.0.len() - 1 } + /// Returns the number of offsets in this container. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + /// Returns the byte slice stored in this buffer #[inline] pub fn as_slice(&self) -> &[O] { @@ -419,7 +425,7 @@ impl OffsetsBuffer { #[inline] pub fn start_end(&self, index: usize) -> (usize, usize) { // soundness: the invariant of the function - assert!(index < self.len()); + assert!(index < self.len_proxy()); unsafe { self.start_end_unchecked(index) } }