diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 01503d2b42d..11b76c37367 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -155,6 +155,12 @@ impl BinaryArray { self.values.get_unchecked(start..end) } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the offsets that slice `.values()` to return valid values. #[inline] pub fn offsets(&self) -> &Buffer { diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 96200361d98..065eb199775 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -87,6 +87,21 @@ impl BooleanArray { } } + /// Sets the validity bitmap on this [`BooleanArray`]. + /// # Panic + /// This function panics iff `validity.len() != self.len()`. + pub fn with_validity(&self, validity: Option) -> Self { + if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { + panic!("validity should be as least as large as the array") + } + let mut arr = self.clone(); + arr.validity = validity; + arr + } +} + +// accessors +impl BooleanArray { /// Returns the value at index `i` /// # Panic /// This function panics iff `i >= self.len()`. @@ -103,23 +118,17 @@ impl BooleanArray { self.values.get_bit_unchecked(i) } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the values of this [`BooleanArray`]. #[inline] pub fn values(&self) -> &Bitmap { &self.values } - - /// Sets the validity bitmap on this [`BooleanArray`]. - /// # Panic - /// This function panics iff `validity.len() != self.len()`. - pub fn with_validity(&self, validity: Option) -> Self { - if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { - panic!("validity should be as least as large as the array") - } - let mut arr = self.clone(); - arr.validity = validity; - arr - } } impl Array for BooleanArray { diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 71862ebf83b..3b33cb3e17b 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -109,6 +109,12 @@ impl DictionaryArray { arr } + /// The optional validity. Equivalent to `self.keys().validity()`. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.keys.validity() + } + /// Returns the keys of the [`DictionaryArray`]. These keys can be used to fetch values /// from `values`. #[inline] diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index d9d76e3ca27..7d0d12f364e 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -83,6 +83,12 @@ impl FixedSizeBinaryArray { } } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the values allocated on this [`FixedSizeBinaryArray`]. pub fn values(&self) -> &Buffer { &self.values diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index e11a7036f72..8bf04f6ad2c 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -97,6 +97,12 @@ impl FixedSizeListArray { } } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the inner array. pub fn values(&self) -> &Arc { &self.values diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 371f9ee22c8..7070a2d61eb 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -69,6 +69,50 @@ impl ListArray { } } + /// Returns a slice of this [`ListArray`]. + /// # Panics + /// panics iff `offset + length >= self.len()` + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + /// Returns a slice of this [`ListArray`]. + /// # Safety + /// The caller must ensure that `offset + length < self.len()`. + pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { + let validity = self + .validity + .clone() + .map(|x| x.slice_unchecked(offset, length)); + let offsets = self.offsets.clone().slice_unchecked(offset, length + 1); + Self { + data_type: self.data_type.clone(), + offsets, + values: self.values.clone(), + validity, + offset: self.offset + offset, + } + } + + /// Sets the validity bitmap on this [`ListArray`]. + /// # Panic + /// This function panics iff `validity.len() != self.len()`. + pub fn with_validity(&self, validity: Option) -> Self { + if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { + panic!("validity should be as least as large as the array") + } + let mut arr = self.clone(); + arr.validity = validity; + arr + } +} + +// Accessors +impl ListArray { /// Returns the element at index `i` #[inline] pub fn value(&self, i: usize) -> Box { @@ -99,33 +143,10 @@ impl ListArray { self.values.slice_unchecked(offset.to_usize(), length) } - /// Returns a slice of this [`ListArray`]. - /// # Panics - /// panics iff `offset + length >= self.len()` - pub fn slice(&self, offset: usize, length: usize) -> Self { - assert!( - offset + length <= self.len(), - "the offset of the new Buffer cannot exceed the existing length" - ); - unsafe { self.slice_unchecked(offset, length) } - } - - /// Returns a slice of this [`ListArray`]. - /// # Safety - /// The caller must ensure that `offset + length < self.len()`. - pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self { - let validity = self - .validity - .clone() - .map(|x| x.slice_unchecked(offset, length)); - let offsets = self.offsets.clone().slice_unchecked(offset, length + 1); - Self { - data_type: self.data_type.clone(), - offsets, - values: self.values.clone(), - validity, - offset: self.offset + offset, - } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() } #[inline] @@ -137,18 +158,6 @@ impl ListArray { pub fn values(&self) -> &Arc { &self.values } - - /// Sets the validity bitmap on this [`ListArray`]. - /// # Panic - /// This function panics iff `validity.len() != self.len()`. - pub fn with_validity(&self, validity: Option) -> Self { - if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { - panic!("validity should be as least as large as the array") - } - let mut arr = self.clone(); - arr.validity = validity; - arr - } } impl ListArray { diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 5def0a5a5cb..f75ffd9a2b0 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -124,7 +124,13 @@ impl PrimitiveArray { arr } - /// The values. + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + + /// The values [`Buffer`]. /// Values on null slots are undetermined (they can be anything). #[inline] pub fn values(&self) -> &Buffer { diff --git a/src/array/struct_.rs b/src/array/struct_.rs index 10cc8791be3..8c772b857cf 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -144,6 +144,15 @@ impl StructArray { arr.validity = validity; arr } +} + +// Accessors +impl StructArray { + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } /// Returns the values of this [`StructArray`]. pub fn values(&self) -> &[Arc] { diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index ab7db86c0c4..1d6b75faec3 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -132,33 +132,6 @@ impl Utf8Array { } } - /// Returns the element at index `i` as &str - /// # Safety - /// This function is safe iff `i < self.len`. - pub unsafe fn value_unchecked(&self, i: usize) -> &str { - // soundness: the invariant of the function - let start = self.offsets.get_unchecked(i).to_usize(); - let end = self.offsets.get_unchecked(i + 1).to_usize(); - - // soundness: the invariant of the struct - let slice = self.values.get_unchecked(start..end); - - // soundness: the invariant of the struct - std::str::from_utf8_unchecked(slice) - } - - /// Returns the element at index `i` - pub fn value(&self, i: usize) -> &str { - let start = self.offsets[i].to_usize(); - let end = self.offsets[i + 1].to_usize(); - - // soundness: the invariant of the struct - let slice = unsafe { self.values.get_unchecked(start..end) }; - - // soundness: we always check for utf8 soundness on constructors. - unsafe { std::str::from_utf8_unchecked(slice) } - } - /// Returns a slice of this [`Utf8Array`]. /// # Implementation /// This operation is `O(1)` as it amounts to essentially increase two ref counts. @@ -203,6 +176,42 @@ impl Utf8Array { arr.validity = validity; arr } +} + +// Accessors +impl Utf8Array { + /// Returns the element at index `i` as &str + /// # Safety + /// This function is safe iff `i < self.len`. + pub unsafe fn value_unchecked(&self, i: usize) -> &str { + // soundness: the invariant of the function + let start = self.offsets.get_unchecked(i).to_usize(); + let end = self.offsets.get_unchecked(i + 1).to_usize(); + + // soundness: the invariant of the struct + let slice = self.values.get_unchecked(start..end); + + // soundness: the invariant of the struct + std::str::from_utf8_unchecked(slice) + } + + /// Returns the element at index `i` + pub fn value(&self, i: usize) -> &str { + let start = self.offsets[i].to_usize(); + let end = self.offsets[i + 1].to_usize(); + + // soundness: the invariant of the struct + let slice = unsafe { self.values.get_unchecked(start..end) }; + + // soundness: we always check for utf8 soundness on constructors. + unsafe { std::str::from_utf8_unchecked(slice) } + } + + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } /// Returns the offsets of this [`Utf8Array`]. #[inline] diff --git a/src/compute/utils.rs b/src/compute/utils.rs index 2187d8aa2d4..87f1372aeb7 100644 --- a/src/compute/utils.rs +++ b/src/compute/utils.rs @@ -16,7 +16,7 @@ // under the License. use crate::{ - array::{Array, BooleanArray, Offset, Utf8Array}, + array::{BooleanArray, Offset, Utf8Array}, bitmap::Bitmap, datatypes::DataType, };