diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs
index 67c6c9c3a39..21b7d50ed32 100644
--- a/src/array/utf8/mod.rs
+++ b/src/array/utf8/mod.rs
@@ -21,8 +21,19 @@ pub(super) mod fmt;
mod from;
mod iterator;
mod mutable;
+mod mutable_values;
pub use iterator::*;
pub use mutable::*;
+pub use mutable_values::MutableUtf8ValuesArray;
+
+// Auxiliary struct to allow presenting &str as [u8] to a generic function
+pub(super) struct StrAsBytes
(P);
+impl> AsRef<[u8]> for StrAsBytes {
+ #[inline(always)]
+ fn as_ref(&self) -> &[u8] {
+ self.0.as_ref().as_bytes()
+ }
+}
/// A [`Utf8Array`] is arrow's semantic equivalent of an immutable `Vec>`.
/// Cloning and slicing this struct is `O(1)`.
diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs
index 0b87b97c25c..61a7b65a0f5 100644
--- a/src/array/utf8/mutable.rs
+++ b/src/array/utf8/mutable.rs
@@ -1,59 +1,32 @@
use std::{iter::FromIterator, sync::Arc};
+use crate::array::physical_binary::*;
use crate::{
- array::{
- specification::{check_offsets_minimal, try_check_offsets_and_utf8},
- Array, MutableArray, Offset, TryExtend, TryPush,
- },
- bitmap::MutableBitmap,
+ array::{Array, MutableArray, Offset, TryExtend, TryPush},
+ bitmap::{Bitmap, MutableBitmap},
datatypes::DataType,
error::{Error, Result},
trusted_len::TrustedLen,
};
-use super::Utf8Array;
-use crate::array::physical_binary::*;
-use crate::bitmap::Bitmap;
+use super::{MutableUtf8ValuesArray, StrAsBytes, Utf8Array};
-struct StrAsBytes(P);
-impl> AsRef<[u8]> for StrAsBytes {
- #[inline]
- fn as_ref(&self) -> &[u8] {
- self.0.as_ref().as_bytes()
- }
-}
-
-/// The mutable version of [`Utf8Array`]. See [`MutableArray`] for more details.
+/// A [`MutableArray`] that builds a [`Utf8Array`]. It differs
+/// from [`MutableUtf8ValuesArray`] in that it can build nullable [`Utf8Array`]s.
#[derive(Debug)]
pub struct MutableUtf8Array {
- data_type: DataType,
- offsets: Vec,
- values: Vec,
+ values: MutableUtf8ValuesArray,
validity: Option,
}
impl From> for Utf8Array {
fn from(other: MutableUtf8Array) -> Self {
- // Safety:
- // `MutableUtf8Array` has the same invariants as `Utf8Array` and thus
- // `Utf8Array` can be safely created from `MutableUtf8Array` without checks.
let validity = other.validity.and_then(|x| {
- let bitmap: Bitmap = x.into();
- if bitmap.unset_bits() == 0 {
- None
- } else {
- Some(bitmap)
- }
+ let validity: Option = x.into();
+ validity
});
-
- unsafe {
- Utf8Array::::from_data_unchecked(
- other.data_type,
- other.offsets.into(),
- other.values.into(),
- validity,
- )
- }
+ let array: Utf8Array = other.values.into();
+ array.with_validity(validity)
}
}
@@ -67,9 +40,7 @@ impl MutableUtf8Array {
/// Initializes a new empty [`MutableUtf8Array`].
pub fn new() -> Self {
Self {
- data_type: Self::default_data_type(),
- offsets: vec![O::default()],
- values: Vec::::new(),
+ values: Default::default(),
validity: None,
}
}
@@ -91,71 +62,65 @@ impl MutableUtf8Array {
values: Vec,
validity: Option,
) -> Result {
- try_check_offsets_and_utf8(&offsets, &values)?;
+ let values = MutableUtf8ValuesArray::try_new(data_type, offsets, values)?;
+
if validity
.as_ref()
- .map_or(false, |validity| validity.len() != offsets.len() - 1)
+ .map_or(false, |validity| validity.len() != values.len())
{
return Err(Error::oos(
"validity's length must be equal to the number of values",
));
}
- if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
- return Err(Error::oos(
- "MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8",
- ));
- }
-
- Ok(Self {
- data_type,
- offsets,
- values,
- validity,
- })
+ Ok(Self { values, validity })
}
- /// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs.
+ /// Create a [`MutableUtf8Array`] out of low-end APIs.
+ /// # Safety
+ /// The caller must ensure that every value between offsets is a valid utf8.
/// # Panics
/// This function panics iff:
/// * The `offsets` and `values` are inconsistent
- /// * The `values` between `offsets` are not utf8 encoded
/// * The validity is not `None` and its length is different from `offsets`'s length minus one.
- pub fn from_data(
+ pub unsafe fn new_unchecked(
data_type: DataType,
offsets: Vec,
values: Vec,
validity: Option,
) -> Self {
- Self::try_new(data_type, offsets, values, validity).unwrap()
+ Self::from_data_unchecked(data_type, offsets, values, validity)
}
- /// Create a [`MutableUtf8Array`] out of low-end APIs.
+ /// Alias of `new_unchecked`
/// # Safety
/// The caller must ensure that every value between offsets is a valid utf8.
+ pub unsafe fn from_data_unchecked(
+ data_type: DataType,
+ offsets: Vec,
+ values: Vec,
+ validity: Option,
+ ) -> Self {
+ let values = MutableUtf8ValuesArray::new_unchecked(data_type, offsets, values);
+ if let Some(ref validity) = validity {
+ assert_eq!(values.len(), validity.len());
+ }
+ Self { values, validity }
+ }
+
+ /// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs.
/// # Panics
/// This function panics iff:
/// * The `offsets` and `values` are inconsistent
+ /// * The `values` between `offsets` are not utf8 encoded
/// * The validity is not `None` and its length is different from `offsets`'s length minus one.
- pub unsafe fn from_data_unchecked(
+ pub fn from_data(
data_type: DataType,
offsets: Vec,
values: Vec,
validity: Option,
) -> Self {
- check_offsets_minimal(&offsets, values.len());
- if let Some(ref validity) = validity {
- assert_eq!(offsets.len() - 1, validity.len());
- }
- if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
- panic!("MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
- }
- Self {
- data_type,
- offsets,
- values,
- validity,
- }
+ Self::try_new(data_type, offsets, values, validity).unwrap()
}
fn default_data_type() -> DataType {
@@ -169,29 +134,23 @@ impl MutableUtf8Array {
/// Initializes a new [`MutableUtf8Array`] with a pre-allocated capacity of slots and values.
pub fn with_capacities(capacity: usize, values: usize) -> Self {
- let mut offsets = Vec::::with_capacity(capacity + 1);
- offsets.push(O::default());
-
Self {
- data_type: Self::default_data_type(),
- offsets,
- values: Vec::::with_capacity(values),
+ values: MutableUtf8ValuesArray::with_capacities(capacity, values),
validity: None,
}
}
/// Reserves `additional` elements and `additional_values` on the values buffer.
pub fn reserve(&mut self, additional: usize, additional_values: usize) {
- self.offsets.reserve(additional);
+ self.values.reserve(additional, additional_values);
if let Some(x) = self.validity.as_mut() {
x.reserve(additional)
}
- self.values.reserve(additional_values);
}
- #[inline]
- fn last_offset(&self) -> O {
- *self.offsets.last().unwrap()
+ /// Reserves `additional` elements and `additional_values` on the values buffer.
+ pub fn capacity(&self) -> usize {
+ self.values.capacity()
}
/// Pushes a new element to the array.
@@ -205,23 +164,16 @@ impl MutableUtf8Array {
/// Pop the last entry from [`MutableUtf8Array`].
/// This function returns `None` iff this array is empty.
pub fn pop(&mut self) -> Option {
- if self.offsets.len() < 2 {
- return None;
- }
- self.offsets.pop()?;
- let value_start = self.offsets.iter().last().cloned()?.to_usize();
- let value = self.values.split_off(value_start);
+ let value = self.values.pop()?;
self.validity
.as_mut()
.map(|x| x.pop()?.then(|| ()))
.unwrap_or_else(|| Some(()))
- .map(|_|
- // soundness: we always check for utf8 soundness on constructors.
- unsafe { String::from_utf8_unchecked(value) })
+ .map(|_| value)
}
fn init_validity(&mut self) {
- let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
+ let mut validity = MutableBitmap::with_capacity(self.values.capacity());
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity);
@@ -236,7 +188,6 @@ impl MutableUtf8Array {
/// Shrinks the capacity of the [`MutableUtf8Array`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
- self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
@@ -244,25 +195,26 @@ impl MutableUtf8Array {
/// Extract the low-end APIs from the [`MutableUtf8Array`].
pub fn into_data(self) -> (DataType, Vec, Vec, Option) {
- (self.data_type, self.offsets, self.values, self.validity)
+ let (data_type, offsets, values) = self.values.into_inner();
+ (data_type, offsets, values, self.validity)
}
}
impl MutableUtf8Array {
/// returns its values.
pub fn values(&self) -> &Vec {
- &self.values
+ self.values.values()
}
/// returns its offsets.
pub fn offsets(&self) -> &Vec {
- &self.offsets
+ self.values.offsets()
}
}
impl MutableArray for MutableUtf8Array {
fn len(&self) -> usize {
- self.offsets.len() - 1
+ self.values.len()
}
fn validity(&self) -> Option<&MutableBitmap> {
@@ -273,28 +225,32 @@ impl MutableArray for MutableUtf8Array {
// Safety:
// `MutableUtf8Array` has the same invariants as `Utf8Array` and thus
// `Utf8Array` can be safely created from `MutableUtf8Array` without checks.
- Box::new(unsafe {
+ let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner();
+ unsafe {
Utf8Array::from_data_unchecked(
- self.data_type.clone(),
- std::mem::take(&mut self.offsets).into(),
- std::mem::take(&mut self.values).into(),
+ data_type,
+ offsets.into(),
+ values.into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
- })
+ }
+ .boxed()
}
fn as_arc(&mut self) -> Arc {
// Safety:
// `MutableUtf8Array` has the same invariants as `Utf8Array` and thus
// `Utf8Array` can be safely created from `MutableUtf8Array` without checks.
- Arc::new(unsafe {
+ let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner();
+ unsafe {
Utf8Array::from_data_unchecked(
- self.data_type.clone(),
- std::mem::take(&mut self.offsets).into(),
- std::mem::take(&mut self.values).into(),
+ data_type,
+ offsets.into(),
+ values.into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
- })
+ }
+ .arced()
}
fn data_type(&self) -> &DataType {
@@ -353,8 +309,9 @@ impl MutableUtf8Array {
P: AsRef,
I: Iterator- ,
{
- let iterator = iterator.map(StrAsBytes);
- let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator);
+ let length = self.values.len();
+ self.values.extend(iterator);
+ let additional = self.values.len() - length;
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
@@ -372,11 +329,9 @@ impl
MutableUtf8Array {
P: AsRef,
I: Iterator- ,
{
- let (_, upper) = iterator.size_hint();
- let additional = upper.expect("extend_trusted_len_values requires an upper limit");
-
- let iterator = iterator.map(StrAsBytes);
- extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
+ let length = self.values.len();
+ self.values.extend_trusted_len_unchecked(iterator);
+ let additional = self.values.len() - length;
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
@@ -408,13 +363,8 @@ impl
MutableUtf8Array {
self.validity = Some(validity);
}
- let iterator = iterator.map(|x| x.map(StrAsBytes));
- extend_from_trusted_len_iter(
- &mut self.offsets,
- &mut self.values,
- self.validity.as_mut().unwrap(),
- iterator,
- );
+ self.values
+ .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
}
/// Creates a [`MutableUtf8Array`] from an iterator of trusted length.
@@ -453,10 +403,7 @@ impl MutableUtf8Array {
pub unsafe fn from_trusted_len_values_iter_unchecked, I: Iterator- >(
iterator: I,
) -> Self {
- let iterator = iterator.map(StrAsBytes);
- let (offsets, values) = unsafe { trusted_len_values_iter(iterator) };
- // soundness: T is AsRef
- Self::from_data_unchecked(Self::default_data_type(), offsets, values, None)
+ MutableUtf8ValuesArray::from_trusted_len_iter_unchecked(iterator).into()
}
/// Creates a new [`MutableUtf8Array`] from a [`TrustedLen`] of `&str`.
@@ -521,10 +468,7 @@ impl MutableUtf8Array {
/// Creates a new [`MutableUtf8Array`] from a [`Iterator`] of `&str`.
pub fn from_iter_values, I: Iterator- >(iterator: I) -> Self {
- let iterator = iterator.map(StrAsBytes);
- let (offsets, values) = values_iter(iterator);
- // soundness: T: AsRef
- unsafe { Self::from_data_unchecked(Self::default_data_type(), offsets, values, None) }
+ MutableUtf8ValuesArray::from_iter(iterator).into()
}
}
@@ -547,12 +491,7 @@ impl> TryPush> for MutableUtf8Array {
fn try_push(&mut self, value: Option) -> Result<()> {
match value {
Some(value) => {
- let bytes = value.as_ref().as_bytes();
- self.values.extend_from_slice(bytes);
-
- let size = O::from_usize(self.values.len()).ok_or(Error::Overflow)?;
-
- self.offsets.push(size);
+ self.values.try_push(value.as_ref())?;
match &mut self.validity {
Some(validity) => validity.push(true),
@@ -560,7 +499,7 @@ impl> TryPush> for MutableUtf8Array {
}
}
None => {
- self.offsets.push(self.last_offset());
+ self.values.push("");
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
diff --git a/src/array/utf8/mutable_values.rs b/src/array/utf8/mutable_values.rs
new file mode 100644
index 00000000000..a7524a9f3a6
--- /dev/null
+++ b/src/array/utf8/mutable_values.rs
@@ -0,0 +1,383 @@
+use std::{iter::FromIterator, sync::Arc};
+
+use crate::{
+ array::{
+ specification::{check_offsets_minimal, try_check_offsets_and_utf8},
+ Array, MutableArray, Offset, TryExtend, TryPush,
+ },
+ bitmap::MutableBitmap,
+ datatypes::DataType,
+ error::{Error, Result},
+ trusted_len::TrustedLen,
+};
+
+use super::{MutableUtf8Array, StrAsBytes, Utf8Array};
+use crate::array::physical_binary::*;
+
+/// A [`MutableArray`] that builds a [`Utf8Array`]. It differs
+/// from [`MutableUtf8Array`] in that it builds non-null [`Utf8Array`].
+#[derive(Debug)]
+pub struct MutableUtf8ValuesArray {
+ data_type: DataType,
+ offsets: Vec,
+ values: Vec,
+}
+
+impl From> for Utf8Array {
+ fn from(other: MutableUtf8ValuesArray) -> Self {
+ // Safety:
+ // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus
+ // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks.
+ unsafe {
+ Utf8Array::::from_data_unchecked(
+ other.data_type,
+ other.offsets.into(),
+ other.values.into(),
+ None,
+ )
+ }
+ }
+}
+
+impl From> for MutableUtf8Array {
+ fn from(other: MutableUtf8ValuesArray) -> Self {
+ // Safety:
+ // `MutableUtf8ValuesArray` has the same invariants as `MutableUtf8Array`
+ unsafe {
+ MutableUtf8Array::::from_data_unchecked(
+ other.data_type,
+ other.offsets,
+ other.values,
+ None,
+ )
+ }
+ }
+}
+
+impl Default for MutableUtf8ValuesArray {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl MutableUtf8ValuesArray {
+ /// Returns an empty [`MutableUtf8ValuesArray`].
+ pub fn new() -> Self {
+ Self {
+ data_type: Self::default_data_type(),
+ offsets: vec![O::default()],
+ values: Vec::::new(),
+ }
+ }
+
+ /// Returns a [`MutableUtf8ValuesArray`] created from its internal representation.
+ ///
+ /// # Errors
+ /// This function returns an error iff:
+ /// * the offsets are not monotonically increasing
+ /// * The last offset is not equal to the values' length.
+ /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`.
+ /// * The `values` between two consecutive `offsets` are not valid utf8
+ /// # Implementation
+ /// This function is `O(N)` - checking monotinicity and utf8 is `O(N)`
+ pub fn try_new(data_type: DataType, offsets: Vec, values: Vec) -> Result {
+ try_check_offsets_and_utf8(&offsets, &values)?;
+ if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
+ return Err(Error::oos(
+ "MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8",
+ ));
+ }
+
+ Ok(Self {
+ data_type,
+ offsets,
+ values,
+ })
+ }
+
+ /// Returns a [`MutableUtf8ValuesArray`] created from its internal representation.
+ ///
+ /// # Panic
+ /// This function does not panic iff:
+ /// * The last offset is equal to the values' length.
+ /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is equal to either `Utf8` or `LargeUtf8`.
+ /// # Safety
+ /// This function is safe iff:
+ /// * the offsets are monotonically increasing
+ /// * The `values` between two consecutive `offsets` are not valid utf8
+ /// # Implementation
+ /// This function is `O(1)`
+ pub unsafe fn new_unchecked(data_type: DataType, offsets: Vec, values: Vec) -> Self {
+ check_offsets_minimal(&offsets, values.len());
+
+ if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
+ panic!("MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
+ }
+
+ Self {
+ data_type,
+ offsets,
+ values,
+ }
+ }
+
+ /// Returns the default [`DataType`] of this container: [`DataType::Utf8`] or [`DataType::LargeUtf8`]
+ /// depending on the generic [`Offset`].
+ pub fn default_data_type() -> DataType {
+ Utf8Array::::default_data_type()
+ }
+
+ /// Initializes a new [`MutableUtf8ValuesArray`] with a pre-allocated capacity of items.
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self::with_capacities(capacity, 0)
+ }
+
+ /// Initializes a new [`MutableUtf8ValuesArray`] with a pre-allocated capacity of items and values.
+ pub fn with_capacities(capacity: usize, values: usize) -> Self {
+ let mut offsets = Vec::::with_capacity(capacity + 1);
+ offsets.push(O::default());
+
+ Self {
+ data_type: Self::default_data_type(),
+ offsets,
+ values: Vec::::with_capacity(values),
+ }
+ }
+
+ /// returns its values.
+ #[inline]
+ pub fn values(&self) -> &Vec {
+ &self.values
+ }
+
+ /// returns its offsets.
+ #[inline]
+ pub fn offsets(&self) -> &Vec {
+ &self.offsets
+ }
+
+ /// Reserves `additional` elements and `additional_values` on the values.
+ #[inline]
+ pub fn reserve(&mut self, additional: usize, additional_values: usize) {
+ self.offsets.reserve(additional + 1);
+ self.values.reserve(additional_values);
+ }
+
+ /// Returns the capacity in number of items
+ pub fn capacity(&self) -> usize {
+ self.offsets.capacity() - 1
+ }
+
+ /// Pushes a new item to the array.
+ /// # Panic
+ /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
+ #[inline]
+ pub fn push>(&mut self, value: T) {
+ self.try_push(value).unwrap()
+ }
+
+ /// Pop the last entry from [`MutableUtf8ValuesArray`].
+ /// This function returns `None` iff this array is empty.
+ pub fn pop(&mut self) -> Option {
+ if self.len() == 0 {
+ return None;
+ }
+ self.offsets.pop()?;
+ let start = self.offsets.last()?.to_usize();
+ let value = self.values.split_off(start);
+ // Safety: utf8 is validated on initialization
+ Some(unsafe { String::from_utf8_unchecked(value) })
+ }
+
+ /// Shrinks the capacity of the [`MutableUtf8ValuesArray`] to fit its current length.
+ pub fn shrink_to_fit(&mut self) {
+ self.values.shrink_to_fit();
+ self.offsets.shrink_to_fit();
+ }
+
+ /// Extract the low-end APIs from the [`MutableUtf8ValuesArray`].
+ pub fn into_inner(self) -> (DataType, Vec, Vec) {
+ (self.data_type, self.offsets, self.values)
+ }
+}
+
+impl MutableArray for MutableUtf8ValuesArray {
+ fn len(&self) -> usize {
+ self.offsets.len() - 1
+ }
+
+ fn validity(&self) -> Option<&MutableBitmap> {
+ None
+ }
+
+ fn as_box(&mut self) -> Box {
+ // Safety:
+ // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus
+ // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks.
+ Box::new(unsafe {
+ Utf8Array::from_data_unchecked(
+ self.data_type.clone(),
+ std::mem::take(&mut self.offsets).into(),
+ std::mem::take(&mut self.values).into(),
+ None,
+ )
+ })
+ }
+
+ fn as_arc(&mut self) -> Arc {
+ // Safety:
+ // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus
+ // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks.
+ Arc::new(unsafe {
+ Utf8Array::from_data_unchecked(
+ self.data_type.clone(),
+ std::mem::take(&mut self.offsets).into(),
+ std::mem::take(&mut self.values).into(),
+ None,
+ )
+ })
+ }
+
+ fn data_type(&self) -> &DataType {
+ &self.data_type
+ }
+
+ fn as_any(&self) -> &dyn std::any::Any {
+ self
+ }
+
+ fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
+ self
+ }
+
+ #[inline]
+ fn push_null(&mut self) {
+ self.push::<&str>("")
+ }
+
+ fn reserve(&mut self, additional: usize) {
+ self.reserve(additional, 0)
+ }
+
+ fn shrink_to_fit(&mut self) {
+ self.shrink_to_fit()
+ }
+}
+
+impl> FromIterator for MutableUtf8ValuesArray {
+ fn from_iter>(iter: I) -> Self {
+ let (offsets, values) = values_iter(iter.into_iter().map(StrAsBytes));
+ // soundness: T: AsRef and offsets are monotonically increasing
+ unsafe { Self::new_unchecked(Self::default_data_type(), offsets, values) }
+ }
+}
+
+impl MutableUtf8ValuesArray {
+ pub(crate) unsafe fn extend_from_trusted_len_iter(
+ &mut self,
+ validity: &mut MutableBitmap,
+ iterator: I,
+ ) where
+ P: AsRef,
+ I: Iterator- >,
+ {
+ let iterator = iterator.map(|x| x.map(StrAsBytes));
+ extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
+ }
+
+ /// Extends the [`MutableUtf8ValuesArray`] from a [`TrustedLen`]
+ #[inline]
+ pub fn extend_trusted_len
(&mut self, iterator: I)
+ where
+ P: AsRef,
+ I: TrustedLen- ,
+ {
+ unsafe { self.extend_trusted_len_unchecked(iterator) }
+ }
+
+ /// Extends [`MutableUtf8ValuesArray`] from an iterator of trusted len.
+ /// # Safety
+ /// The iterator must be trusted len.
+ #[inline]
+ pub unsafe fn extend_trusted_len_unchecked
(&mut self, iterator: I)
+ where
+ P: AsRef,
+ I: Iterator- ,
+ {
+ let iterator = iterator.map(StrAsBytes);
+ extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
+ }
+
+ /// Creates a [`MutableUtf8ValuesArray`] from a [`TrustedLen`]
+ #[inline]
+ pub fn from_trusted_len_iter
(iterator: I) -> Self
+ where
+ P: AsRef,
+ I: TrustedLen- ,
+ {
+ // soundness: I is `TrustedLen`
+ unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
+ }
+
+ /// Returns a new [`MutableUtf8ValuesArray`] from an iterator of trusted length.
+ /// # Safety
+ /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
+ /// I.e. that `size_hint().1` correctly reports its length.
+ #[inline]
+ pub unsafe fn from_trusted_len_iter_unchecked
(iterator: I) -> Self
+ where
+ P: AsRef,
+ I: Iterator- ,
+ {
+ let iterator = iterator.map(StrAsBytes);
+ let (offsets, values) = trusted_len_values_iter(iterator);
+
+ // soundness: P is `str` and offsets are monotonically increasing
+ Self::new_unchecked(Self::default_data_type(), offsets, values)
+ }
+
+ /// Returns a new [`MutableUtf8ValuesArray`] from an iterator.
+ /// # Error
+ /// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value.
+ /// (`i32::MAX` or `i64::MAX` respectively).
+ pub fn try_from_iter
, I: IntoIterator- >(iter: I) -> Result
{
+ let iterator = iter.into_iter();
+ let (lower, _) = iterator.size_hint();
+ let mut array = Self::with_capacity(lower);
+ for item in iterator {
+ array.try_push(item)?;
+ }
+ Ok(array)
+ }
+}
+
+impl> Extend for MutableUtf8ValuesArray {
+ fn extend>(&mut self, iter: I) {
+ extend_from_values_iter(
+ &mut self.offsets,
+ &mut self.values,
+ iter.into_iter().map(StrAsBytes),
+ );
+ }
+}
+
+impl> TryExtend for MutableUtf8ValuesArray {
+ fn try_extend>(&mut self, iter: I) -> Result<()> {
+ let mut iter = iter.into_iter();
+ self.reserve(iter.size_hint().0, 0);
+ iter.try_for_each(|x| self.try_push(x))
+ }
+}
+
+impl> TryPush for MutableUtf8ValuesArray {
+ #[inline]
+ fn try_push(&mut self, value: T) -> Result<()> {
+ let bytes = value.as_ref().as_bytes();
+ self.values.extend_from_slice(bytes);
+
+ let size = O::from_usize(self.values.len()).ok_or(Error::Overflow)?;
+
+ self.offsets.push(size);
+ Ok(())
+ }
+}