diff --git a/src/compute/aggregate/sum.rs b/src/compute/aggregate/sum.rs index 49e3f392e35..a08841cc54d 100644 --- a/src/compute/aggregate/sum.rs +++ b/src/compute/aggregate/sum.rs @@ -118,6 +118,32 @@ macro_rules! dyn_sum { }}; } +pub fn can_sum(data_type: &DataType) -> bool { + use DataType::*; + matches!( + data_type, + Int8 | Int16 + | Date32 + | Time32(_) + | Interval(IntervalUnit::YearMonth) + | Int64 + | Date64 + | Time64(_) + | Timestamp(_, _) + | Duration(_) + | UInt8 + | UInt16 + | UInt32 + | UInt64 + | Float32 + | Float64 + ) +} + +/// Returns the sum of all elements in `array` as a [`Scalar`] of the same physical +/// and logical types as `array`. +/// # Error +/// Errors iff the operation is not supported. pub fn sum(array: &dyn Array) -> Result> { Ok(match array.data_type() { DataType::Int8 => dyn_sum!(i8, array), @@ -158,7 +184,16 @@ mod tests { #[test] fn test_primitive_array_sum() { let a = Int32Array::from_slice(&[1, 2, 3, 4, 5]); - assert_eq!(15, sum(&a).unwrap()); + assert_eq!( + &PrimitiveScalar::::from(Some(15)) as &dyn Scalar, + sum(&a).unwrap().as_ref() + ); + + let a = a.to(DataType::Date32); + assert_eq!( + &PrimitiveScalar::::from(Some(15)).to(DataType::Date32) as &dyn Scalar, + sum(&a).unwrap().as_ref() + ); } #[test] diff --git a/src/lib.rs b/src/lib.rs index f0b43d91fa0..7a5b27346db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod bitmap; pub mod buffer; mod endianess; pub mod error; +#[cfg(feature = "compute")] pub mod scalar; pub mod trusted_len; pub mod types; diff --git a/src/scalar/README.md b/src/scalar/README.md index 0948317004b..2bac790873b 100644 --- a/src/scalar/README.md +++ b/src/scalar/README.md @@ -10,7 +10,7 @@ There are three reasons: * forward-compatibility: a new entry on an `enum` is backward-incompatible * do not expose implementation details to users (reduce the surface of the public API) -### `Scalar` should contain nullability information +### `Scalar` MUST contain nullability information This is to be aligned with the general notion of arrow's `Array`. diff --git a/src/scalar/binary.rs b/src/scalar/binary.rs new file mode 100644 index 00000000000..546b2751f67 --- /dev/null +++ b/src/scalar/binary.rs @@ -0,0 +1,66 @@ +use crate::{array::*, buffer::Buffer, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct BinaryScalar { + value: Buffer, + is_valid: bool, + phantom: std::marker::PhantomData, +} + +impl BinaryScalar { + #[inline] + pub fn new(v: Option<&[u8]>) -> Self { + let is_valid = v.is_some(); + O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); + let value = Buffer::from(v.unwrap_or(&[])); + Self { + value, + is_valid, + phantom: std::marker::PhantomData, + } + } + + #[inline] + pub fn value(&self) -> &[u8] { + self.value.as_slice() + } +} + +impl Scalar for BinaryScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + if O::is_large() { + &DataType::LargeBinary + } else { + &DataType::Binary + } + } + + fn to_boxed_array(&self, length: usize) -> Box { + if self.is_valid { + let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` + let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); + let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; + let values = std::iter::repeat(self.value.as_slice()) + .take(length) + .flatten() + .copied() + .collect(); + Box::new(BinaryArray::::from_data(offsets, values, None)) + } else { + Box::new(BinaryArray::::new_null(length)) + } + } +} diff --git a/src/scalar/boolean.rs b/src/scalar/boolean.rs new file mode 100644 index 00000000000..29d8ef3c97d --- /dev/null +++ b/src/scalar/boolean.rs @@ -0,0 +1,51 @@ +use crate::{array::*, bitmap::Bitmap, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct BooleanScalar { + value: bool, + is_valid: bool, +} + +impl BooleanScalar { + #[inline] + pub fn new(v: Option) -> Self { + let is_valid = v.is_some(); + Self { + value: v.unwrap_or_default(), + is_valid, + } + } + + #[inline] + pub fn value(&self) -> bool { + self.value + } +} + +impl Scalar for BooleanScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + &DataType::Boolean + } + + fn to_boxed_array(&self, length: usize) -> Box { + if self.is_valid { + let values = Bitmap::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); + Box::new(BooleanArray::from_data(values, None)) + } else { + Box::new(BooleanArray::new_null(length)) + } + } +} diff --git a/src/scalar/equal.rs b/src/scalar/equal.rs new file mode 100644 index 00000000000..503f9fc6856 --- /dev/null +++ b/src/scalar/equal.rs @@ -0,0 +1,115 @@ +use super::*; + +impl PartialEq for dyn Scalar { + fn eq(&self, other: &Self) -> bool { + equal(self, other) + } +} + +macro_rules! dyn_eq { + ($ty:ty, $lhs:expr, $rhs:expr) => {{ + let lhs = $lhs + .as_any() + .downcast_ref::>() + .unwrap(); + let rhs = $rhs + .as_any() + .downcast_ref::>() + .unwrap(); + lhs == rhs + }}; +} + +fn equal(lhs: &dyn Scalar, rhs: &dyn Scalar) -> bool { + if lhs.data_type() != rhs.data_type() { + return false; + } + + match lhs.data_type() { + DataType::Null => { + let lhs = lhs.as_any().downcast_ref::().unwrap(); + let rhs = rhs.as_any().downcast_ref::().unwrap(); + lhs == rhs + } + DataType::Boolean => { + let lhs = lhs.as_any().downcast_ref::().unwrap(); + let rhs = rhs.as_any().downcast_ref::().unwrap(); + lhs == rhs + } + DataType::UInt8 => { + dyn_eq!(u8, lhs, rhs) + } + DataType::UInt16 => { + dyn_eq!(u16, lhs, rhs) + } + DataType::UInt32 => { + dyn_eq!(u32, lhs, rhs) + } + DataType::UInt64 => { + dyn_eq!(u64, lhs, rhs) + } + DataType::Int8 => { + dyn_eq!(i8, lhs, rhs) + } + DataType::Int16 => { + dyn_eq!(i16, lhs, rhs) + } + DataType::Int32 + | DataType::Date32 + | DataType::Time32(_) + | DataType::Interval(IntervalUnit::YearMonth) => { + dyn_eq!(i32, lhs, rhs) + } + DataType::Int64 + | DataType::Date64 + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Duration(_) => { + dyn_eq!(i64, lhs, rhs) + } + DataType::Decimal(_, _) => { + dyn_eq!(i128, lhs, rhs) + } + DataType::Interval(IntervalUnit::DayTime) => { + dyn_eq!(days_ms, lhs, rhs) + } + DataType::Float16 => unreachable!(), + DataType::Float32 => { + dyn_eq!(f32, lhs, rhs) + } + DataType::Float64 => { + dyn_eq!(f64, lhs, rhs) + } + DataType::Utf8 => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeUtf8 => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::Binary => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeBinary => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::List(_) => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeList(_) => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + _ => unimplemented!(), + } +} diff --git a/src/scalar/list.rs b/src/scalar/list.rs new file mode 100644 index 00000000000..24c81764256 --- /dev/null +++ b/src/scalar/list.rs @@ -0,0 +1,88 @@ +use std::any::Any; +use std::sync::Arc; + +use crate::{ + array::*, + buffer::Buffer, + datatypes::{DataType, Field}, +}; + +use super::Scalar; + +/// The scalar equivalent of [`ListArray`]. Like [`ListArray`], this struct holds a dynamically-typed +/// [`Array`]. The only difference is that this has only one element. +#[derive(Debug, Clone)] +pub struct ListScalar { + values: Arc, + is_valid: bool, + phantom: std::marker::PhantomData, + data_type: DataType, +} + +impl PartialEq for ListScalar { + fn eq(&self, other: &Self) -> bool { + (self.data_type == other.data_type) + && (self.is_valid == other.is_valid) + && (self.is_valid && (self.values.as_ref() == other.values.as_ref())) + } +} + +pub enum ListScalarNew { + Array(Arc), + DataType(DataType), +} + +impl ListScalar { + #[inline] + pub fn new(v: ListScalarNew) -> Self { + let (data_type, values, is_valid) = match v { + ListScalarNew::Array(a) => (a.data_type().clone(), a, true), + ListScalarNew::DataType(d) => (d.clone(), new_empty_array(d).into(), false), + }; + let field = Field::new("item", data_type, true); + let data_type = if O::is_large() { + DataType::LargeList(Box::new(field)) + } else { + DataType::List(Box::new(field)) + }; + Self { + values, + is_valid, + phantom: std::marker::PhantomData, + data_type, + } + } +} + +impl Scalar for ListScalar { + fn as_any(&self) -> &dyn Any { + self + } + + fn is_valid(&self) -> bool { + self.is_valid + } + + fn data_type(&self) -> &DataType { + &self.data_type + } + + fn to_boxed_array(&self, length: usize) -> Box { + if self.is_valid { + let offsets = (0..=length).map(|i| O::from_usize(i + self.values.len()).unwrap()); + let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; + let values = std::iter::repeat(self.values.as_ref()) + .take(length) + .collect::>(); + let values = crate::compute::concat::concatenate(&values).unwrap(); + Box::new(ListArray::::from_data( + self.data_type.clone(), + offsets, + values.into(), + None, + )) + } else { + Box::new(ListArray::::new_null(self.data_type.clone(), length)) + } + } +} diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index b91be027d2a..d5f8753b6f4 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -1,6 +1,20 @@ use std::any::Any; -use crate::{array::*, bitmap::Bitmap, buffer::Buffer, datatypes::DataType, types::NativeType}; +use crate::{array::*, datatypes::*, types::days_ms}; + +mod equal; +mod primitive; +pub use primitive::*; +mod utf8; +pub use utf8::*; +mod binary; +pub use binary::*; +mod boolean; +pub use boolean::*; +mod list; +pub use list::*; +mod null; +pub use null::*; pub trait Scalar: std::fmt::Debug { fn as_any(&self) -> &dyn Any; @@ -12,234 +26,102 @@ pub trait Scalar: std::fmt::Debug { fn to_boxed_array(&self, length: usize) -> Box; } -#[derive(Debug, Clone)] -pub struct PrimitiveScalar { - // Not Option because this offers a stabler pointer offset on the struct - value: T, - is_valid: bool, - data_type: DataType, -} - -impl PrimitiveScalar { - #[inline] - pub fn new(data_type: DataType, v: Option) -> Self { - let is_valid = v.is_some(); - Self { - value: v.unwrap_or_default(), - is_valid, - data_type, - } - } - - #[inline] - pub fn value(&self) -> T { - self.value - } -} - -impl Scalar for PrimitiveScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - &self.data_type - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let values = Buffer::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); - Box::new(PrimitiveArray::from_data( - self.data_type.clone(), - values, - None, - )) +macro_rules! dyn_new { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(PrimitiveArray::::new_null( - self.data_type.clone(), - length, - )) - } - } + None + }; + Box::new(PrimitiveScalar::new(array.data_type().clone(), value)) + }}; } -#[derive(Debug, Clone)] -pub struct BooleanScalar { - value: bool, - is_valid: bool, -} - -impl BooleanScalar { - #[inline] - pub fn new(v: Option) -> Self { - let is_valid = v.is_some(); - Self { - value: v.unwrap_or_default(), - is_valid, - } - } - - #[inline] - pub fn value(&self) -> bool { - self.value - } -} - -impl Scalar for BooleanScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - &DataType::Boolean - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let values = Bitmap::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); - Box::new(BooleanArray::from_data(values, None)) +macro_rules! dyn_new_utf8 { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array.as_any().downcast_ref::>().unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(BooleanArray::new_null(length)) - } - } + None + }; + Box::new(Utf8Scalar::<$type>::new(value)) + }}; } -#[derive(Debug, Clone)] -pub struct Utf8Scalar { - value: Buffer, - is_valid: bool, - phantom: std::marker::PhantomData, -} - -impl Utf8Scalar { - #[inline] - pub fn new(v: Option<&str>) -> Self { - let is_valid = v.is_some(); - O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); - let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); - Self { - value, - is_valid, - phantom: std::marker::PhantomData, - } - } - - #[inline] - pub fn value(&self) -> &str { - unsafe { std::str::from_utf8_unchecked(self.value.as_slice()) } - } -} - -impl Scalar for Utf8Scalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - if O::is_large() { - &DataType::LargeUtf8 - } else { - &DataType::Utf8 - } - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` - let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); - let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; - let values = std::iter::repeat(self.value.as_slice()) - .take(length) - .flatten() - .copied() - .collect(); - Box::new(Utf8Array::::from_data(offsets, values, None)) +macro_rules! dyn_new_binary { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(Utf8Array::::new_null(length)) - } - } -} - -#[derive(Debug, Clone)] -pub struct BinaryScalar { - value: Buffer, - is_valid: bool, - phantom: std::marker::PhantomData, + None + }; + Box::new(BinaryScalar::<$type>::new(value)) + }}; } -impl BinaryScalar { - #[inline] - pub fn new(v: Option<&str>) -> Self { - let is_valid = v.is_some(); - O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); - let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); - Self { - value, - is_valid, - phantom: std::marker::PhantomData, - } - } - - #[inline] - pub fn value(&self) -> &[u8] { - self.value.as_slice() - } +macro_rules! dyn_new_list { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array.as_any().downcast_ref::>().unwrap(); + let value = if array.is_valid($index) { + ListScalarNew::Array(array.value($index).into()) + } else { + ListScalarNew::DataType(array.data_type().clone()) + }; + Box::new(ListScalar::<$type>::new(value)) + }}; } -impl Scalar for BinaryScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - if O::is_large() { - &DataType::LargeBinary - } else { - &DataType::Binary +/// creates a new [`Scalar`] from an [`Array`]. +pub fn new_scalar(array: &dyn Array, index: usize) -> Box { + use DataType::*; + match array.data_type() { + Null => Box::new(NullScalar::new()), + Boolean => { + let array = array.as_any().downcast_ref::().unwrap(); + let value = if array.is_valid(index) { + Some(array.value(index)) + } else { + None + }; + Box::new(BooleanScalar::new(value)) } - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` - let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); - let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; - let values = std::iter::repeat(self.value.as_slice()) - .take(length) - .flatten() - .copied() - .collect(); - Box::new(BinaryArray::::from_data(offsets, values, None)) - } else { - Box::new(BinaryArray::::new_null(length)) + Int8 => dyn_new!(array, index, i8), + Int16 => dyn_new!(array, index, i16), + Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => { + dyn_new!(array, index, i32) } + Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => dyn_new!(array, index, i64), + Interval(IntervalUnit::DayTime) => dyn_new!(array, index, days_ms), + UInt8 => dyn_new!(array, index, u8), + UInt16 => dyn_new!(array, index, u16), + UInt32 => dyn_new!(array, index, u32), + UInt64 => dyn_new!(array, index, u64), + Decimal(_, _) => dyn_new!(array, index, i128), + Float16 => unreachable!(), + Float32 => dyn_new!(array, index, f32), + Float64 => dyn_new!(array, index, f64), + Utf8 => dyn_new_utf8!(array, index, i32), + LargeUtf8 => dyn_new_utf8!(array, index, i64), + Binary => dyn_new_binary!(array, index, i32), + LargeBinary => dyn_new_binary!(array, index, i64), + List(_) => dyn_new_list!(array, index, i32), + LargeList(_) => dyn_new_list!(array, index, i64), + /* + FixedSizeBinary(_) => {} + FixedSizeList(_, _) => {} + Struct(_) => {} + Union(_) => {} + Dictionary(_, _) => {} + */ + _ => todo!(), } } diff --git a/src/scalar/null.rs b/src/scalar/null.rs new file mode 100644 index 00000000000..0eae937504b --- /dev/null +++ b/src/scalar/null.rs @@ -0,0 +1,41 @@ +use crate::{array::*, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct NullScalar {} + +impl NullScalar { + #[inline] + pub fn new() -> Self { + Self {} + } +} + +impl Default for NullScalar { + fn default() -> Self { + Self::new() + } +} + +impl Scalar for NullScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + false + } + + #[inline] + fn data_type(&self) -> &DataType { + &DataType::Null + } + + #[inline] + fn to_boxed_array(&self, length: usize) -> Box { + Box::new(NullArray::from_data(length)) + } +} diff --git a/src/scalar/primitive.rs b/src/scalar/primitive.rs new file mode 100644 index 00000000000..58038040d58 --- /dev/null +++ b/src/scalar/primitive.rs @@ -0,0 +1,86 @@ +use crate::{ + array::*, + buffer::Buffer, + datatypes::DataType, + types::{NativeType, NaturalDataType}, +}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct PrimitiveScalar { + // Not Option because this offers a stabler pointer offset on the struct + value: T, + is_valid: bool, + data_type: DataType, +} + +impl PrimitiveScalar { + #[inline] + pub fn new(data_type: DataType, v: Option) -> Self { + let is_valid = v.is_some(); + Self { + value: v.unwrap_or_default(), + is_valid, + data_type, + } + } + + #[inline] + pub fn value(&self) -> T { + self.value + } + + /// Returns a new `PrimitiveScalar` with the same value but different [`DataType`] + /// # Panic + /// This function panics if the `data_type` is not valid for self's physical type `T`. + pub fn to(self, data_type: DataType) -> Self { + let v = if self.is_valid { + Some(self.value) + } else { + None + }; + Self::new(data_type, v) + } +} + +impl From> for PrimitiveScalar { + #[inline] + fn from(v: Option) -> Self { + Self::new(T::DATA_TYPE, v) + } +} + +impl Scalar for PrimitiveScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + &self.data_type + } + + #[inline] + fn to_boxed_array(&self, length: usize) -> Box { + if self.is_valid { + let values = Buffer::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); + Box::new(PrimitiveArray::from_data( + self.data_type.clone(), + values, + None, + )) + } else { + Box::new(PrimitiveArray::::new_null( + self.data_type.clone(), + length, + )) + } + } +} diff --git a/src/scalar/utf8.rs b/src/scalar/utf8.rs new file mode 100644 index 00000000000..f20587fba87 --- /dev/null +++ b/src/scalar/utf8.rs @@ -0,0 +1,66 @@ +use crate::{array::*, buffer::Buffer, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct Utf8Scalar { + value: Buffer, + is_valid: bool, + phantom: std::marker::PhantomData, +} + +impl Utf8Scalar { + #[inline] + pub fn new(v: Option<&str>) -> Self { + let is_valid = v.is_some(); + O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); + let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); + Self { + value, + is_valid, + phantom: std::marker::PhantomData, + } + } + + #[inline] + pub fn value(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(self.value.as_slice()) } + } +} + +impl Scalar for Utf8Scalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + if O::is_large() { + &DataType::LargeUtf8 + } else { + &DataType::Utf8 + } + } + + fn to_boxed_array(&self, length: usize) -> Box { + if self.is_valid { + let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` + let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); + let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; + let values = std::iter::repeat(self.value.as_slice()) + .take(length) + .flatten() + .copied() + .collect(); + Box::new(Utf8Array::::from_data(offsets, values, None)) + } else { + Box::new(Utf8Array::::new_null(length)) + } + } +}