diff --git a/src/compute/lower.rs b/src/compute/lower.rs index cd903bba992..5a9978179b4 100644 --- a/src/compute/lower.rs +++ b/src/compute/lower.rs @@ -17,30 +17,26 @@ //! Defines kernel to extract a lower case of a \[Large\]StringArray +use super::utils::utf8_apply; use crate::array::*; use crate::{ datatypes::DataType, error::{ArrowError, Result}, }; -fn utf8_lower(array: &Utf8Array) -> Utf8Array { - let iter = array.values_iter().map(str::to_lowercase); - - let new = Utf8Array::::from_trusted_len_values_iter(iter); - new.with_validity(array.validity().cloned()) -} - /// Returns a new `Array` where each of each of the elements is lower-cased. /// this function errors when the passed array is not a \[Large\]String array. pub fn lower(array: &dyn Array) -> Result> { match array.data_type() { - DataType::LargeUtf8 => Ok(Box::new(utf8_lower( + DataType::LargeUtf8 => Ok(Box::new(utf8_apply( + str::to_lowercase, array .as_any() .downcast_ref::>() .expect("A large string is expected"), ))), - DataType::Utf8 => Ok(Box::new(utf8_lower( + DataType::Utf8 => Ok(Box::new(utf8_apply( + str::to_lowercase, array .as_any() .downcast_ref::>() diff --git a/src/compute/utils.rs b/src/compute/utils.rs index 864eb27d40d..69ed5b7a5a8 100644 --- a/src/compute/utils.rs +++ b/src/compute/utils.rs @@ -30,6 +30,14 @@ pub fn unary_utf8_boolean bool>( BooleanArray::from_data(DataType::Boolean, values, validity) } +/// utf8_apply will apply `Fn(&str) -> String` to every value in Utf8Array. +pub fn utf8_apply String>(f: F, array: &Utf8Array) -> Utf8Array { + let iter = array.values_iter().map(f); + + let new = Utf8Array::::from_trusted_len_values_iter(iter); + new.with_validity(array.validity().cloned()) +} + // Errors iff the two arrays have a different length. #[inline] pub fn check_same_len(lhs: &dyn Array, rhs: &dyn Array) -> Result<()> {