Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Implement generic over the function applied to the utf8 array
Browse files Browse the repository at this point in the history
Signed-off-by: Xuanwo <[email protected]>
  • Loading branch information
Xuanwo committed Dec 6, 2021
1 parent f6347b5 commit fa9693b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
14 changes: 5 additions & 9 deletions src/compute/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,26 @@

//! Defines kernel to extract a lower case of a \[Large\]StringArray
use super::utils::utf8_apply;
use crate::array::*;
use crate::{
datatypes::DataType,
error::{ArrowError, Result},
};

fn utf8_lower<O: Offset>(array: &Utf8Array<O>) -> Utf8Array<O> {
let iter = array.values_iter().map(str::to_lowercase);

let new = Utf8Array::<O>::from_trusted_len_values_iter(iter);
new.with_validity(array.validity().cloned())
}

/// Returns a new `Array` where each of each of the elements is lower-cased.
/// this function errors when the passed array is not a \[Large\]String array.
pub fn lower(array: &dyn Array) -> Result<Box<dyn Array>> {
match array.data_type() {
DataType::LargeUtf8 => Ok(Box::new(utf8_lower(
DataType::LargeUtf8 => Ok(Box::new(utf8_apply(
str::to_lowercase,
array
.as_any()
.downcast_ref::<Utf8Array<i64>>()
.expect("A large string is expected"),
))),
DataType::Utf8 => Ok(Box::new(utf8_lower(
DataType::Utf8 => Ok(Box::new(utf8_apply(
str::to_lowercase,
array
.as_any()
.downcast_ref::<Utf8Array<i32>>()
Expand Down
8 changes: 8 additions & 0 deletions src/compute/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ pub fn unary_utf8_boolean<O: Offset, F: Fn(&str) -> bool>(
BooleanArray::from_data(DataType::Boolean, values, validity)
}

/// utf8_apply will apply `Fn(&str) -> String` to every value in Utf8Array.
pub fn utf8_apply<O: Offset, F: Fn(&str) -> String>(f: F, array: &Utf8Array<O>) -> Utf8Array<O> {
let iter = array.values_iter().map(f);

let new = Utf8Array::<O>::from_trusted_len_values_iter(iter);
new.with_validity(array.validity().cloned())
}

// Errors iff the two arrays have a different length.
#[inline]
pub fn check_same_len(lhs: &dyn Array, rhs: &dyn Array) -> Result<()> {
Expand Down

0 comments on commit fa9693b

Please sign in to comment.