Skip to content

Commit

Permalink
feat: cast string to decimal (#13)
Browse files Browse the repository at this point in the history
Can drop this after rebase on commit 2ad0705 "Support casting from String to Decimal (apache#3281)", first released in 30.0.0
  • Loading branch information
gandronchik authored and mcheshkov committed Aug 21, 2024
1 parent b8743b8 commit aec2597
Showing 1 changed file with 69 additions and 0 deletions.
69 changes: 69 additions & 0 deletions arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
Null,
) => true,
(Decimal(_, _), _) => false,
(Utf8 | LargeUtf8, Decimal(_, _)) => true,
(_, Decimal(_, _)) => false,
(Struct(_), _) => false,
(_, Struct(_)) => false,
Expand Down Expand Up @@ -378,6 +379,41 @@ macro_rules! cast_decimal_to_integer {
}};
}

// cast the string array to defined decimal data type array
macro_rules! cast_string_to_decimal {
($ARRAY: expr, $ARRAY_TYPE: ident, $PRECISION : ident, $SCALE : ident) => {{
let mul = 10_f64.powi(*$SCALE as i32);
let string_array = $ARRAY
.as_any()
.downcast_ref::<$ARRAY_TYPE>()
.unwrap()
.iter()
.map(|val| match val {
Some(val) => {
// TODO: f64 is smaller than decimal
let val: lexical_core::Result<f64> =
lexical_core::parse(val.as_bytes());
match val {
Ok(val) => Ok(Some((val * mul) as i128)),
Err(lexical_core::Error::EmptyMantissa(_)) => Ok(None),
_ => Err(ArrowError::CastError(format!(
"Cannot cast from string to decimal"
))),
}
}
None => Ok(None),
})
.collect::<Result<DecimalArray>>();

match string_array {
Ok(string_array) => Ok(Arc::new(
string_array.with_precision_and_scale(*$PRECISION, *$SCALE)?,
)),
Err(err) => Err(err),
}
}};
}

// cast the List array to Utf8 array
macro_rules! cast_list_to_string {
($ARRAY:expr, $SIZE:ident) => {{
Expand Down Expand Up @@ -497,6 +533,12 @@ pub fn cast_with_options(
Float64 => {
cast_floating_point_to_decimal!(array, Float64Array, precision, scale)
}
LargeUtf8 => {
cast_string_to_decimal!(array, LargeStringArray, precision, scale)
}
Utf8 => {
cast_string_to_decimal!(array, StringArray, precision, scale)
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type
Expand Down Expand Up @@ -4982,4 +5024,31 @@ mod tests {
.collect::<Vec<_>>();
assert_eq!(&out, &vec!["[0, 1, 2]", "[3, 4, 5]", "[6, 7]"]);
}

#[test]
fn test_string_to_decimal() {
let array = Arc::new(StringArray::from(vec![
Some("10.1"),
Some("NULL"),
None,
Some("10"),
])) as ArrayRef;
let out = cast(&array, &DataType::Decimal(38, 10)).unwrap();
let out = out.as_any().downcast_ref::<DecimalArray>().unwrap();
assert_eq!(out.len(), 4);
let out = out.into_iter().flatten().collect::<Vec<_>>();
assert_eq!(&out, &vec![101000000000, 100000000000]);

let array = Arc::new(LargeStringArray::from(vec![
Some("10.1"),
Some("NULL"),
None,
Some("10"),
])) as ArrayRef;
let out = cast(&array, &DataType::Decimal(38, 0)).unwrap();
let out = out.as_any().downcast_ref::<DecimalArray>().unwrap();
assert_eq!(out.len(), 4);
let out = out.into_iter().flatten().collect::<Vec<_>>();
assert_eq!(&out, &vec![10, 10]);
}
}

0 comments on commit aec2597

Please sign in to comment.