From 5cc49c82a4a7eff19c3734a3ba3e2aaed83e3bc2 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 15 Feb 2023 21:13:56 +0100 Subject: [PATCH] Added cast for FixedSizeBinary to (Large)Binary (#1403) --- src/compute/cast/binary_to.rs | 28 +++++++++++++++++++++++++++- src/compute/cast/mod.rs | 17 ++++++++++++++++- tests/it/compute/cast.rs | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/compute/cast/binary_to.rs b/src/compute/cast/binary_to.rs index 98cf4105b4b..d84c7dd1bd4 100644 --- a/src/compute/cast/binary_to.rs +++ b/src/compute/cast/binary_to.rs @@ -1,5 +1,5 @@ use crate::error::Result; -use crate::offset::Offset; +use crate::offset::{Offset, Offsets}; use crate::{array::*, datatypes::DataType, types::NativeType}; use super::CastOptions; @@ -118,3 +118,29 @@ pub(super) fn binary_to_dictionary_dyn( let values = from.as_any().downcast_ref().unwrap(); binary_to_dictionary::(values).map(|x| Box::new(x) as Box) } + +fn fixed_size_to_offsets(values_len: usize, fixed_size: usize) -> Offsets { + let offsets = (0..(values_len + 1)) + .step_by(fixed_size) + .map(|v| O::from_usize(v).unwrap()) + .collect(); + // Safety + // * every element is `>= 0` + // * element at position `i` is >= than element at position `i-1`. + unsafe { Offsets::new_unchecked(offsets) } +} + +/// Conversion of `FixedSizeBinary` to `Binary`. +pub fn fixed_size_binary_binary( + from: &FixedSizeBinaryArray, + to_data_type: DataType, +) -> BinaryArray { + let values = from.values().clone(); + let offsets = fixed_size_to_offsets(values.len(), from.size()); + BinaryArray::::new( + to_data_type, + offsets.into(), + values, + from.validity().cloned(), + ) +} diff --git a/src/compute/cast/mod.rs b/src/compute/cast/mod.rs index 24f5689f38f..e42f769e7e5 100644 --- a/src/compute/cast/mod.rs +++ b/src/compute/cast/mod.rs @@ -132,7 +132,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { is_numeric(to_type) || matches!(to_type, LargeBinary | Utf8 | LargeUtf8) } (LargeBinary, to_type) => is_numeric(to_type) || matches!(to_type, Binary | LargeUtf8), - + (FixedSizeBinary(_), to_type) => matches!(to_type, Binary | LargeBinary), (Timestamp(_, _), Utf8) => true, (Timestamp(_, _), LargeUtf8) => true, (_, Utf8) => is_numeric(from_type) || from_type == &Binary, @@ -686,6 +686,21 @@ pub fn cast(array: &dyn Array, to_type: &DataType, options: CastOptions) -> Resu "Casting from {from_type:?} to {to_type:?} not supported", ))), }, + (FixedSizeBinary(_), _) => match to_type { + Binary => Ok(fixed_size_binary_binary::( + array.as_any().downcast_ref().unwrap(), + to_type.clone(), + ) + .boxed()), + LargeBinary => Ok(fixed_size_binary_binary::( + array.as_any().downcast_ref().unwrap(), + to_type.clone(), + ) + .boxed()), + _ => Err(Error::NotYetImplemented(format!( + "Casting from {from_type:?} to {to_type:?} not supported", + ))), + }, (_, Binary) => match from_type { UInt8 => primitive_to_binary_dyn::(array), diff --git a/tests/it/compute/cast.rs b/tests/it/compute/cast.rs index 0631ba20baf..01cb31d2f24 100644 --- a/tests/it/compute/cast.rs +++ b/tests/it/compute/cast.rs @@ -227,6 +227,38 @@ fn binary_to_i32_partial() { assert_eq!(c, &expected); } +#[test] +fn fixed_size_binary_to_binary() { + let slice = [[0, 1], [2, 3]]; + let array = FixedSizeBinaryArray::from_slice(slice); + + // large-binary + let b = cast( + &array, + &DataType::LargeBinary, + CastOptions { + ..Default::default() + }, + ) + .unwrap(); + let c = b.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from_slice(slice); + assert_eq!(c, &expected); + + // binary + let b = cast( + &array, + &DataType::Binary, + CastOptions { + ..Default::default() + }, + ) + .unwrap(); + let c = b.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from_slice(slice); + assert_eq!(c, &expected); +} + #[test] fn utf8_to_i32() { let array = Utf8Array::::from_slice(["5", "6", "seven", "8", "9.1"]);