From f510012eb375093c24d596e09f299b81972ff0c4 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Tue, 14 Feb 2023 12:50:33 +0100 Subject: [PATCH 1/2] feat: add cast for FixedSizeBinary to (Large)Binary --- src/array/fixed_size_binary/mod.rs | 6 +++++ src/compute/cast/binary_to.rs | 43 +++++++++++++++++++++++++++++- src/compute/cast/mod.rs | 17 +++++++++++- tests/it/compute/cast.rs | 32 ++++++++++++++++++++++ 4 files changed, 96 insertions(+), 2 deletions(-) diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 5419f30fc54..97ee4303c6e 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -87,6 +87,12 @@ impl FixedSizeBinaryArray { // must use impl FixedSizeBinaryArray { + /// Retrieve the size of the binary elements + /// in this ['FixedSizeBinaryArray'] + pub fn fixed_size(&self) -> usize { + self.size + } + /// Slices this [`FixedSizeBinaryArray`]. /// # Implementation /// This operation is `O(1)`. diff --git a/src/compute/cast/binary_to.rs b/src/compute/cast/binary_to.rs index 98cf4105b4b..37f4f8649aa 100644 --- a/src/compute/cast/binary_to.rs +++ b/src/compute/cast/binary_to.rs @@ -1,5 +1,5 @@ use crate::error::Result; -use crate::offset::Offset; +use crate::offset::{Offset, Offsets}; use crate::{array::*, datatypes::DataType, types::NativeType}; use super::CastOptions; @@ -118,3 +118,44 @@ pub(super) fn binary_to_dictionary_dyn( let values = from.as_any().downcast_ref().unwrap(); binary_to_dictionary::(values).map(|x| Box::new(x) as Box) } + +fn fixed_size_to_offsets(values_len: usize, fixed_size: usize) -> Offsets { + let offsets = (0..(values_len + 1)) + .step_by(fixed_size) + .map(|v| O::from_usize(v).unwrap()) + .collect(); + // Safety + // * every element is `>= 0` + // * element at position `i` is >= than element at position `i-1`. + unsafe { Offsets::new_unchecked(offsets) } +} + +/// Conversion of large-binary +pub fn fixed_size_binary_to_large_binary( + from: &FixedSizeBinaryArray, + to_data_type: DataType, +) -> BinaryArray { + let values = from.values().clone(); + let offsets = fixed_size_to_offsets(values.len(), from.fixed_size()); + BinaryArray::::new( + to_data_type, + offsets.into(), + values, + from.validity().cloned(), + ) +} + +/// Conversion of binary +pub fn fixed_size_binary_to_binary( + from: &FixedSizeBinaryArray, + to_data_type: DataType, +) -> BinaryArray { + let values = from.values().clone(); + let offsets = fixed_size_to_offsets(values.len(), from.fixed_size()); + BinaryArray::::new( + to_data_type, + offsets.into(), + values, + from.validity().cloned(), + ) +} diff --git a/src/compute/cast/mod.rs b/src/compute/cast/mod.rs index 24f5689f38f..681602196e3 100644 --- a/src/compute/cast/mod.rs +++ b/src/compute/cast/mod.rs @@ -132,7 +132,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { is_numeric(to_type) || matches!(to_type, LargeBinary | Utf8 | LargeUtf8) } (LargeBinary, to_type) => is_numeric(to_type) || matches!(to_type, Binary | LargeUtf8), - + (FixedSizeBinary(_), to_type) => matches!(to_type, Binary | LargeBinary), (Timestamp(_, _), Utf8) => true, (Timestamp(_, _), LargeUtf8) => true, (_, Utf8) => is_numeric(from_type) || from_type == &Binary, @@ -686,6 +686,21 @@ pub fn cast(array: &dyn Array, to_type: &DataType, options: CastOptions) -> Resu "Casting from {from_type:?} to {to_type:?} not supported", ))), }, + (FixedSizeBinary(_), _) => match to_type { + Binary => Ok(fixed_size_binary_to_binary( + array.as_any().downcast_ref().unwrap(), + to_type.clone(), + ) + .boxed()), + LargeBinary => Ok(fixed_size_binary_to_large_binary( + array.as_any().downcast_ref().unwrap(), + to_type.clone(), + ) + .boxed()), + _ => Err(Error::NotYetImplemented(format!( + "Casting from {from_type:?} to {to_type:?} not supported", + ))), + }, (_, Binary) => match from_type { UInt8 => primitive_to_binary_dyn::(array), diff --git a/tests/it/compute/cast.rs b/tests/it/compute/cast.rs index 0631ba20baf..01cb31d2f24 100644 --- a/tests/it/compute/cast.rs +++ b/tests/it/compute/cast.rs @@ -227,6 +227,38 @@ fn binary_to_i32_partial() { assert_eq!(c, &expected); } +#[test] +fn fixed_size_binary_to_binary() { + let slice = [[0, 1], [2, 3]]; + let array = FixedSizeBinaryArray::from_slice(slice); + + // large-binary + let b = cast( + &array, + &DataType::LargeBinary, + CastOptions { + ..Default::default() + }, + ) + .unwrap(); + let c = b.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from_slice(slice); + assert_eq!(c, &expected); + + // binary + let b = cast( + &array, + &DataType::Binary, + CastOptions { + ..Default::default() + }, + ) + .unwrap(); + let c = b.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from_slice(slice); + assert_eq!(c, &expected); +} + #[test] fn utf8_to_i32() { let array = Utf8Array::::from_slice(["5", "6", "seven", "8", "9.1"]); From 85eb6bea658cd0a9ef538617a1e4430aec8f007f Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 15 Feb 2023 08:26:27 +0100 Subject: [PATCH 2/2] apply suggestions --- src/array/fixed_size_binary/mod.rs | 6 ------ src/compute/cast/binary_to.rs | 25 +++++-------------------- src/compute/cast/mod.rs | 4 ++-- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 97ee4303c6e..5419f30fc54 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -87,12 +87,6 @@ impl FixedSizeBinaryArray { // must use impl FixedSizeBinaryArray { - /// Retrieve the size of the binary elements - /// in this ['FixedSizeBinaryArray'] - pub fn fixed_size(&self) -> usize { - self.size - } - /// Slices this [`FixedSizeBinaryArray`]. /// # Implementation /// This operation is `O(1)`. diff --git a/src/compute/cast/binary_to.rs b/src/compute/cast/binary_to.rs index 37f4f8649aa..d84c7dd1bd4 100644 --- a/src/compute/cast/binary_to.rs +++ b/src/compute/cast/binary_to.rs @@ -130,29 +130,14 @@ fn fixed_size_to_offsets(values_len: usize, fixed_size: usize) -> Off unsafe { Offsets::new_unchecked(offsets) } } -/// Conversion of large-binary -pub fn fixed_size_binary_to_large_binary( +/// Conversion of `FixedSizeBinary` to `Binary`. +pub fn fixed_size_binary_binary( from: &FixedSizeBinaryArray, to_data_type: DataType, -) -> BinaryArray { +) -> BinaryArray { let values = from.values().clone(); - let offsets = fixed_size_to_offsets(values.len(), from.fixed_size()); - BinaryArray::::new( - to_data_type, - offsets.into(), - values, - from.validity().cloned(), - ) -} - -/// Conversion of binary -pub fn fixed_size_binary_to_binary( - from: &FixedSizeBinaryArray, - to_data_type: DataType, -) -> BinaryArray { - let values = from.values().clone(); - let offsets = fixed_size_to_offsets(values.len(), from.fixed_size()); - BinaryArray::::new( + let offsets = fixed_size_to_offsets(values.len(), from.size()); + BinaryArray::::new( to_data_type, offsets.into(), values, diff --git a/src/compute/cast/mod.rs b/src/compute/cast/mod.rs index 681602196e3..e42f769e7e5 100644 --- a/src/compute/cast/mod.rs +++ b/src/compute/cast/mod.rs @@ -687,12 +687,12 @@ pub fn cast(array: &dyn Array, to_type: &DataType, options: CastOptions) -> Resu ))), }, (FixedSizeBinary(_), _) => match to_type { - Binary => Ok(fixed_size_binary_to_binary( + Binary => Ok(fixed_size_binary_binary::( array.as_any().downcast_ref().unwrap(), to_type.clone(), ) .boxed()), - LargeBinary => Ok(fixed_size_binary_to_large_binary( + LargeBinary => Ok(fixed_size_binary_binary::( array.as_any().downcast_ref().unwrap(), to_type.clone(), )