From 1cc85635bd921ccd1e84b04ab0b8a00e100c26b3 Mon Sep 17 00:00:00 2001 From: Remzi Yang <59198230+HaoYang670@users.noreply.github.com> Date: Wed, 3 Aug 2022 18:28:30 +0800 Subject: [PATCH] Replace the `fn get_data_type` by `const DATA_TYPE` in BinaryArray and StringArray (#2289) * const fn get_data_type for binary and string Signed-off-by: remzi <13716567376yh@gmail.com> * const value instead of const fn Signed-off-by: remzi <13716567376yh@gmail.com> * deprecate Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/array_binary.rs | 28 +++++++++-------- arrow/src/array/array_string.rs | 31 +++++++++---------- .../array/builder/generic_binary_builder.rs | 15 +++------ arrow/src/compute/kernels/concat_elements.rs | 4 +-- arrow/src/compute/kernels/substring.rs | 12 +++---- arrow/src/compute/kernels/take.rs | 11 +++---- 6 files changed, 47 insertions(+), 54 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 3e49f60318ce..dd21e0d51763 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -37,15 +37,17 @@ pub struct GenericBinaryArray { } impl GenericBinaryArray { + /// Data type of the array. + pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE { + DataType::LargeBinary + } else { + DataType::Binary + }; + /// Get the data type of the array. - // Declare this function as `pub const fn` after - // https://github.com/rust-lang/rust/issues/93706 is merged. - pub fn get_data_type() -> DataType { - if OffsetSize::IS_LARGE { - DataType::LargeBinary - } else { - DataType::Binary - } + #[deprecated(note = "please use `Self::DATA_TYPE` instead")] + pub const fn get_data_type() -> DataType { + Self::DATA_TYPE } /// Returns the length for value at index `i`. @@ -158,7 +160,7 @@ impl GenericBinaryArray { "The child array cannot contain null values." ); - let builder = ArrayData::builder(Self::get_data_type()) + let builder = ArrayData::builder(Self::DATA_TYPE) .len(v.len()) .offset(v.offset()) .add_buffer(v.data_ref().buffers()[0].clone()) @@ -197,7 +199,7 @@ impl GenericBinaryArray { assert!(!offsets.is_empty()); // wrote at least one let actual_len = (offsets.len() / std::mem::size_of::()) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(actual_len) .add_buffer(offsets.into()) .add_buffer(values.into()); @@ -276,7 +278,7 @@ impl From for GenericBinaryArray Self { assert_eq!( data.data_type(), - &Self::get_data_type(), + &Self::DATA_TYPE, "[Large]BinaryArray expects Datatype::[Large]Binary" ); assert_eq!( @@ -353,7 +355,7 @@ where // calculate actual data_len, which may be different from the iterator's upper bound let data_len = offsets.len() - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(data_len) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_buffer(Buffer::from_slice_ref(&values)) @@ -598,7 +600,7 @@ mod tests { let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap()); // Array data: ["hello", "", "parquet"] - let array_data1 = ArrayData::builder(GenericBinaryArray::::get_data_type()) + let array_data1 = ArrayData::builder(GenericBinaryArray::::DATA_TYPE) .len(3) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_buffer(Buffer::from_slice_ref(&values)) diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index c332aa197688..1bb99fce7eda 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -39,15 +39,17 @@ pub struct GenericStringArray { } impl GenericStringArray { + /// Data type of the array. + pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE { + DataType::LargeUtf8 + } else { + DataType::Utf8 + }; + /// Get the data type of the array. - // Declare this function as `pub const fn` after - // https://github.com/rust-lang/rust/issues/93706 is merged. - pub fn get_data_type() -> DataType { - if OffsetSize::IS_LARGE { - DataType::LargeUtf8 - } else { - DataType::Utf8 - } + #[deprecated(note = "please use `Self::DATA_TYPE` instead")] + pub const fn get_data_type() -> DataType { + Self::DATA_TYPE } /// Returns the length for the element at index `i`. @@ -148,7 +150,7 @@ impl GenericStringArray { "The child array cannot contain null values." ); - let builder = ArrayData::builder(Self::get_data_type()) + let builder = ArrayData::builder(Self::DATA_TYPE) .len(v.len()) .offset(v.offset()) .add_buffer(v.data().buffers()[0].clone()) @@ -187,7 +189,7 @@ impl GenericStringArray { assert!(!offsets.is_empty()); // wrote at least one let actual_len = (offsets.len() / std::mem::size_of::()) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(actual_len) .add_buffer(offsets.into()) .add_buffer(values.into()); @@ -264,7 +266,7 @@ where // calculate actual data_len, which may be different from the iterator's upper bound let data_len = (offsets.len() / offset_size) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(data_len) .add_buffer(offsets.into()) .add_buffer(values.into()) @@ -342,10 +344,7 @@ impl From> for GenericStringArray { fn from(v: GenericBinaryArray) -> Self { - let builder = v - .into_data() - .into_builder() - .data_type(Self::get_data_type()); + let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE); let data = unsafe { builder.build_unchecked() }; Self::from(data) } @@ -355,7 +354,7 @@ impl From for GenericStringArray Self { assert_eq!( data.data_type(), - &Self::get_data_type(), + &Self::DATA_TYPE, "[Large]StringArray expects Datatype::[Large]Utf8" ); assert_eq!( diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs index 8f242243cd7c..aca2e1d9694e 100644 --- a/arrow/src/array/builder/generic_binary_builder.rs +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -15,12 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::{ - array::{ - ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait, - UInt8BufferBuilder, - }, - datatypes::DataType, +use crate::array::{ + ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait, + UInt8BufferBuilder, }; use std::any::Any; use std::sync::Arc; @@ -80,11 +77,7 @@ impl GenericBinaryBuilder { /// Builds the [`GenericBinaryArray`] and reset this builder. pub fn finish(&mut self) -> GenericBinaryArray { - let array_type = if OffsetSize::IS_LARGE { - DataType::LargeBinary - } else { - DataType::Binary - }; + let array_type = GenericBinaryArray::::DATA_TYPE; let array_builder = ArrayDataBuilder::new(array_type) .len(self.len()) .add_buffer(self.offsets_builder.finish()) diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index 7d460b21cb0d..ac365a0968ec 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -75,7 +75,7 @@ pub fn concat_elements_utf8( output_offsets.append(Offset::from_usize(output_values.len()).unwrap()); } - let builder = ArrayDataBuilder::new(GenericStringArray::::get_data_type()) + let builder = ArrayDataBuilder::new(GenericStringArray::::DATA_TYPE) .len(left.len()) .add_buffer(output_offsets.finish()) .add_buffer(output_values.finish()) @@ -155,7 +155,7 @@ pub fn concat_elements_utf8_many( output_offsets.append(Offset::from_usize(output_values.len()).unwrap()); } - let builder = ArrayDataBuilder::new(GenericStringArray::::get_data_type()) + let builder = ArrayDataBuilder::new(GenericStringArray::::DATA_TYPE) .len(size) .add_buffer(output_offsets.finish()) .add_buffer(output_values.finish()) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index 024f5633fef4..5190d0bf0b67 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -205,7 +205,7 @@ pub fn substring_by_char( }); let data = unsafe { ArrayData::new_unchecked( - GenericStringArray::::get_data_type(), + GenericStringArray::::DATA_TYPE, array.len(), None, array @@ -294,7 +294,7 @@ fn binary_substring( let data = unsafe { ArrayData::new_unchecked( - GenericBinaryArray::::get_data_type(), + GenericBinaryArray::::DATA_TYPE, array.len(), None, array @@ -425,7 +425,7 @@ fn utf8_substring( let data = unsafe { ArrayData::new_unchecked( - GenericStringArray::::get_data_type(), + GenericStringArray::::DATA_TYPE, array.len(), None, array @@ -587,7 +587,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericBinaryArray::::get_data_type()) + let data = ArrayData::builder(GenericBinaryArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from_iter(values)) @@ -814,7 +814,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericStringArray::::get_data_type()) + let data = ArrayData::builder(GenericStringArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from(values)) @@ -939,7 +939,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericStringArray::::get_data_type()) + let data = ArrayData::builder(GenericStringArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from(values)) diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 5bfd257fcf46..ab99acd2c04b 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -771,12 +771,11 @@ where }; } - let array_data = - ArrayData::builder(GenericStringArray::::get_data_type()) - .len(data_len) - .add_buffer(offsets_buffer.into()) - .add_buffer(values.into()) - .null_bit_buffer(nulls); + let array_data = ArrayData::builder(GenericStringArray::::DATA_TYPE) + .len(data_len) + .add_buffer(offsets_buffer.into()) + .add_buffer(values.into()) + .null_bit_buffer(nulls); let array_data = unsafe { array_data.build_unchecked() };