Skip to content

Commit

Permalink
Replace the fn get_data_type by const DATA_TYPE in BinaryArray an…
Browse files Browse the repository at this point in the history
…d StringArray (apache#2289)

* const fn get_data_type for binary and string

Signed-off-by: remzi <[email protected]>

* const value instead of const fn

Signed-off-by: remzi <[email protected]>

* deprecate

Signed-off-by: remzi <[email protected]>
  • Loading branch information
HaoYang670 authored Aug 3, 2022
1 parent 299908e commit 1cc8563
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 54 deletions.
28 changes: 15 additions & 13 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ pub struct GenericBinaryArray<OffsetSize: OffsetSizeTrait> {
}

impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
/// Data type of the array.
pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE {
DataType::LargeBinary
} else {
DataType::Binary
};

/// Get the data type of the array.
// Declare this function as `pub const fn` after
// https://github.com/rust-lang/rust/issues/93706 is merged.
pub fn get_data_type() -> DataType {
if OffsetSize::IS_LARGE {
DataType::LargeBinary
} else {
DataType::Binary
}
#[deprecated(note = "please use `Self::DATA_TYPE` instead")]
pub const fn get_data_type() -> DataType {
Self::DATA_TYPE
}

/// Returns the length for value at index `i`.
Expand Down Expand Up @@ -158,7 +160,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
"The child array cannot contain null values."
);

let builder = ArrayData::builder(Self::get_data_type())
let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data_ref().buffers()[0].clone())
Expand Down Expand Up @@ -197,7 +199,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(Self::get_data_type())
let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
Expand Down Expand Up @@ -276,7 +278,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericBinaryArray<OffsetS
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&Self::get_data_type(),
&Self::DATA_TYPE,
"[Large]BinaryArray expects Datatype::[Large]Binary"
);
assert_eq!(
Expand Down Expand Up @@ -353,7 +355,7 @@ where

// calculate actual data_len, which may be different from the iterator's upper bound
let data_len = offsets.len() - 1;
let array_data = ArrayData::builder(Self::get_data_type())
let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(data_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -598,7 +600,7 @@ mod tests {
let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap());

// Array data: ["hello", "", "parquet"]
let array_data1 = ArrayData::builder(GenericBinaryArray::<O>::get_data_type())
let array_data1 = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down
31 changes: 15 additions & 16 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@ pub struct GenericStringArray<OffsetSize: OffsetSizeTrait> {
}

impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
/// Data type of the array.
pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE {
DataType::LargeUtf8
} else {
DataType::Utf8
};

/// Get the data type of the array.
// Declare this function as `pub const fn` after
// https://github.com/rust-lang/rust/issues/93706 is merged.
pub fn get_data_type() -> DataType {
if OffsetSize::IS_LARGE {
DataType::LargeUtf8
} else {
DataType::Utf8
}
#[deprecated(note = "please use `Self::DATA_TYPE` instead")]
pub const fn get_data_type() -> DataType {
Self::DATA_TYPE
}

/// Returns the length for the element at index `i`.
Expand Down Expand Up @@ -148,7 +150,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
"The child array cannot contain null values."
);

let builder = ArrayData::builder(Self::get_data_type())
let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data().buffers()[0].clone())
Expand Down Expand Up @@ -187,7 +189,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(Self::get_data_type())
let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
Expand Down Expand Up @@ -264,7 +266,7 @@ where

// calculate actual data_len, which may be different from the iterator's upper bound
let data_len = (offsets.len() / offset_size) - 1;
let array_data = ArrayData::builder(Self::get_data_type())
let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(data_len)
.add_buffer(offsets.into())
.add_buffer(values.into())
Expand Down Expand Up @@ -342,10 +344,7 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
for GenericStringArray<OffsetSize>
{
fn from(v: GenericBinaryArray<OffsetSize>) -> Self {
let builder = v
.into_data()
.into_builder()
.data_type(Self::get_data_type());
let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE);
let data = unsafe { builder.build_unchecked() };
Self::from(data)
}
Expand All @@ -355,7 +354,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericStringArray<OffsetS
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&Self::get_data_type(),
&Self::DATA_TYPE,
"[Large]StringArray expects Datatype::[Large]Utf8"
);
assert_eq!(
Expand Down
15 changes: 4 additions & 11 deletions arrow/src/array/builder/generic_binary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use crate::{
array::{
ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait,
UInt8BufferBuilder,
},
datatypes::DataType,
use crate::array::{
ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait,
UInt8BufferBuilder,
};
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -80,11 +77,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {

/// Builds the [`GenericBinaryArray`] and reset this builder.
pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
let array_type = if OffsetSize::IS_LARGE {
DataType::LargeBinary
} else {
DataType::Binary
};
let array_type = GenericBinaryArray::<OffsetSize>::DATA_TYPE;
let array_builder = ArrayDataBuilder::new(array_type)
.len(self.len())
.add_buffer(self.offsets_builder.finish())
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/compute/kernels/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
}

let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
.len(left.len())
.add_buffer(output_offsets.finish())
.add_buffer(output_values.finish())
Expand Down Expand Up @@ -155,7 +155,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
}

let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
.len(size)
.add_buffer(output_offsets.finish())
.add_buffer(output_values.finish())
Expand Down
12 changes: 6 additions & 6 deletions arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ pub fn substring_by_char<OffsetSize: OffsetSizeTrait>(
});
let data = unsafe {
ArrayData::new_unchecked(
GenericStringArray::<OffsetSize>::get_data_type(),
GenericStringArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
Expand Down Expand Up @@ -294,7 +294,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(

let data = unsafe {
ArrayData::new_unchecked(
GenericBinaryArray::<OffsetSize>::get_data_type(),
GenericBinaryArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
Expand Down Expand Up @@ -425,7 +425,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(

let data = unsafe {
ArrayData::new_unchecked(
GenericStringArray::<OffsetSize>::get_data_type(),
GenericStringArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
Expand Down Expand Up @@ -587,7 +587,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];

let data = ArrayData::builder(GenericBinaryArray::<O>::get_data_type())
let data = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from_iter(values))
Expand Down Expand Up @@ -814,7 +814,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];

let data = ArrayData::builder(GenericStringArray::<O>::get_data_type())
let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
Expand Down Expand Up @@ -939,7 +939,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];

let data = ArrayData::builder(GenericStringArray::<O>::get_data_type())
let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
Expand Down
11 changes: 5 additions & 6 deletions arrow/src/compute/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -771,12 +771,11 @@ where
};
}

let array_data =
ArrayData::builder(GenericStringArray::<OffsetSize>::get_data_type())
.len(data_len)
.add_buffer(offsets_buffer.into())
.add_buffer(values.into())
.null_bit_buffer(nulls);
let array_data = ArrayData::builder(GenericStringArray::<OffsetSize>::DATA_TYPE)
.len(data_len)
.add_buffer(offsets_buffer.into())
.add_buffer(values.into())
.null_bit_buffer(nulls);

let array_data = unsafe { array_data.build_unchecked() };

Expand Down

0 comments on commit 1cc8563

Please sign in to comment.