From 5776c81330c60ce8895357dbf89a9168eb424f8c Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sat, 30 Oct 2021 21:48:40 +0200 Subject: [PATCH] Changed DataType::FixedSize*(i32) to DataType::FixedSize*(usize) (#556) Fixes #525 --- src/array/fixed_size_binary/mod.rs | 6 +++--- src/array/fixed_size_binary/mutable.rs | 8 ++++---- src/array/fixed_size_list/mod.rs | 2 +- src/array/growable/fixed_binary.rs | 2 +- src/datatypes/mod.rs | 4 ++-- src/io/avro/read/schema.rs | 2 +- src/io/ipc/convert.rs | 4 ++-- src/io/ipc/read/array/fixed_size_binary.rs | 3 +-- src/io/json_integration/schema.rs | 4 ++-- src/io/parquet/read/fixed_size_binary.rs | 4 ++-- src/io/parquet/read/mod.rs | 2 +- src/io/parquet/read/schema/convert.rs | 2 +- src/io/parquet/read/statistics/fixlen.rs | 2 +- src/io/parquet/write/mod.rs | 4 ++-- src/io/parquet/write/schema.rs | 2 +- 15 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 3091aae3b62..78db064ced5 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -34,7 +34,7 @@ impl FixedSizeBinaryArray { /// Returns a new [`FixedSizeBinaryArray`]. pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { - let size = *Self::get_size(&data_type) as usize; + let size = Self::get_size(&data_type); assert_eq!(values.len() % size, 0); @@ -135,9 +135,9 @@ impl FixedSizeBinaryArray { } impl FixedSizeBinaryArray { - pub(crate) fn get_size(data_type: &DataType) -> &i32 { + pub(crate) fn get_size(data_type: &DataType) -> usize { match data_type.to_logical_type() { - DataType::FixedSizeBinary(size) => size, + DataType::FixedSizeBinary(size) => *size, _ => panic!("Wrong DataType"), } } diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index 2e0d218015e..46a4bcc143a 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -39,7 +39,7 @@ impl MutableFixedSizeBinaryArray { values: MutableBuffer, validity: Option, ) -> Self { - let size = *FixedSizeBinaryArray::get_size(&data_type) as usize; + let size = FixedSizeBinaryArray::get_size(&data_type); assert_eq!( values.len() % size, 0, @@ -68,7 +68,7 @@ impl MutableFixedSizeBinaryArray { /// Creates a new [`MutableFixedSizeBinaryArray`] with capacity for `capacity` entries. pub fn with_capacity(size: usize, capacity: usize) -> Self { Self::from_data( - DataType::FixedSizeBinary(size as i32), + DataType::FixedSizeBinary(size), MutableBuffer::::with_capacity(capacity * size), None, ) @@ -189,7 +189,7 @@ impl MutableArray for MutableFixedSizeBinaryArray { fn as_box(&mut self) -> Box { Box::new(FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(self.size as i32), + DataType::FixedSizeBinary(self.size), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), )) @@ -197,7 +197,7 @@ impl MutableArray for MutableFixedSizeBinaryArray { fn as_arc(&mut self) -> Arc { Arc::new(FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(self.size as i32), + DataType::FixedSizeBinary(self.size), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), )) diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index 5b78569eed4..a291fd38e9b 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -154,7 +154,7 @@ impl FixedSizeListArray { /// Returns a [`DataType`] consistent with [`FixedSizeListArray`]. pub fn default_datatype(data_type: DataType, size: usize) -> DataType { let field = Box::new(Field::new("item", data_type, true)); - DataType::FixedSizeList(field, size as i32) + DataType::FixedSizeList(field, size) } } diff --git a/src/array/growable/fixed_binary.rs b/src/array/growable/fixed_binary.rs index b9472a972b3..3abdc67fdbd 100644 --- a/src/array/growable/fixed_binary.rs +++ b/src/array/growable/fixed_binary.rs @@ -40,7 +40,7 @@ impl<'a> GrowableFixedSizeBinary<'a> { .map(|array| build_extend_null_bits(*array, use_validity)) .collect(); - let size = *FixedSizeBinaryArray::get_size(arrays[0].data_type()) as usize; + let size = FixedSizeBinaryArray::get_size(arrays[0].data_type()); Self { arrays, values: MutableBuffer::with_capacity(0), diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index b1c01d83570..9386f9c7c25 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -74,7 +74,7 @@ pub enum DataType { Binary, /// Opaque binary data of fixed size. /// Enum parameter specifies the number of bytes per value. - FixedSizeBinary(i32), + FixedSizeBinary(usize), /// Opaque binary data of variable length and 64-bit offsets. LargeBinary, /// A variable-length string in Unicode with UTF-8 encoding. @@ -84,7 +84,7 @@ pub enum DataType { /// A list of some logical data type with variable length. List(Box), /// A list of some logical data type with fixed length. - FixedSizeList(Box, i32), + FixedSizeList(Box, usize), /// A list of some logical data type with variable length and 64-bit offsets. LargeList(Box), /// A nested datatype that contains a number of sub-fields. diff --git a/src/io/avro/read/schema.rs b/src/io/avro/read/schema.rs index 8b9d648cf5f..ba7e2b494f1 100644 --- a/src/io/avro/read/schema.rs +++ b/src/io/avro/read/schema.rs @@ -173,7 +173,7 @@ fn schema_to_field( false, )) } - AvroSchema::Fixed { size, .. } => DataType::FixedSizeBinary(*size as i32), + AvroSchema::Fixed { size, .. } => DataType::FixedSizeBinary(*size), AvroSchema::Decimal { precision, scale, .. } => DataType::Decimal(*precision, *scale), diff --git a/src/io/ipc/convert.rs b/src/io/ipc/convert.rs index b69cc83bad6..69e81f1d46a 100644 --- a/src/io/ipc/convert.rs +++ b/src/io/ipc/convert.rs @@ -191,7 +191,7 @@ fn get_data_type(field: ipc::Field, extension: Extension, may_be_dictionary: boo ipc::Type::LargeUtf8 => DataType::LargeUtf8, ipc::Type::FixedSizeBinary => { let fsb = field.type_as_fixed_size_binary().unwrap(); - DataType::FixedSizeBinary(fsb.byteWidth()) + DataType::FixedSizeBinary(fsb.byteWidth() as usize) } ipc::Type::FloatingPoint => { let float = field.type_as_floating_point().unwrap(); @@ -273,7 +273,7 @@ fn get_data_type(field: ipc::Field, extension: Extension, may_be_dictionary: boo panic!("expect a list to have one child") } let fsl = field.type_as_fixed_size_list().unwrap(); - DataType::FixedSizeList(Box::new(children.get(0).into()), fsl.listSize()) + DataType::FixedSizeList(Box::new(children.get(0).into()), fsl.listSize() as usize) } ipc::Type::Struct_ => { let mut fields = vec![]; diff --git a/src/io/ipc/read/array/fixed_size_binary.rs b/src/io/ipc/read/array/fixed_size_binary.rs index 5df424737ec..094b34d2f78 100644 --- a/src/io/ipc/read/array/fixed_size_binary.rs +++ b/src/io/ipc/read/array/fixed_size_binary.rs @@ -30,8 +30,7 @@ pub fn read_fixed_size_binary( compression, )?; - let length = - field_node.length() as usize * (*FixedSizeBinaryArray::get_size(&data_type) as usize); + let length = field_node.length() as usize * FixedSizeBinaryArray::get_size(&data_type); let values = read_buffer( buffers, length, diff --git a/src/io/json_integration/schema.rs b/src/io/json_integration/schema.rs index 3b3d6b7c599..b57b04ef9dc 100644 --- a/src/io/json_integration/schema.rs +++ b/src/io/json_integration/schema.rs @@ -256,7 +256,7 @@ fn to_data_type(item: &Value, mut children: Vec) -> Result { "fixedsizebinary" => { // return a list with any type as its child isn't defined in the map if let Some(Value::Number(size)) = item.get("byteWidth") { - DataType::FixedSizeBinary(size.as_i64().unwrap() as i32) + DataType::FixedSizeBinary(size.as_i64().unwrap() as usize) } else { return Err(ArrowError::Schema( "Expecting a byteWidth for fixedsizebinary".to_string(), @@ -385,7 +385,7 @@ fn to_data_type(item: &Value, mut children: Vec) -> Result { if let Some(Value::Number(size)) = item.get("listSize") { DataType::FixedSizeList( Box::new(children.pop().unwrap()), - size.as_i64().unwrap() as i32, + size.as_i64().unwrap() as usize, ) } else { return Err(ArrowError::Schema( diff --git a/src/io/parquet/read/fixed_size_binary.rs b/src/io/parquet/read/fixed_size_binary.rs index be5b0a988c8..ddd6e02beb9 100644 --- a/src/io/parquet/read/fixed_size_binary.rs +++ b/src/io/parquet/read/fixed_size_binary.rs @@ -136,7 +136,7 @@ where ArrowError: From, I: FallibleStreamingIterator, { - let size = *FixedSizeBinaryArray::get_size(&data_type) as usize; + let size = FixedSizeBinaryArray::get_size(&data_type); let capacity = metadata.num_values() as usize; let mut values = MutableBuffer::::with_capacity(capacity * size); @@ -168,7 +168,7 @@ where E: Clone, I: Stream>, { - let size = *FixedSizeBinaryArray::get_size(&data_type) as usize; + let size = FixedSizeBinaryArray::get_size(&data_type); let capacity = metadata.num_values() as usize; let mut values = MutableBuffer::::with_capacity(capacity * size); diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index e85f8c63637..3e421c1f493 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -291,7 +291,7 @@ pub fn page_iter_to_array>(); fixed_size_binary::iter_to_array( iter, - DataType::FixedSizeBinary(*n), + DataType::FixedSizeBinary(*n as usize), metadata, ) .map(|e| { diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 3b7502f1b0d..56ac40d0452 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -218,7 +218,7 @@ pub fn from_fixed_len_byte_array( // would be incorrect if all 12 bytes of the interval are populated DataType::Interval(IntervalUnit::DayTime) } - _ => DataType::FixedSizeBinary(*length), + _ => DataType::FixedSizeBinary(*length as usize), } } diff --git a/src/io/parquet/read/statistics/fixlen.rs b/src/io/parquet/read/statistics/fixlen.rs index 62413533c79..8ef9a7de197 100644 --- a/src/io/parquet/read/statistics/fixlen.rs +++ b/src/io/parquet/read/statistics/fixlen.rs @@ -40,7 +40,7 @@ impl From<&ParquetFixedLenStatistics> for FixedLenStatistics { distinct_count: stats.distinct_count, min_value: stats.min_value.clone(), max_value: stats.max_value.clone(), - data_type: DataType::FixedSizeBinary(byte_lens), + data_type: DataType::FixedSizeBinary(byte_lens as usize), } } } diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index ab1f07c5e76..5048ea2d3f2 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -308,7 +308,7 @@ pub fn array_to_page( values.extend_from_slice(bytes) }); let array = FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(size as i32), + DataType::FixedSizeBinary(size), values.into(), array.validity().cloned(), ); @@ -449,7 +449,7 @@ fn nested_array_to_page( DataType::FixedSizeList(_, size) => { let array = array.as_any().downcast_ref::().unwrap(); let offsets = (0..array.len()) - .map(|x| size * x as i32) + .map(|x| (*size * x) as i32) .collect::>(); list_array_to_page( &offsets, diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs index 40c91242143..595cf6b63c2 100644 --- a/src/io/parquet/write/schema.rs +++ b/src/io/parquet/write/schema.rs @@ -283,7 +283,7 @@ pub fn to_parquet_type(field: &Field) -> Result { } DataType::FixedSizeBinary(size) => Ok(ParquetType::try_from_primitive( name, - PhysicalType::FixedLenByteArray(*size), + PhysicalType::FixedLenByteArray(*size as i32), repetition, None, None,