diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index c9f84612757..b9d60c68cf0 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -87,5 +87,5 @@ fn convert_i256(value: &[u8], n: usize) -> i256 { let mut bytes = [0u8; 32]; bytes[..n].copy_from_slice(value); - i256::from_be_bytes(bytes) + i256(i256::from_be_bytes(bytes).0 >> (8 * (32 - n))) } diff --git a/src/io/parquet/read/row_group.rs b/src/io/parquet/read/row_group.rs index 176c6e83182..e475010e606 100644 --- a/src/io/parquet/read/row_group.rs +++ b/src/io/parquet/read/row_group.rs @@ -225,6 +225,9 @@ pub fn to_deserializer<'a>( (columns, types) } else { + for (meta, chunk) in columns.clone() { + println!("the meta is {:?},\nthe chunk is {:?}", meta, chunk); + } let (columns, types): (Vec<_>, Vec<_>) = columns .into_iter() .map(|(column_meta, chunk)| { diff --git a/src/io/parquet/read/statistics/fixlen.rs b/src/io/parquet/read/statistics/fixlen.rs index 62b41956b00..6ad5aab9ff6 100644 --- a/src/io/parquet/read/statistics/fixlen.rs +++ b/src/io/parquet/read/statistics/fixlen.rs @@ -2,10 +2,9 @@ use parquet2::statistics::{FixedLenStatistics, Statistics as ParquetStatistics}; use crate::array::*; use crate::error::Result; -use crate::io::parquet::read::convert_i256; use crate::types::{days_ms, i256}; -use super::super::{convert_days_ms, convert_i128}; +use super::super::{convert_days_ms, convert_i128, convert_i256}; pub(super) fn push_i128( from: Option<&dyn ParquetStatistics>, diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs index db871bb817e..357c14f2bb4 100644 --- a/src/io/parquet/write/fixed_len_bytes.rs +++ b/src/io/parquet/write/fixed_len_bytes.rs @@ -6,6 +6,7 @@ use parquet2::{ }; use super::{binary::ord_binary, utils, WriteOptions}; +use crate::types::i256; use crate::{ array::{Array, FixedSizeBinaryArray, PrimitiveArray}, error::Result, @@ -103,3 +104,25 @@ pub(super) fn build_statistics_decimal( .map(|x| x.to_be_bytes()[16 - size..].to_vec()), } } + +pub(super) fn build_statistics_decimal256( + array: &PrimitiveArray, + primitive_type: PrimitiveType, + size: usize, +) -> FixedLenStatistics { + FixedLenStatistics { + primitive_type, + null_count: Some(array.null_count() as i64), + distinct_count: None, + max_value: array + .iter() + .flatten() + .max() + .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()), + min_value: array + .iter() + .flatten() + .min() + .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()), + } +} diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index f16e6acc9d4..44e7b8e9138 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -464,13 +464,20 @@ pub fn array_to_page_simple( fixed_len_bytes::array_to_page(array, options, type_, statistics) } - DataType::Decimal256(_, _) => { + DataType::Decimal256(precision, _) => { let type_ = type_; - let size = 32; + let size = decimal_length_from_precision(*precision); let array = array .as_any() .downcast_ref::>() .unwrap(); + let statistics = if options.write_statistics { + let stats = + fixed_len_bytes::build_statistics_decimal256(array, type_.clone(), size); + Some(stats) + } else { + None + }; let mut values = Vec::::with_capacity(size * array.len()); array.values().iter().for_each(|x| { let bytes = &x.to_be_bytes()[32 - size..]; @@ -481,12 +488,6 @@ pub fn array_to_page_simple( values.into(), array.validity().cloned(), ); - let statistics = if options.write_statistics { - let stats = fixed_len_bytes::build_statistics(&array, type_.clone()); - Some(stats) - } else { - None - }; fixed_len_bytes::array_to_page(&array, options, type_, statistics) } diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs index cbf85fa6d3f..ac4bbd396be 100644 --- a/src/io/parquet/write/schema.rs +++ b/src/io/parquet/write/schema.rs @@ -295,9 +295,9 @@ pub fn to_parquet_type(field: &Field) -> Result { None, )?) } - DataType::Decimal256(_, _) => Ok(ParquetType::try_from_primitive( + DataType::Decimal256(precision, _) => Ok(ParquetType::try_from_primitive( name, - PhysicalType::FixedLenByteArray(32), + PhysicalType::FixedLenByteArray(decimal_length_from_precision(*precision)), repetition, None, None, diff --git a/src/types/native.rs b/src/types/native.rs index 19c1697d1cf..40b79bcf1a2 100644 --- a/src/types/native.rs +++ b/src/types/native.rs @@ -513,7 +513,7 @@ impl NativeType for f16 { } /// Physical representation of a decimal -#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] #[allow(non_camel_case_types)] #[repr(C)] pub struct i256(pub ethnum::I256);