From 6661c1223ec5da0ba83f78ec87cef3bdd6902c53 Mon Sep 17 00:00:00 2001 From: taichong Date: Wed, 22 Feb 2023 21:15:39 +0800 Subject: [PATCH] decimal256 to FixedLenByteArray(32) --- src/io/parquet/read/deserialize/simple.rs | 6 +++--- src/io/parquet/read/statistics/mod.rs | 2 +- src/io/parquet/write/mod.rs | 4 ++-- src/io/parquet/write/schema.rs | 2 +- tests/it/io/parquet/write.rs | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/io/parquet/read/deserialize/simple.rs b/src/io/parquet/read/deserialize/simple.rs index b9fedd8528b..370dd1c6a2b 100644 --- a/src/io/parquet/read/deserialize/simple.rs +++ b/src/io/parquet/read/deserialize/simple.rs @@ -229,7 +229,7 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( Box::new(arrays) as _ } - (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n > 16 => { + (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n > 32 => { return Err(Error::NotYetImplemented(format!( "Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}" ))) @@ -239,7 +239,7 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( let pages = fixed_size_binary::Iter::new( pages, - DataType::FixedSizeBinary(16), + DataType::FixedSizeBinary(n), num_rows, chunk_size, ); @@ -248,7 +248,7 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( let array = maybe_array?; let values = array .values() - .chunks_exact(16) + .chunks_exact(n) .map(|value: &[u8]| super::super::convert_i256(value, n)) .collect::>(); let validity = array.validity().cloned(); diff --git a/src/io/parquet/read/statistics/mod.rs b/src/io/parquet/read/statistics/mod.rs index a8c9ded8cc4..d32d7167e01 100644 --- a/src/io/parquet/read/statistics/mod.rs +++ b/src/io/parquet/read/statistics/mod.rs @@ -516,7 +516,7 @@ fn push( _ => unreachable!(), }, Decimal256(_, _) => match physical_type { - ParquetPhysicalType::FixedLenByteArray(n) if *n > 16 => Err(Error::NotYetImplemented( + ParquetPhysicalType::FixedLenByteArray(n) if *n > 32 => Err(Error::NotYetImplemented( format!("Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}"), )), ParquetPhysicalType::FixedLenByteArray(n) => fixlen::push_i256(from, *n, min, max), diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 8b482a1a4db..f16e6acc9d4 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -466,14 +466,14 @@ pub fn array_to_page_simple( } DataType::Decimal256(_, _) => { let type_ = type_; - let size = 16; + let size = 32; let array = array .as_any() .downcast_ref::>() .unwrap(); let mut values = Vec::::with_capacity(size * array.len()); array.values().iter().for_each(|x| { - let bytes = &x.to_be_bytes()[16 - size..]; + let bytes = &x.to_be_bytes()[32 - size..]; values.extend_from_slice(bytes) }); let array = FixedSizeBinaryArray::new( diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs index 4465bf74000..cbf85fa6d3f 100644 --- a/src/io/parquet/write/schema.rs +++ b/src/io/parquet/write/schema.rs @@ -297,7 +297,7 @@ pub fn to_parquet_type(field: &Field) -> Result { } DataType::Decimal256(_, _) => Ok(ParquetType::try_from_primitive( name, - PhysicalType::FixedLenByteArray(16), + PhysicalType::FixedLenByteArray(32), repetition, None, None, diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs index 7238762328f..52a67f22c6e 100644 --- a/tests/it/io/parquet/write.rs +++ b/tests/it/io/parquet/write.rs @@ -70,6 +70,7 @@ fn round_trip_opt_stats( let data = writer.into_inner().into_inner(); let (result, stats) = read_column(&mut Cursor::new(data), "a1")?; + assert_eq!(array.as_ref(), result.as_ref()); if check_stats { assert_eq!(statistics, stats);