From dfa251d8ac0ab7be34f4a8bb4d742335a1ba7b81 Mon Sep 17 00:00:00 2001 From: taichong Date: Tue, 28 Feb 2023 21:39:17 +0800 Subject: [PATCH] support decimal256(9,_), decimal256(18,_), decimal256(38,_) --- parquet_integration/write_parquet.py | 17 ++-- src/io/parquet/read/deserialize/simple.rs | 51 +++++++++-- src/io/parquet/read/indexes/mod.rs | 15 ++++ src/io/parquet/read/indexes/primitive.rs | 15 +++- src/io/parquet/read/schema/convert.rs | 1 + src/io/parquet/read/schema/mod.rs | 1 + src/io/parquet/read/statistics/fixlen.rs | 31 +++++++ src/io/parquet/read/statistics/mod.rs | 18 +++- src/io/parquet/write/fixed_len_bytes.rs | 22 +++++ src/io/parquet/write/mod.rs | 89 ++++++++++++++---- src/io/parquet/write/schema.rs | 53 +++++++++-- tests/it/io/parquet/mod.rs | 62 +++++++++++-- tests/it/io/parquet/read.rs | 70 ++++++++++++--- tests/it/io/parquet/write.rs | 104 ++++++++++++++++++++-- 14 files changed, 486 insertions(+), 63 deletions(-) diff --git a/parquet_integration/write_parquet.py b/parquet_integration/write_parquet.py index b486981f8bb..ede8e9de591 100644 --- a/parquet_integration/write_parquet.py +++ b/parquet_integration/write_parquet.py @@ -32,14 +32,15 @@ def case_basic_nullable() -> Tuple[dict, pa.Schema, str]: pa.field("decimal_9", pa.decimal128(9, 0)), pa.field("decimal_18", pa.decimal128(18, 0)), pa.field("decimal_26", pa.decimal128(26, 0)), - pa.field("decimal_39", pa.decimal256(39, 0)), + pa.field("decimal256_9", pa.decimal256(9, 0)), + pa.field("decimal256_18", pa.decimal256(18, 0)), + pa.field("decimal256_26", pa.decimal256(26, 0)), pa.field("timestamp_us", pa.timestamp("us")), pa.field("timestamp_s", pa.timestamp("s")), pa.field("emoji", pa.utf8()), pa.field("timestamp_s_utc", pa.timestamp("s", "UTC")), ] schema = pa.schema(fields) - return ( { "int64": int64, @@ -52,7 +53,9 @@ def case_basic_nullable() -> Tuple[dict, pa.Schema, str]: "decimal_9": decimal, "decimal_18": decimal, "decimal_26": decimal, - "decimal_39": decimal, + "decimal256_9": decimal, + "decimal256_18": decimal, + "decimal256_26": decimal, "timestamp_us": int64, "timestamp_s": int64, "emoji": emoji, @@ -85,7 +88,9 @@ def case_basic_required() -> Tuple[dict, pa.Schema, str]: pa.field("decimal_9", pa.decimal128(9, 0), nullable=False), pa.field("decimal_18", pa.decimal128(18, 0), nullable=False), pa.field("decimal_26", pa.decimal128(26, 0), nullable=False), - pa.field("decimal_39", pa.decimal256(39, 0), nullable=False), + pa.field("decimal256_9", pa.decimal256(9, 0), nullable=False), + pa.field("decimal256_18", pa.decimal256(18, 0), nullable=False), + pa.field("decimal256_26", pa.decimal256(26, 0), nullable=False), ] schema = pa.schema(fields) @@ -100,7 +105,9 @@ def case_basic_required() -> Tuple[dict, pa.Schema, str]: "decimal_9": decimal, "decimal_18": decimal, "decimal_26": decimal, - "decimal_39": decimal, + "decimal256_9": decimal, + "decimal256_18": decimal, + "decimal256_26": decimal, }, schema, f"basic_required_10.parquet", diff --git a/src/io/parquet/read/deserialize/simple.rs b/src/io/parquet/read/deserialize/simple.rs index 370dd1c6a2b..0b1e3926239 100644 --- a/src/io/parquet/read/deserialize/simple.rs +++ b/src/io/parquet/read/deserialize/simple.rs @@ -1,3 +1,4 @@ +use ethnum::I256; use parquet2::{ schema::types::{ PhysicalType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit, @@ -229,12 +230,47 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( Box::new(arrays) as _ } - (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n > 32 => { - return Err(Error::NotYetImplemented(format!( - "Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}" - ))) + (PhysicalType::Int32, Decimal256(_, _)) => dyn_iter(iden(primitive::IntegerIter::new( + pages, + data_type, + num_rows, + chunk_size, + |x: i32| i256(I256::new(x as i128)), + ))), + (PhysicalType::Int64, Decimal256(_, _)) => dyn_iter(iden(primitive::IntegerIter::new( + pages, + data_type, + num_rows, + chunk_size, + |x: i64| i256(I256::new(x as i128)), + ))), + (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n <= 16 => { + let n = *n; + + let pages = fixed_size_binary::Iter::new( + pages, + DataType::FixedSizeBinary(n), + num_rows, + chunk_size, + ); + + let pages = pages.map(move |maybe_array| { + let array = maybe_array?; + let values = array + .values() + .chunks_exact(n) + .map(|value: &[u8]| i256(I256::new(super::super::convert_i128(value, n)))) + .collect::>(); + let validity = array.validity().cloned(); + + PrimitiveArray::::try_new(data_type.clone(), values.into(), validity) + }); + + let arrays = pages.map(|x| x.map(|x| x.boxed())); + + Box::new(arrays) as _ } - (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) => { + (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n <= 32 => { let n = *n; let pages = fixed_size_binary::Iter::new( @@ -260,6 +296,11 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( Box::new(arrays) as _ } + (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n > 32 => { + return Err(Error::NotYetImplemented(format!( + "Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}" + ))) + } (PhysicalType::Int32, Date64) => dyn_iter(iden(primitive::IntegerIter::new( pages, data_type, diff --git a/src/io/parquet/read/indexes/mod.rs b/src/io/parquet/read/indexes/mod.rs index b52620bc92d..e149564eb4e 100644 --- a/src/io/parquet/read/indexes/mod.rs +++ b/src/io/parquet/read/indexes/mod.rs @@ -111,6 +111,21 @@ fn deserialize( PhysicalType::Primitive(PrimitiveType::Int256) => { let index = indexes.pop_front().unwrap(); match index.physical_type() { + ParquetPhysicalType::Int32 => { + let index = index.as_any().downcast_ref::>().unwrap(); + Ok(primitive::deserialize_i32(&index.indexes, data_type).into()) + } + parquet2::schema::types::PhysicalType::Int64 => { + let index = index.as_any().downcast_ref::>().unwrap(); + Ok( + primitive::deserialize_i64( + &index.indexes, + &index.primitive_type, + data_type, + ) + .into(), + ) + } parquet2::schema::types::PhysicalType::FixedLenByteArray(_) => { let index = index.as_any().downcast_ref::().unwrap(); Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into()) diff --git a/src/io/parquet/read/indexes/primitive.rs b/src/io/parquet/read/indexes/primitive.rs index 636661f5223..96fdc0b7b5a 100644 --- a/src/io/parquet/read/indexes/primitive.rs +++ b/src/io/parquet/read/indexes/primitive.rs @@ -1,3 +1,4 @@ +use ethnum::I256; use parquet2::indexes::PageIndex; use parquet2::schema::types::{PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit}; use parquet2::types::int96_to_i64_ns; @@ -5,7 +6,7 @@ use parquet2::types::int96_to_i64_ns; use crate::array::{Array, MutablePrimitiveArray, PrimitiveArray}; use crate::datatypes::{DataType, TimeUnit}; use crate::trusted_len::TrustedLen; -use crate::types::NativeType; +use crate::types::{i256, NativeType}; use super::ColumnPageStatistics; @@ -32,6 +33,12 @@ fn deserialize_int32>>( PrimitiveArray::::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128))) .to(data_type), ), + Decimal256(_, _) => Box::new( + PrimitiveArray::::from_trusted_len_iter( + iter.map(|x| x.map(|x| i256(I256::new(x.into())))), + ) + .to(data_type), + ) as _, _ => Box::new(PrimitiveArray::::from_trusted_len_iter(iter).to(data_type)), } } @@ -110,6 +117,12 @@ fn deserialize_int64>>( PrimitiveArray::::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128))) .to(data_type), ) as _, + Decimal256(_, _) => Box::new( + PrimitiveArray::::from_trusted_len_iter( + iter.map(|x| x.map(|x| i256(I256::new(x.into())))), + ) + .to(data_type), + ) as _, Timestamp(time_unit, _) => { let mut array = MutablePrimitiveArray::::from_trusted_len_iter(iter).to(data_type.clone()); diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 821d5107649..20fc0d50206 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -12,6 +12,7 @@ use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit}; /// Converts [`ParquetType`]s to a [`Field`], ignoring parquet fields that do not contain /// any physical column. pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec { + println!("in parquet_to_arrow_schema"); fields.iter().filter_map(to_field).collect::>() } diff --git a/src/io/parquet/read/schema/mod.rs b/src/io/parquet/read/schema/mod.rs index d47055ef6aa..cb1661760bb 100644 --- a/src/io/parquet/read/schema/mod.rs +++ b/src/io/parquet/read/schema/mod.rs @@ -21,6 +21,7 @@ use self::metadata::parse_key_value_metadata; /// This function errors iff the key `"ARROW:schema"` exists but is not correctly encoded, /// indicating that that the file's arrow metadata was incorrectly written. pub fn infer_schema(file_metadata: &FileMetaData) -> Result { + println!("in infer_schema"); let mut metadata = parse_key_value_metadata(file_metadata.key_value_metadata()); let schema = read_schema_from_metadata(&mut metadata)?; diff --git a/src/io/parquet/read/statistics/fixlen.rs b/src/io/parquet/read/statistics/fixlen.rs index 6ad5aab9ff6..1362d22bb97 100644 --- a/src/io/parquet/read/statistics/fixlen.rs +++ b/src/io/parquet/read/statistics/fixlen.rs @@ -1,3 +1,4 @@ +use ethnum::I256; use parquet2::statistics::{FixedLenStatistics, Statistics as ParquetStatistics}; use crate::array::*; @@ -28,6 +29,36 @@ pub(super) fn push_i128( Ok(()) } +pub(super) fn push_i256_with_i128( + from: Option<&dyn ParquetStatistics>, + n: usize, + min: &mut dyn MutableArray, + max: &mut dyn MutableArray, +) -> Result<()> { + let min = min + .as_mut_any() + .downcast_mut::>() + .unwrap(); + let max = max + .as_mut_any() + .downcast_mut::>() + .unwrap(); + let from = from.map(|s| s.as_any().downcast_ref::().unwrap()); + + min.push(from.and_then(|s| { + s.min_value + .as_deref() + .map(|x| i256(I256::new(convert_i128(x, n)))) + })); + max.push(from.and_then(|s| { + s.max_value + .as_deref() + .map(|x| i256(I256::new(convert_i128(x, n)))) + })); + + Ok(()) +} + pub(super) fn push_i256( from: Option<&dyn ParquetStatistics>, n: usize, diff --git a/src/io/parquet/read/statistics/mod.rs b/src/io/parquet/read/statistics/mod.rs index d32d7167e01..8a81074e8f4 100644 --- a/src/io/parquet/read/statistics/mod.rs +++ b/src/io/parquet/read/statistics/mod.rs @@ -1,4 +1,5 @@ //! APIs exposing `parquet2`'s statistics as arrow's statistics. +use ethnum::I256; use std::collections::VecDeque; use std::sync::Arc; @@ -17,6 +18,7 @@ use crate::datatypes::IntervalUnit; use crate::datatypes::{DataType, Field, PhysicalType}; use crate::error::Error; use crate::error::Result; +use crate::types::i256; mod binary; mod boolean; @@ -516,10 +518,24 @@ fn push( _ => unreachable!(), }, Decimal256(_, _) => match physical_type { + ParquetPhysicalType::Int32 => { + primitive::push(from, min, max, |x: i32| Ok(i256(I256::new(x.into())))) + } + ParquetPhysicalType::Int64 => { + println!("in push static Decimal256"); + primitive::push(from, min, max, |x: i64| Ok(i256(I256::new(x.into())))) + } + ParquetPhysicalType::FixedLenByteArray(n) if *n <= 16 => { + println!("in push FixedLenByteArray n {:?}", n); + fixlen::push_i256_with_i128(from, *n, min, max) + } ParquetPhysicalType::FixedLenByteArray(n) if *n > 32 => Err(Error::NotYetImplemented( format!("Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}"), )), - ParquetPhysicalType::FixedLenByteArray(n) => fixlen::push_i256(from, *n, min, max), + ParquetPhysicalType::FixedLenByteArray(n) => { + println!("in push FixedLenByteArray n {:?}", n); + fixlen::push_i256(from, *n, min, max) + } _ => unreachable!(), }, Binary => binary::push::(from, min, max), diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs index 357c14f2bb4..91b641da17f 100644 --- a/src/io/parquet/write/fixed_len_bytes.rs +++ b/src/io/parquet/write/fixed_len_bytes.rs @@ -105,6 +105,28 @@ pub(super) fn build_statistics_decimal( } } +pub(super) fn build_statistics_decimal256_with_i128( + array: &PrimitiveArray, + primitive_type: PrimitiveType, + size: usize, +) -> FixedLenStatistics { + FixedLenStatistics { + primitive_type, + null_count: Some(array.null_count() as i64), + distinct_count: None, + max_value: array + .iter() + .flatten() + .max() + .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()), + min_value: array + .iter() + .flatten() + .min() + .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()), + } +} + pub(super) fn build_statistics_decimal256( array: &PrimitiveArray, primitive_type: PrimitiveType, diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 44e7b8e9138..34c07a535b6 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -466,30 +466,83 @@ pub fn array_to_page_simple( } DataType::Decimal256(precision, _) => { let type_ = type_; - let size = decimal_length_from_precision(*precision); + let precision = *precision; let array = array .as_any() .downcast_ref::>() .unwrap(); - let statistics = if options.write_statistics { - let stats = - fixed_len_bytes::build_statistics_decimal256(array, type_.clone(), size); - Some(stats) + if precision <= 9 { + let values = array + .values() + .iter() + .map(|x| x.0.as_i32()) + .collect::>() + .into(); + + let array = + PrimitiveArray::::new(DataType::Int32, values, array.validity().cloned()); + primitive::array_to_page_integer::(&array, options, type_, encoding) + } else if precision <= 18 { + let values = array + .values() + .iter() + .map(|x| x.0.as_i64()) + .collect::>() + .into(); + + let array = + PrimitiveArray::::new(DataType::Int64, values, array.validity().cloned()); + primitive::array_to_page_integer::(&array, options, type_, encoding) + } else if precision <= 38 { + let size = decimal_length_from_precision(precision); + let statistics = if options.write_statistics { + let stats = fixed_len_bytes::build_statistics_decimal256_with_i128( + array, + type_.clone(), + size, + ); + Some(stats) + } else { + None + }; + + let mut values = Vec::::with_capacity(size * array.len()); + array.values().iter().for_each(|x| { + let bytes = &x.0.low().to_be_bytes()[16 - size..]; + values.extend_from_slice(bytes) + }); + let array = FixedSizeBinaryArray::new( + DataType::FixedSizeBinary(size), + values.into(), + array.validity().cloned(), + ); + fixed_len_bytes::array_to_page(&array, options, type_, statistics) } else { - None - }; - let mut values = Vec::::with_capacity(size * array.len()); - array.values().iter().for_each(|x| { - let bytes = &x.to_be_bytes()[32 - size..]; - values.extend_from_slice(bytes) - }); - let array = FixedSizeBinaryArray::new( - DataType::FixedSizeBinary(size), - values.into(), - array.validity().cloned(), - ); + let size = decimal_length_from_precision(precision); + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + let statistics = if options.write_statistics { + let stats = + fixed_len_bytes::build_statistics_decimal256(array, type_.clone(), size); + Some(stats) + } else { + None + }; + let mut values = Vec::::with_capacity(size * array.len()); + array.values().iter().for_each(|x| { + let bytes = &x.to_be_bytes()[32 - size..]; + values.extend_from_slice(bytes) + }); + let array = FixedSizeBinaryArray::new( + DataType::FixedSizeBinary(size), + values.into(), + array.validity().cloned(), + ); - fixed_len_bytes::array_to_page(&array, options, type_, statistics) + fixed_len_bytes::array_to_page(&array, options, type_, statistics) + } } DataType::Decimal(precision, _) => { let type_ = type_; diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs index ac4bbd396be..02d7040f29d 100644 --- a/src/io/parquet/write/schema.rs +++ b/src/io/parquet/write/schema.rs @@ -295,14 +295,51 @@ pub fn to_parquet_type(field: &Field) -> Result { None, )?) } - DataType::Decimal256(precision, _) => Ok(ParquetType::try_from_primitive( - name, - PhysicalType::FixedLenByteArray(decimal_length_from_precision(*precision)), - repetition, - None, - None, - None, - )?), + DataType::Decimal256(precision, scale) => { + let precision = *precision; + let scale = *scale; + let logical_type = Some(PrimitiveLogicalType::Decimal(precision, scale)); + + if precision <= 9 { + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::Int32, + repetition, + Some(PrimitiveConvertedType::Decimal(precision, scale)), + logical_type, + None, + )?) + } else if precision <= 18 { + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::Int64, + repetition, + Some(PrimitiveConvertedType::Decimal(precision, scale)), + logical_type, + None, + )?) + } else if precision <= 38 { + let len = decimal_length_from_precision(precision); + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::FixedLenByteArray(len), + repetition, + Some(PrimitiveConvertedType::Decimal(precision, scale)), + logical_type, + None, + )?) + } else { + let len = decimal_length_from_precision(precision); + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::FixedLenByteArray(len), + repetition, + None, + logical_type, + None, + )?) + } + } DataType::Interval(_) => Ok(ParquetType::try_from_primitive( name, PhysicalType::FixedLenByteArray(12), diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index b1a61205bf8..c6bfca40559 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -530,12 +530,26 @@ pub fn pyarrow_nullable(column: &str) -> Box { .collect::>(); Box::new(PrimitiveArray::::from(values).to(DataType::Decimal(26, 0))) } - "decimal_39" => { + "decimal256_9" => { let values = i64_values .iter() .map(|x| x.map(|x| i256(x.as_i256()))) .collect::>(); - Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(39, 0))) + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(9, 0))) + } + "decimal256_18" => { + let values = i64_values + .iter() + .map(|x| x.map(|x| i256(x.as_i256()))) + .collect::>(); + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(18, 0))) + } + "decimal256_26" => { + let values = i64_values + .iter() + .map(|x| x.map(|x| i256(x.as_i256()))) + .collect::>(); + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(26, 0))) } "timestamp_us" => Box::new( PrimitiveArray::::from(i64_values) @@ -623,14 +637,34 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { min_value: Box::new(Int128Array::from_slice([-256]).to(DataType::Decimal(26, 0))), max_value: Box::new(Int128Array::from_slice([9]).to(DataType::Decimal(26, 0))), }, - "decimal_39" => Statistics { + "decimal256_9" => Statistics { + distinct_count: UInt64Array::from([None]).boxed(), + null_count: UInt64Array::from([Some(3)]).boxed(), + min_value: Box::new( + Int256Array::from_slice([i256(-(256.as_i256()))]).to(DataType::Decimal256(9, 0)), + ), + max_value: Box::new( + Int256Array::from_slice([i256(9.as_i256())]).to(DataType::Decimal256(9, 0)), + ), + }, + "decimal256_18" => Statistics { distinct_count: UInt64Array::from([None]).boxed(), null_count: UInt64Array::from([Some(3)]).boxed(), min_value: Box::new( - Int256Array::from_slice([i256(-(256.as_i256()))]).to(DataType::Decimal256(39, 0)), + Int256Array::from_slice([i256(-(256.as_i256()))]).to(DataType::Decimal256(18, 0)), ), max_value: Box::new( - Int256Array::from_slice([i256(9.as_i256())]).to(DataType::Decimal256(39, 0)), + Int256Array::from_slice([i256(9.as_i256())]).to(DataType::Decimal256(18, 0)), + ), + }, + "decimal256_26" => Statistics { + distinct_count: UInt64Array::from([None]).boxed(), + null_count: UInt64Array::from([Some(3)]).boxed(), + min_value: Box::new( + Int256Array::from_slice([i256(-(256.as_i256()))]).to(DataType::Decimal256(26, 0)), + ), + max_value: Box::new( + Int256Array::from_slice([i256(9.as_i256())]).to(DataType::Decimal256(26, 0)), ), }, "timestamp_us" => Statistics { @@ -713,12 +747,26 @@ pub fn pyarrow_required(column: &str) -> Box { .collect::>(); Box::new(PrimitiveArray::::from(values).to(DataType::Decimal(26, 0))) } - "decimal_39" => { + "decimal256_9" => { + let values = i64_values + .iter() + .map(|x| x.map(|x| i256(x.as_i256()))) + .collect::>(); + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(9, 0))) + } + "decimal256_18" => { + let values = i64_values + .iter() + .map(|x| x.map(|x| i256(x.as_i256()))) + .collect::>(); + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(18, 0))) + } + "decimal256_26" => { let values = i64_values .iter() .map(|x| x.map(|x| i256(x.as_i256()))) .collect::>(); - Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(39, 0))) + Box::new(PrimitiveArray::::from(values).to(DataType::Decimal256(26, 0))) } _ => unreachable!(), } diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index bb10778e74c..17bdd2b9f6f 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -402,13 +402,33 @@ fn v1_decimal_26_required() -> Result<()> { } #[test] -fn v1_decimal_39_nullable() -> Result<()> { - test_pyarrow_integration("decimal_39", 1, "basic", false, false, None) +fn v1_decimal256_9_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_9", 1, "basic", false, false, None) } #[test] -fn v1_decimal_39_required() -> Result<()> { - test_pyarrow_integration("decimal_39", 1, "basic", false, true, None) +fn v1_decimal256_9_required() -> Result<()> { + test_pyarrow_integration("decimal256_9", 1, "basic", false, true, None) +} + +#[test] +fn v1_decimal256_18_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_18", 1, "basic", false, false, None) +} + +#[test] +fn v1_decimal256_18_required() -> Result<()> { + test_pyarrow_integration("decimal256_18", 1, "basic", false, true, None) +} + +#[test] +fn v1_decimal256_26_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_26", 1, "basic", false, false, None) +} + +#[test] +fn v1_decimal256_26_required() -> Result<()> { + test_pyarrow_integration("decimal256_26", 1, "basic", false, true, None) } #[test] @@ -447,8 +467,18 @@ fn v2_decimal_26_nullable() -> Result<()> { } #[test] -fn v2_decimal_39_nullable() -> Result<()> { - test_pyarrow_integration("decimal_39", 2, "basic", false, false, None) +fn v2_decimal256_9_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_9", 2, "basic", false, false, None) +} + +#[test] +fn v2_decimal256_18_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_18", 2, "basic", false, false, None) +} + +#[test] +fn v2_decimal256_26_nullable() -> Result<()> { + test_pyarrow_integration("decimal256_26", 2, "basic", false, false, None) } #[test] @@ -482,13 +512,33 @@ fn v2_decimal_26_required_dict() -> Result<()> { } #[test] -fn v2_decimal_39_required() -> Result<()> { - test_pyarrow_integration("decimal_39", 2, "basic", false, true, None) +fn v2_decimal256_9_required() -> Result<()> { + test_pyarrow_integration("decimal256_9", 2, "basic", false, true, None) +} + +#[test] +fn v2_decimal256_9_required_dict() -> Result<()> { + test_pyarrow_integration("decimal256_9", 2, "basic", true, true, None) +} + +#[test] +fn v2_decimal256_18_required() -> Result<()> { + test_pyarrow_integration("decimal256_18", 2, "basic", false, true, None) +} + +#[test] +fn v2_decimal256_18_required_dict() -> Result<()> { + test_pyarrow_integration("decimal256_18", 2, "basic", true, true, None) +} + +#[test] +fn v2_decimal256_26_required() -> Result<()> { + test_pyarrow_integration("decimal256_26", 2, "basic", false, true, None) } #[test] -fn v2_decimal_39_required_dict() -> Result<()> { - test_pyarrow_integration("decimal_39", 2, "basic", true, true, None) +fn v2_decimal256_26_required_dict() -> Result<()> { + test_pyarrow_integration("decimal256_26", 2, "basic", true, true, None) } #[test] diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs index 52a67f22c6e..25a7839068b 100644 --- a/tests/it/io/parquet/write.rs +++ b/tests/it/io/parquet/write.rs @@ -540,9 +540,9 @@ fn decimal_26_required_v1() -> Result<()> { } #[test] -fn decimal_39_optional_v1() -> Result<()> { +fn decimal256_9_optional_v1() -> Result<()> { round_trip( - "decimal_39", + "decimal256_9", "nullable", Version::V1, CompressionOptions::Uncompressed, @@ -551,9 +551,53 @@ fn decimal_39_optional_v1() -> Result<()> { } #[test] -fn decimal_39_required_v1() -> Result<()> { +fn decimal256_9_required_v1() -> Result<()> { round_trip( - "decimal_39", + "decimal256_9", + "required", + Version::V1, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_18_optional_v1() -> Result<()> { + round_trip( + "decimal256_18", + "nullable", + Version::V1, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_18_required_v1() -> Result<()> { + round_trip( + "decimal256_18", + "required", + Version::V1, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_26_optional_v1() -> Result<()> { + round_trip( + "decimal256_26", + "nullable", + Version::V1, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_26_required_v1() -> Result<()> { + round_trip( + "decimal256_26", "required", Version::V1, CompressionOptions::Uncompressed, @@ -628,9 +672,53 @@ fn decimal_26_required_v2() -> Result<()> { } #[test] -fn decimal_39_optional_v2() -> Result<()> { +fn decimal256_9_optional_v2() -> Result<()> { + round_trip( + "decimal256_9", + "nullable", + Version::V2, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_9_required_v2() -> Result<()> { + round_trip( + "decimal256_9", + "required", + Version::V2, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_18_optional_v2() -> Result<()> { + round_trip( + "decimal256_18", + "nullable", + Version::V2, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_18_required_v2() -> Result<()> { + round_trip( + "decimal256_18", + "required", + Version::V2, + CompressionOptions::Uncompressed, + vec![Encoding::Plain], + ) +} + +#[test] +fn decimal256_26_optional_v2() -> Result<()> { round_trip( - "decimal_39", + "decimal256_26", "nullable", Version::V2, CompressionOptions::Uncompressed, @@ -639,9 +727,9 @@ fn decimal_39_optional_v2() -> Result<()> { } #[test] -fn decimal_39_required_v2() -> Result<()> { +fn decimal256_26_required_v2() -> Result<()> { round_trip( - "decimal_39", + "decimal256_26", "required", Version::V2, CompressionOptions::Uncompressed,