From 2605412a0543e4b5f72c7b1f75ab211216492c7c Mon Sep 17 00:00:00 2001 From: taichong Date: Sat, 18 Feb 2023 09:59:38 +0800 Subject: [PATCH] feat(query): support desc table with decimal type and support order by decmail type col --- src/query/expression/src/schema.rs | 6 ++ src/query/expression/src/types.rs | 8 ++ src/query/expression/src/types/decimal.rs | 61 ++++++++++++++ src/query/expression/src/utils/column_from.rs | 21 +++++ src/query/expression/src/values.rs | 1 + src/query/expression/tests/it/sort.rs | 82 +++++++++++++++++++ .../11_data_type/11_0006_data_type_decimal | 27 ++++++ 7 files changed, 206 insertions(+) diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs index 091ba8bb8b0cc..6f6210f505164 100644 --- a/src/query/expression/src/schema.rs +++ b/src/query/expression/src/schema.rs @@ -1366,6 +1366,12 @@ impl From<&DataType> for ArrowDataType { Box::new(ArrowDataType::LargeBinary), None, ), + DataType::Decimal(DecimalDataType::Decimal128(s)) => { + ArrowDataType::Decimal(s.precision as usize, s.scale as usize) + } + DataType::Decimal(DecimalDataType::Decimal256(s)) => { + ArrowDataType::Decimal256(s.precision as usize, s.scale as usize) + } _ => unreachable!(), } diff --git a/src/query/expression/src/types.rs b/src/query/expression/src/types.rs index c2a947b568857..d5af194a22cc1 100755 --- a/src/query/expression/src/types.rs +++ b/src/query/expression/src/types.rs @@ -61,6 +61,7 @@ use crate::deserializations::TimestampDeserializer; use crate::deserializations::TupleDeserializer; use crate::deserializations::VariantDeserializer; use crate::property::Domain; +use crate::types::decimal::DecimalScalar; use crate::values::Column; use crate::values::Scalar; use crate::ColumnBuilder; @@ -288,6 +289,13 @@ impl DataType { Scalar::Tuple(tys.iter().map(|ty| ty.default_value()).collect()) } DataType::Variant => Scalar::Variant(vec![]), + DataType::Decimal(DecimalDataType::Decimal128(s)) => { + Scalar::Decimal(DecimalScalar::Decimal128(0.into(), *s)) + } + DataType::Decimal(DecimalDataType::Decimal256(s)) => { + Scalar::Decimal(DecimalScalar::Decimal256(0.into(), *s)) + } + _ => unimplemented!(), } } diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 457f9ff867976..e1e3d1783142f 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -27,6 +27,9 @@ use serde::Deserialize; use serde::Serialize; use super::SimpleDomain; +use crate::types::ArgType; +use crate::types::DataType; +use crate::types::GenericMap; use crate::types::ValueType; use crate::utils::arrow::buffer_into_mut; use crate::Column; @@ -38,6 +41,9 @@ use crate::ScalarRef; #[derive(Debug, Clone, PartialEq, Eq)] pub struct DecimalType(PhantomData); +pub type Decimal128Type = DecimalType; +pub type Decimal256Type = DecimalType; + impl ValueType for DecimalType { type Scalar = Num; type ScalarRef<'a> = Num; @@ -146,6 +152,38 @@ impl ValueType for DecimalType { } } +impl ArgType for DecimalType { + fn data_type() -> DataType { + Num::data_type() + } + + fn full_domain() -> Self::Domain { + SimpleDomain { + min: Num::MIN, + max: Num::MAX, + } + } + + fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { + Vec::with_capacity(capacity) + } + + fn column_from_vec(vec: Vec, _generics: &GenericMap) -> Self::Column { + vec.into() + } + + fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { + iter.collect() + } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.collect() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, EnumAsInner)] pub enum DecimalDataType { Decimal128(DecimalSize), @@ -227,6 +265,10 @@ pub trait Decimal: fn to_column(value: Vec, size: DecimalSize) -> DecimalColumn { Self::to_column_from_buffer(value.into(), size) } + + fn data_type() -> DataType; + const MIN: Self; + const MAX: Self; } impl Decimal for i128 { @@ -326,6 +368,16 @@ impl Decimal for i128 { fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { DecimalColumn::Decimal128(value, size) } + + fn data_type() -> DataType { + DataType::Decimal(DecimalDataType::Decimal128(DecimalSize { + precision: MAX_DECIMAL128_PRECISION, + scale: 0, + })) + } + + const MIN: i128 = i128::MIN; + const MAX: i128 = i128::MAX; } impl Decimal for i256 { @@ -424,6 +476,15 @@ impl Decimal for i256 { fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { DecimalColumn::Decimal256(value, size) } + + fn data_type() -> DataType { + DataType::Decimal(DecimalDataType::Decimal128(DecimalSize { + precision: MAX_DECIMAL256_PRECISION, + scale: 0, + })) + } + const MIN: i256 = i256::MIN; + const MAX: i256 = i256::MAX; } pub static MAX_DECIMAL128_PRECISION: u8 = 38; diff --git a/src/query/expression/src/utils/column_from.rs b/src/query/expression/src/utils/column_from.rs index c4f98c5130b40..4a3404658b0f2 100755 --- a/src/query/expression/src/utils/column_from.rs +++ b/src/query/expression/src/utils/column_from.rs @@ -14,6 +14,9 @@ use std::iter::Iterator; +use ethnum::i256; + +use crate::types::decimal::*; use crate::types::nullable::NullableColumn; use crate::types::number::*; use crate::types::*; @@ -129,6 +132,24 @@ impl> FromData; 0]> for Float64Type { } } +impl> FromData; 0]> for Decimal128Type { + fn from_data(d: D) -> Column { + Decimal128Type::upcast_column(Decimal128Type::column_from_iter( + d.as_ref().iter().copied(), + &[], + )) + } +} + +impl> FromData; 0]> for Decimal256Type { + fn from_data(d: D) -> Column { + Decimal256Type::upcast_column(Decimal256Type::column_from_iter( + d.as_ref().iter().copied(), + &[], + )) + } +} + #[cfg(test)] mod test { diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 33c05d35bbabb..53e7de1e46ad4 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -489,6 +489,7 @@ impl PartialOrd for Column { col1.partial_cmp(col2) } (Column::Number(col1), Column::Number(col2)) => col1.partial_cmp(col2), + (Column::Decimal(col1), Column::Decimal(col2)) => col1.partial_cmp(col2), (Column::Boolean(col1), Column::Boolean(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::String(col1), Column::String(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::Timestamp(col1), Column::Timestamp(col2)) => { diff --git a/src/query/expression/tests/it/sort.rs b/src/query/expression/tests/it/sort.rs index 3293133d26242..d37763f804f14 100644 --- a/src/query/expression/tests/it/sort.rs +++ b/src/query/expression/tests/it/sort.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use std::vec; use common_exception::Result; +use common_expression::types::decimal::*; use common_expression::types::number::*; use common_expression::types::StringType; use common_expression::Column; @@ -108,6 +109,87 @@ fn test_block_sort() -> Result<()> { } } + let decimal_block = new_block(&[ + Decimal128Type::from_data(vec![6i128, 4, 3, 2, 1, 1, 7]), + StringType::from_data(vec!["b1", "b2", "b3", "b4", "b5", "b6", "b7"]), + ]); + + // test cast: + // - sort descriptions + // - limit + // - expected cols + let test_cases: Vec<(Vec, Option, Vec)> = vec![ + ( + vec![SortColumnDescription { + offset: 0, + asc: true, + nulls_first: false, + }], + None, + vec![ + Decimal128Type::from_data(vec![1_i128, 1, 2, 3, 4, 6, 7]), + StringType::from_data(vec!["b5", "b6", "b4", "b3", "b2", "b1", "b7"]), + ], + ), + ( + vec![SortColumnDescription { + offset: 0, + asc: true, + nulls_first: false, + }], + Some(4), + vec![ + Decimal128Type::from_data(vec![1_i128, 1, 2, 3]), + StringType::from_data(vec!["b5", "b6", "b4", "b3"]), + ], + ), + ( + vec![SortColumnDescription { + offset: 1, + asc: false, + nulls_first: false, + }], + None, + vec![ + Decimal128Type::from_data(vec![7_i128, 1, 1, 2, 3, 4, 6]), + StringType::from_data(vec!["b7", "b6", "b5", "b4", "b3", "b2", "b1"]), + ], + ), + ( + vec![ + SortColumnDescription { + offset: 0, + asc: true, + nulls_first: false, + }, + SortColumnDescription { + offset: 1, + asc: false, + nulls_first: false, + }, + ], + None, + vec![ + Decimal128Type::from_data(vec![1_i128, 1, 2, 3, 4, 6, 7]), + StringType::from_data(vec!["b6", "b5", "b4", "b3", "b2", "b1", "b7"]), + ], + ), + ]; + + for (sort_descs, limit, expected) in test_cases { + let res = DataBlock::sort(&decimal_block, &sort_descs, limit)?; + + for (entry, expect) in res.columns().iter().zip(expected.iter()) { + assert_eq!( + entry.value.as_column().unwrap(), + expect, + "the column after sort is wrong, expect: {:?}, got: {:?}", + expect, + entry.value + ); + } + } + Ok(()) } diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal index 61348b1714089..d1e7861b46722 100644 --- a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal +++ b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal @@ -137,3 +137,30 @@ query I SELECT ANY(CAST(2.34 AS DECIMAL(6, 2))) ---- 2.34 + +statement ok +drop table if exists t + +statement ok +drop table if exists t1 + +statement ok +create table t(c1 decimal(38,2)) + +statement ok +create table t1(c0 int, c1 decimal(39,2)) + +statement ok +select * from t order by c1 + +## https://github.com/jorgecarleitao/parquet2/blob/main/src/schema/types/converted_type.rs#L25 +## https://github.com/jorgecarleitao/arrow2/blob/main/src/io/parquet/write/schema.rs#L323 +## the parquet2 and arrow2 not impl decimal256 +statement error 1002 +select * from t1 order by c1 + +statement ok +drop table if exists t + +statement ok +drop table if exists t1