From 296cf1471db38576a28a14329f2bf7a1c37923ad Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Sat, 21 Jan 2023 11:42:46 -0500 Subject: [PATCH 1/5] feat: add new dtype-i128 feature flag --- polars/Cargo.toml | 2 ++ polars/polars-core/Cargo.toml | 1 + polars/polars-lazy/Cargo.toml | 1 + polars/polars-lazy/polars-pipe/Cargo.toml | 1 + polars/polars-lazy/polars-plan/Cargo.toml | 1 + polars/polars-ops/Cargo.toml | 1 + 6 files changed, 7 insertions(+) diff --git a/polars/Cargo.toml b/polars/Cargo.toml index 4e6e9059f7e2..823409adcb93 100644 --- a/polars/Cargo.toml +++ b/polars/Cargo.toml @@ -177,6 +177,7 @@ dtype-full = [ "dtype-time", "dtype-i8", "dtype-i16", + "dtype-i128", "dtype-u8", "dtype-u16", "dtype-categorical", @@ -215,6 +216,7 @@ dtype-duration = [ dtype-time = ["polars-core/dtype-time", "polars-io/dtype-time", "polars-time/dtype-time", "polars-ops/dtype-time"] dtype-i8 = ["polars-core/dtype-i8", "polars-lazy/dtype-i8", "polars-ops/dtype-i8"] dtype-i16 = ["polars-core/dtype-i16", "polars-lazy/dtype-i16", "polars-ops/dtype-i16"] +dtype-i128 = ["polars-core/dtype-i128", "polars-lazy/dtype-i128", "polars-ops/dtype-i128"] dtype-u8 = ["polars-core/dtype-u8", "polars-lazy/dtype-u8", "polars-ops/dtype-u8"] dtype-u16 = ["polars-core/dtype-u16", "polars-lazy/dtype-u16", "polars-ops/dtype-u16"] dtype-categorical = [ diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml index ff6b92ddb476..fc5250f7d5c7 100644 --- a/polars/polars-core/Cargo.toml +++ b/polars/polars-core/Cargo.toml @@ -92,6 +92,7 @@ dtype-duration = ["temporal"] dtype-time = ["temporal"] dtype-i8 = [] dtype-i16 = [] +dtype-i128 = [] dtype-u8 = [] dtype-u16 = [] dtype-categorical = [] diff --git a/polars/polars-lazy/Cargo.toml b/polars/polars-lazy/Cargo.toml index 50d3d86295a9..d60f8092e370 100644 --- a/polars/polars-lazy/Cargo.toml +++ b/polars/polars-lazy/Cargo.toml @@ -51,6 +51,7 @@ dtype-u8 = ["polars-plan/dtype-u8", "polars-pipe/dtype-u8"] dtype-u16 = ["polars-plan/dtype-u16", "polars-pipe/dtype-u16"] dtype-i8 = ["polars-plan/dtype-i8", "polars-pipe/dtype-i8"] dtype-i16 = ["polars-plan/dtype-i16", "polars-pipe/dtype-i16"] +dtype-i128 = ["polars-plan/dtype-i128", "polars-pipe/dtype-i128"] dtype-date = ["polars-plan/dtype-date", "polars-time/dtype-date", "temporal"] dtype-datetime = ["polars-plan/dtype-datetime", "polars-time/dtype-datetime", "temporal"] dtype-duration = ["polars-plan/dtype-duration", "polars-time/dtype-duration", "temporal"] diff --git a/polars/polars-lazy/polars-pipe/Cargo.toml b/polars/polars-lazy/polars-pipe/Cargo.toml index 809936cf6b10..5dd262649514 100644 --- a/polars/polars-lazy/polars-pipe/Cargo.toml +++ b/polars/polars-lazy/polars-pipe/Cargo.toml @@ -34,4 +34,5 @@ dtype-u8 = ["polars-core/dtype-u8"] dtype-u16 = ["polars-core/dtype-u16"] dtype-i8 = ["polars-core/dtype-i8"] dtype-i16 = ["polars-core/dtype-i16"] +dtype-i128 = ["polars-core/dtype-i128"] dtype-categorical = ["polars-core/dtype-categorical"] diff --git a/polars/polars-lazy/polars-plan/Cargo.toml b/polars/polars-lazy/polars-plan/Cargo.toml index 29dba85b8c79..6e4cc60e6dfe 100644 --- a/polars/polars-lazy/polars-plan/Cargo.toml +++ b/polars/polars-lazy/polars-plan/Cargo.toml @@ -43,6 +43,7 @@ dtype-u8 = ["polars-core/dtype-u8"] dtype-u16 = ["polars-core/dtype-u16"] dtype-i8 = ["polars-core/dtype-i8"] dtype-i16 = ["polars-core/dtype-i16"] +dtype-i128 = ["polars-core/dtype-i128"] dtype-date = ["polars-core/dtype-date", "polars-time/dtype-date", "temporal"] dtype-datetime = ["polars-core/dtype-datetime", "polars-time/dtype-datetime", "temporal"] dtype-duration = ["polars-core/dtype-duration", "polars-time/dtype-duration", "temporal"] diff --git a/polars/polars-ops/Cargo.toml b/polars/polars-ops/Cargo.toml index dd93b9f3ab27..adcf2d1766d3 100644 --- a/polars/polars-ops/Cargo.toml +++ b/polars/polars-ops/Cargo.toml @@ -32,6 +32,7 @@ dtype-u8 = ["polars-core/dtype-u8"] dtype-u16 = ["polars-core/dtype-u16"] dtype-i8 = ["polars-core/dtype-i8"] dtype-i16 = ["polars-core/dtype-i16"] +dtype-i128 = ["polars-core/dtype-i128"] object = ["polars-core/object"] propagate_nans = [] performant = ["polars-core/performant"] From 6c874de98ccae641a0444325c775d8ccb3dccfe7 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Sat, 21 Jan 2023 11:53:21 -0500 Subject: [PATCH 2/5] feat: add DataType::Int128 --- polars/polars-core/src/datatypes/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/polars/polars-core/src/datatypes/mod.rs b/polars/polars-core/src/datatypes/mod.rs index 3888dc1a5a43..32c005023b30 100644 --- a/polars/polars-core/src/datatypes/mod.rs +++ b/polars/polars-core/src/datatypes/mod.rs @@ -82,6 +82,8 @@ impl_polars_datatype!(Int8Type, Int8, i8); impl_polars_datatype!(Int16Type, Int16, i16); impl_polars_datatype!(Int32Type, Int32, i32); impl_polars_datatype!(Int64Type, Int64, i64); +#[cfg(feature = "dtype-i128")] +impl_polars_datatype!(Int128Type, Unknown, i128); impl_polars_datatype!(Float32Type, Float32, f32); impl_polars_datatype!(Float64Type, Float64, f64); impl_polars_datatype!(DateType, Date, i32); @@ -150,6 +152,8 @@ pub type Int8Chunked = ChunkedArray; pub type Int16Chunked = ChunkedArray; pub type Int32Chunked = ChunkedArray; pub type Int64Chunked = ChunkedArray; +#[cfg(feature = "dtype-i128")] +pub type Int128Chunked = ChunkedArray; pub type Float32Chunked = ChunkedArray; pub type Float64Chunked = ChunkedArray; pub type Utf8Chunked = ChunkedArray; From e910c2267e1b5f025aea09aa8bddfcb87360a8f7 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Sun, 22 Jan 2023 00:43:48 -0500 Subject: [PATCH 3/5] feat: define ArrayArithmetics to abstract over basic vs decimal Decimal arithmetics require manipulating the DataType when doing some operations, i.e.: changing precision/scale --- .../src/chunked_array/arithmetic.rs | 121 ++++++++++++++++-- polars/polars-core/src/datatypes/mod.rs | 4 +- 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/polars/polars-core/src/chunked_array/arithmetic.rs b/polars/polars-core/src/chunked_array/arithmetic.rs index 60e7ec69601f..a9b867c1c8e9 100644 --- a/polars/polars-core/src/chunked_array/arithmetic.rs +++ b/polars/polars-core/src/chunked_array/arithmetic.rs @@ -3,14 +3,94 @@ use std::borrow::Cow; use std::ops::{Add, Div, Mul, Rem, Sub}; use arrow::array::PrimitiveArray; -use arrow::compute::arithmetics::basic; +use arrow::compute::arithmetics::{basic, decimal}; use arrow::compute::arity_assign; +use arrow::types::NativeType; use num::{Num, NumCast, ToPrimitive}; use crate::prelude::*; use crate::series::IsSorted; use crate::utils::{align_chunks_binary, align_chunks_binary_owned}; +pub trait ArrayArithmetics +where + Self: NativeType, +{ + fn add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray; + fn sub(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray; + fn mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray; + fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray; + fn div_scalar(lhs: &PrimitiveArray, rhs: &Self) -> PrimitiveArray; + fn rem(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray; + fn rem_scalar(lhs: &PrimitiveArray, rhs: &Self) -> PrimitiveArray; +} + +macro_rules! native_array_arithmetics { + ($ty: ty) => { + impl ArrayArithmetics for $ty + { + fn add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + basic::add(lhs, rhs) + } + fn sub(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + basic::sub(lhs, rhs) + } + fn mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + basic::mul(lhs, rhs) + } + fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + basic::div(lhs, rhs) + } + fn div_scalar(lhs: &PrimitiveArray, rhs: &Self) -> PrimitiveArray { + basic::div_scalar(lhs, rhs) + } + fn rem(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + basic::rem(lhs, rhs) + } + fn rem_scalar(lhs: &PrimitiveArray, rhs: &Self) -> PrimitiveArray { + basic::rem_scalar(lhs, rhs) + } + } + }; + ($($ty:ty),*) => { + $(native_array_arithmetics!($ty);)* + } +} + +native_array_arithmetics!(u8, u16, u32, u64, i8, i16, i32, i64, f32, f64); + +#[cfg(feature = "dtype-i128")] +impl ArrayArithmetics for i128 { + fn add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + decimal::add(lhs, rhs) + } + + fn sub(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + decimal::sub(lhs, rhs) + } + + fn mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + decimal::mul(lhs, rhs) + } + + fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray { + decimal::div(lhs, rhs) + } + + fn div_scalar(_lhs: &PrimitiveArray, _rhs: &Self) -> PrimitiveArray { + // decimal::div_scalar(lhs, rhs) + todo!("decimal::div_scalar exists, but takes &PrimitiveScalar, not &i128"); + } + + fn rem(_lhs: &PrimitiveArray, _rhs: &PrimitiveArray) -> PrimitiveArray { + unimplemented!("requires support in arrow2 crate") + } + + fn rem_scalar(_lhs: &PrimitiveArray, _rhs: &Self) -> PrimitiveArray { + unimplemented!("requires support in arrow2 crate") + } +} + macro_rules! apply_operand_on_chunkedarray_by_iter { ($self:ident, $rhs:ident, $operand:tt) => { @@ -157,7 +237,12 @@ where type Output = ChunkedArray; fn add(self, rhs: Self) -> Self::Output { - arithmetic_helper(self, rhs, basic::add, |lhs, rhs| lhs + rhs) + arithmetic_helper( + self, + rhs, + ::add, + |lhs, rhs| lhs + rhs, + ) } } @@ -168,7 +253,12 @@ where type Output = ChunkedArray; fn div(self, rhs: Self) -> Self::Output { - arithmetic_helper(self, rhs, basic::div, |lhs, rhs| lhs / rhs) + arithmetic_helper( + self, + rhs, + ::div, + |lhs, rhs| lhs / rhs, + ) } } @@ -179,7 +269,12 @@ where type Output = ChunkedArray; fn mul(self, rhs: Self) -> Self::Output { - arithmetic_helper(self, rhs, basic::mul, |lhs, rhs| lhs * rhs) + arithmetic_helper( + self, + rhs, + ::mul, + |lhs, rhs| lhs * rhs, + ) } } @@ -190,7 +285,12 @@ where type Output = ChunkedArray; fn rem(self, rhs: Self) -> Self::Output { - arithmetic_helper(self, rhs, basic::rem, |lhs, rhs| lhs % rhs) + arithmetic_helper( + self, + rhs, + ::rem, + |lhs, rhs| lhs % rhs, + ) } } @@ -201,7 +301,12 @@ where type Output = ChunkedArray; fn sub(self, rhs: Self) -> Self::Output { - arithmetic_helper(self, rhs, basic::sub, |lhs, rhs| lhs - rhs) + arithmetic_helper( + self, + rhs, + ::sub, + |lhs, rhs| lhs - rhs, + ) } } @@ -317,7 +422,7 @@ where fn div(self, rhs: N) -> Self::Output { let rhs: T::Native = NumCast::from(rhs).expect("could not cast"); - self.apply_kernel(&|arr| Box::new(basic::div_scalar(arr, &rhs))) + self.apply_kernel(&|arr| Box::new(::div_scalar(arr, &rhs))) } } @@ -343,7 +448,7 @@ where fn rem(self, rhs: N) -> Self::Output { let rhs: T::Native = NumCast::from(rhs).expect("could not cast"); - self.apply_kernel(&|arr| Box::new(basic::rem_scalar(arr, &rhs))) + self.apply_kernel(&|arr| Box::new(::rem_scalar(arr, &rhs))) } } diff --git a/polars/polars-core/src/datatypes/mod.rs b/polars/polars-core/src/datatypes/mod.rs index 32c005023b30..96dded32e0f4 100644 --- a/polars/polars-core/src/datatypes/mod.rs +++ b/polars/polars-core/src/datatypes/mod.rs @@ -22,7 +22,6 @@ use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign}; use ahash::RandomState; pub use aliases::*; pub use any_value::*; -use arrow::compute::arithmetics::basic::NativeArithmetics; use arrow::compute::comparison::Simd8; #[cfg(feature = "dtype-categorical")] use arrow::datatypes::IntegerType; @@ -41,6 +40,7 @@ use serde::{Deserialize, Serialize}; use serde::{Deserializer, Serializer}; pub use time_unit::*; +use crate::chunked_array::arithmetic::ArrayArithmetics; pub use crate::chunked_array::logical::*; #[cfg(feature = "object")] use crate::chunked_array::object::PolarsObjectSafe; @@ -179,7 +179,7 @@ pub trait NumericNative: + Bounded + FromPrimitive + IsFloat - + NativeArithmetics + + ArrayArithmetics { type POLARSTYPE: PolarsNumericType; } From b803b6db5fdcdf8497f5f51ea47c64ac00508531 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 31 Jan 2023 09:18:37 -0500 Subject: [PATCH 4/5] tidy: fix clippy warning --- polars/polars-core/src/chunked_array/arithmetic.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/polars/polars-core/src/chunked_array/arithmetic.rs b/polars/polars-core/src/chunked_array/arithmetic.rs index a9b867c1c8e9..181d4bdd0942 100644 --- a/polars/polars-core/src/chunked_array/arithmetic.rs +++ b/polars/polars-core/src/chunked_array/arithmetic.rs @@ -3,7 +3,9 @@ use std::borrow::Cow; use std::ops::{Add, Div, Mul, Rem, Sub}; use arrow::array::PrimitiveArray; -use arrow::compute::arithmetics::{basic, decimal}; +use arrow::compute::arithmetics::basic; +#[cfg(feature = "dtype-i128")] +use arrow::compute::arithmetics::decimal; use arrow::compute::arity_assign; use arrow::types::NativeType; use num::{Num, NumCast, ToPrimitive}; From 0d0a89825891aa88948d30ec1be29bdde9277451 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 31 Jan 2023 09:26:28 -0500 Subject: [PATCH 5/5] feat: add DataType::Decimal128 --- polars/polars-core/src/datatypes/dtype.rs | 8 ++++++++ polars/polars-core/src/datatypes/mod.rs | 13 +++++++++++-- py-polars/src/conversion.rs | 1 + py-polars/src/datatypes.rs | 1 + py-polars/src/series.rs | 1 + 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/polars/polars-core/src/datatypes/dtype.rs b/polars/polars-core/src/datatypes/dtype.rs index f229301d752f..977a1c7697ff 100644 --- a/polars/polars-core/src/datatypes/dtype.rs +++ b/polars/polars-core/src/datatypes/dtype.rs @@ -15,6 +15,10 @@ pub enum DataType { Int64, Float32, Float64, + #[cfg(feature = "dtype-i128")] + /// Fixed point decimal type with precision and scale. + /// This is backed by 128 bits which allows for 38 significant digits. + Decimal128(Option<(usize, usize)>), /// String data Utf8, #[cfg(feature = "dtype-binary")] @@ -208,6 +212,8 @@ impl DataType { Int64 => ArrowDataType::Int64, Float32 => ArrowDataType::Float32, Float64 => ArrowDataType::Float64, + #[cfg(feature = "dtype-i128")] + Decimal128(_) => todo!(), Utf8 => ArrowDataType::LargeUtf8, #[cfg(feature = "dtype-binary")] Binary => ArrowDataType::LargeBinary, @@ -261,6 +267,8 @@ impl Display for DataType { DataType::Int64 => "i64", DataType::Float32 => "f32", DataType::Float64 => "f64", + #[cfg(feature = "dtype-i128")] + DataType::Decimal128(_) => "i128", DataType::Utf8 => "str", #[cfg(feature = "dtype-binary")] DataType::Binary => "binary", diff --git a/polars/polars-core/src/datatypes/mod.rs b/polars/polars-core/src/datatypes/mod.rs index 96dded32e0f4..e7ac00a0faf5 100644 --- a/polars/polars-core/src/datatypes/mod.rs +++ b/polars/polars-core/src/datatypes/mod.rs @@ -82,8 +82,6 @@ impl_polars_datatype!(Int8Type, Int8, i8); impl_polars_datatype!(Int16Type, Int16, i16); impl_polars_datatype!(Int32Type, Int32, i32); impl_polars_datatype!(Int64Type, Int64, i64); -#[cfg(feature = "dtype-i128")] -impl_polars_datatype!(Int128Type, Unknown, i128); impl_polars_datatype!(Float32Type, Float32, f32); impl_polars_datatype!(Float64Type, Float64, f64); impl_polars_datatype!(DateType, Date, i32); @@ -120,6 +118,17 @@ impl PolarsDataType for ListType { } } +#[cfg(feature = "dtype-i128")] +pub struct Int128Type {} + +#[cfg(feature = "dtype-i128")] +impl PolarsDataType for Int128Type { + fn get_dtype() -> DataType { + // we cannot know precision/scale statically + DataType::Decimal128(None) + } +} + #[cfg(feature = "object")] pub struct ObjectType(T); #[cfg(feature = "object")] diff --git a/py-polars/src/conversion.rs b/py-polars/src/conversion.rs index 575d69fbd14e..56eebe940bb9 100644 --- a/py-polars/src/conversion.rs +++ b/py-polars/src/conversion.rs @@ -281,6 +281,7 @@ impl ToPyObject for Wrap { DataType::UInt64 => pl.getattr("UInt64").unwrap().into(), DataType::Float32 => pl.getattr("Float32").unwrap().into(), DataType::Float64 => pl.getattr("Float64").unwrap().into(), + DataType::Decimal128(_) => todo!(), DataType::Boolean => pl.getattr("Boolean").unwrap().into(), DataType::Utf8 => pl.getattr("Utf8").unwrap().into(), DataType::Binary => pl.getattr("Binary").unwrap().into(), diff --git a/py-polars/src/datatypes.rs b/py-polars/src/datatypes.rs index 3e40787ce3e9..26a1c1b2f871 100644 --- a/py-polars/src/datatypes.rs +++ b/py-polars/src/datatypes.rs @@ -46,6 +46,7 @@ impl From<&DataType> for PyDataType { DataType::UInt64 => UInt64, DataType::Float32 => Float32, DataType::Float64 => Float64, + DataType::Decimal128(_) => todo!(), DataType::Boolean => Bool, DataType::Utf8 => Utf8, DataType::Binary => Binary, diff --git a/py-polars/src/series.rs b/py-polars/src/series.rs index 51758e0d0539..1c75a3eed603 100644 --- a/py-polars/src/series.rs +++ b/py-polars/src/series.rs @@ -666,6 +666,7 @@ impl PySeries { DataType::Int64 => PyList::new(py, series.i64().unwrap()), DataType::Float32 => PyList::new(py, series.f32().unwrap()), DataType::Float64 => PyList::new(py, series.f64().unwrap()), + DataType::Decimal128(_) => todo!(), DataType::Categorical(_) => { PyList::new(py, series.categorical().unwrap().iter_str()) }