Skip to content

Commit

Permalink
Add dtype-i128 feature flag and Int128Type (#6374)
Browse files Browse the repository at this point in the history
  • Loading branch information
plaflamme authored Feb 2, 2023
1 parent a08c7ba commit 8ff0dcf
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 9 deletions.
2 changes: 2 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ dtype-full = [
"dtype-time",
"dtype-i8",
"dtype-i16",
"dtype-i128",
"dtype-u8",
"dtype-u16",
"dtype-categorical",
Expand Down Expand Up @@ -216,6 +217,7 @@ dtype-duration = [
dtype-time = ["polars-core/dtype-time", "polars-io/dtype-time", "polars-time/dtype-time", "polars-ops/dtype-time"]
dtype-i8 = ["polars-core/dtype-i8", "polars-lazy/dtype-i8", "polars-ops/dtype-i8"]
dtype-i16 = ["polars-core/dtype-i16", "polars-lazy/dtype-i16", "polars-ops/dtype-i16"]
dtype-i128 = ["polars-core/dtype-i128", "polars-lazy/dtype-i128", "polars-ops/dtype-i128"]
dtype-u8 = ["polars-core/dtype-u8", "polars-lazy/dtype-u8", "polars-ops/dtype-u8"]
dtype-u16 = ["polars-core/dtype-u16", "polars-lazy/dtype-u16", "polars-ops/dtype-u16"]
dtype-categorical = [
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ dtype-duration = ["temporal"]
dtype-time = ["temporal"]
dtype-i8 = []
dtype-i16 = []
dtype-i128 = []
dtype-u8 = []
dtype-u16 = []
dtype-categorical = []
Expand Down
121 changes: 114 additions & 7 deletions polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,95 @@ use std::ops::{Add, Div, Mul, Rem, Sub};

use arrow::array::PrimitiveArray;
use arrow::compute::arithmetics::basic;
#[cfg(feature = "dtype-i128")]
use arrow::compute::arithmetics::decimal;
use arrow::compute::arity_assign;
use arrow::types::NativeType;
use num::{Num, NumCast, ToPrimitive};

use crate::prelude::*;
use crate::series::IsSorted;
use crate::utils::{align_chunks_binary, align_chunks_binary_owned};

pub trait ArrayArithmetics
where
Self: NativeType,
{
fn add(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self>;
fn sub(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self>;
fn mul(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self>;
fn div(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self>;
fn div_scalar(lhs: &PrimitiveArray<Self>, rhs: &Self) -> PrimitiveArray<Self>;
fn rem(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self>;
fn rem_scalar(lhs: &PrimitiveArray<Self>, rhs: &Self) -> PrimitiveArray<Self>;
}

macro_rules! native_array_arithmetics {
($ty: ty) => {
impl ArrayArithmetics for $ty
{
fn add(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
basic::add(lhs, rhs)
}
fn sub(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
basic::sub(lhs, rhs)
}
fn mul(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
basic::mul(lhs, rhs)
}
fn div(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
basic::div(lhs, rhs)
}
fn div_scalar(lhs: &PrimitiveArray<Self>, rhs: &Self) -> PrimitiveArray<Self> {
basic::div_scalar(lhs, rhs)
}
fn rem(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
basic::rem(lhs, rhs)
}
fn rem_scalar(lhs: &PrimitiveArray<Self>, rhs: &Self) -> PrimitiveArray<Self> {
basic::rem_scalar(lhs, rhs)
}
}
};
($($ty:ty),*) => {
$(native_array_arithmetics!($ty);)*
}
}

native_array_arithmetics!(u8, u16, u32, u64, i8, i16, i32, i64, f32, f64);

#[cfg(feature = "dtype-i128")]
impl ArrayArithmetics for i128 {
fn add(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
decimal::add(lhs, rhs)
}

fn sub(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
decimal::sub(lhs, rhs)
}

fn mul(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
decimal::mul(lhs, rhs)
}

fn div(lhs: &PrimitiveArray<Self>, rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
decimal::div(lhs, rhs)
}

fn div_scalar(_lhs: &PrimitiveArray<Self>, _rhs: &Self) -> PrimitiveArray<Self> {
// decimal::div_scalar(lhs, rhs)
todo!("decimal::div_scalar exists, but takes &PrimitiveScalar<i128>, not &i128");
}

fn rem(_lhs: &PrimitiveArray<Self>, _rhs: &PrimitiveArray<Self>) -> PrimitiveArray<Self> {
unimplemented!("requires support in arrow2 crate")
}

fn rem_scalar(_lhs: &PrimitiveArray<Self>, _rhs: &Self) -> PrimitiveArray<Self> {
unimplemented!("requires support in arrow2 crate")
}
}

macro_rules! apply_operand_on_chunkedarray_by_iter {

($self:ident, $rhs:ident, $operand:tt) => {
Expand Down Expand Up @@ -157,7 +239,12 @@ where
type Output = ChunkedArray<T>;

fn add(self, rhs: Self) -> Self::Output {
arithmetic_helper(self, rhs, basic::add, |lhs, rhs| lhs + rhs)
arithmetic_helper(
self,
rhs,
<T::Native as ArrayArithmetics>::add,
|lhs, rhs| lhs + rhs,
)
}
}

Expand All @@ -168,7 +255,12 @@ where
type Output = ChunkedArray<T>;

fn div(self, rhs: Self) -> Self::Output {
arithmetic_helper(self, rhs, basic::div, |lhs, rhs| lhs / rhs)
arithmetic_helper(
self,
rhs,
<T::Native as ArrayArithmetics>::div,
|lhs, rhs| lhs / rhs,
)
}
}

Expand All @@ -179,7 +271,12 @@ where
type Output = ChunkedArray<T>;

fn mul(self, rhs: Self) -> Self::Output {
arithmetic_helper(self, rhs, basic::mul, |lhs, rhs| lhs * rhs)
arithmetic_helper(
self,
rhs,
<T::Native as ArrayArithmetics>::mul,
|lhs, rhs| lhs * rhs,
)
}
}

Expand All @@ -190,7 +287,12 @@ where
type Output = ChunkedArray<T>;

fn rem(self, rhs: Self) -> Self::Output {
arithmetic_helper(self, rhs, basic::rem, |lhs, rhs| lhs % rhs)
arithmetic_helper(
self,
rhs,
<T::Native as ArrayArithmetics>::rem,
|lhs, rhs| lhs % rhs,
)
}
}

Expand All @@ -201,7 +303,12 @@ where
type Output = ChunkedArray<T>;

fn sub(self, rhs: Self) -> Self::Output {
arithmetic_helper(self, rhs, basic::sub, |lhs, rhs| lhs - rhs)
arithmetic_helper(
self,
rhs,
<T::Native as ArrayArithmetics>::sub,
|lhs, rhs| lhs - rhs,
)
}
}

Expand Down Expand Up @@ -317,7 +424,7 @@ where

fn div(self, rhs: N) -> Self::Output {
let rhs: T::Native = NumCast::from(rhs).expect("could not cast");
self.apply_kernel(&|arr| Box::new(basic::div_scalar(arr, &rhs)))
self.apply_kernel(&|arr| Box::new(<T::Native as ArrayArithmetics>::div_scalar(arr, &rhs)))
}
}

Expand All @@ -343,7 +450,7 @@ where

fn rem(self, rhs: N) -> Self::Output {
let rhs: T::Native = NumCast::from(rhs).expect("could not cast");
self.apply_kernel(&|arr| Box::new(basic::rem_scalar(arr, &rhs)))
self.apply_kernel(&|arr| Box::new(<T::Native as ArrayArithmetics>::rem_scalar(arr, &rhs)))
}
}

Expand Down
8 changes: 8 additions & 0 deletions polars/polars-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ pub enum DataType {
Int64,
Float32,
Float64,
#[cfg(feature = "dtype-i128")]
/// Fixed point decimal type with precision and scale.
/// This is backed by 128 bits which allows for 38 significant digits.
Decimal128(Option<(usize, usize)>),
/// String data
Utf8,
#[cfg(feature = "dtype-binary")]
Expand Down Expand Up @@ -208,6 +212,8 @@ impl DataType {
Int64 => ArrowDataType::Int64,
Float32 => ArrowDataType::Float32,
Float64 => ArrowDataType::Float64,
#[cfg(feature = "dtype-i128")]
Decimal128(_) => todo!(),
Utf8 => ArrowDataType::LargeUtf8,
#[cfg(feature = "dtype-binary")]
Binary => ArrowDataType::LargeBinary,
Expand Down Expand Up @@ -261,6 +267,8 @@ impl Display for DataType {
DataType::Int64 => "i64",
DataType::Float32 => "f32",
DataType::Float64 => "f64",
#[cfg(feature = "dtype-i128")]
DataType::Decimal128(_) => "i128",
DataType::Utf8 => "str",
#[cfg(feature = "dtype-binary")]
DataType::Binary => "binary",
Expand Down
17 changes: 15 additions & 2 deletions polars/polars-core/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};
use ahash::RandomState;
pub use aliases::*;
pub use any_value::*;
use arrow::compute::arithmetics::basic::NativeArithmetics;
use arrow::compute::comparison::Simd8;
#[cfg(feature = "dtype-categorical")]
use arrow::datatypes::IntegerType;
Expand All @@ -41,6 +40,7 @@ use serde::{Deserialize, Serialize};
use serde::{Deserializer, Serializer};
pub use time_unit::*;

use crate::chunked_array::arithmetic::ArrayArithmetics;
pub use crate::chunked_array::logical::*;
#[cfg(feature = "object")]
use crate::chunked_array::object::PolarsObjectSafe;
Expand Down Expand Up @@ -118,6 +118,17 @@ impl PolarsDataType for ListType {
}
}

#[cfg(feature = "dtype-i128")]
pub struct Int128Type {}

#[cfg(feature = "dtype-i128")]
impl PolarsDataType for Int128Type {
fn get_dtype() -> DataType {
// we cannot know precision/scale statically
DataType::Decimal128(None)
}
}

#[cfg(feature = "object")]
pub struct ObjectType<T>(T);
#[cfg(feature = "object")]
Expand Down Expand Up @@ -150,6 +161,8 @@ pub type Int8Chunked = ChunkedArray<Int8Type>;
pub type Int16Chunked = ChunkedArray<Int16Type>;
pub type Int32Chunked = ChunkedArray<Int32Type>;
pub type Int64Chunked = ChunkedArray<Int64Type>;
#[cfg(feature = "dtype-i128")]
pub type Int128Chunked = ChunkedArray<Int128Type>;
pub type Float32Chunked = ChunkedArray<Float32Type>;
pub type Float64Chunked = ChunkedArray<Float64Type>;
pub type Utf8Chunked = ChunkedArray<Utf8Type>;
Expand All @@ -175,7 +188,7 @@ pub trait NumericNative:
+ Bounded
+ FromPrimitive
+ IsFloat
+ NativeArithmetics
+ ArrayArithmetics
{
type POLARSTYPE: PolarsNumericType;
}
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dtype-u8 = ["polars-plan/dtype-u8", "polars-pipe/dtype-u8"]
dtype-u16 = ["polars-plan/dtype-u16", "polars-pipe/dtype-u16"]
dtype-i8 = ["polars-plan/dtype-i8", "polars-pipe/dtype-i8"]
dtype-i16 = ["polars-plan/dtype-i16", "polars-pipe/dtype-i16"]
dtype-i128 = ["polars-plan/dtype-i128", "polars-pipe/dtype-i128"]
dtype-date = ["polars-plan/dtype-date", "polars-time/dtype-date", "temporal"]
dtype-datetime = ["polars-plan/dtype-datetime", "polars-time/dtype-datetime", "temporal"]
dtype-duration = ["polars-plan/dtype-duration", "polars-time/dtype-duration", "temporal"]
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/polars-pipe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ dtype-u8 = ["polars-core/dtype-u8"]
dtype-u16 = ["polars-core/dtype-u16"]
dtype-i8 = ["polars-core/dtype-i8"]
dtype-i16 = ["polars-core/dtype-i16"]
dtype-i128 = ["polars-core/dtype-i128"]
dtype-categorical = ["polars-core/dtype-categorical"]
1 change: 1 addition & 0 deletions polars/polars-lazy/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dtype-u8 = ["polars-core/dtype-u8"]
dtype-u16 = ["polars-core/dtype-u16"]
dtype-i8 = ["polars-core/dtype-i8"]
dtype-i16 = ["polars-core/dtype-i16"]
dtype-i128 = ["polars-core/dtype-i128"]
dtype-date = ["polars-core/dtype-date", "polars-time/dtype-date", "temporal"]
dtype-datetime = ["polars-core/dtype-datetime", "polars-time/dtype-datetime", "temporal"]
dtype-duration = ["polars-core/dtype-duration", "polars-time/dtype-duration", "temporal"]
Expand Down
1 change: 1 addition & 0 deletions polars/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dtype-u8 = ["polars-core/dtype-u8"]
dtype-u16 = ["polars-core/dtype-u16"]
dtype-i8 = ["polars-core/dtype-i8"]
dtype-i16 = ["polars-core/dtype-i16"]
dtype-i128 = ["polars-core/dtype-i128"]
object = ["polars-core/object"]
propagate_nans = []
performant = ["polars-core/performant"]
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ impl ToPyObject for Wrap<DataType> {
DataType::UInt64 => pl.getattr("UInt64").unwrap().into(),
DataType::Float32 => pl.getattr("Float32").unwrap().into(),
DataType::Float64 => pl.getattr("Float64").unwrap().into(),
DataType::Decimal128(_) => todo!(),
DataType::Boolean => pl.getattr("Boolean").unwrap().into(),
DataType::Utf8 => pl.getattr("Utf8").unwrap().into(),
DataType::Binary => pl.getattr("Binary").unwrap().into(),
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ impl From<&DataType> for PyDataType {
DataType::UInt64 => UInt64,
DataType::Float32 => Float32,
DataType::Float64 => Float64,
DataType::Decimal128(_) => todo!(),
DataType::Boolean => Bool,
DataType::Utf8 => Utf8,
DataType::Binary => Binary,
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ impl PySeries {
DataType::Int64 => PyList::new(py, series.i64().unwrap()),
DataType::Float32 => PyList::new(py, series.f32().unwrap()),
DataType::Float64 => PyList::new(py, series.f64().unwrap()),
DataType::Decimal128(_) => todo!(),
DataType::Categorical(_) => {
PyList::new(py, series.categorical().unwrap().iter_str())
}
Expand Down

0 comments on commit 8ff0dcf

Please sign in to comment.