Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support for decimal 256 (#1194)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 11, 2022
1 parent aa86aa9 commit a1ab5a7
Show file tree
Hide file tree
Showing 20 changed files with 254 additions and 39 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ dyn-clone = "1"
bytemuck = { version = "1", features = ["derive"] }
chrono = { version = "0.4", default_features = false, features = ["std"] }

# for decimal i256
ethnum = "1"

# We need to Hash values before sending them to an hasher. This
# crate provides HashMap that assumes pre-hashed values.
hash_hasher = "^2.0.3"
Expand Down
13 changes: 13 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ def test_decimal_roundtrip(self):
b = arrow_pyarrow_integration_testing.round_trip_array(a)
self.assertEqual(a, b)

def test_decimal256_roundtrip(self):
"""
Python -> Rust -> Python
"""
data = [
round(decimal.Decimal(722.82), 2),
round(decimal.Decimal(-934.11), 2),
None,
]
a = pyarrow.array(data, pyarrow.decimal256(5, 2))
b = arrow_pyarrow_integration_testing.round_trip_array(a)
self.assertEqual(a, b)

def test_list_array(self):
"""
Python -> Rust -> Python
Expand Down
3 changes: 2 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,14 @@ macro_rules! with_match_primitive_type {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns, f16};
use crate::types::{days_ms, months_days_ns, f16, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
Expand Down
19 changes: 15 additions & 4 deletions src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fmt::{Debug, Formatter, Result, Write};

use crate::array::Array;
use crate::datatypes::{IntervalUnit, TimeUnit};
use crate::types::{days_ms, months_days_ns};
use crate::types::{days_ms, i256, months_days_ns};

use super::PrimitiveArray;
use crate::array::fmt::write_vec;
Expand Down Expand Up @@ -118,13 +118,24 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Decimal(_, scale) => {
// The number 999.99 has a precision of 5 and scale of 2
let scale = *scale as u32;
let display = move |x| {
let base = x / 10i128.pow(scale);
let decimals = x - base * 10i128.pow(scale);
let factor = 10i128.pow(scale);
let display = move |x: i128| {
let base = x / factor;
let decimals = (x - base * factor).abs();
format!("{}.{}", base, decimals)
};
dyn_primitive!(array, i128, display)
}
Decimal256(_, scale) => {
let scale = *scale as u32;
let factor = (ethnum::I256::ONE * 10).pow(scale);
let display = move |x: i256| {
let base = x.0 / factor;
let decimals = (x.0 - base * factor).abs();
format!("{}.{}", base, decimals)
};
dyn_primitive!(array, i256, display)
}
_ => unreachable!(),
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
datatypes::*,
error::Error,
trusted_len::TrustedLen,
types::{days_ms, f16, months_days_ns, NativeType},
types::{days_ms, f16, i256, months_days_ns, NativeType},
};

use super::Array;
Expand Down Expand Up @@ -475,6 +475,8 @@ pub type Int32Array = PrimitiveArray<i32>;
pub type Int64Array = PrimitiveArray<i64>;
/// A type definition [`PrimitiveArray`] for `i128`
pub type Int128Array = PrimitiveArray<i128>;
/// A type definition [`PrimitiveArray`] for `i256`
pub type Int256Array = PrimitiveArray<i256>;
/// A type definition [`PrimitiveArray`] for [`days_ms`]
pub type DaysMsArray = PrimitiveArray<days_ms>;
/// A type definition [`PrimitiveArray`] for [`months_days_ns`]
Expand Down Expand Up @@ -504,6 +506,8 @@ pub type Int32Vec = MutablePrimitiveArray<i32>;
pub type Int64Vec = MutablePrimitiveArray<i64>;
/// A type definition [`MutablePrimitiveArray`] for `i128`
pub type Int128Vec = MutablePrimitiveArray<i128>;
/// A type definition [`MutablePrimitiveArray`] for `i256`
pub type Int256Vec = MutablePrimitiveArray<i256>;
/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]
pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]
Expand Down
3 changes: 2 additions & 1 deletion src/compute/arithmetics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,13 +402,14 @@ macro_rules! with_match_negatable {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns};
use crate::types::{days_ms, months_days_ns, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 | UInt16 | UInt32 | UInt64 | Float16 => todo!(),
Expand Down
5 changes: 4 additions & 1 deletion src/compute/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,14 @@ macro_rules! match_eq_ord {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::i256;
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => todo!(),
MonthDayNano => todo!(),
UInt8 => __with_ty__! { u8 },
Expand All @@ -92,13 +94,14 @@ macro_rules! match_eq {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns, f16};
use crate::types::{days_ms, months_days_ns, f16, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
Expand Down
3 changes: 2 additions & 1 deletion src/compute/comparison/simd/native.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::convert::TryInto;

use super::{set, Simd8, Simd8Lanes, Simd8PartialEq, Simd8PartialOrd};
use crate::types::{days_ms, f16, months_days_ns};
use crate::types::{days_ms, f16, i256, months_days_ns};

simd8_native_all!(u8);
simd8_native_all!(u16);
Expand All @@ -11,6 +11,7 @@ simd8_native_all!(i8);
simd8_native_all!(i16);
simd8_native_all!(i32);
simd8_native_all!(i128);
simd8_native_all!(i256);
simd8_native_all!(i64);
simd8_native!(f16);
simd8_native_partial_eq!(f16);
Expand Down
3 changes: 2 additions & 1 deletion src/compute/comparison/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::convert::TryInto;
use std::simd::{SimdPartialEq, SimdPartialOrd, ToBitMask};

use crate::types::simd::*;
use crate::types::{days_ms, f16, months_days_ns};
use crate::types::{days_ms, f16, i256, months_days_ns};

use super::*;

Expand Down Expand Up @@ -71,6 +71,7 @@ simd8!(i16, i16x8);
simd8!(i32, i32x8);
simd8!(i64, i64x8);
simd8_native_all!(i128);
simd8_native_all!(i256);
simd8_native!(f16);
simd8_native_partial_eq!(f16);
simd8!(f32, f32x8);
Expand Down
4 changes: 4 additions & 0 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ pub enum DataType {
/// scale is the number of decimal places.
/// The number 999.99 has a precision of 5 and scale of 2.
Decimal(usize, usize),
/// Decimal backed by 256 bits
Decimal256(usize, usize),
/// Extension type.
Extension(String, Box<DataType>, Option<String>),
}
Expand Down Expand Up @@ -233,6 +235,7 @@ impl DataType {
PhysicalType::Primitive(PrimitiveType::Int64)
}
Decimal(_, _) => PhysicalType::Primitive(PrimitiveType::Int128),
Decimal256(_, _) => PhysicalType::Primitive(PrimitiveType::Int256),
UInt8 => PhysicalType::Primitive(PrimitiveType::UInt8),
UInt16 => PhysicalType::Primitive(PrimitiveType::UInt16),
UInt32 => PhysicalType::Primitive(PrimitiveType::UInt32),
Expand Down Expand Up @@ -299,6 +302,7 @@ impl From<PrimitiveType> for DataType {
PrimitiveType::UInt32 => DataType::UInt32,
PrimitiveType::UInt64 => DataType::UInt64,
PrimitiveType::Int128 => DataType::Decimal(32, 32),
PrimitiveType::Int256 => DataType::Decimal256(32, 32),
PrimitiveType::Float16 => DataType::Float16,
PrimitiveType::Float32 => DataType::Float32,
PrimitiveType::Float64 => DataType::Float64,
Expand Down
16 changes: 13 additions & 3 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,18 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result<DataType> {
"Decimal bit width is not a valid integer".to_string(),
)
})?;
if bit_width != 128 {
return Err(Error::OutOfSpec(
"Decimal256 is not supported".to_string(),
if bit_width == 256 {
return Ok(DataType::Decimal256(
precision_raw.parse::<usize>().map_err(|_| {
Error::OutOfSpec(
"Decimal precision is not a valid integer".to_string(),
)
})?,
scale_raw.parse::<usize>().map_err(|_| {
Error::OutOfSpec(
"Decimal scale is not a valid integer".to_string(),
)
})?,
));
}
(precision_raw, scale_raw)
Expand Down Expand Up @@ -438,6 +447,7 @@ fn to_format(data_type: &DataType) -> String {
)
}
DataType::Decimal(precision, scale) => format!("d:{},{}", precision, scale),
DataType::Decimal256(precision, scale) => format!("d:{},{},256", precision, scale),
DataType::List(_) => "+l".to_string(),
DataType::LargeList(_) => "+L".to_string(),
DataType::Struct(_) => "+s".to_string(),
Expand Down
29 changes: 19 additions & 10 deletions src/io/ipc/read/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,16 +327,25 @@ fn get_data_type(
(DataType::Duration(time_unit), IpcField::default())
}
Decimal(decimal) => {
let data_type = DataType::Decimal(
decimal
.precision()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?,
decimal
.scale()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?,
);
let bit_width: usize = decimal
.bit_width()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;
let precision: usize = decimal
.precision()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;
let scale: usize = decimal
.scale()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;

let data_type = match bit_width {
128 => DataType::Decimal(precision, scale),
256 => DataType::Decimal256(precision, scale),
_ => return Err(Error::from(OutOfSpecKind::NegativeFooterLength)),
};

(data_type, IpcField::default())
}
List(_) => deserialize_list(field)?,
Expand Down
8 changes: 7 additions & 1 deletion src/io/ipc/write/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ fn serialize_type(data_type: &DataType) -> arrow_format::ipc::Type {
scale: *scale as i32,
bit_width: 128,
})),
Decimal256(precision, scale) => ipc::Type::Decimal(Box::new(ipc::Decimal {
precision: *precision as i32,
scale: *scale as i32,
bit_width: 256,
})),
Binary => ipc::Type::Binary(Box::new(ipc::Binary {})),
LargeBinary => ipc::Type::LargeBinary(Box::new(ipc::LargeBinary {})),
Utf8 => ipc::Type::Utf8(Box::new(ipc::Utf8 {})),
Expand Down Expand Up @@ -281,7 +286,8 @@ fn serialize_children(data_type: &DataType, ipc_field: &IpcField) -> Vec<arrow_f
| LargeBinary
| Utf8
| LargeUtf8
| Decimal(_, _) => vec![],
| Decimal(_, _)
| Decimal256(_, _) => vec![],
FixedSizeList(inner, _) | LargeList(inner) | List(inner) | Map(inner, _) => {
vec![serialize_field(inner, &ipc_field.fields[0])]
}
Expand Down
21 changes: 20 additions & 1 deletion src/io/json_integration/read/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
datatypes::{DataType, PhysicalType, PrimitiveType, Schema},
error::{Error, Result},
io::ipc::IpcField,
types::{days_ms, months_days_ns, NativeType},
types::{days_ms, i256, months_days_ns, NativeType},
};

use super::super::{ArrowJsonBatch, ArrowJsonColumn, ArrowJsonDictionaryBatch};
Expand Down Expand Up @@ -125,6 +125,24 @@ fn to_decimal(json_col: &ArrowJsonColumn, data_type: DataType) -> PrimitiveArray
PrimitiveArray::<i128>::new(data_type, values, validity)
}

fn to_decimal256(json_col: &ArrowJsonColumn, data_type: DataType) -> PrimitiveArray<i256> {
let validity = to_validity(&json_col.validity);
let values = json_col
.data
.as_ref()
.unwrap()
.iter()
.map(|value| match value {
Value::String(x) => i256(x.parse::<ethnum::I256>().unwrap()),
_ => {
panic!()
}
})
.collect();

PrimitiveArray::<i256>::new(data_type, values, validity)
}

fn to_primitive<T: NativeType + NumCast>(
json_col: &ArrowJsonColumn,
data_type: DataType,
Expand Down Expand Up @@ -280,6 +298,7 @@ pub fn to_array(
Primitive(PrimitiveType::Int32) => Ok(Box::new(to_primitive::<i32>(json_col, data_type))),
Primitive(PrimitiveType::Int64) => Ok(Box::new(to_primitive::<i64>(json_col, data_type))),
Primitive(PrimitiveType::Int128) => Ok(Box::new(to_decimal(json_col, data_type))),
Primitive(PrimitiveType::Int256) => Ok(Box::new(to_decimal256(json_col, data_type))),
Primitive(PrimitiveType::DaysMs) => Ok(Box::new(to_primitive_days_ms(json_col, data_type))),
Primitive(PrimitiveType::MonthDayNano) => {
Ok(Box::new(to_primitive_months_days_ns(json_col, data_type)))
Expand Down
32 changes: 20 additions & 12 deletions src/io/json_integration/read/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,20 +170,28 @@ fn to_data_type(item: &Value, mut children: Vec<Field>) -> Result<DataType> {
"largeutf8" => LargeUtf8,
"decimal" => {
// return a list with any type as its child isn't defined in the map
let precision = match item.get("precision") {
Some(p) => Ok(p.as_u64().unwrap() as usize),
None => Err(Error::OutOfSpec(
"Expecting a precision for decimal".to_string(),
)),
};
let scale = match item.get("scale") {
Some(s) => Ok(s.as_u64().unwrap() as usize),
_ => Err(Error::OutOfSpec(
"Expecting a scale for decimal".to_string(),
)),
let precision = item
.get("precision")
.ok_or_else(|| Error::OutOfSpec("Expecting a precision for decimal".to_string()))?
.as_u64()
.unwrap() as usize;

let scale = item
.get("scale")
.ok_or_else(|| Error::OutOfSpec("Expecting a scale for decimal".to_string()))?
.as_u64()
.unwrap() as usize;

let bit_width = match item.get("bitWidth") {
Some(s) => s.as_u64().unwrap() as usize,
None => 128,
};

DataType::Decimal(precision?, scale?)
match bit_width {
128 => DataType::Decimal(precision, scale),
256 => DataType::Decimal256(precision, scale),
_ => todo!(),
}
}
"floatingpoint" => match item.get("precision") {
Some(p) if p == "HALF" => DataType::Float16,
Expand Down
3 changes: 3 additions & 0 deletions src/io/json_integration/write/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ fn serialize_data_type(data_type: &DataType) -> Value {
DataType::Decimal(precision, scale) => {
json!({"name": "decimal", "precision": precision, "scale": scale})
}
DataType::Decimal256(precision, scale) => {
json!({"name": "decimal", "precision": precision, "scale": scale, "bit_width": 256})
}
DataType::Extension(_, inner_data_type, _) => serialize_data_type(inner_data_type),
}
}
Expand Down
Loading

0 comments on commit a1ab5a7

Please sign in to comment.