Improved performance of decimal arithmetics (#605)

jorgecarleitao · Nov 14, 2021 · 7f973ee · 7f973ee
1 parent a09834e
commit 7f973ee
Show file tree

Hide file tree

Showing 5 changed files with 284 additions and 492 deletions.
diff --git a/src/compute/arithmetics/decimal/add.rs b/src/compute/arithmetics/decimal/add.rs
@@ -1,21 +1,4 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the addition arithmetic kernels for Decimal `PrimitiveArrays`.
+//! Defines the addition arithmetic kernels for [`PrimitiveArray`] representing decimals.
 use crate::compute::arithmetics::basic::check_same_len;
 use crate::{
     array::{Array, PrimitiveArray},
@@ -31,12 +14,14 @@ use crate::{
     error::{ArrowError, Result},
 };
 
-use super::{adjusted_precision_scale, max_value, number_digits};
+use super::{adjusted_precision_scale, get_parameters, max_value, number_digits};
 
-/// Adds two decimal primitive arrays with the same precision and scale. If the
-/// precision and scale is different, then an InvalidArgumentError is returned.
-/// This function panics if the added numbers result in a number larger than
-/// the possible number for the selected precision.
+/// Adds two decimal [`PrimitiveArray`] with the same precision and scale.
+/// # Error
+/// Errors if the precision and scale are different.
+/// # Panic
+/// This function panics iff the added numbers result in a number larger than
+/// the possible number for the precision.
 ///
 /// # Examples
 /// ```
@@ -53,38 +38,22 @@ use super::{adjusted_precision_scale, max_value, number_digits};
 /// assert_eq!(result, expected);
 /// ```
 pub fn add(lhs: &PrimitiveArray<i128>, rhs: &PrimitiveArray<i128>) -> Result<PrimitiveArray<i128>> {
-    // Matching on both data types from both arrays
-    // This match will be true only when precision and scale from both
-    // arrays are the same, otherwise it will return and ArrowError
-    match (lhs.data_type(), rhs.data_type()) {
-        (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) => {
-            if lhs_p == rhs_p && lhs_s == rhs_s {
-                // Closure for the binary operation. This closure will panic if
-                // the sum of the values is larger than the max value possible
-                // for the decimal precision
-                let op = move |a, b| {
-                    let res: i128 = a + b;
+    let (precision, _) = get_parameters(lhs.data_type(), rhs.data_type())?;
 
-                    assert!(
-                        !(res.abs() > max_value(*lhs_p)),
-                        "Overflow in addition presented for precision {}",
-                        lhs_p
-                    );
+    let max = max_value(precision);
+    let op = move |a, b| {
+        let res: i128 = a + b;
 
-                    res
-                };
+        assert!(
+            res.abs() <= max,
+            "Overflow in addition presented for precision {}",
+            precision
+        );
 
-                binary(lhs, rhs, lhs.data_type().clone(), op)
-            } else {
-                Err(ArrowError::InvalidArgumentError(
-                    "Arrays must have the same precision and scale".to_string(),
-                ))
-            }
-        }
-        _ => Err(ArrowError::InvalidArgumentError(
-            "Incorrect data type for the array".to_string(),
-        )),
-    }
+        res
+    };
+
+    binary(lhs, rhs, lhs.data_type().clone(), op)
 }
 
 /// Saturated addition of two decimal primitive arrays with the same precision
@@ -111,40 +80,24 @@ pub fn saturating_add(
     lhs: &PrimitiveArray<i128>,
     rhs: &PrimitiveArray<i128>,
 ) -> Result<PrimitiveArray<i128>> {
-    // Matching on both data types from both arrays. This match will be true
-    // only when precision and scale from both arrays are the same, otherwise
-    // it will return and ArrowError
-    match (lhs.data_type(), rhs.data_type()) {
-        (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) => {
-            if lhs_p == rhs_p && lhs_s == rhs_s {
-                // Closure for the binary operation.
-                let op = move |a, b| {
-                    let res: i128 = a + b;
-                    let max = max_value(*lhs_p);
+    let (precision, _) = get_parameters(lhs.data_type(), rhs.data_type())?;
 
-                    match res {
-                        res if res.abs() > max => {
-                            if res > 0 {
-                                max
-                            } else {
-                                -max
-                            }
-                        }
-                        _ => res,
-                    }
-                };
+    let max = max_value(precision);
+    let op = move |a, b| {
+        let res: i128 = a + b;
 
-                binary(lhs, rhs, lhs.data_type().clone(), op)
+        if res.abs() > max {
+            if res > 0 {
+                max
             } else {
-                Err(ArrowError::InvalidArgumentError(
-                    "Arrays must have the same precision and scale".to_string(),
-                ))
+                -max
             }
+        } else {
+            res
         }
-        _ => Err(ArrowError::InvalidArgumentError(
-            "Incorrect data type for the array".to_string(),
-        )),
-    }
+    };
+
+    binary(lhs, rhs, lhs.data_type().clone(), op)
 }
 
 /// Checked addition of two decimal primitive arrays with the same precision
@@ -171,33 +124,20 @@ pub fn checked_add(
     lhs: &PrimitiveArray<i128>,
     rhs: &PrimitiveArray<i128>,
 ) -> Result<PrimitiveArray<i128>> {
-    // Matching on both data types from both arrays. This match will be true
-    // only when precision and scale from both arrays are the same, otherwise
-    // it will return and ArrowError
-    match (lhs.data_type(), rhs.data_type()) {
-        (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) => {
-            if lhs_p == rhs_p && lhs_s == rhs_s {
-                // Closure for the binary operation.
-                let op = move |a, b| {
-                    let res: i128 = a + b;
+    let (precision, _) = get_parameters(lhs.data_type(), rhs.data_type())?;
 
-                    match res {
-                        res if res.abs() > max_value(*lhs_p) => None,
-                        _ => Some(res),
-                    }
-                };
+    let max = max_value(precision);
+    let op = move |a, b| {
+        let result: i128 = a + b;
 
-                binary_checked(lhs, rhs, lhs.data_type().clone(), op)
-            } else {
-                Err(ArrowError::InvalidArgumentError(
-                    "Arrays must have the same precision and scale".to_string(),
-                ))
-            }
+        if result.abs() > max {
+            None
+        } else {
+            Some(result)
         }
-        _ => Err(ArrowError::InvalidArgumentError(
-            "Incorrect data type for the array".to_string(),
-        )),
-    }
+    };
+
+    binary_checked(lhs, rhs, lhs.data_type().clone(), op)
 }
 
 // Implementation of ArrayAdd trait for PrimitiveArrays
@@ -259,14 +199,16 @@ pub fn adaptive_add(
         // looping through the iterator
         let (mut res_p, res_s, diff) = adjusted_precision_scale(*lhs_p, *lhs_s, *rhs_p, *rhs_s);
 
-        let mut result = Vec::new();
-        for (l, r) in lhs.values().iter().zip(rhs.values().iter()) {
+        let shift = 10i128.pow(diff as u32);
+        let mut max = max_value(res_p);
+
+        let iter = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| {
             // Based on the array's scales one of the arguments in the sum has to be shifted
             // to the left to match the final scale
             let res = if lhs_s > rhs_s {
-                l + r * 10i128.pow(diff as u32)
+                l + r * shift
             } else {
-                l * 10i128.pow(diff as u32) + r
+                l * shift + r
             };
 
             // The precision of the resulting array will change if one of the
@@ -277,15 +219,15 @@ pub fn adaptive_add(
             //  00.0001 -> 6, 4
             // -----------------
             // 100.0000 -> 7, 4
-            if res.abs() > max_value(res_p) {
+            if res.abs() > max {
                 res_p = number_digits(res);
+                max = max_value(res_p);
             }
-
-            result.push(res);
-        }
+            res
+        });
+        let values = Buffer::from_trusted_len_iter(iter);
 
         let validity = combine_validities(lhs.validity(), rhs.validity());
-        let values = Buffer::from(result);
 
         Ok(PrimitiveArray::<i128>::from_data(
             DataType::Decimal(res_p, res_s),