-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implementing math power function for SQL #2324
Changes from 11 commits
5b90d3a
e4e9d0e
2484973
6687009
07e7f59
d8c73c1
afe40c4
5c80524
e2fd339
f177e39
6e0cf10
caf2413
1d50bb6
167cabd
8faa7f2
84e6c58
d60d217
ef3040c
e3b234f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -217,6 +217,11 @@ pub fn return_type( | |
} | ||
}), | ||
|
||
BuiltinScalarFunction::Power => match &input_expr_types[0] { | ||
DataType::Int32 | DataType::Int64 => Ok(DataType::Int64), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about Int8,Int16, UInt.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now its changed to be like in https://www.postgresql.org/docs/14/functions-math.html So the signature is either i64 or f64. |
||
_ => Ok(DataType::Float64), | ||
}, | ||
|
||
BuiltinScalarFunction::Abs | ||
| BuiltinScalarFunction::Acos | ||
| BuiltinScalarFunction::Asin | ||
|
@@ -505,6 +510,13 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { | |
fun.volatility(), | ||
), | ||
BuiltinScalarFunction::Random => Signature::exact(vec![], fun.volatility()), | ||
BuiltinScalarFunction::Power => Signature::one_of( | ||
vec![ | ||
TypeSignature::Exact(vec![DataType::Int64, DataType::Int64]), | ||
TypeSignature::Exact(vec![DataType::Float64, DataType::Float64]), | ||
], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It appears that postgres supports https://www.postgresql.org/docs/12/functions-math.html I think for the initial implementation it would be fine to support |
||
fun.volatility(), | ||
), | ||
// math expressions expect 1 argument of type f64 or f32 | ||
// priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we | ||
// return the best approximation for it (in f64). | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -17,12 +17,14 @@ | |||||
|
||||||
//! Math expressions | ||||||
|
||||||
use arrow::array::{Float32Array, Float64Array}; | ||||||
use arrow::array::ArrayRef; | ||||||
use arrow::array::{Float32Array, Float64Array, Int64Array}; | ||||||
use arrow::datatypes::DataType; | ||||||
use datafusion_common::ScalarValue; | ||||||
use datafusion_common::{DataFusionError, Result}; | ||||||
use datafusion_expr::ColumnarValue; | ||||||
use rand::{thread_rng, Rng}; | ||||||
use std::any::type_name; | ||||||
use std::iter; | ||||||
use std::sync::Arc; | ||||||
|
||||||
|
@@ -86,6 +88,33 @@ macro_rules! math_unary_function { | |||||
}; | ||||||
} | ||||||
|
||||||
macro_rules! downcast_arg { | ||||||
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{ | ||||||
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { | ||||||
DataFusionError::Internal(format!( | ||||||
"could not cast {} to {}", | ||||||
$NAME, | ||||||
type_name::<$ARRAY_TYPE>() | ||||||
)) | ||||||
})? | ||||||
}}; | ||||||
} | ||||||
|
||||||
macro_rules! make_function_inputs2 { | ||||||
($ARG1: expr, $ARG2: expr, $NAME1:expr, $NAME2: expr, $ARRAY_TYPE:ident, $FUNC: block) => {{ | ||||||
let arg1 = downcast_arg!($ARG1, $NAME1, $ARRAY_TYPE); | ||||||
let arg2 = downcast_arg!($ARG2, $NAME2, $ARRAY_TYPE); | ||||||
|
||||||
arg1.iter() | ||||||
.zip(arg2.iter()) | ||||||
.map(|(a1, a2)| match (a1, a2) { | ||||||
(Some(a1), Some(a2)) => Some($FUNC(a1, a2.try_into().unwrap())), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if Does it work if you do
Suggested change
Instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I came up with |
||||||
_ => None, | ||||||
}) | ||||||
.collect::<$ARRAY_TYPE>() | ||||||
}}; | ||||||
} | ||||||
|
||||||
math_unary_function!("sqrt", sqrt); | ||||||
math_unary_function!("sin", sin); | ||||||
math_unary_function!("cos", cos); | ||||||
|
@@ -120,6 +149,33 @@ pub fn random(args: &[ColumnarValue]) -> Result<ColumnarValue> { | |||||
Ok(ColumnarValue::Array(Arc::new(array))) | ||||||
} | ||||||
|
||||||
pub fn power(args: &[ArrayRef]) -> Result<ArrayRef> { | ||||||
match args[0].data_type() { | ||||||
DataType::Float32 | DataType::Float64 => Ok(Arc::new(make_function_inputs2!( | ||||||
&args[0], | ||||||
&args[1], | ||||||
"base", | ||||||
"exponent", | ||||||
Float64Array, | ||||||
{ f64::powf } | ||||||
)) as ArrayRef), | ||||||
|
||||||
DataType::Int32 | DataType::Int64 => Ok(Arc::new(make_function_inputs2!( | ||||||
&args[0], | ||||||
&args[1], | ||||||
"base", | ||||||
"exponent", | ||||||
Int64Array, | ||||||
{ i64::pow } | ||||||
)) as ArrayRef), | ||||||
|
||||||
other => Err(DataFusionError::Internal(format!( | ||||||
"Unsupported data type {:?} for function power", | ||||||
other | ||||||
))), | ||||||
} | ||||||
} | ||||||
|
||||||
#[cfg(test)] | ||||||
mod tests { | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -184,6 +184,7 @@ enum ScalarFunction { | |
Trim=61; | ||
Upper=62; | ||
Coalesce=63; | ||
Power=64; | ||
} | ||
|
||
message ScalarFunctionNode { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A test that takes two columns in addition to a column and a scalar would also be good -- like
power(i64, i64)
perhapsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done