Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dyn compare for binary array #1238

Merged
merged 9 commits into from
Jan 26, 2022
243 changes: 234 additions & 9 deletions arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,117 @@ where
}
}

/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"eq_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}

/// Perform `left != right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn neq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
neq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
neq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"neq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}

/// Perform `left < right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn lt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
lt_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
lt_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}

/// Perform `left <= right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn lt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
lt_eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
lt_eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}

/// Perform `left > right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn gt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
gt_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
gt_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}

/// Perform `left >= right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn gt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
gt_eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
gt_eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}

/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
Expand Down Expand Up @@ -1770,7 +1881,7 @@ macro_rules! typed_cmp {
}

macro_rules! typed_compares {
($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR: ident) => {{
($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR: ident, $OP_BINARY: ident) => {{
match ($LEFT.data_type(), $RIGHT.data_type()) {
(DataType::Boolean, DataType::Boolean) => {
typed_cmp!($LEFT, $RIGHT, BooleanArray, $OP_BOOL)
Expand Down Expand Up @@ -1811,6 +1922,12 @@ macro_rules! typed_compares {
(DataType::LargeUtf8, DataType::LargeUtf8) => {
typed_cmp!($LEFT, $RIGHT, LargeStringArray, $OP_STR, i64)
}
(DataType::Binary, DataType::Binary) => {
typed_cmp!($LEFT, $RIGHT, BinaryArray, $OP_BINARY, i32)
}
(DataType::LargeBinary, DataType::LargeBinary) => {
typed_cmp!($LEFT, $RIGHT, LargeBinaryArray, $OP_BINARY, i64)
}
(
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
Expand Down Expand Up @@ -1918,47 +2035,47 @@ macro_rules! typed_compares {
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, eq_bool, eq, eq_utf8)
typed_compares!(left, right, eq_bool, eq, eq_utf8, eq_binary)
}

/// Perform `left != right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, neq_bool, neq, neq_utf8)
typed_compares!(left, right, neq_bool, neq, neq_utf8, neq_binary)
}

/// Perform `left < right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, lt_bool, lt, lt_utf8)
typed_compares!(left, right, lt_bool, lt, lt_utf8, lt_binary)
}

/// Perform `left <= right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8)
typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8, lt_eq_binary)
}

/// Perform `left > right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, gt_bool, gt, gt_utf8)
typed_compares!(left, right, gt_bool, gt, gt_utf8, gt_binary)
}

/// Perform `left >= right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
pub fn gt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8)
typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8, gt_eq_binary)
}

/// Perform `left == right` operation on two [`PrimitiveArray`]s.
Expand Down Expand Up @@ -3055,10 +3172,10 @@ mod tests {
);
test_binary_scalar!(
test_binary_array_gt_eq_scalar,
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
gt_eq_binary_scalar,
vec![false, false, true, true]
vec![false, false, true, true, true]
);

// Expected behaviour:
Expand Down Expand Up @@ -3843,6 +3960,114 @@ mod tests {
assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected);
}

#[test]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could get a test with nulls?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I will add them

fn test_eq_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(vec![false, false, true, false, false]);

assert_eq!(eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_neq_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(vec![true, true, false, true, true]);

assert_eq!(neq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
neq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_lt_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(vec![true, true, false, false, false]);

assert_eq!(lt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
lt_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_lt_eq_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(vec![true, true, true, false, false]);

assert_eq!(lt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
lt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_gt_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps one of these tests could use a non-UTF8 string as the scalar.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

let expected = BooleanArray::from(vec![false, false, false, true, true]);

assert_eq!(gt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
gt_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_gt_eq_dyn_binary_scalar() {
let array = BinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let large_array = LargeBinaryArray::from_vec(
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(vec![false, false, true, true, true]);

assert_eq!(gt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
gt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}

#[test]
fn test_eq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
Expand Down