-
Notifications
You must be signed in to change notification settings - Fork 853
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dyn compare for binary array #1238
Merged
alamb
merged 9 commits into
apache:master
from
HaoYang670:issue1108_part2_dyn_compare_binary
Jan 26, 2022
Merged
Changes from 6 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
67566a1
dyn compare two binary array
HaoYang670 61b6c86
add dyn comparison for binary array
HaoYang670 de5f958
Merge branch 'master' into issue1108_part2_dyn_compare_binary
HaoYang670 06a9dee
add tests for dyn compare binary array and scalar
HaoYang670 5e90a84
remove DictionaryArray from dyn compare, because not find an easy way…
HaoYang670 fd97848
fix mistakes in test code
HaoYang670 a17011f
add Nones into the test cases
HaoYang670 c4fbcc5
add non utf8 scalar
HaoYang670 35b8234
correct the code format
HaoYang670 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1261,6 +1261,117 @@ where | |
} | ||
} | ||
|
||
/// Perform `left == right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
eq_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
eq_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"eq_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left != right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn neq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
neq_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
neq_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"neq_dyn_binary_scalar only supports Binary or LargeBinary arrays" | ||
.to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left < right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn lt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
lt_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
lt_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"lt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left <= right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn lt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
lt_eq_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
lt_eq_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"lt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays" | ||
.to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left > right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn gt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
gt_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
gt_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"gt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left >= right` operation on an array and a numeric scalar | ||
/// value. Supports BinaryArray and LargeBinaryArray | ||
pub fn gt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> { | ||
match left.data_type() { | ||
DataType::Binary => { | ||
let left = as_generic_binary_array::<i32>(left); | ||
gt_eq_binary_scalar(left, right) | ||
} | ||
DataType::LargeBinary => { | ||
let left = as_generic_binary_array::<i64>(left); | ||
gt_eq_binary_scalar(left, right) | ||
} | ||
_ => Err(ArrowError::ComputeError( | ||
"gt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays" | ||
.to_string(), | ||
)), | ||
} | ||
} | ||
|
||
/// Perform `left == right` operation on an array and a numeric scalar | ||
/// value. Supports StringArrays, and DictionaryArrays that have string values | ||
pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> { | ||
|
@@ -1770,7 +1881,7 @@ macro_rules! typed_cmp { | |
} | ||
|
||
macro_rules! typed_compares { | ||
($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR: ident) => {{ | ||
($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR: ident, $OP_BINARY: ident) => {{ | ||
match ($LEFT.data_type(), $RIGHT.data_type()) { | ||
(DataType::Boolean, DataType::Boolean) => { | ||
typed_cmp!($LEFT, $RIGHT, BooleanArray, $OP_BOOL) | ||
|
@@ -1811,6 +1922,12 @@ macro_rules! typed_compares { | |
(DataType::LargeUtf8, DataType::LargeUtf8) => { | ||
typed_cmp!($LEFT, $RIGHT, LargeStringArray, $OP_STR, i64) | ||
} | ||
(DataType::Binary, DataType::Binary) => { | ||
typed_cmp!($LEFT, $RIGHT, BinaryArray, $OP_BINARY, i32) | ||
} | ||
(DataType::LargeBinary, DataType::LargeBinary) => { | ||
typed_cmp!($LEFT, $RIGHT, LargeBinaryArray, $OP_BINARY, i64) | ||
} | ||
( | ||
DataType::Timestamp(TimeUnit::Nanosecond, _), | ||
DataType::Timestamp(TimeUnit::Nanosecond, _), | ||
|
@@ -1918,47 +2035,47 @@ macro_rules! typed_compares { | |
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, eq_bool, eq, eq_utf8) | ||
typed_compares!(left, right, eq_bool, eq, eq_utf8, eq_binary) | ||
} | ||
|
||
/// Perform `left != right` operation on two (dynamic) [`Array`]s. | ||
/// | ||
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, neq_bool, neq, neq_utf8) | ||
typed_compares!(left, right, neq_bool, neq, neq_utf8, neq_binary) | ||
} | ||
|
||
/// Perform `left < right` operation on two (dynamic) [`Array`]s. | ||
/// | ||
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, lt_bool, lt, lt_utf8) | ||
typed_compares!(left, right, lt_bool, lt, lt_utf8, lt_binary) | ||
} | ||
|
||
/// Perform `left <= right` operation on two (dynamic) [`Array`]s. | ||
/// | ||
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8) | ||
typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8, lt_eq_binary) | ||
} | ||
|
||
/// Perform `left > right` operation on two (dynamic) [`Array`]s. | ||
/// | ||
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, gt_bool, gt, gt_utf8) | ||
typed_compares!(left, right, gt_bool, gt, gt_utf8, gt_binary) | ||
} | ||
|
||
/// Perform `left >= right` operation on two (dynamic) [`Array`]s. | ||
/// | ||
/// Only when two arrays are of the same type the comparison will happen otherwise it will err | ||
/// with a casting error. | ||
pub fn gt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> { | ||
typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8) | ||
typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8, gt_eq_binary) | ||
} | ||
|
||
/// Perform `left == right` operation on two [`PrimitiveArray`]s. | ||
|
@@ -3055,10 +3172,10 @@ mod tests { | |
); | ||
test_binary_scalar!( | ||
test_binary_array_gt_eq_scalar, | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet"], | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
"flight".as_bytes(), | ||
gt_eq_binary_scalar, | ||
vec![false, false, true, true] | ||
vec![false, false, true, true, true] | ||
); | ||
|
||
// Expected behaviour: | ||
|
@@ -3843,6 +3960,114 @@ mod tests { | |
assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected); | ||
} | ||
|
||
#[test] | ||
fn test_eq_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
let expected = BooleanArray::from(vec![false, false, true, false, false]); | ||
|
||
assert_eq!(eq_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
eq_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_neq_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
let expected = BooleanArray::from(vec![true, true, false, true, true]); | ||
|
||
assert_eq!(neq_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
neq_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_lt_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
let expected = BooleanArray::from(vec![true, true, false, false, false]); | ||
|
||
assert_eq!(lt_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
lt_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_lt_eq_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
let expected = BooleanArray::from(vec![true, true, true, false, false]); | ||
|
||
assert_eq!(lt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
lt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_gt_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps one of these tests could use a non-UTF8 string as the scalar. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! |
||
let expected = BooleanArray::from(vec![false, false, false, true, true]); | ||
|
||
assert_eq!(gt_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
gt_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_gt_eq_dyn_binary_scalar() { | ||
let array = BinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let large_array = LargeBinaryArray::from_vec( | ||
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]], | ||
); | ||
let scalar = "flight".as_bytes(); | ||
let expected = BooleanArray::from(vec![false, false, true, true, true]); | ||
|
||
assert_eq!(gt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected); | ||
assert_eq!( | ||
gt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(), | ||
expected | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_eq_dyn_utf8_scalar() { | ||
let array = StringArray::from(vec!["abc", "def", "xyz"]); | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps we could get a test with nulls?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, I will add them