Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support for scalar comparison of dictionary.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jan 19, 2022
1 parent 84a4911 commit 98ff7f1
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 49 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ compute_bitwise = []
compute_boolean = []
compute_boolean_kleene = []
compute_cast = ["lexical-core", "compute_take"]
compute_comparison = []
compute_comparison = ["compute_take"]
compute_concatenate = []
compute_contains = []
compute_filter = []
Expand Down
13 changes: 13 additions & 0 deletions src/compute/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pub mod utf8;
mod simd;
pub use simd::{Simd8, Simd8Lanes, Simd8PartialEq, Simd8PartialOrd};

use super::take::take_boolean;
pub(crate) use primitive::{
compare_values_op as primitive_compare_values_op,
compare_values_op_scalar as primitive_compare_values_op_scalar,
Expand Down Expand Up @@ -266,6 +267,14 @@ macro_rules! compare_scalar {
let rhs = rhs.as_any().downcast_ref::<BinaryScalar<i64>>().unwrap();
binary::$op::<i64>(lhs, rhs.value().unwrap())
}
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
let lhs = lhs.as_any().downcast_ref::<DictionaryArray<$T>>().unwrap();
let values = $op(lhs.values().as_ref(), rhs);

take_boolean(&values, lhs.keys())
})
}
_ => todo!("Comparisons of {:?} are not yet supported", lhs.data_type()),
}
}};
Expand Down Expand Up @@ -363,6 +372,10 @@ pub fn can_gt_eq(data_type: &DataType) -> bool {

// The list of operations currently supported.
fn can_partial_eq_and_ord(data_type: &DataType) -> bool {
if let DataType::Dictionary(_, values, _) = data_type.to_logical_type() {
return can_partial_eq_and_ord(values.as_ref());
}

matches!(
data_type,
DataType::Boolean
Expand Down
2 changes: 2 additions & 0 deletions src/compute/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ mod primitive;
mod structure;
mod utf8;

pub(crate) use boolean::take as take_boolean;

/// Returns a new [`Array`] with only indices at `indices`. Null indices are taken as nulls.
/// The returned array has a length equal to `indices.len()`.
pub fn take<O: Index>(values: &dyn Array, indices: &PrimitiveArray<O>) -> Result<Box<dyn Array>> {
Expand Down
71 changes: 24 additions & 47 deletions src/types/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ pub trait Index:
+ std::ops::AddAssign
+ std::ops::Sub<Output = Self>
+ num_traits::One
+ PartialOrd
+ num_traits::Num
+ Ord
+ num_traits::CheckedAdd
+ PartialOrd
+ Ord
{
/// Convert itself to [`usize`].
fn to_usize(&self) -> usize;
Expand All @@ -32,53 +32,30 @@ pub trait Index:
}
}

impl Index for i32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for i64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for u32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
macro_rules! index {
($t:ty) => {
impl Index for $t {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}
};
}

impl Index for u64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}
index!(i8);
index!(i16);
index!(i32);
index!(i64);
index!(u8);
index!(u16);
index!(u32);
index!(u64);

/// Range of [`Index`], equivalent to `(a..b)`.
/// `Step` is unstable in Rust, which does not allow us to implement (a..b) for [`Index`].
Expand Down
3 changes: 2 additions & 1 deletion tests/it/compute/comparison.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use arrow2::array::*;
use arrow2::compute::comparison::boolean::*;
use arrow2::datatypes::TimeUnit;
use arrow2::datatypes::{DataType::*, IntervalUnit};
use arrow2::datatypes::{IntegerType, TimeUnit};
use arrow2::scalar::new_scalar;

#[test]
Expand Down Expand Up @@ -41,6 +41,7 @@ fn consistency() {
Duration(TimeUnit::Millisecond),
Duration(TimeUnit::Microsecond),
Duration(TimeUnit::Nanosecond),
Dictionary(IntegerType::Int32, Box::new(LargeBinary), false),
];

// array <> array
Expand Down

0 comments on commit 98ff7f1

Please sign in to comment.