Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Add simdutf8 feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Dandandan committed Sep 19, 2021
1 parent 55ff79c commit 859ee78
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 16 deletions.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,13 @@ avro-rs = { version = "0.13", optional = true, default_features = false }

# for division/remainder optimization at runtime
strength_reduce = { version = "0.2", optional = true }

# For instruction multiversioning
multiversion = { version = "0.6.1", optional = true }

# For SIMD utf8 validation
simdutf8 = { version = "0.1.3", optional = true }

[dev-dependencies]
rand = "0.8"
criterion = "0.3"
Expand Down
71 changes: 55 additions & 16 deletions src/array/ord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,51 +48,90 @@ where
}

fn compare_primitives<T: NativeType + Ord>(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap().clone();
let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.unwrap()
.clone();
Box::new(move |i, j| total_cmp(&left.value(i), &right.value(j)))
}

fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<BooleanArray>().unwrap().clone();
let right = right.as_any().downcast_ref::<BooleanArray>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap()
.clone();
Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
}

fn compare_f32(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<PrimitiveArray<f32>>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<PrimitiveArray<f32>>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<PrimitiveArray<f32>>()
.unwrap().clone();
.unwrap()
.clone();
Box::new(move |i, j| total_cmp_f32(&left.value(i), &right.value(j)))
}

fn compare_f64(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<PrimitiveArray<f64>>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<PrimitiveArray<f64>>()
.unwrap().clone();
.unwrap()
.clone();
Box::new(move |i, j| total_cmp_f64(&left.value(i), &right.value(j)))
}

fn compare_string<O: Offset>(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<Utf8Array<O>>().unwrap().clone();
let right = right.as_any().downcast_ref::<Utf8Array<O>>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<Utf8Array<O>>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<Utf8Array<O>>()
.unwrap()
.clone();
Box::new(move |i, j| left.value(i).cmp(right.value(j)))
}

fn compare_binary<O: Offset>(left: &dyn Array, right: &dyn Array) -> DynComparator {
let left = left.as_any().downcast_ref::<BinaryArray<O>>().unwrap().clone();
let right = right.as_any().downcast_ref::<BinaryArray<O>>().unwrap().clone();
let left = left
.as_any()
.downcast_ref::<BinaryArray<O>>()
.unwrap()
.clone();
let right = right
.as_any()
.downcast_ref::<BinaryArray<O>>()
.unwrap()
.clone();
Box::new(move |i, j| left.value(i).cmp(right.value(j)))
}

fn compare_dict<K>(
left: &DictionaryArray<K>,
right: &DictionaryArray<K>,
) -> Result<DynComparator>
fn compare_dict<K>(left: &DictionaryArray<K>, right: &DictionaryArray<K>) -> Result<DynComparator>
where
K: DictionaryKey,
{
Expand Down
3 changes: 3 additions & 0 deletions src/array/specification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ pub fn check_offsets_and_utf8<O: Offset>(offsets: &[O], values: &[u8]) -> usize
let end = window[1].to_usize();
assert!(end <= values.len());
let slice = unsafe { std::slice::from_raw_parts(values.as_ptr().add(start), end - start) };
#[cfg(feature = "simdutf8")]
simdutf8::basic::from_utf8(slice).expect("A non-utf8 string was passed.");
#[cfg(not(feature = "simdutf8"))]
std::str::from_utf8(slice).expect("A non-utf8 string was passed.");
});
len
Expand Down

0 comments on commit 859ee78

Please sign in to comment.