From 859ee78f02da11f3e52691c56852b73fe97017ec Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sun, 19 Sep 2021 14:21:12 +0200 Subject: [PATCH] Add simdutf8 feature --- Cargo.toml | 5 +++ src/array/ord.rs | 71 +++++++++++++++++++++++++++++--------- src/array/specification.rs | 3 ++ 3 files changed, 63 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 218ed07b33b..551c24bacf3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,8 +64,13 @@ avro-rs = { version = "0.13", optional = true, default_features = false } # for division/remainder optimization at runtime strength_reduce = { version = "0.2", optional = true } + +# For instruction multiversioning multiversion = { version = "0.6.1", optional = true } +# For SIMD utf8 validation +simdutf8 = { version = "0.1.3", optional = true } + [dev-dependencies] rand = "0.8" criterion = "0.3" diff --git a/src/array/ord.rs b/src/array/ord.rs index 17dd0bc4653..319af374ab1 100644 --- a/src/array/ord.rs +++ b/src/array/ord.rs @@ -48,51 +48,90 @@ where } fn compare_primitives(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::>().unwrap().clone(); - let right = right.as_any().downcast_ref::>().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); + let right = right + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); Box::new(move |i, j| total_cmp(&left.value(i), &right.value(j))) } fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::().unwrap().clone(); - let right = right.as_any().downcast_ref::().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + let right = right + .as_any() + .downcast_ref::() + .unwrap() + .clone(); Box::new(move |i, j| left.value(i).cmp(&right.value(j))) } fn compare_f32(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::>().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); let right = right .as_any() .downcast_ref::>() - .unwrap().clone(); + .unwrap() + .clone(); Box::new(move |i, j| total_cmp_f32(&left.value(i), &right.value(j))) } fn compare_f64(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::>().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); let right = right .as_any() .downcast_ref::>() - .unwrap().clone(); + .unwrap() + .clone(); Box::new(move |i, j| total_cmp_f64(&left.value(i), &right.value(j))) } fn compare_string(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::>().unwrap().clone(); - let right = right.as_any().downcast_ref::>().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); + let right = right + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } fn compare_binary(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left = left.as_any().downcast_ref::>().unwrap().clone(); - let right = right.as_any().downcast_ref::>().unwrap().clone(); + let left = left + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); + let right = right + .as_any() + .downcast_ref::>() + .unwrap() + .clone(); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } -fn compare_dict( - left: &DictionaryArray, - right: &DictionaryArray, -) -> Result +fn compare_dict(left: &DictionaryArray, right: &DictionaryArray) -> Result where K: DictionaryKey, { diff --git a/src/array/specification.rs b/src/array/specification.rs index dd1e8b77ad6..e38e425e732 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -79,6 +79,9 @@ pub fn check_offsets_and_utf8(offsets: &[O], values: &[u8]) -> usize let end = window[1].to_usize(); assert!(end <= values.len()); let slice = unsafe { std::slice::from_raw_parts(values.as_ptr().add(start), end - start) }; + #[cfg(feature = "simdutf8")] + simdutf8::basic::from_utf8(slice).expect("A non-utf8 string was passed."); + #[cfg(not(feature = "simdutf8"))] std::str::from_utf8(slice).expect("A non-utf8 string was passed."); }); len