From 972429220077d12b56209bb5df6561191088c3eb Mon Sep 17 00:00:00 2001 From: Usamoi Date: Thu, 27 Oct 2022 12:15:07 +0800 Subject: [PATCH] fix(hashtable): fix clippy warnings --- .../src/mem_allocator/global_allocator.rs | 27 +++---- .../base/src/mem_allocator/mmap_allocator.rs | 2 +- src/common/hashtable/src/container.rs | 3 + src/common/hashtable/src/hashtable.rs | 12 +++- src/common/hashtable/src/stack_hashtable.rs | 12 +++- src/common/hashtable/src/table0.rs | 10 ++- src/common/hashtable/src/table1.rs | 11 ++- src/common/hashtable/src/traits.rs | 13 +++- .../hashtable/src/twolevel_hashtable.rs | 72 +++++++++++++------ src/common/hashtable/src/unsized_hashtable.rs | 10 +++ src/common/hashtable/tests/it/main.rs | 2 +- .../aggregates/aggregate_distinct_state.rs | 6 +- .../aggregates/aggregate_distinct_state.rs | 12 ++-- .../group_by/aggregator_groups_builder.rs | 2 +- .../group_by/aggregator_polymorphic_keys.rs | 2 +- .../transforms/group_by/aggregator_state.rs | 2 +- 16 files changed, 135 insertions(+), 63 deletions(-) diff --git a/src/common/base/src/mem_allocator/global_allocator.rs b/src/common/base/src/mem_allocator/global_allocator.rs index cfa1f50ae91ad..01eab7ce51c59 100644 --- a/src/common/base/src/mem_allocator/global_allocator.rs +++ b/src/common/base/src/mem_allocator/global_allocator.rs @@ -104,22 +104,25 @@ unsafe impl GlobalAlloc for GlobalAllocator { #[inline] unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + use std::cmp::Ordering::*; let ptr = NonNull::new(ptr).unwrap_unchecked(); let new_layout = Layout::from_size_align(new_size, layout.align()).unwrap(); - if layout.size() < new_size { - if let Ok(ptr) = GlobalAllocator.grow(ptr, layout, new_layout) { - ptr.as_ptr() as *mut u8 - } else { - null_mut() + match layout.size().cmp(&new_size) { + Less => { + if let Ok(ptr) = GlobalAllocator.grow(ptr, layout, new_layout) { + ptr.as_ptr() as *mut u8 + } else { + null_mut() + } } - } else if layout.size() > new_size { - if let Ok(ptr) = GlobalAllocator.shrink(ptr, layout, new_layout) { - ptr.as_ptr() as *mut u8 - } else { - null_mut() + Greater => { + if let Ok(ptr) = GlobalAllocator.shrink(ptr, layout, new_layout) { + ptr.as_ptr() as *mut u8 + } else { + null_mut() + } } - } else { - ptr.as_ptr() as *mut u8 + Equal => ptr.as_ptr() as *mut u8, } } } diff --git a/src/common/base/src/mem_allocator/mmap_allocator.rs b/src/common/base/src/mem_allocator/mmap_allocator.rs index 02e21ac2ce64f..7830d5129ad13 100644 --- a/src/common/base/src/mem_allocator/mmap_allocator.rs +++ b/src/common/base/src/mem_allocator/mmap_allocator.rs @@ -255,7 +255,7 @@ pub mod linux { while length < uname.release.len() && uname.release[length] != 0 { length += 1; } - let slice = unsafe { &*(&uname.release[..length] as *const [i8] as *const [u8]) }; + let slice = unsafe { &*(&uname.release[..length] as *const _ as *const [u8]) }; let ver = std::str::from_utf8(slice).unwrap(); let semver = semver::Version::parse(ver).unwrap(); let result = (semver.major.min(65535) as u32) << 16 diff --git a/src/common/hashtable/src/container.rs b/src/common/hashtable/src/container.rs index b7656d02263e7..da5326e82e26b 100644 --- a/src/common/hashtable/src/container.rs +++ b/src/common/hashtable/src/container.rs @@ -20,6 +20,9 @@ use std::ops::DerefMut; use std::ptr::null_mut; use std::ptr::NonNull; +/// # Safety +/// +/// Any foreign type shouldn't implement this trait. pub unsafe trait Container where Self: Deref + DerefMut { diff --git a/src/common/hashtable/src/hashtable.rs b/src/common/hashtable/src/hashtable.rs index e1f603bd4bd6a..ddfcb5e1a8bc3 100644 --- a/src/common/hashtable/src/hashtable.rs +++ b/src/common/hashtable/src/hashtable.rs @@ -53,6 +53,16 @@ where } } +impl Default for Hashtable +where + K: Keyable, + A: Allocator + Clone + Default, +{ + fn default() -> Self { + Self::new() + } +} + impl Hashtable where K: Keyable, @@ -177,7 +187,7 @@ where pub fn set_merge(&mut self, other: &Self) { if let Some(entry) = other.zero.0.as_ref() { self.zero = ZeroEntry(Some(Entry { - key: entry.key.clone(), + key: entry.key, val: MaybeUninit::uninit(), _alignment: [0; 0], })); diff --git a/src/common/hashtable/src/stack_hashtable.rs b/src/common/hashtable/src/stack_hashtable.rs index 25cb7c58f479d..738b63c5a81a7 100644 --- a/src/common/hashtable/src/stack_hashtable.rs +++ b/src/common/hashtable/src/stack_hashtable.rs @@ -59,6 +59,16 @@ where } } +impl Default for StackHashtable +where + K: Keyable, + A: Allocator + Clone + Default, +{ + fn default() -> Self { + Self::new() + } +} + impl StackHashtable where K: Keyable, @@ -183,7 +193,7 @@ where pub fn set_merge(&mut self, other: &Self) { if let Some(entry) = other.zero.0.as_ref() { self.zero = ZeroEntry(Some(Entry { - key: entry.key.clone(), + key: entry.key, val: MaybeUninit::uninit(), _alignment: [0; 0], })); diff --git a/src/common/hashtable/src/table0.rs b/src/common/hashtable/src/table0.rs index 07f6c9b5720b8..8f79f24f98ad4 100644 --- a/src/common/hashtable/src/table0.rs +++ b/src/common/hashtable/src/table0.rs @@ -37,6 +37,14 @@ impl Entry { unsafe { self.key.assume_init_ref() } } // this function can only be used in external crates + /// # Safety + /// + /// The new key should be equals the old key. + #[inline(always)] + pub unsafe fn set_key(&mut self, key: K) { + self.key.write(key); + } + // this function can only be used in external crates #[inline(always)] pub fn get(&self) -> &V { unsafe { self.val.assume_init_ref() } @@ -267,7 +275,7 @@ where pub unsafe fn set_merge(&mut self, other: &Self) { assert!(self.capacity() >= self.len() + other.len()); for entry in other.iter() { - let key = *(*entry).key.assume_init_ref(); + let key = entry.key.assume_init(); let _ = self.insert(key); } } diff --git a/src/common/hashtable/src/table1.rs b/src/common/hashtable/src/table1.rs index 8ca098c38c523..617a36d0eccd8 100644 --- a/src/common/hashtable/src/table1.rs +++ b/src/common/hashtable/src/table1.rs @@ -16,6 +16,8 @@ use std::alloc::Allocator; use super::table0::Entry; +type Ent = Entry<[u8; 2], V>; + pub struct Table1 { pub(crate) data: Box<[Entry<[u8; 2], V>; 65536], A>, pub(crate) len: usize, @@ -39,7 +41,7 @@ impl Table1 { pub fn len(&self) -> usize { self.len } - pub fn get(&self, key: [u8; 2]) -> Option<&Entry<[u8; 2], V>> { + pub fn get(&self, key: [u8; 2]) -> Option<&Ent> { let e = &self.data[key[1] as usize * 256 + key[0] as usize]; if unsafe { e.key.assume_init() } == key { Some(e) @@ -47,7 +49,7 @@ impl Table1 { None } } - pub fn get_mut(&mut self, key: [u8; 2]) -> Option<&mut Entry<[u8; 2], V>> { + pub fn get_mut(&mut self, key: [u8; 2]) -> Option<&mut Ent> { let e = &mut self.data[key[1] as usize * 256 + key[0] as usize]; if unsafe { e.key.assume_init() } == key { Some(e) @@ -58,10 +60,7 @@ impl Table1 { /// # Safety /// /// The resulted `MaybeUninit` should be initialized immedidately. - pub fn insert( - &mut self, - key: [u8; 2], - ) -> Result<&mut Entry<[u8; 2], V>, &mut Entry<[u8; 2], V>> { + pub fn insert(&mut self, key: [u8; 2]) -> Result<&mut Ent, &mut Ent> { let e = &mut self.data[key[1] as usize * 256 + key[0] as usize]; if unsafe { e.key.assume_init() } == key { Err(e) diff --git a/src/common/hashtable/src/traits.rs b/src/common/hashtable/src/traits.rs index 3001629db1102..c952702b20d57 100644 --- a/src/common/hashtable/src/traits.rs +++ b/src/common/hashtable/src/traits.rs @@ -30,9 +30,16 @@ pub unsafe trait Keyable: Sized + Copy + Eq { fn hash(&self) -> u64; } -pub trait UnsizedKeyable { +/// # Safety +/// +/// There shouldn't be any uninitialized bytes or interior mutability in `Self`. +/// The implementation promises `from_bytes(as_bytes(self))` is `self`. +/// `as_bytes` should returns a slice that is valid for reads. +pub unsafe trait UnsizedKeyable { fn as_bytes(&self) -> &[u8]; + /// # Safety + /// The argument should be a return value of `as_bytes` in the same implementation. unsafe fn from_bytes(bytes: &[u8]) -> &Self; } @@ -153,7 +160,7 @@ unsafe impl Keyable for [u8; N] { } } -impl UnsizedKeyable for [u8] { +unsafe impl UnsizedKeyable for [u8] { fn as_bytes(&self) -> &[u8] { self } @@ -163,7 +170,7 @@ impl UnsizedKeyable for [u8] { } } -impl UnsizedKeyable for str { +unsafe impl UnsizedKeyable for str { fn as_bytes(&self) -> &[u8] { self.as_bytes() } diff --git a/src/common/hashtable/src/twolevel_hashtable.rs b/src/common/hashtable/src/twolevel_hashtable.rs index 4288b5cfd16aa..ad21a6d826f7a 100644 --- a/src/common/hashtable/src/twolevel_hashtable.rs +++ b/src/common/hashtable/src/twolevel_hashtable.rs @@ -33,13 +33,15 @@ use super::utils::ZeroEntry; const BUCKETS: usize = 256; const BUCKETS_LG2: u32 = 8; +type Tables = [Table0, A>, A>; BUCKETS]; + pub struct TwolevelHashtable> where K: Keyable, A: Allocator + Clone, { zero: ZeroEntry, - tables: [Table0, A>, A>; BUCKETS], + tables: Tables, } unsafe impl Send @@ -62,6 +64,16 @@ where } } +impl Default for TwolevelHashtable +where + K: Keyable, + A: Allocator + Clone + Default, +{ + fn default() -> Self { + Self::new() + } +} + impl TwolevelHashtable where K: Keyable, @@ -197,8 +209,8 @@ where src.table.dropped = true; res.zero = ZeroEntry(src.zero.take()); for entry in src.table.iter() { - let key = *(*entry).key.assume_init_ref(); - let val = std::ptr::read((*entry).val.assume_init_ref()); + let key = entry.key.assume_init(); + let val = std::ptr::read(entry.val.assume_init_ref()); let hash = K::hash(&key); let index = hash as usize >> (64u32 - BUCKETS_LG2); if unlikely((res.tables[index].len() + 1) * 2 > res.tables[index].capacity()) { @@ -232,7 +244,7 @@ where pub fn set_merge(&mut self, other: &Self) { if let Some(entry) = other.zero.0.as_ref() { self.zero = ZeroEntry(Some(Entry { - key: entry.key.clone(), + key: entry.key, val: MaybeUninit::uninit(), _alignment: [0; 0], })); @@ -252,19 +264,21 @@ where } } +type TwolevelHashtableIterInner<'a, K, V, A> = std::iter::Chain< + std::option::Iter<'a, Entry>, + std::iter::FlatMap< + std::slice::Iter<'a, Table0, A>, A>>, + Table0Iter<'a, K, V>, + fn(&'a Table0, A>, A>) -> Table0Iter<'a, K, V>, + >, +>; + pub struct TwolevelHashtableIter<'a, K, V, A = MmapAllocator> where K: Keyable, A: Allocator + Clone, { - inner: std::iter::Chain< - std::option::Iter<'a, Entry>, - std::iter::FlatMap< - std::slice::Iter<'a, Table0, A>, A>>, - Table0Iter<'a, K, V>, - fn(&'a Table0, A>, A>) -> Table0Iter<'a, K, V>, - >, - >, + inner: TwolevelHashtableIterInner<'a, K, V, A>, } impl<'a, K, V, A> Iterator for TwolevelHashtableIter<'a, K, V, A> @@ -279,19 +293,21 @@ where } } +type TwolevelHashtableIterMutInner<'a, K, V, A> = std::iter::Chain< + std::option::IterMut<'a, Entry>, + std::iter::FlatMap< + std::slice::IterMut<'a, Table0, A>, A>>, + Table0IterMut<'a, K, V>, + fn(&'a mut Table0, A>, A>) -> Table0IterMut<'a, K, V>, + >, +>; + pub struct TwolevelHashtableIterMut<'a, K, V, A = MmapAllocator> where K: Keyable, A: Allocator + Clone, { - inner: std::iter::Chain< - std::option::IterMut<'a, Entry>, - std::iter::FlatMap< - std::slice::IterMut<'a, Table0, A>, A>>, - Table0IterMut<'a, K, V>, - fn(&'a mut Table0, A>, A>) -> Table0IterMut<'a, K, V>, - >, - >, + inner: TwolevelHashtableIterMutInner<'a, K, V, A>, } impl<'a, K, V, A> Iterator for TwolevelHashtableIterMut<'a, K, V, A> @@ -312,7 +328,7 @@ where A: Allocator + Clone, { Onelevel(Hashtable), - Twolevel(TwolevelHashtable), + Twolevel(Box>), } impl HashtableKind @@ -328,6 +344,16 @@ where } } +impl Default for HashtableKind +where + K: Keyable, + A: Allocator + Clone + Default, +{ + fn default() -> Self { + Self::new() + } +} + impl HashtableKind where K: Keyable, @@ -337,7 +363,7 @@ where Self::Onelevel(Hashtable::new_in(allocator)) } pub fn new_twolevel_in(allocator: A) -> Self { - Self::Twolevel(TwolevelHashtable::new_in(allocator)) + Self::Twolevel(Box::new(TwolevelHashtable::new_in(allocator))) } #[inline(always)] pub fn is_empty(&self) -> bool { @@ -437,7 +463,7 @@ where unsafe { if let Onelevel(x) = self { let onelevel = std::ptr::read(x); - let twolevel = TwolevelHashtable::::from(onelevel); + let twolevel = Box::new(TwolevelHashtable::::from(onelevel)); std::ptr::write(self, Twolevel(twolevel)); } } diff --git a/src/common/hashtable/src/unsized_hashtable.rs b/src/common/hashtable/src/unsized_hashtable.rs index a44fa3b59960d..17655e5271423 100644 --- a/src/common/hashtable/src/unsized_hashtable.rs +++ b/src/common/hashtable/src/unsized_hashtable.rs @@ -70,6 +70,16 @@ where } } +impl Default for UnsizedHashtable +where + K: UnsizedKeyable + ?Sized, + A: Allocator + Clone + Default, +{ + fn default() -> Self { + Self::new() + } +} + impl UnsizedHashtable where K: UnsizedKeyable + ?Sized, diff --git a/src/common/hashtable/tests/it/main.rs b/src/common/hashtable/tests/it/main.rs index 453f236d9370f..27dc316ecc6d2 100644 --- a/src/common/hashtable/tests/it/main.rs +++ b/src/common/hashtable/tests/it/main.rs @@ -120,7 +120,7 @@ fn test_unsized_hash_map() { } let mut hashtable = UnsizedHashMap::<[u8], U64>::new(); for s in sequence.iter() { - match unsafe { hashtable.insert_and_entry(&s) } { + match unsafe { hashtable.insert_and_entry(s) } { Ok(mut e) => { e.write(U64::new(1u64)); } diff --git a/src/query/functions-v2/src/aggregates/aggregate_distinct_state.rs b/src/query/functions-v2/src/aggregates/aggregate_distinct_state.rs index 0d4697e85ea24..d16e22da5ebfd 100644 --- a/src/query/functions-v2/src/aggregates/aggregate_distinct_state.rs +++ b/src/query/functions-v2/src/aggregates/aggregate_distinct_state.rs @@ -174,8 +174,7 @@ impl AggregateDistinctStringState { holder.commit_row(); let value = unsafe { holder.index_unchecked(holder.len() - 1) }; unsafe { - *(entity.key() as *const _ as *mut _) = - KeysRef::create(value.as_ptr() as usize, value.len()); + entity.set_key(KeysRef::create(value.as_ptr() as usize, value.len())); } self.holders.push(holder); } else { @@ -183,8 +182,7 @@ impl AggregateDistinctStringState { holder.commit_row(); let value = unsafe { holder.index_unchecked(holder.len() - 1) }; unsafe { - *(entity.key() as *const _ as *mut _) = - KeysRef::create(value.as_ptr() as usize, value.len()); + entity.set_key(KeysRef::create(value.as_ptr() as usize, value.len())); } } } diff --git a/src/query/functions/src/aggregates/aggregate_distinct_state.rs b/src/query/functions/src/aggregates/aggregate_distinct_state.rs index 01be055934e20..112a2a7eb9e98 100644 --- a/src/query/functions/src/aggregates/aggregate_distinct_state.rs +++ b/src/query/functions/src/aggregates/aggregate_distinct_state.rs @@ -198,16 +198,14 @@ impl AggregateDistinctStringState { holder.push(data); let value = unsafe { holder.value_unchecked(holder.len() - 1) }; unsafe { - *(entity.key() as *const _ as *mut _) = - KeysRef::create(value.as_ptr() as usize, value.len()); + entity.set_key(KeysRef::create(value.as_ptr() as usize, value.len())); } self.holders.push(holder); } else { holder.push(data); let value = unsafe { holder.value_unchecked(holder.len() - 1) }; unsafe { - *(entity.key() as *const _ as *mut _) = - KeysRef::create(value.as_ptr() as usize, value.len()); + entity.set_key(KeysRef::create(value.as_ptr() as usize, value.len())); } } } @@ -343,7 +341,7 @@ where fn serialize(&self, writer: &mut BytesMut) -> Result<()> { writer.write_uvarint(self.set.len() as u64)?; for value in self.set.iter() { - let t: T = value.key().clone().into(); + let t: T = (*value.key()).into(); serialize_into_buf(writer, &t)? } Ok(()) @@ -402,13 +400,13 @@ where fn merge(&mut self, rhs: &Self) -> Result<()> { for x in rhs.set.iter() { - let _ = self.set.set_insert(x.key().clone()); + let _ = self.set.set_insert(*x.key()); } Ok(()) } fn build_columns(&mut self, _fields: &[DataField]) -> Result> { - let values: Vec = self.set.iter().map(|e| e.key().clone().into()).collect(); + let values: Vec = self.set.iter().map(|e| (*e.key()).into()).collect(); let result = PrimitiveColumn::::new_from_vec(values); Ok(vec![result.arc()]) } diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs index ce4d6921e4382..95a02f54238fd 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs @@ -104,7 +104,7 @@ impl<'a> GroupColumnsBuilder for SerializedKeysGroupColumnsBuilder<'a> { if self.group_data_types.len() == 1 && self.group_data_types[0].data_type_id() == TypeID::String { - let col = StringColumn::from_slice(&keys); + let col = StringColumn::from_slice(keys); return Ok(vec![col.arc()]); } diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs index 6fbb713361412..c90435391f854 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs @@ -91,7 +91,7 @@ pub trait PolymorphicKeysHelper { Self: 'a, Self::State: 'a; - fn keys_column_builder<'a>(&'a self, capacity: usize) -> Self::ColumnBuilder<'a>; + fn keys_column_builder(&self, capacity: usize) -> Self::ColumnBuilder<'_>; type KeysColumnIter<'a>: KeysColumnIter<>::KeyRef<'a>> where diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs index fa64e5cbc1586..62ee7ae4bd668 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs @@ -362,7 +362,7 @@ impl AggregatorState for SerializedKeysAggregatorState { inserted: &mut bool, ) -> Self::EntityMutRef<'_> { unsafe { - match self.data_state_map.insert_and_entry(&*key_ref) { + match self.data_state_map.insert_and_entry(key_ref) { Ok(e) => { *inserted = true; e