Skip to content

Commit

Permalink
Move RandomState and DefaultHasher into std::hash, but in a sneaky way
Browse files Browse the repository at this point in the history
  • Loading branch information
clarfonthey committed Sep 9, 2023
1 parent b0b8c52 commit 26d14ac
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 155 deletions.
150 changes: 1 addition & 149 deletions library/std/src/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,14 @@ use self::Entry::*;
use hashbrown::hash_map as base;

use crate::borrow::Borrow;
use crate::cell::Cell;
use crate::collections::TryReserveError;
use crate::collections::TryReserveErrorKind;
use crate::error::Error;
use crate::fmt::{self, Debug};
#[allow(deprecated)]
use crate::hash::{BuildHasher, Hash, Hasher, SipHasher13};
use crate::hash::{private::RandomState, BuildHasher, Hash};
use crate::iter::FusedIterator;
use crate::ops::Index;
use crate::sys;

/// A [hash map] implemented with quadratic probing and SIMD lookup.
///
Expand Down Expand Up @@ -3072,152 +3070,6 @@ where
}
}

/// `RandomState` is the default state for [`HashMap`] types.
///
/// A particular instance `RandomState` will create the same instances of
/// [`Hasher`], but the hashers created by two different `RandomState`
/// instances are unlikely to produce the same result for the same values.
///
/// # Examples
///
/// ```
/// use std::collections::HashMap;
/// use std::collections::hash_map::RandomState;
///
/// let s = RandomState::new();
/// let mut map = HashMap::with_hasher(s);
/// map.insert(1, 2);
/// ```
#[derive(Clone)]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub struct RandomState {
k0: u64,
k1: u64,
}

impl RandomState {
/// Constructs a new `RandomState` that is initialized with random keys.
///
/// # Examples
///
/// ```
/// use std::collections::hash_map::RandomState;
///
/// let s = RandomState::new();
/// ```
#[inline]
#[allow(deprecated)]
// rand
#[must_use]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub fn new() -> RandomState {
// Historically this function did not cache keys from the OS and instead
// simply always called `rand::thread_rng().gen()` twice. In #31356 it
// was discovered, however, that because we re-seed the thread-local RNG
// from the OS periodically that this can cause excessive slowdown when
// many hash maps are created on a thread. To solve this performance
// trap we cache the first set of randomly generated keys per-thread.
//
// Later in #36481 it was discovered that exposing a deterministic
// iteration order allows a form of DOS attack. To counter that we
// increment one of the seeds on every RandomState creation, giving
// every corresponding HashMap a different iteration order.
thread_local!(static KEYS: Cell<(u64, u64)> = {
Cell::new(sys::hashmap_random_keys())
});

KEYS.with(|keys| {
let (k0, k1) = keys.get();
keys.set((k0.wrapping_add(1), k1));
RandomState { k0, k1 }
})
}
}

#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl BuildHasher for RandomState {
type Hasher = DefaultHasher;
#[inline]
#[allow(deprecated)]
fn build_hasher(&self) -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1))
}
}

/// The default [`Hasher`] used by [`RandomState`].
///
/// The internal algorithm is not specified, and so it and its hashes should
/// not be relied upon over releases.
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
#[allow(deprecated)]
#[derive(Clone, Debug)]
pub struct DefaultHasher(SipHasher13);

impl DefaultHasher {
/// Creates a new `DefaultHasher`.
///
/// This hasher is not guaranteed to be the same as all other
/// `DefaultHasher` instances, but is the same as all other `DefaultHasher`
/// instances created through `new` or `default`.
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
#[inline]
#[allow(deprecated)]
#[rustc_const_unstable(feature = "const_hash", issue = "104061")]
#[must_use]
pub const fn new() -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(0, 0))
}
}

#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Default for DefaultHasher {
/// Creates a new `DefaultHasher` using [`new`].
/// See its documentation for more.
///
/// [`new`]: DefaultHasher::new
#[inline]
fn default() -> DefaultHasher {
DefaultHasher::new()
}
}

#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Hasher for DefaultHasher {
// The underlying `SipHasher13` doesn't override the other
// `write_*` methods, so it's ok not to forward them here.

#[inline]
fn write(&mut self, msg: &[u8]) {
self.0.write(msg)
}

#[inline]
fn write_str(&mut self, s: &str) {
self.0.write_str(s);
}

#[inline]
fn finish(&self) -> u64 {
self.0.finish()
}
}

#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl Default for RandomState {
/// Constructs a new `RandomState`.
#[inline]
fn default() -> RandomState {
RandomState::new()
}
}

#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for RandomState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RandomState").finish_non_exhaustive()
}
}

#[inline]
fn map_entry<'a, K: 'a, V: 'a>(raw: base::RustcEntry<'a, K, V>) -> Entry<'a, K, V> {
match raw {
Expand Down
2 changes: 1 addition & 1 deletion library/std/src/collections/hash/map/tests.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use super::Entry::{Occupied, Vacant};
use super::HashMap;
use super::RandomState;
use crate::assert_matches::assert_matches;
use crate::cell::RefCell;
use crate::hash::private::RandomState;
use crate::test_helpers::test_rng;
use rand::Rng;
use realstd::collections::TryReserveErrorKind::*;
Expand Down
4 changes: 2 additions & 2 deletions library/std/src/collections/hash/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ use hashbrown::hash_set as base;
use crate::borrow::Borrow;
use crate::collections::TryReserveError;
use crate::fmt;
use crate::hash::{BuildHasher, Hash};
use crate::hash::{private::RandomState, BuildHasher, Hash};
use crate::iter::{Chain, FusedIterator};
use crate::ops::{BitAnd, BitOr, BitXor, Sub};

use super::map::{map_try_reserve_error, RandomState};
use super::map::map_try_reserve_error;

/// A [hash set] implemented as a `HashMap` where the value is `()`.
///
Expand Down
2 changes: 1 addition & 1 deletion library/std/src/collections/hash/set/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::super::map::RandomState;
use super::HashSet;

use crate::hash::private::RandomState;
use crate::panic::{catch_unwind, AssertUnwindSafe};
use crate::sync::atomic::{AtomicU32, Ordering};
use crate::sync::Arc;
Expand Down
5 changes: 5 additions & 0 deletions library/std/src/collections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,11 @@ pub mod hash_map {
//! A hash map implemented with quadratic probing and SIMD lookup.
#[stable(feature = "rust1", since = "1.0.0")]
pub use super::hash::map::*;

#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub use crate::hash::private::DefaultHasher;
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub use crate::hash::private::RandomState;
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
90 changes: 90 additions & 0 deletions library/std/src/hash/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//! Generic hashing support.
//!
//! This module provides a generic way to compute the [hash] of a value.
//! Hashes are most commonly used with [`HashMap`] and [`HashSet`].
//!
//! [hash]: https://en.wikipedia.org/wiki/Hash_function
//! [`HashMap`]: ../../std/collections/struct.HashMap.html
//! [`HashSet`]: ../../std/collections/struct.HashSet.html
//!
//! The simplest way to make a type hashable is to use `#[derive(Hash)]`:
//!
//! # Examples
//!
//! ```rust
//! use std::collections::hash_map::DefaultHasher;
//! use std::hash::{Hash, Hasher};
//!
//! #[derive(Hash)]
//! struct Person {
//! id: u32,
//! name: String,
//! phone: u64,
//! }
//!
//! let person1 = Person {
//! id: 5,
//! name: "Janet".to_string(),
//! phone: 555_666_7777,
//! };
//! let person2 = Person {
//! id: 5,
//! name: "Bob".to_string(),
//! phone: 555_666_7777,
//! };
//!
//! assert!(calculate_hash(&person1) != calculate_hash(&person2));
//!
//! fn calculate_hash<T: Hash>(t: &T) -> u64 {
//! let mut s = DefaultHasher::new();
//! t.hash(&mut s);
//! s.finish()
//! }
//! ```
//!
//! If you need more control over how a value is hashed, you need to implement
//! the [`Hash`] trait:
//!
//! ```rust
//! use std::collections::hash_map::DefaultHasher;
//! use std::hash::{Hash, Hasher};
//!
//! struct Person {
//! id: u32,
//! # #[allow(dead_code)]
//! name: String,
//! phone: u64,
//! }
//!
//! impl Hash for Person {
//! fn hash<H: Hasher>(&self, state: &mut H) {
//! self.id.hash(state);
//! self.phone.hash(state);
//! }
//! }
//!
//! let person1 = Person {
//! id: 5,
//! name: "Janet".to_string(),
//! phone: 555_666_7777,
//! };
//! let person2 = Person {
//! id: 5,
//! name: "Bob".to_string(),
//! phone: 555_666_7777,
//! };
//!
//! assert_eq!(calculate_hash(&person1), calculate_hash(&person2));
//!
//! fn calculate_hash<T: Hash>(t: &T) -> u64 {
//! let mut s = DefaultHasher::new();
//! t.hash(&mut s);
//! s.finish()
//! }
//! ```
#![stable(feature = "rust1", since = "1.0.0")]

pub(crate) mod private;

#[stable(feature = "rust1", since = "1.0.0")]
pub use core::hash::*;
Loading

0 comments on commit 26d14ac

Please sign in to comment.