Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stable hashing: add comments and tests concerning platform-independence #77319

Merged
merged 1 commit into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions compiler/rustc_data_structures/src/sip128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,28 @@ impl SipHasher128 {

// A specialized write function for values with size <= 8.
//
// The hashing of multi-byte integers depends on endianness. E.g.:
// The input must be zero-extended to 64-bits by the caller. This extension
// isn't hashed, but the implementation requires it for correctness.
//
// This function, given the same integer size and value, has the same effect
// on both little- and big-endian hardware. It operates on values without
// depending on their sequence in memory, so is independent of endianness.
//
// However, we want SipHasher128 to be platform-dependent, in order to be
// consistent with the platform-dependent SipHasher in libstd. In other
// words, we want:
//
// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
//
// This function does the right thing for little-endian hardware. On
// big-endian hardware `x` must be byte-swapped first to give the right
// behaviour. After any byte-swapping, the input must be zero-extended to
// 64-bits. The caller is responsible for the byte-swapping and
// zero-extension.
// Therefore, in order to produce endian-dependent results, SipHasher128's
// `write_xxx` Hasher trait methods byte-swap `x` prior to zero-extending.
//
// If clients of SipHasher128 itself want platform-independent results, they
// *also* must byte-swap integer inputs before invoking the `write_xxx`
// methods on big-endian hardware (that is, two byte-swaps must occur--one
// in the client, and one in SipHasher128). Additionally, they must extend
// `usize` and `isize` types to 64 bits on 32-bit systems.
#[inline]
fn short_write<T>(&mut self, _x: T, x: u64) {
let size = mem::size_of::<T>();
Expand Down
56 changes: 45 additions & 11 deletions compiler/rustc_data_structures/src/sip128/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use super::*;

use std::hash::{Hash, Hasher};
use std::{mem, slice};

// Hash just the bytes of the slice, without length prefix
struct Bytes<'a>(&'a [u8]);
Expand Down Expand Up @@ -399,20 +398,55 @@ fn test_hash_no_concat_alias() {
}

#[test]
fn test_write_short_works() {
let test_usize = 0xd0c0b0a0usize;
fn test_short_write_works() {
let test_u8 = 0xFF_u8;
let test_u16 = 0x1122_u16;
let test_u32 = 0x22334455_u32;
let test_u64 = 0x33445566_778899AA_u64;
let test_u128 = 0x11223344_55667788_99AABBCC_DDEEFF77_u128;
let test_usize = 0xD0C0B0A0_usize;

let test_i8 = -1_i8;
let test_i16 = -2_i16;
let test_i32 = -3_i32;
let test_i64 = -4_i64;
let test_i128 = -5_i128;
let test_isize = -6_isize;

let mut h1 = SipHasher128::new_with_keys(0, 0);
h1.write_usize(test_usize);
h1.write(b"bytes");
h1.write(b"string");
h1.write_u8(0xFFu8);
h1.write_u8(0x01u8);
h1.write_u8(test_u8);
h1.write_u16(test_u16);
h1.write_u32(test_u32);
h1.write_u64(test_u64);
h1.write_u128(test_u128);
h1.write_usize(test_usize);
h1.write_i8(test_i8);
h1.write_i16(test_i16);
h1.write_i32(test_i32);
h1.write_i64(test_i64);
h1.write_i128(test_i128);
h1.write_isize(test_isize);

let mut h2 = SipHasher128::new_with_keys(0, 0);
h2.write(unsafe {
slice::from_raw_parts(&test_usize as *const _ as *const u8, mem::size_of::<usize>())
});
h2.write(b"bytes");
h2.write(b"string");
h2.write(&[0xFFu8, 0x01u8]);
assert_eq!(h1.finish128(), h2.finish128());
h2.write(&test_u8.to_ne_bytes());
h2.write(&test_u16.to_ne_bytes());
h2.write(&test_u32.to_ne_bytes());
h2.write(&test_u64.to_ne_bytes());
h2.write(&test_u128.to_ne_bytes());
h2.write(&test_usize.to_ne_bytes());
h2.write(&test_i8.to_ne_bytes());
h2.write(&test_i16.to_ne_bytes());
h2.write(&test_i32.to_ne_bytes());
h2.write(&test_i64.to_ne_bytes());
h2.write(&test_i128.to_ne_bytes());
h2.write(&test_isize.to_ne_bytes());

let h1_hash = h1.finish128();
let h2_hash = h2.finish128();

assert_eq!(h1_hash, h2_hash);
}
6 changes: 5 additions & 1 deletion compiler/rustc_data_structures/src/stable_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ use smallvec::SmallVec;
use std::hash::{BuildHasher, Hash, Hasher};
use std::mem;

#[cfg(test)]
mod tests;

/// When hashing something that ends up affecting properties like symbol names,
/// we want these symbol names to be calculated independently of other factors
/// like what architecture you're compiling *from*.
Expand Down Expand Up @@ -129,7 +132,8 @@ impl Hasher for StableHasher {
fn write_isize(&mut self, i: isize) {
// Always treat isize as i64 so we get the same results on 32 and 64 bit
// platforms. This is important for symbol hashes when cross compiling,
// for example.
// for example. Sign extending here is preferable as it means that the
// same negative number hashes the same on both 32 and 64 bit platforms.
self.state.write_i64((i as i64).to_le());
}
}
Expand Down
73 changes: 73 additions & 0 deletions compiler/rustc_data_structures/src/stable_hasher/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use super::*;

// The tests below compare the computed hashes to particular expected values
// in order to test that we produce the same results on different platforms,
// regardless of endianness and `usize` and `isize` size differences (this
// of course assumes we run these tests on platforms that differ in those
// ways). The expected values depend on the hashing algorithm used, so they
// need to be updated whenever StableHasher changes its hashing algorithm.

#[test]
fn test_hash_integers() {
// Test that integers are handled consistently across platforms.
let test_u8 = 0xAB_u8;
let test_u16 = 0xFFEE_u16;
let test_u32 = 0x445577AA_u32;
let test_u64 = 0x01234567_13243546_u64;
let test_u128 = 0x22114433_66557788_99AACCBB_EEDDFF77_u128;
let test_usize = 0xD0C0B0A0_usize;

let test_i8 = -100_i8;
let test_i16 = -200_i16;
let test_i32 = -300_i32;
let test_i64 = -400_i64;
let test_i128 = -500_i128;
let test_isize = -600_isize;

let mut h = StableHasher::new();
test_u8.hash(&mut h);
test_u16.hash(&mut h);
test_u32.hash(&mut h);
test_u64.hash(&mut h);
test_u128.hash(&mut h);
test_usize.hash(&mut h);
test_i8.hash(&mut h);
test_i16.hash(&mut h);
test_i32.hash(&mut h);
test_i64.hash(&mut h);
test_i128.hash(&mut h);
test_isize.hash(&mut h);

// This depends on the hashing algorithm. See note at top of file.
let expected = (2736651863462566372, 8121090595289675650);

assert_eq!(h.finalize(), expected);
}

#[test]
fn test_hash_usize() {
// Test that usize specifically is handled consistently across platforms.
let test_usize = 0xABCDEF01_usize;

let mut h = StableHasher::new();
test_usize.hash(&mut h);

// This depends on the hashing algorithm. See note at top of file.
let expected = (5798740672699530587, 11186240177685111648);

assert_eq!(h.finalize(), expected);
}

#[test]
fn test_hash_isize() {
// Test that isize specifically is handled consistently across platforms.
let test_isize = -7_isize;

let mut h = StableHasher::new();
test_isize.hash(&mut h);

// This depends on the hashing algorithm. See note at top of file.
let expected = (14721296605626097289, 11385941877786388409);

assert_eq!(h.finalize(), expected);
}