Skip to content

Commit

Permalink
Rollup merge of rust-lang#127297 - the8472:path-new-hash, r=Nilstrieb
Browse files Browse the repository at this point in the history
Improve std::Path's Hash quality by avoiding prefix collisions

This adds a bit rotation to the already existing state so that the same sequence of characters chunked at different offsets into separate path components results in different hashes.

The tests are from rust-lang#127255

Closes rust-lang#127254
  • Loading branch information
matthiaskrgr authored Jul 7, 2024
2 parents 2206c6b + ed3d487 commit ee06e7d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 4 deletions.
13 changes: 9 additions & 4 deletions std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3192,15 +3192,19 @@ impl Hash for Path {
let bytes = &bytes[prefix_len..];

let mut component_start = 0;
let mut bytes_hashed = 0;
// track some extra state to avoid prefix collisions.
// ["foo", "bar"] and ["foobar"], will have the same payload bytes
// but result in different chunk_bits
let mut chunk_bits: usize = 0;

for i in 0..bytes.len() {
let is_sep = if verbatim { is_verbatim_sep(bytes[i]) } else { is_sep_byte(bytes[i]) };
if is_sep {
if i > component_start {
let to_hash = &bytes[component_start..i];
chunk_bits = chunk_bits.wrapping_add(to_hash.len());
chunk_bits = chunk_bits.rotate_right(2);
h.write(to_hash);
bytes_hashed += to_hash.len();
}

// skip over separator and optionally a following CurDir item
Expand All @@ -3221,11 +3225,12 @@ impl Hash for Path {

if component_start < bytes.len() {
let to_hash = &bytes[component_start..];
chunk_bits = chunk_bits.wrapping_add(to_hash.len());
chunk_bits = chunk_bits.rotate_right(2);
h.write(to_hash);
bytes_hashed += to_hash.len();
}

h.write_usize(bytes_hashed);
h.write_usize(chunk_bits);
}
}

Expand Down
35 changes: 35 additions & 0 deletions std/src/path/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,20 @@ pub fn test_compare() {
relative_from: Some("")
);

tc!("foo//", "foo",
eq: true,
starts_with: true,
ends_with: true,
relative_from: Some("")
);

tc!("foo///", "foo",
eq: true,
starts_with: true,
ends_with: true,
relative_from: Some("")
);

tc!("foo/.", "foo",
eq: true,
starts_with: true,
Expand All @@ -1633,13 +1647,34 @@ pub fn test_compare() {
relative_from: Some("")
);

tc!("foo/.//bar", "foo/bar",
eq: true,
starts_with: true,
ends_with: true,
relative_from: Some("")
);

tc!("foo//./bar", "foo/bar",
eq: true,
starts_with: true,
ends_with: true,
relative_from: Some("")
);

tc!("foo/bar", "foo",
eq: false,
starts_with: true,
ends_with: false,
relative_from: Some("bar")
);

tc!("foo/bar", "foobar",
eq: false,
starts_with: false,
ends_with: false,
relative_from: None
);

tc!("foo/bar/baz", "foo/bar",
eq: false,
starts_with: true,
Expand Down

0 comments on commit ee06e7d

Please sign in to comment.