-
Notifications
You must be signed in to change notification settings - Fork 4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #73937 from CyrusNajmabadi/cacheHash
- Loading branch information
Showing
2 changed files
with
177 additions
and
5 deletions.
There are no files selected for viewing
176 changes: 176 additions & 0 deletions
176
src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.CachingFilePathComparer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
// To quiet analyzers asking to change code from dotnet/runtime style to dotnet/roslyn style | ||
// <auto-generated/> | ||
|
||
#nullable enable | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Threading; | ||
|
||
namespace Microsoft.CodeAnalysis; | ||
|
||
internal sealed partial class SolutionState | ||
{ | ||
/// <summary> | ||
/// String comparer for file paths that caches the last result of the comparison to avoid expensive rehashing of the | ||
/// same string over and over again. | ||
/// </summary> | ||
private sealed class CachingFilePathComparer : IEqualityComparer<string> | ||
{ | ||
/// <summary> | ||
/// Note: this insensitive comparer is busted on many systems. But we do things this way for compat with the logic | ||
/// we've had on windows since forever. | ||
/// </summary> | ||
private static readonly StringComparer s_comparer = StringComparer.OrdinalIgnoreCase; | ||
|
||
public static readonly CachingFilePathComparer Instance = new(); | ||
|
||
/// <summary> | ||
/// Lock to protect the the last string and hash code we computed. `enableThreadOwnerTracking: false` as we | ||
/// don't need that tracking, and it substantially speeds up the spin lock (removing 0.7% cpu from solution load | ||
/// scenario). | ||
/// </summary> | ||
private SpinLock _lock = new(enableThreadOwnerTracking: false); | ||
private string? _lastString; | ||
private int _lastHashCode; | ||
|
||
private CachingFilePathComparer() | ||
{ | ||
} | ||
|
||
public bool Equals(string? x, string? y) | ||
=> s_comparer.Equals(x, y); | ||
|
||
public int GetHashCode([DisallowNull] string obj) | ||
{ | ||
if (TryGetCachedHashCode(obj, out var hashCode)) | ||
return hashCode; | ||
|
||
// Hashing a different string than last time. Compute the hash and cache the value. | ||
|
||
// Specialized impl of OrdinalIgnoreCase.GetHashCode that is faster for the common case of an all-ASCII | ||
// string. Falls back to normal OrdinalIgnoreCase.GetHashCode for the uncommon case. | ||
hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCase(obj); | ||
|
||
var lockTaken = false; | ||
try | ||
{ | ||
_lock.Enter(ref lockTaken); | ||
_lastString = obj; | ||
_lastHashCode = hashCode; | ||
} | ||
finally | ||
{ | ||
if (lockTaken) | ||
_lock.Exit(); | ||
} | ||
|
||
return hashCode; | ||
} | ||
|
||
private bool TryGetCachedHashCode(string obj, out int hashCode) | ||
{ | ||
var lastString = _lastString; | ||
|
||
// Quickly check if this is definitely *not* trying to hash the same string that this comparer was just used | ||
// to hash. If that's the case, we can avoid taking the lock and just return false immediately. For the | ||
// case when a lot of distinct strings are being hashed (say, when a dictionary is being populated), this | ||
// means we only spin-wait once. | ||
if (!ReferenceEquals(lastString, obj)) | ||
{ | ||
hashCode = default; | ||
return false; | ||
} | ||
|
||
// Otherwise, take the lock, and now copy both the string and hash out. | ||
var lockTaken = false; | ||
try | ||
{ | ||
_lock.Enter(ref lockTaken); | ||
lastString = _lastString; | ||
hashCode = _lastHashCode; | ||
} | ||
finally | ||
{ | ||
if (lockTaken) | ||
_lock.Exit(); | ||
} | ||
|
||
// Check again, as another thread may have written into this field between the first check and taking the lock. | ||
return ReferenceEquals(lastString, obj); | ||
} | ||
|
||
// From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs#L921 | ||
|
||
// We "normalize to lowercase" every char by ORing with 0x0020. This casts | ||
// a very wide net because it will change, e.g., '^' to '~'. But that should | ||
// be ok because we expect this to be very rare in practice. These are valid | ||
// for both for big-endian and for little-endian. | ||
private const uint NormalizeToLowercase = 0x0020_0020u; | ||
|
||
private unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase(string obj) | ||
{ | ||
uint hash1 = (5381 << 16) + 5381; | ||
uint hash2 = hash1; | ||
|
||
int length = obj.Length; | ||
fixed (char* src = obj) | ||
{ | ||
Debug.Assert(src[obj.Length] == '\0', "src[this.Length] == '\\0'"); | ||
Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary"); | ||
|
||
uint* ptr = (uint*)src; | ||
|
||
while (length > 2) | ||
{ | ||
uint p0 = ptr[0]; | ||
uint p1 = ptr[1]; | ||
if (!AllCharsInUInt32AreAscii(p0 | p1)) | ||
{ | ||
goto NotAscii; | ||
} | ||
|
||
length -= 4; | ||
hash1 = (RuntimeBitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); | ||
hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); | ||
ptr += 2; | ||
} | ||
|
||
if (length > 0) | ||
{ | ||
uint p0 = ptr[0]; | ||
if (!AllCharsInUInt32AreAscii(p0)) | ||
{ | ||
goto NotAscii; | ||
} | ||
|
||
hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); | ||
} | ||
} | ||
|
||
return (int)(hash1 + (hash2 * 1566083941)); | ||
|
||
NotAscii: | ||
return s_comparer.GetHashCode(obj); | ||
} | ||
|
||
// From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs#L16. | ||
|
||
/// <summary> | ||
/// Returns true iff the UInt32 represents two ASCII UTF-16 characters in machine endianness. | ||
/// </summary> | ||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static bool AllCharsInUInt32AreAscii(uint value) | ||
{ | ||
return (value & ~0x007F_007Fu) == 0; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters