diff --git a/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.CachingFilePathComparer.cs b/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.CachingFilePathComparer.cs new file mode 100644 index 0000000000000..20fe46870c34c --- /dev/null +++ b/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.CachingFilePathComparer.cs @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// To quiet analyzers asking to change code from dotnet/runtime style to dotnet/roslyn style +// + +#nullable enable + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Threading; + +namespace Microsoft.CodeAnalysis; + +internal sealed partial class SolutionState +{ + /// + /// String comparer for file paths that caches the last result of the comparison to avoid expensive rehashing of the + /// same string over and over again. + /// + private sealed class CachingFilePathComparer : IEqualityComparer + { + /// + /// Note: this insensitive comparer is busted on many systems. But we do things this way for compat with the logic + /// we've had on windows since forever. + /// + private static readonly StringComparer s_comparer = StringComparer.OrdinalIgnoreCase; + + public static readonly CachingFilePathComparer Instance = new(); + + /// + /// Lock to protect the the last string and hash code we computed. `enableThreadOwnerTracking: false` as we + /// don't need that tracking, and it substantially speeds up the spin lock (removing 0.7% cpu from solution load + /// scenario). + /// + private SpinLock _lock = new(enableThreadOwnerTracking: false); + private string? _lastString; + private int _lastHashCode; + + private CachingFilePathComparer() + { + } + + public bool Equals(string? x, string? y) + => s_comparer.Equals(x, y); + + public int GetHashCode([DisallowNull] string obj) + { + if (TryGetCachedHashCode(obj, out var hashCode)) + return hashCode; + + // Hashing a different string than last time. Compute the hash and cache the value. + + // Specialized impl of OrdinalIgnoreCase.GetHashCode that is faster for the common case of an all-ASCII + // string. Falls back to normal OrdinalIgnoreCase.GetHashCode for the uncommon case. + hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCase(obj); + + var lockTaken = false; + try + { + _lock.Enter(ref lockTaken); + _lastString = obj; + _lastHashCode = hashCode; + } + finally + { + if (lockTaken) + _lock.Exit(); + } + + return hashCode; + } + + private bool TryGetCachedHashCode(string obj, out int hashCode) + { + var lastString = _lastString; + + // Quickly check if this is definitely *not* trying to hash the same string that this comparer was just used + // to hash. If that's the case, we can avoid taking the lock and just return false immediately. For the + // case when a lot of distinct strings are being hashed (say, when a dictionary is being populated), this + // means we only spin-wait once. + if (!ReferenceEquals(lastString, obj)) + { + hashCode = default; + return false; + } + + // Otherwise, take the lock, and now copy both the string and hash out. + var lockTaken = false; + try + { + _lock.Enter(ref lockTaken); + lastString = _lastString; + hashCode = _lastHashCode; + } + finally + { + if (lockTaken) + _lock.Exit(); + } + + // Check again, as another thread may have written into this field between the first check and taking the lock. + return ReferenceEquals(lastString, obj); + } + + // From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs#L921 + + // We "normalize to lowercase" every char by ORing with 0x0020. This casts + // a very wide net because it will change, e.g., '^' to '~'. But that should + // be ok because we expect this to be very rare in practice. These are valid + // for both for big-endian and for little-endian. + private const uint NormalizeToLowercase = 0x0020_0020u; + + private unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase(string obj) + { + uint hash1 = (5381 << 16) + 5381; + uint hash2 = hash1; + + int length = obj.Length; + fixed (char* src = obj) + { + Debug.Assert(src[obj.Length] == '\0', "src[this.Length] == '\\0'"); + Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary"); + + uint* ptr = (uint*)src; + + while (length > 2) + { + uint p0 = ptr[0]; + uint p1 = ptr[1]; + if (!AllCharsInUInt32AreAscii(p0 | p1)) + { + goto NotAscii; + } + + length -= 4; + hash1 = (RuntimeBitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); + hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); + ptr += 2; + } + + if (length > 0) + { + uint p0 = ptr[0]; + if (!AllCharsInUInt32AreAscii(p0)) + { + goto NotAscii; + } + + hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); + } + } + + return (int)(hash1 + (hash2 * 1566083941)); + +NotAscii: + return s_comparer.GetHashCode(obj); + } + + // From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs#L16. + + /// + /// Returns true iff the UInt32 represents two ASCII UTF-16 characters in machine endianness. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool AllCharsInUInt32AreAscii(uint value) + { + return (value & ~0x007F_007Fu) == 0; + } + } +} diff --git a/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.cs b/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.cs index ec58bc415dfac..347ba082bdf54 100644 --- a/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.cs +++ b/src/Workspaces/Core/Portable/Workspace/Solution/SolutionState.cs @@ -33,11 +33,7 @@ internal readonly record struct StateChange( /// internal sealed partial class SolutionState { - /// - /// Note: this insensitive comparer is busted on many systems. But we do things this way for compat with the logic - /// we've had on windows since forever. - /// - public static readonly StringComparer FilePathComparer = StringComparer.OrdinalIgnoreCase; + public static readonly IEqualityComparer FilePathComparer = CachingFilePathComparer.Instance; // the version of the workspace this solution is from public int WorkspaceVersion { get; }