Skip to content

Commit

Permalink
Merge pull request #73937 from CyrusNajmabadi/cacheHash
Browse files Browse the repository at this point in the history
  • Loading branch information
CyrusNajmabadi authored Jun 11, 2024
2 parents e8ca335 + dd25a6d commit f2705c0
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// To quiet analyzers asking to change code from dotnet/runtime style to dotnet/roslyn style
// <auto-generated/>

#nullable enable

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Threading;

namespace Microsoft.CodeAnalysis;

internal sealed partial class SolutionState
{
/// <summary>
/// String comparer for file paths that caches the last result of the comparison to avoid expensive rehashing of the
/// same string over and over again.
/// </summary>
private sealed class CachingFilePathComparer : IEqualityComparer<string>
{
/// <summary>
/// Note: this insensitive comparer is busted on many systems. But we do things this way for compat with the logic
/// we've had on windows since forever.
/// </summary>
private static readonly StringComparer s_comparer = StringComparer.OrdinalIgnoreCase;

public static readonly CachingFilePathComparer Instance = new();

/// <summary>
/// Lock to protect the the last string and hash code we computed. `enableThreadOwnerTracking: false` as we
/// don't need that tracking, and it substantially speeds up the spin lock (removing 0.7% cpu from solution load
/// scenario).
/// </summary>
private SpinLock _lock = new(enableThreadOwnerTracking: false);
private string? _lastString;
private int _lastHashCode;

private CachingFilePathComparer()
{
}

public bool Equals(string? x, string? y)
=> s_comparer.Equals(x, y);

public int GetHashCode([DisallowNull] string obj)
{
if (TryGetCachedHashCode(obj, out var hashCode))
return hashCode;

// Hashing a different string than last time. Compute the hash and cache the value.

// Specialized impl of OrdinalIgnoreCase.GetHashCode that is faster for the common case of an all-ASCII
// string. Falls back to normal OrdinalIgnoreCase.GetHashCode for the uncommon case.
hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCase(obj);

var lockTaken = false;
try
{
_lock.Enter(ref lockTaken);
_lastString = obj;
_lastHashCode = hashCode;
}
finally
{
if (lockTaken)
_lock.Exit();
}

return hashCode;
}

private bool TryGetCachedHashCode(string obj, out int hashCode)
{
var lastString = _lastString;

// Quickly check if this is definitely *not* trying to hash the same string that this comparer was just used
// to hash. If that's the case, we can avoid taking the lock and just return false immediately. For the
// case when a lot of distinct strings are being hashed (say, when a dictionary is being populated), this
// means we only spin-wait once.
if (!ReferenceEquals(lastString, obj))
{
hashCode = default;
return false;
}

// Otherwise, take the lock, and now copy both the string and hash out.
var lockTaken = false;
try
{
_lock.Enter(ref lockTaken);
lastString = _lastString;
hashCode = _lastHashCode;
}
finally
{
if (lockTaken)
_lock.Exit();
}

// Check again, as another thread may have written into this field between the first check and taking the lock.
return ReferenceEquals(lastString, obj);
}

// From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs#L921

// We "normalize to lowercase" every char by ORing with 0x0020. This casts
// a very wide net because it will change, e.g., '^' to '~'. But that should
// be ok because we expect this to be very rare in practice. These are valid
// for both for big-endian and for little-endian.
private const uint NormalizeToLowercase = 0x0020_0020u;

private unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase(string obj)
{
uint hash1 = (5381 << 16) + 5381;
uint hash2 = hash1;

int length = obj.Length;
fixed (char* src = obj)
{
Debug.Assert(src[obj.Length] == '\0', "src[this.Length] == '\\0'");
Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary");

uint* ptr = (uint*)src;

while (length > 2)
{
uint p0 = ptr[0];
uint p1 = ptr[1];
if (!AllCharsInUInt32AreAscii(p0 | p1))
{
goto NotAscii;
}

length -= 4;
hash1 = (RuntimeBitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase);
hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase);
ptr += 2;
}

if (length > 0)
{
uint p0 = ptr[0];
if (!AllCharsInUInt32AreAscii(p0))
{
goto NotAscii;
}

hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase);
}
}

return (int)(hash1 + (hash2 * 1566083941));

NotAscii:
return s_comparer.GetHashCode(obj);
}

// From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs#L16.

/// <summary>
/// Returns true iff the UInt32 represents two ASCII UTF-16 characters in machine endianness.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool AllCharsInUInt32AreAscii(uint value)
{
return (value & ~0x007F_007Fu) == 0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ internal readonly record struct StateChange(
/// </summary>
internal sealed partial class SolutionState
{
/// <summary>
/// Note: this insensitive comparer is busted on many systems. But we do things this way for compat with the logic
/// we've had on windows since forever.
/// </summary>
public static readonly StringComparer FilePathComparer = StringComparer.OrdinalIgnoreCase;
public static readonly IEqualityComparer<string> FilePathComparer = CachingFilePathComparer.Instance;

// the version of the workspace this solution is from
public int WorkspaceVersion { get; }
Expand Down

0 comments on commit f2705c0

Please sign in to comment.