Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to Xxhash128 from sha256 #70723

Merged
merged 11 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@
<SystemDrawingCommonVersion>7.0.0</SystemDrawingCommonVersion>
<SystemIOFileSystemVersion>4.3.0</SystemIOFileSystemVersion>
<SystemIOFileSystemPrimitivesVersion>4.3.0</SystemIOFileSystemPrimitivesVersion>
<SystemIOHashingVersion>8.0.0</SystemIOHashingVersion>
<SystemIOPipesAccessControlVersion>5.0.0</SystemIOPipesAccessControlVersion>
<SystemIOPipelinesVersion>7.0.0</SystemIOPipelinesVersion>
<SystemManagementVersion>7.0.0</SystemManagementVersion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
<_Dependency Remove="System.Collections.Immutable"/>
<_Dependency Remove="System.Diagnostics.DiagnosticSource"/>
<_Dependency Remove="System.Drawing.Common"/>
<_Dependency Remove="System.IO.Hashing"/>
<_Dependency Remove="System.IO.Pipelines"/>
<_Dependency Remove="System.Memory"/>
<_Dependency Remove="System.Numerics.Vectors"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<NuGetPackageToIncludeInVsix Include="System.Composition" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.Configuration.ConfigurationManager" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.Drawing.Common" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.IO.Hashing" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.IO.Pipelines" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.Memory" PkgDefEntry="BindingRedirect" />
<NuGetPackageToIncludeInVsix Include="System.Numerics.Vectors" PkgDefEntry="BindingRedirect" />
Expand Down Expand Up @@ -75,6 +76,7 @@
<PackageReference Include="System.Configuration.ConfigurationManager" Version="$(SystemConfigurationConfigurationManagerVersion)" />
<PackageReference Include="System.Drawing.Common" Version="$(SystemDrawingCommonVersion)" />
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
<PackageReference Include="System.IO.Hashing" Version="$(SystemIOHashingVersion)" />
<PackageReference Include="System.IO.Pipelines" Version="$(SystemIOPipelinesVersion)" />
<PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataVersion)" />
<PackageReference Include="System.Resources.Extensions" Version="$(SystemResourcesExtensionsVersion)" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<PackageReference Include="SQLitePCLRaw.bundle_green" Version="$(SQLitePCLRawbundle_greenVersion)" PrivateAssets="all" Condition="'$(DotNetBuildFromSource)' != 'true'" />
<PackageReference Include="System.Composition" Version="$(SystemCompositionVersion)" />
<PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="$(MicrosoftBclAsyncInterfacesVersion)" />
<PackageReference Include="System.IO.Hashing" Version="$(SystemIOHashingVersion)" />
<PackageReference Include="System.IO.Pipelines" Version="$(SystemIOPipelinesVersion)" />
<PackageReference Include="System.Threading.Channels" Version="$(SystemThreadingChannelsVersion)" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ internal static class SQLitePersistentStorageConstants
// 6. Use compression in some features. Need to move to a different table since the blob
// format will be different and we don't want different VS versions (that do/don't support
// compression constantly stomping on each other.
private const string Version = "6";
// 7. Checksum size changed from 20 bytes to 16 bytes long.
private const string Version = "7";

/// <summary>
/// Inside the DB we have a table dedicated to storing strings that also provides a unique
Expand Down
15 changes: 5 additions & 10 deletions src/Workspaces/Core/Portable/Workspace/Solution/Checksum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ internal sealed partial record class Checksum(
/// <summary>
/// The intended size of the <see cref="HashData"/> structure.
/// </summary>
public const int HashSize = 20;
public const int HashSize = 16;

public static readonly Checksum Null = new(Hash: default);

Expand Down Expand Up @@ -119,29 +119,24 @@ public override int GetHashCode()
[DataContract, StructLayout(LayoutKind.Explicit, Size = HashSize)]
public readonly record struct HashData(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HashData

Can we use InlineArray feature instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no. this has to run on netfx and core.

[field: FieldOffset(0)][property: DataMember(Order = 0)] long Data1,
[field: FieldOffset(8)][property: DataMember(Order = 1)] long Data2,
[field: FieldOffset(16)][property: DataMember(Order = 2)] int Data3)
[field: FieldOffset(8)][property: DataMember(Order = 1)] long Data2)
{
public void WriteTo(ObjectWriter writer)
{
writer.WriteInt64(Data1);
writer.WriteInt64(Data2);
writer.WriteInt32(Data3);
}

public void WriteTo(Span<byte> span)
{
Contract.ThrowIfFalse(span.Length >= HashSize);
Contract.ThrowIfTrue(span.Length < HashSize);
#pragma warning disable CS9191 // The 'ref' modifier for an argument corresponding to 'in' parameter is equivalent to 'in'. Consider using 'in' instead.
Contract.ThrowIfFalse(MemoryMarshal.TryWrite(span, ref Unsafe.AsRef(in this)));
#pragma warning restore CS9191
}

public static unsafe HashData FromPointer(HashData* hash)
=> new(hash->Data1, hash->Data2, hash->Data3);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never used.


public static HashData ReadFrom(ObjectReader reader)
=> new(reader.ReadInt64(), reader.ReadInt64(), reader.ReadInt32());
=> new(reader.ReadInt64(), reader.ReadInt64());

public override int GetHashCode()
{
Expand All @@ -152,7 +147,7 @@ public override int GetHashCode()
// Explicitly implement this method as default jit for records on netfx doesn't properly devirtualize the
// standard calls to EqualityComparer<long>.Default.Equals
public bool Equals(HashData other)
=> this.Data1 == other.Data1 && this.Data2 == other.Data2 && this.Data3 == other.Data3;
=> this.Data1 == other.Data1 && this.Data2 == other.Data2;
}
}

Expand Down
221 changes: 17 additions & 204 deletions src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.IO.Hashing;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using System.Threading;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Serialization;
Expand All @@ -21,135 +19,41 @@ namespace Microsoft.CodeAnalysis
internal partial record class Checksum
{
// https://github.com/dotnet/runtime/blob/f2db6d6093c54e5eeb9db2d8dcbe15b2db92ad8c/src/libraries/System.Security.Cryptography.Algorithms/src/System/Security/Cryptography/SHA256.cs#L18-L19
CyrusNajmabadi marked this conversation as resolved.
Show resolved Hide resolved
private const int SHA256HashSizeBytes = 256 / 8;
private const int XXHash128SizeBytes = 128 / 8;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why we need this constant and the HashSize constant. But was keeping this roughly the same.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you follow up with cleanup-refactoring?
Other than having this duplicated I'm not sure about the value of having the Create methods in a separate file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. i'm also intending to measure why we need Checksum and HashData when it seems like we could just have the latter. The former is technically good if we expect a few actual distinct checksums, and tons of pointers to them. The latter would be good if we expect less pointers and most of the values to just be needed in the place where we're pointing at them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might have been a 32-bit process the last time this was evaluated, but the concern last time we looked was the number of pointers could exceed the number of instances.


#if NET
private static readonly ObjectPool<IncrementalHash> s_incrementalHashPool =
new(() => IncrementalHash.CreateHash(HashAlgorithmName.SHA256), size: 20);
#else
private static readonly ObjectPool<SHA256> s_incrementalHashPool =
new(SHA256.Create, size: 20);
#endif

#if !NET
// Dedicated pools for the byte[]s we use to create checksums from two or three existing checksums. Sized to
// exactly the space needed to splat the existing checksum data into the array and then hash it.

// Note: number of elements here should be computed based on what we need from our various collection-with-children objects.
private static readonly ObjectPool<byte[]>[] s_checksumByteArrayPool =
Enumerable.Range(0, 11).Select(i => new ObjectPool<byte[]>(() => new byte[HashSize * i])).ToArray();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no longer needed. the xxhash128 has all the functionality we need.


#endif
private static readonly ObjectPool<XxHash128> s_incrementalHashPool =
new(() => new(), size: 20);

public static Checksum Create(IEnumerable<string> values)
{
#if NET
using var pooledHash = s_incrementalHashPool.GetPooledObject();

foreach (var value in values)
{
pooledHash.Object.AppendData(MemoryMarshal.AsBytes(value.AsSpan()));
pooledHash.Object.AppendData(MemoryMarshal.AsBytes("\0".AsSpan()));
pooledHash.Object.Append(MemoryMarshal.AsBytes(value.AsSpan()));
pooledHash.Object.Append(MemoryMarshal.AsBytes("\0".AsSpan()));
}

Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
Span<byte> hash = stackalloc byte[XXHash128SizeBytes];
pooledHash.Object.GetHashAndReset(hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();
var hash = pooledHash.Object;

hash.Initialize();
foreach (var value in values)
{
AppendData(hash, pooledBuffer.Object, value);
AppendData(hash, pooledBuffer.Object, "\0");
}

hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Hash);
#endif
}

public static Checksum Create(string value)
{
#if NET
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
SHA256.HashData(MemoryMarshal.AsBytes(value.AsSpan()), hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();
var hash = pooledHash.Object;
hash.Initialize();

AppendData(hash, pooledBuffer.Object, value);

hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Hash);
#endif
Span<byte> destination = stackalloc byte[XXHash128SizeBytes];
XxHash128.Hash(MemoryMarshal.AsBytes(value.AsSpan()), destination);
return From(destination);
}

public static Checksum Create(Stream stream)
{
#if NET7_0_OR_GREATER
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
SHA256.HashData(stream, hash);
return From(hash);
#elif NET
using var pooledHash = s_incrementalHashPool.GetPooledObject();
Span<byte> buffer = stackalloc byte[SharedPools.ByteBufferSize];
pooledHash.Object.Append(stream);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xxhash128 already has support for appending a stream to it. woohoo.


int bytesRead;
do
{
bytesRead = stream.Read(buffer);
if (bytesRead > 0)
{
pooledHash.Object.AppendData(buffer[..bytesRead]);
}
}
while (bytesRead > 0);

Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
Span<byte> hash = stackalloc byte[XXHash128SizeBytes];
pooledHash.Object.GetHashAndReset(hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();

var hash = pooledHash.Object;
hash.Initialize();

var buffer = pooledBuffer.Object;
var bufferLength = buffer.Length;
int bytesRead;
do
{
bytesRead = stream.Read(buffer, 0, bufferLength);
if (bytesRead > 0)
{
hash.TransformBlock(buffer, 0, bytesRead, null, 0);
}
}
while (bytesRead > 0);

hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
var bytes = hash.Hash;

// if bytes array is bigger than certain size, checksum
// will truncate it to predetermined size. for more detail,
// see the Checksum type
//
// the truncation can happen since different hash algorithm or
// same algorithm on different platform can have different hash size
// which might be bigger than the Checksum HashSize.
//
// hash algorithm used here should remain functionally correct even
// after the truncation
return From(bytes);
#endif
}

public static Checksum Create<T>(T @object, Action<T, ObjectWriter> writeObject)
Expand All @@ -166,89 +70,18 @@ public static Checksum Create<T>(T @object, Action<T, ObjectWriter> writeObject)
}

public static Checksum Create(Checksum checksum1, Checksum checksum2)
{
#if NET
return CreateUsingSpans(checksum1, checksum2);
#else
return CreateUsingByteArrays(checksum1, checksum2);
#endif
}
=> Create(stackalloc[] { checksum1.Hash, checksum2.Hash });

public static Checksum Create(Checksum checksum1, Checksum checksum2, Checksum checksum3)
{
#if NET
return CreateUsingSpans(checksum1, checksum2, checksum3);
#else
return CreateUsingByteArrays(checksum1, checksum2, checksum3);
#endif
}
=> Create(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash });

public static Checksum Create(ReadOnlySpan<Checksum.HashData> hashes)
{
#if NET
return CreateUsingSpans(hashes);
#else
return CreateUsingByteArrays(hashes);
#endif
Span<byte> destination = stackalloc byte[XXHash128SizeBytes];
XxHash128.Hash(MemoryMarshal.AsBytes(hashes), destination);
return From(destination);
}

#if !NET

private static PooledObject<byte[]> GetPooledByteArray(int checksumCount)
{
var objectPool = s_checksumByteArrayPool[checksumCount];
return objectPool.GetPooledObject();
}

private static Checksum CreateUsingByteArrays(ReadOnlySpan<Checksum.HashData> checksums)
{
using var bytes = GetPooledByteArray(checksumCount: checksums.Length);

var bytesSpan = bytes.Object.AsSpan();
var index = 0;
foreach (var checksum in checksums)
{
checksum.WriteTo(bytesSpan.Slice(HashSize * index));
index++;
}

using var hash = s_incrementalHashPool.GetPooledObject();
hash.Object.Initialize();

hash.Object.TransformBlock(bytes.Object, 0, bytes.Object.Length, null, 0);

hash.Object.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Object.Hash);
}

private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2)
=> CreateUsingByteArrays(stackalloc[] { checksum1.Hash, checksum2.Hash });

private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2, Checksum checksum3)
=> CreateUsingByteArrays(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash });

#else

// Optimized helpers that do not need to allocate any arrays to combine hashes.

private static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2)
=> CreateUsingSpans(stackalloc[] { checksum1.Hash, checksum2.Hash });

private static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2, Checksum checksum3)
=> CreateUsingSpans(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash });

private static Checksum CreateUsingSpans(
ReadOnlySpan<Checksum.HashData> hashes)
{
Span<byte> hashResultSpan = stackalloc byte[SHA256HashSizeBytes];

SHA256.HashData(MemoryMarshal.AsBytes(hashes), hashResultSpan);

return From(hashResultSpan);
}

#endif

public static Checksum Create(ArrayBuilder<Checksum> checksums)
{
using var stream = SerializableBytes.CreateWritableStream();
Expand Down Expand Up @@ -317,25 +150,5 @@ public static Checksum Create(ParseOptions value, ISerializerService serializer)
stream.Position = 0;
return Create(stream);
}

#if !NET
private static void AppendData(SHA256 hash, byte[] buffer, string value)
{
var stringBytes = MemoryMarshal.AsBytes(value.AsSpan());
Debug.Assert(stringBytes.Length == value.Length * 2);

var index = 0;
while (index < stringBytes.Length)
{
var remaining = stringBytes.Length - index;
var toCopy = Math.Min(remaining, buffer.Length);

stringBytes.Slice(index, toCopy).CopyTo(buffer);
hash.TransformBlock(buffer, 0, toCopy, null, 0);

index += toCopy;
}
}
#endif
}
}
Loading
Loading