-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Switch to Xxhash128 from sha256 #70723
Changes from 7 commits
b325cf9
e20234e
1eb8cfa
eb0bcfc
d3875e0
def56fc
2f5f6ac
9639bed
ee31bdf
2c1117d
6bf626a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,7 @@ internal sealed partial record class Checksum( | |
/// <summary> | ||
/// The intended size of the <see cref="HashData"/> structure. | ||
/// </summary> | ||
public const int HashSize = 20; | ||
public const int HashSize = 16; | ||
|
||
public static readonly Checksum Null = new(Hash: default); | ||
|
||
|
@@ -119,29 +119,24 @@ public override int GetHashCode() | |
[DataContract, StructLayout(LayoutKind.Explicit, Size = HashSize)] | ||
public readonly record struct HashData( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no. this has to run on netfx and core. |
||
[field: FieldOffset(0)][property: DataMember(Order = 0)] long Data1, | ||
[field: FieldOffset(8)][property: DataMember(Order = 1)] long Data2, | ||
[field: FieldOffset(16)][property: DataMember(Order = 2)] int Data3) | ||
[field: FieldOffset(8)][property: DataMember(Order = 1)] long Data2) | ||
{ | ||
public void WriteTo(ObjectWriter writer) | ||
{ | ||
writer.WriteInt64(Data1); | ||
writer.WriteInt64(Data2); | ||
writer.WriteInt32(Data3); | ||
} | ||
|
||
public void WriteTo(Span<byte> span) | ||
{ | ||
Contract.ThrowIfFalse(span.Length >= HashSize); | ||
Contract.ThrowIfTrue(span.Length < HashSize); | ||
#pragma warning disable CS9191 // The 'ref' modifier for an argument corresponding to 'in' parameter is equivalent to 'in'. Consider using 'in' instead. | ||
Contract.ThrowIfFalse(MemoryMarshal.TryWrite(span, ref Unsafe.AsRef(in this))); | ||
#pragma warning restore CS9191 | ||
} | ||
|
||
public static unsafe HashData FromPointer(HashData* hash) | ||
=> new(hash->Data1, hash->Data2, hash->Data3); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. never used. |
||
|
||
public static HashData ReadFrom(ObjectReader reader) | ||
=> new(reader.ReadInt64(), reader.ReadInt64(), reader.ReadInt32()); | ||
=> new(reader.ReadInt64(), reader.ReadInt64()); | ||
|
||
public override int GetHashCode() | ||
{ | ||
|
@@ -152,7 +147,7 @@ public override int GetHashCode() | |
// Explicitly implement this method as default jit for records on netfx doesn't properly devirtualize the | ||
// standard calls to EqualityComparer<long>.Default.Equals | ||
public bool Equals(HashData other) | ||
=> this.Data1 == other.Data1 && this.Data2 == other.Data2 && this.Data3 == other.Data3; | ||
=> this.Data1 == other.Data1 && this.Data2 == other.Data2; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,9 @@ | |
using System; | ||
using System.Collections.Generic; | ||
using System.Collections.Immutable; | ||
using System.Diagnostics; | ||
using System.IO; | ||
using System.Linq; | ||
using System.IO.Hashing; | ||
using System.Runtime.InteropServices; | ||
using System.Security.Cryptography; | ||
using System.Threading; | ||
using Microsoft.CodeAnalysis.PooledObjects; | ||
using Microsoft.CodeAnalysis.Serialization; | ||
|
@@ -21,135 +19,41 @@ namespace Microsoft.CodeAnalysis | |
internal partial record class Checksum | ||
{ | ||
// https://github.com/dotnet/runtime/blob/f2db6d6093c54e5eeb9db2d8dcbe15b2db92ad8c/src/libraries/System.Security.Cryptography.Algorithms/src/System/Security/Cryptography/SHA256.cs#L18-L19 | ||
CyrusNajmabadi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private const int SHA256HashSizeBytes = 256 / 8; | ||
private const int XXHash128SizeBytes = 128 / 8; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure why we need this constant and the HashSize constant. But was keeping this roughly the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you follow up with cleanup-refactoring? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. i'm also intending to measure why we need Checksum and HashData when it seems like we could just have the latter. The former is technically good if we expect a few actual distinct checksums, and tons of pointers to them. The latter would be good if we expect less pointers and most of the values to just be needed in the place where we're pointing at them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might have been a 32-bit process the last time this was evaluated, but the concern last time we looked was the number of pointers could exceed the number of instances. |
||
|
||
#if NET | ||
private static readonly ObjectPool<IncrementalHash> s_incrementalHashPool = | ||
new(() => IncrementalHash.CreateHash(HashAlgorithmName.SHA256), size: 20); | ||
#else | ||
private static readonly ObjectPool<SHA256> s_incrementalHashPool = | ||
new(SHA256.Create, size: 20); | ||
#endif | ||
|
||
#if !NET | ||
// Dedicated pools for the byte[]s we use to create checksums from two or three existing checksums. Sized to | ||
// exactly the space needed to splat the existing checksum data into the array and then hash it. | ||
|
||
// Note: number of elements here should be computed based on what we need from our various collection-with-children objects. | ||
private static readonly ObjectPool<byte[]>[] s_checksumByteArrayPool = | ||
Enumerable.Range(0, 11).Select(i => new ObjectPool<byte[]>(() => new byte[HashSize * i])).ToArray(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no longer needed. the xxhash128 has all the functionality we need. |
||
|
||
#endif | ||
private static readonly ObjectPool<XxHash128> s_incrementalHashPool = | ||
new(() => new(), size: 20); | ||
|
||
public static Checksum Create(IEnumerable<string> values) | ||
{ | ||
#if NET | ||
using var pooledHash = s_incrementalHashPool.GetPooledObject(); | ||
|
||
foreach (var value in values) | ||
{ | ||
pooledHash.Object.AppendData(MemoryMarshal.AsBytes(value.AsSpan())); | ||
pooledHash.Object.AppendData(MemoryMarshal.AsBytes("\0".AsSpan())); | ||
pooledHash.Object.Append(MemoryMarshal.AsBytes(value.AsSpan())); | ||
pooledHash.Object.Append(MemoryMarshal.AsBytes("\0".AsSpan())); | ||
} | ||
|
||
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes]; | ||
Span<byte> hash = stackalloc byte[XXHash128SizeBytes]; | ||
pooledHash.Object.GetHashAndReset(hash); | ||
return From(hash); | ||
#else | ||
using var pooledHash = s_incrementalHashPool.GetPooledObject(); | ||
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject(); | ||
var hash = pooledHash.Object; | ||
|
||
hash.Initialize(); | ||
foreach (var value in values) | ||
{ | ||
AppendData(hash, pooledBuffer.Object, value); | ||
AppendData(hash, pooledBuffer.Object, "\0"); | ||
} | ||
|
||
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0); | ||
return From(hash.Hash); | ||
#endif | ||
} | ||
|
||
public static Checksum Create(string value) | ||
{ | ||
#if NET | ||
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes]; | ||
SHA256.HashData(MemoryMarshal.AsBytes(value.AsSpan()), hash); | ||
return From(hash); | ||
#else | ||
using var pooledHash = s_incrementalHashPool.GetPooledObject(); | ||
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject(); | ||
var hash = pooledHash.Object; | ||
hash.Initialize(); | ||
|
||
AppendData(hash, pooledBuffer.Object, value); | ||
|
||
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0); | ||
return From(hash.Hash); | ||
#endif | ||
Span<byte> destination = stackalloc byte[XXHash128SizeBytes]; | ||
XxHash128.Hash(MemoryMarshal.AsBytes(value.AsSpan()), destination); | ||
return From(destination); | ||
} | ||
|
||
public static Checksum Create(Stream stream) | ||
{ | ||
#if NET7_0_OR_GREATER | ||
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes]; | ||
SHA256.HashData(stream, hash); | ||
return From(hash); | ||
#elif NET | ||
using var pooledHash = s_incrementalHashPool.GetPooledObject(); | ||
Span<byte> buffer = stackalloc byte[SharedPools.ByteBufferSize]; | ||
pooledHash.Object.Append(stream); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. xxhash128 already has support for appending a stream to it. woohoo. |
||
|
||
int bytesRead; | ||
do | ||
{ | ||
bytesRead = stream.Read(buffer); | ||
if (bytesRead > 0) | ||
{ | ||
pooledHash.Object.AppendData(buffer[..bytesRead]); | ||
} | ||
} | ||
while (bytesRead > 0); | ||
|
||
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes]; | ||
Span<byte> hash = stackalloc byte[XXHash128SizeBytes]; | ||
pooledHash.Object.GetHashAndReset(hash); | ||
return From(hash); | ||
#else | ||
using var pooledHash = s_incrementalHashPool.GetPooledObject(); | ||
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject(); | ||
|
||
var hash = pooledHash.Object; | ||
hash.Initialize(); | ||
|
||
var buffer = pooledBuffer.Object; | ||
var bufferLength = buffer.Length; | ||
int bytesRead; | ||
do | ||
{ | ||
bytesRead = stream.Read(buffer, 0, bufferLength); | ||
if (bytesRead > 0) | ||
{ | ||
hash.TransformBlock(buffer, 0, bytesRead, null, 0); | ||
} | ||
} | ||
while (bytesRead > 0); | ||
|
||
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0); | ||
var bytes = hash.Hash; | ||
|
||
// if bytes array is bigger than certain size, checksum | ||
// will truncate it to predetermined size. for more detail, | ||
// see the Checksum type | ||
// | ||
// the truncation can happen since different hash algorithm or | ||
// same algorithm on different platform can have different hash size | ||
// which might be bigger than the Checksum HashSize. | ||
// | ||
// hash algorithm used here should remain functionally correct even | ||
// after the truncation | ||
return From(bytes); | ||
#endif | ||
} | ||
|
||
public static Checksum Create<T>(T @object, Action<T, ObjectWriter> writeObject) | ||
|
@@ -166,89 +70,18 @@ public static Checksum Create<T>(T @object, Action<T, ObjectWriter> writeObject) | |
} | ||
|
||
public static Checksum Create(Checksum checksum1, Checksum checksum2) | ||
{ | ||
#if NET | ||
return CreateUsingSpans(checksum1, checksum2); | ||
#else | ||
return CreateUsingByteArrays(checksum1, checksum2); | ||
#endif | ||
} | ||
=> Create(stackalloc[] { checksum1.Hash, checksum2.Hash }); | ||
|
||
public static Checksum Create(Checksum checksum1, Checksum checksum2, Checksum checksum3) | ||
{ | ||
#if NET | ||
return CreateUsingSpans(checksum1, checksum2, checksum3); | ||
#else | ||
return CreateUsingByteArrays(checksum1, checksum2, checksum3); | ||
#endif | ||
} | ||
=> Create(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash }); | ||
|
||
public static Checksum Create(ReadOnlySpan<Checksum.HashData> hashes) | ||
{ | ||
#if NET | ||
return CreateUsingSpans(hashes); | ||
#else | ||
return CreateUsingByteArrays(hashes); | ||
#endif | ||
Span<byte> destination = stackalloc byte[XXHash128SizeBytes]; | ||
XxHash128.Hash(MemoryMarshal.AsBytes(hashes), destination); | ||
return From(destination); | ||
} | ||
|
||
#if !NET | ||
|
||
private static PooledObject<byte[]> GetPooledByteArray(int checksumCount) | ||
{ | ||
var objectPool = s_checksumByteArrayPool[checksumCount]; | ||
return objectPool.GetPooledObject(); | ||
} | ||
|
||
private static Checksum CreateUsingByteArrays(ReadOnlySpan<Checksum.HashData> checksums) | ||
{ | ||
using var bytes = GetPooledByteArray(checksumCount: checksums.Length); | ||
|
||
var bytesSpan = bytes.Object.AsSpan(); | ||
var index = 0; | ||
foreach (var checksum in checksums) | ||
{ | ||
checksum.WriteTo(bytesSpan.Slice(HashSize * index)); | ||
index++; | ||
} | ||
|
||
using var hash = s_incrementalHashPool.GetPooledObject(); | ||
hash.Object.Initialize(); | ||
|
||
hash.Object.TransformBlock(bytes.Object, 0, bytes.Object.Length, null, 0); | ||
|
||
hash.Object.TransformFinalBlock(Array.Empty<byte>(), 0, 0); | ||
return From(hash.Object.Hash); | ||
} | ||
|
||
private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2) | ||
=> CreateUsingByteArrays(stackalloc[] { checksum1.Hash, checksum2.Hash }); | ||
|
||
private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2, Checksum checksum3) | ||
=> CreateUsingByteArrays(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash }); | ||
|
||
#else | ||
|
||
// Optimized helpers that do not need to allocate any arrays to combine hashes. | ||
|
||
private static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2) | ||
=> CreateUsingSpans(stackalloc[] { checksum1.Hash, checksum2.Hash }); | ||
|
||
private static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2, Checksum checksum3) | ||
=> CreateUsingSpans(stackalloc[] { checksum1.Hash, checksum2.Hash, checksum3.Hash }); | ||
|
||
private static Checksum CreateUsingSpans( | ||
ReadOnlySpan<Checksum.HashData> hashes) | ||
{ | ||
Span<byte> hashResultSpan = stackalloc byte[SHA256HashSizeBytes]; | ||
|
||
SHA256.HashData(MemoryMarshal.AsBytes(hashes), hashResultSpan); | ||
|
||
return From(hashResultSpan); | ||
} | ||
|
||
#endif | ||
|
||
public static Checksum Create(ArrayBuilder<Checksum> checksums) | ||
{ | ||
using var stream = SerializableBytes.CreateWritableStream(); | ||
|
@@ -317,25 +150,5 @@ public static Checksum Create(ParseOptions value, ISerializerService serializer) | |
stream.Position = 0; | ||
return Create(stream); | ||
} | ||
|
||
#if !NET | ||
private static void AppendData(SHA256 hash, byte[] buffer, string value) | ||
{ | ||
var stringBytes = MemoryMarshal.AsBytes(value.AsSpan()); | ||
Debug.Assert(stringBytes.Length == value.Length * 2); | ||
|
||
var index = 0; | ||
while (index < stringBytes.Length) | ||
{ | ||
var remaining = stringBytes.Length - index; | ||
var toCopy = Math.Min(remaining, buffer.Length); | ||
|
||
stringBytes.Slice(index, toCopy).CopyTo(buffer); | ||
hash.TransformBlock(buffer, 0, toCopy, null, 0); | ||
|
||
index += toCopy; | ||
} | ||
} | ||
#endif | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably won't want to ship a prerelease of this dependency with VS. Can you use the latest stable version instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we cannot. the latest stable does not include xxhash128 (the actual hashing function we're trying to use).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. In that case, we'll just ask that you help maintain the version on the VS repo side to switch to the stable 8.0 version when it's available.