Skip to content

Commit

Permalink
Merge pull request #67999 from sharwell/incremental-hash-2
Browse files Browse the repository at this point in the history
Use optimized hash creation methods on .NET 5+
  • Loading branch information
sharwell authored Apr 28, 2023
2 parents ae24476 + 48e9cc5 commit ce2b2c7
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 53 deletions.
115 changes: 81 additions & 34 deletions src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Security.Cryptography;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using System.Threading;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Serialization;
using Roslyn.Utilities;
using System.Diagnostics;
using System.Runtime.CompilerServices;

namespace Microsoft.CodeAnalysis
{
Expand All @@ -23,47 +22,102 @@ internal partial class Checksum
// https://github.com/dotnet/runtime/blob/f2db6d6093c54e5eeb9db2d8dcbe15b2db92ad8c/src/libraries/System.Security.Cryptography.Algorithms/src/System/Security/Cryptography/SHA256.cs#L18-L19
private const int SHA256HashSizeBytes = 256 / 8;

#if NET5_0_OR_GREATER
private static readonly ObjectPool<IncrementalHash> s_incrementalHashPool =
new(() => IncrementalHash.CreateHash(HashAlgorithmName.SHA256), size: 20);
#else
private static readonly ObjectPool<SHA256> s_incrementalHashPool =
new(SHA256.Create, size: 20);
#endif

#if !NET5_0_OR_GREATER
// Dedicated pools for the byte[]s we use to create checksums from two or three existing checksums. Sized to
// exactly the space needed to splat the existing checksum data into the array and then hash it.

private static readonly ObjectPool<byte[]> s_twoChecksumByteArrayPool = new(() => new byte[HashSize * 2]);
private static readonly ObjectPool<byte[]> s_threeChecksumByteArrayPool = new(() => new byte[HashSize * 3]);
#endif

public static Checksum Create(IEnumerable<string> values)
{
#if NET5_0_OR_GREATER
using var pooledHash = s_incrementalHashPool.GetPooledObject();

foreach (var value in values)
{
pooledHash.Object.AppendData(MemoryMarshal.AsBytes(value.AsSpan()));
pooledHash.Object.AppendData(MemoryMarshal.AsBytes("\0".AsSpan()));
}

Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
pooledHash.Object.GetHashAndReset(hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();
var hash = pooledHash.Object;

hash.Initialize();
foreach (var value in values)
{
AppendData(hash, pooledBuffer.Object, value);
AppendData(hash, pooledBuffer.Object, "\0");
}

return From(hash.GetHashAndReset());
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Hash);
#endif
}

public static Checksum Create(string value)
{
#if NET5_0_OR_GREATER
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
SHA256.HashData(MemoryMarshal.AsBytes(value.AsSpan()), hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();
var hash = pooledHash.Object;
hash.Initialize();

AppendData(hash, pooledBuffer.Object, value);

return From(hash.GetHashAndReset());
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Hash);
#endif
}

public static Checksum Create(Stream stream)
{
#if NET7_0_OR_GREATER
Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
SHA256.HashData(stream, hash);
return From(hash);
#elif NET5_0_OR_GREATER
using var pooledHash = s_incrementalHashPool.GetPooledObject();
Span<byte> buffer = stackalloc byte[SharedPools.ByteBufferSize];

int bytesRead;
do
{
bytesRead = stream.Read(buffer);
if (bytesRead > 0)
{
pooledHash.Object.AppendData(buffer[..bytesRead]);
}
}
while (bytesRead > 0);

Span<byte> hash = stackalloc byte[SHA256HashSizeBytes];
pooledHash.Object.GetHashAndReset(hash);
return From(hash);
#else
using var pooledHash = s_incrementalHashPool.GetPooledObject();
using var pooledBuffer = SharedPools.ByteArray.GetPooledObject();

var hash = pooledHash.Object;
hash.Initialize();

var buffer = pooledBuffer.Object;
var bufferLength = buffer.Length;
Expand All @@ -73,12 +127,13 @@ public static Checksum Create(Stream stream)
bytesRead = stream.Read(buffer, 0, bufferLength);
if (bytesRead > 0)
{
hash.AppendData(buffer, 0, bytesRead);
hash.TransformBlock(buffer, 0, bytesRead, null, 0);
}
}
while (bytesRead > 0);

var bytes = hash.GetHashAndReset();
hash.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
var bytes = hash.Hash;

// if bytes array is bigger than certain size, checksum
// will truncate it to predetermined size. for more detail,
Expand All @@ -91,6 +146,7 @@ public static Checksum Create(Stream stream)
// hash algorithm used here should remain functionally correct even
// after the truncation
return From(bytes);
#endif
}

public static Checksum Create(IObjectWritable @object)
Expand Down Expand Up @@ -124,36 +180,44 @@ public static Checksum Create(Checksum checksum1, Checksum checksum2, Checksum c
#endif
}

#if !NET5_0_OR_GREATER

private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2)
{
using var hash = s_incrementalHashPool.GetPooledObject();
using var bytes = s_twoChecksumByteArrayPool.GetPooledObject();

var bytesSpan = bytes.Object.AsSpan();
checksum1.WriteTo(bytesSpan);
checksum2.WriteTo(bytesSpan.Slice(HashSize));

hash.Object.AppendData(bytes.Object);
using var hash = s_incrementalHashPool.GetPooledObject();
hash.Object.Initialize();

hash.Object.TransformBlock(bytes.Object, 0, bytes.Object.Length, null, 0);

return From(hash.Object.GetHashAndReset());
hash.Object.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Object.Hash);
}

private static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2, Checksum checksum3)
{
using var hash = s_incrementalHashPool.GetPooledObject();
using var bytes = s_threeChecksumByteArrayPool.GetPooledObject();

var bytesSpan = bytes.Object.AsSpan();
checksum1.WriteTo(bytesSpan);
checksum2.WriteTo(bytesSpan.Slice(HashSize));
checksum3.WriteTo(bytesSpan.Slice(2 * HashSize));

hash.Object.AppendData(bytes.Object);
using var hash = s_incrementalHashPool.GetPooledObject();
hash.Object.Initialize();

hash.Object.TransformBlock(bytes.Object, 0, bytes.Object.Length, null, 0);

return From(hash.Object.GetHashAndReset());
hash.Object.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return From(hash.Object.Hash);
}

#if NET
#else

// Optimized helpers that do not need to allocate any arrays to combine hashes.

Expand Down Expand Up @@ -234,7 +298,8 @@ public static Checksum Create(ParseOptions value, ISerializerService serializer)
return Create(stream);
}

private static void AppendData(IncrementalHash hash, byte[] buffer, string value)
#if !NET5_0_OR_GREATER
private static void AppendData(SHA256 hash, byte[] buffer, string value)
{
var stringBytes = MemoryMarshal.AsBytes(value.AsSpan());
Debug.Assert(stringBytes.Length == value.Length * 2);
Expand All @@ -246,29 +311,11 @@ private static void AppendData(IncrementalHash hash, byte[] buffer, string value
var toCopy = Math.Min(remaining, buffer.Length);

stringBytes.Slice(index, toCopy).CopyTo(buffer);
hash.AppendData(buffer, 0, toCopy);
hash.TransformBlock(buffer, 0, toCopy, null, 0);

index += toCopy;
}
}

public static class TestAccessor
{
public static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2)
=> Checksum.CreateUsingByteArrays(checksum1, checksum2);

public static Checksum CreateUsingByteArrays(Checksum checksum1, Checksum checksum2, Checksum checksum3)
=> Checksum.CreateUsingByteArrays(checksum1, checksum2, checksum3);

#if NET

public static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2)
=> Checksum.CreateUsingSpans(checksum1, checksum2);

public static Checksum CreateUsingSpans(Checksum checksum1, Checksum checksum2, Checksum checksum3)
=> Checksum.CreateUsingSpans(checksum1, checksum2, checksum3);

#endif
}
}
}
25 changes: 6 additions & 19 deletions src/Workspaces/CoreTest/ChecksumTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,26 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Xunit;

namespace Microsoft.CodeAnalysis.UnitTests
{
public class ChecksumTests
{
#if NET
[Fact]
public void ValidateChecksumFromSpanSameAsChecksumFromBytes1()
{
var checksum1 = Checksum.Create("Goo");
var checksum2 = Checksum.Create("Bar");

var checksumA = Checksum.TestAccessor.CreateUsingByteArrays(checksum1, checksum2);
var checksumB = Checksum.TestAccessor.CreateUsingSpans(checksum1, checksum2);
var checksumA = Checksum.Create(checksum1, checksum2);

Assert.Equal(checksumA, checksumB);
// Running this test on multiple target frameworks with the same expectation ensures the results match
Assert.Equal(Checksum.FromBase64String("N30m5jwVeMZzWpy9cbQbtSYHoXU="), checksumA);

Assert.NotEqual(checksum1, checksum2);

Assert.NotEqual(checksum1, checksumA);
Assert.NotEqual(checksum1, checksumB);
Assert.NotEqual(checksum2, checksumA);
Assert.NotEqual(checksum2, checksumB);
}

[Fact]
Expand All @@ -40,22 +31,18 @@ public void ValidateChecksumFromSpanSameAsChecksumFromBytes2()
var checksum2 = Checksum.Create("Bar");
var checksum3 = Checksum.Create("Baz");

var checksumA = Checksum.TestAccessor.CreateUsingByteArrays(checksum1, checksum2, checksum3);
var checksumB = Checksum.TestAccessor.CreateUsingSpans(checksum1, checksum2, checksum3);
var checksumA = Checksum.Create(checksum1, checksum2, checksum3);

Assert.Equal(checksumA, checksumB);
// Running this test on multiple target frameworks with the same expectation ensures the results match
Assert.Equal(Checksum.FromBase64String("NEfIznmqkIqi4VJl12KxycWt7uo="), checksumA);

Assert.NotEqual(checksum1, checksum2);
Assert.NotEqual(checksum2, checksum3);
Assert.NotEqual(checksum3, checksum1);

Assert.NotEqual(checksum1, checksumA);
Assert.NotEqual(checksum1, checksumB);
Assert.NotEqual(checksum2, checksumA);
Assert.NotEqual(checksum2, checksumB);
Assert.NotEqual(checksum3, checksumA);
Assert.NotEqual(checksum3, checksumB);
}
#endif
}
}

0 comments on commit ce2b2c7

Please sign in to comment.