From d9d7f5f7dbe2a24c62d4358b06cea769ecf22e37 Mon Sep 17 00:00:00 2001 From: scooletz Date: Mon, 20 Jan 2025 11:17:30 +0100 Subject: [PATCH 1/6] log builder --- src/Nethermind/Directory.Packages.props | 3 +- .../BinaryEncodingTests.cs | 26 ++ .../Nethermind.Logs.Test/LogBuilderTests.cs | 124 ++++++++ .../Nethermind.Logs.Test.csproj | 29 ++ .../Nethermind.Logs/BinaryEncoding.cs | 54 ++++ .../Nethermind.Logs/CountingBufferWriter.cs | 23 ++ src/Nethermind/Nethermind.Logs/LogsBuilder.cs | 296 ++++++++++++++++++ .../Nethermind.Logs/Nethermind.Logs.csproj | 18 ++ .../Nethermind.Logs/PrimitiveExtensions.cs | 24 ++ src/Nethermind/Nethermind.sln | 13 + src/Nethermind/Nethermind.sln.DotSettings | 2 + 11 files changed, 611 insertions(+), 1 deletion(-) create mode 100644 src/Nethermind/Nethermind.Logs.Test/BinaryEncodingTests.cs create mode 100644 src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs create mode 100644 src/Nethermind/Nethermind.Logs.Test/Nethermind.Logs.Test.csproj create mode 100644 src/Nethermind/Nethermind.Logs/BinaryEncoding.cs create mode 100644 src/Nethermind/Nethermind.Logs/CountingBufferWriter.cs create mode 100644 src/Nethermind/Nethermind.Logs/LogsBuilder.cs create mode 100644 src/Nethermind/Nethermind.Logs/Nethermind.Logs.csproj create mode 100644 src/Nethermind/Nethermind.Logs/PrimitiveExtensions.cs create mode 100644 src/Nethermind/Nethermind.sln.DotSettings diff --git a/src/Nethermind/Directory.Packages.props b/src/Nethermind/Directory.Packages.props index 5b18572cdae..f91b9d1b274 100644 --- a/src/Nethermind/Directory.Packages.props +++ b/src/Nethermind/Directory.Packages.props @@ -78,6 +78,7 @@ + @@ -85,4 +86,4 @@ - + \ No newline at end of file diff --git a/src/Nethermind/Nethermind.Logs.Test/BinaryEncodingTests.cs b/src/Nethermind/Nethermind.Logs.Test/BinaryEncodingTests.cs new file mode 100644 index 00000000000..df08e7820bb --- /dev/null +++ b/src/Nethermind/Nethermind.Logs.Test/BinaryEncodingTests.cs @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: 2025 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using FluentAssertions; +using NUnit.Framework; + +namespace Nethermind.Logs.Test; + +public class BinaryEncodingTests +{ + [TestCase(0u)] + [TestCase(1u)] + [TestCase(0xFFu)] + [TestCase(0xFFFFu)] + [TestCase(0xFFFFFu)] + [TestCase(0xFFFFFFu)] + [TestCase(0xFFFFFFFFu)] + public void Test(uint value) + { + Span buffer = stackalloc byte[8]; + + var written = BinaryEncoding.WriteVarInt(value, buffer); + BinaryEncoding.TryReadVarInt(buffer, 0, out var read).Should().Be(written); + read.Should().Be(value); + } +} diff --git a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs new file mode 100644 index 00000000000..b7f3b50b421 --- /dev/null +++ b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs @@ -0,0 +1,124 @@ +using System.Buffers; +using FluentAssertions; +using Nethermind.Core; +using Nethermind.Core.Crypto; +using NUnit.Framework; + +namespace Nethermind.Logs.Test; + +public class LogBuilderTests +{ + private const int HashCount = 256; + private readonly Hash256[] _hashes; + + public LogBuilderTests() + { + var random = new Random(13); + Span span = stackalloc byte[Hash256.Size]; + + _hashes = new Hash256[HashCount]; + + for (int i = 0; i < HashCount; i++) + { + random.NextBytes(span); + _hashes[i] = new Hash256(span); + } + } + + [Test] + public void Simple() + { + var builder = new LogsBuilder(); + var hash0 = _hashes[0]; + + var entry0 = new LogEntry(Address.SystemUser, [], [hash0]); + var entry1 = new LogEntry(Address.MaxValue, [], [hash0]); + + const ushort block = 1; + const ushort tx1 = 1; + const ushort tx2 = 2; + + builder.Append(entry0, block, tx1); + builder.Append(entry1, block, tx2); + + var writer = new ArrayBufferWriter(); + + builder.Build(writer); + + var reader = new LogsBuilder.MemoryReader(writer.WrittenMemory); + + LogsBuilder.Entry e1 = new(block, tx1); + LogsBuilder.Entry e2 = new(block, tx2); + + reader.Find(Address.SystemUser).ToArray().Should().BeEquivalentTo([e1]); + reader.Find(Address.MaxValue).ToArray().Should().BeEquivalentTo([e2]); + + reader.Find(hash0).ToArray().Should().BeEquivalentTo([e1, e2]); + + reader.Find(hash0, 1).Should().BeEmpty(); + } + + [Test] + public void Entries_are_always_ordered_per_block() + { + var builder = new LogsBuilder(); + var hash0 = _hashes[0]; + + var entry = new LogEntry(Address.SystemUser, [], [hash0]); + + const ushort block1 = 1; + const ushort block2 = 2; + const ushort tx1 = 1; + const ushort tx2 = 2; + + // Report it with different ordering + builder.Append(entry, block2, tx2); + builder.Append(entry, block1, tx2); + builder.Append(entry, block1, tx1); + + var writer = new ArrayBufferWriter(); + + builder.Build(writer); + + var reader = new LogsBuilder.MemoryReader(writer.WrittenMemory); + + LogsBuilder.Entry e1 = new(block1, tx1); + LogsBuilder.Entry e2 = new(block1, tx2); + LogsBuilder.Entry e3 = new(block2, tx2); + + // order by number of block then by tx + LogsBuilder.Entry[] expected = [e1, e2, e3]; + + reader.Find(Address.SystemUser).Should().BeEquivalentTo(expected); + reader.Find(hash0).ToArray().Should().BeEquivalentTo(expected); + + reader.Find(hash0, 1).Should().BeEmpty(); + } + + [Test] + public void Frequent_topics_are_compressed_well() + { + var builder = new LogsBuilder(); + var hash0 = _hashes[0]; + + var entry = new LogEntry(Address.SystemUser, [], [hash0]); + + const int blocks = 1000; + const int txs = 1000; + const int logEntries = blocks * txs; + + for (uint i = 0; i < blocks; i++) + { + for (ushort j = 0; j < txs; j++) + { + builder.Append(entry, i, j); + } + } + + var writer = new ArrayBufferWriter(); + + builder.Build(writer); + + Console.WriteLine($"{(double)writer.WrittenCount / logEntries:F1} bytes per {nameof(LogEntry)}"); + } +} diff --git a/src/Nethermind/Nethermind.Logs.Test/Nethermind.Logs.Test.csproj b/src/Nethermind/Nethermind.Logs.Test/Nethermind.Logs.Test.csproj new file mode 100644 index 00000000000..5b6da6ceedc --- /dev/null +++ b/src/Nethermind/Nethermind.Logs.Test/Nethermind.Logs.Test.csproj @@ -0,0 +1,29 @@ + + + + net9.0 + enable + enable + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs b/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs new file mode 100644 index 00000000000..3e424adec09 --- /dev/null +++ b/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs @@ -0,0 +1,54 @@ +// SPDX-FileCopyrightText: 2025 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +namespace Nethermind.Logs; + +public static class BinaryEncoding +{ + public static int TryReadVarInt(ReadOnlySpan span, int offset, out uint value) + { + if ((uint)offset >= (uint)span.Length) + { + value = 0; + return 0; + } + + const int bits = 7; + + value = span[offset++]; + if ((value & 0x80) == 0) return 1; + value &= 0x7F; + + if ((uint)offset >= (uint)span.Length) return -1; + uint chunk = span[offset++]; + value |= (chunk & 0x7F) << (1 * bits); + if ((chunk & 0x80) == 0) return 2; + + if ((uint)offset >= (uint)span.Length) return -1; + chunk = span[offset++]; + value |= (chunk & 0x7F) << (2 * bits); + if ((chunk & 0x80) == 0) return 3; + + if ((uint)offset >= (uint)span.Length) return -1; + chunk = span[offset++]; + value |= (chunk & 0x7F) << (3 * bits); + if ((chunk & 0x80) == 0) return 4; + + // Use 32 - 28 bits as the last one + value |= (chunk & 0b00001111) << (4 * bits); + return 5; + } + + public static int WriteVarInt(uint value, Span span, int offset = 0) + { + int count = 0; + do + { + span[offset++] = (byte)((value & 0x7F) | 0x80); + count++; + } while ((value >>= 7) != 0); + + span[offset - 1] &= 0x7F; + return count; + } +} diff --git a/src/Nethermind/Nethermind.Logs/CountingBufferWriter.cs b/src/Nethermind/Nethermind.Logs/CountingBufferWriter.cs new file mode 100644 index 00000000000..018175ed749 --- /dev/null +++ b/src/Nethermind/Nethermind.Logs/CountingBufferWriter.cs @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: 2025 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using System.Buffers; + +namespace Nethermind.Logs; + +public sealed class CountingBufferWriter(IBufferWriter writer) : IBufferWriter +{ + public int WrittenCount { get; private set; } + + public void Advance(int count) + { + writer.Advance(count); + WrittenCount += count; + } + + public Memory GetMemory(int sizeHint = 0) => writer.GetMemory(sizeHint); + + public Span GetSpan(int sizeHint = 0) => writer.GetSpan(sizeHint); + + public override string ToString() => $"WrittenCount: {WrittenCount}"; +} diff --git a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs new file mode 100644 index 00000000000..74d353557dc --- /dev/null +++ b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs @@ -0,0 +1,296 @@ +using System.Buffers; +using System.Buffers.Binary; +using System.Collections; +using System.Diagnostics; +using System.IO.Hashing; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Nethermind.Core; +using Nethermind.Core.Crypto; +using Nethermind.Core.Extensions; + +namespace Nethermind.Logs; + +public class LogsBuilder +{ + public readonly struct Entry : IEquatable + { + private readonly uint _raw; + + public Entry(uint raw) + { + _raw = raw; + } + + public Entry(uint blockNumber, uint txNumber) : this((blockNumber << BlockNumberShift) | txNumber) + { + } + + public uint TxNumber => _raw & TxMask; + + public uint BlockNumber => _raw >> BlockNumberShift; + + public bool Equals(Entry other) => _raw == other._raw; + } + + public sealed class MemoryReader + { + private readonly ReadOnlyMemory _mixed; + private readonly ReadOnlyMemory _hashes; + private readonly ReadOnlyMemory _memory; + + public MemoryReader(ReadOnlyMemory memory) + { + _memory = memory; + var count = memory.Span.Slice(memory.Length - LengthOfLength).ReadNativeEndian(); + + var startOfEntries = (int)(memory.Length - LengthOfLength - count * EntryWithHash); + var mixedWithHashes = memory.Slice(startOfEntries, (int)(count * EntryWithHash)); + + Debug.Assert(mixedWithHashes.Length == count * EntryWithHash); + + var split = count * SizeOfMixed; + + _mixed = mixedWithHashes.Slice(0, split); + _hashes = mixedWithHashes.Slice(split); + } + + public IEnumerable Find(Address address) => FindByHash(Hash(address.Bytes, AddressSeed)); + + public IEnumerable Find(Hash256 topic, int index = 0) => + FindByHash(Hash(topic.Bytes, GetTopicSeed(index))); + + private IEnumerable FindByHash(ulong hash) + { + var at = MemoryMarshal.Cast(_hashes.Span).BinarySearch(hash); + + if (at < 0) + return []; + + ReadOnlySpan mixed = MemoryMarshal.Cast(_mixed.Span); + + var m = mixed[at]; + + if ((m & LookupMarker) == 0) + { + return [new(m)]; + } + + var offset = (int)(m & ~LookupMarker); + var start = _memory[offset..].Span.ReadNativeEndian(); + + ReadOnlyMemory payload = _memory.Slice(start, offset - start); + return new EntryEnumerable(payload); + } + + private sealed class EntryEnumerable(ReadOnlyMemory payload) : IEnumerable + { + public IEnumerator GetEnumerator() + { + var offset = 0; + var accumulator = 0u; + + while (offset < payload.Length) + { + var read = BinaryEncoding.TryReadVarInt(payload.Span, offset, out var diff); + if (read == -1) + throw new InvalidOperationException(); + + accumulator += diff; + offset += read; + + yield return new Entry(accumulator); + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + } + } + + /// + /// The length of length written at the end. + /// + public const int LengthOfLength = 4; + + private const int SizeOfHash = sizeof(ulong); + private const int SizeOfMixed = sizeof(int); + + public const int EntryWithHash = SizeOfHash + SizeOfMixed; + + public const int CompressedEntryLength = sizeof(int); + public const int CompressedEntrySize = sizeof(int); + + private const long AddressSeed = 0; + private const long TopicSeed = 1; + + /// + /// The shift for the block ensures that there is one bit (sign) left in the mixed to use as a marker + /// + private const int BlockNumberShift = 14; + + /// + /// Max block number taking into consideration + /// that requires the highest bit. + /// + private const int MaxBlockNumber = (1 << (31 - BlockNumberShift)) - 1; + + private const int MaxTxPerBlock = 1 << BlockNumberShift; + private const int TxMask = MaxTxPerBlock - 1; + private const uint LookupMarker = 0x80_00_00_00; + + private const int Size = 1024 * 1024; + + private ulong[] _hashes = new ulong[Size]; + private uint[] _mixed = new uint[Size]; + private int _index; + + public int Count => _index; + + public void Append(LogEntry entry, uint blockNumber, ushort txNumber) + { + Debug.Assert(txNumber < MaxTxPerBlock); + Debug.Assert(blockNumber <= MaxBlockNumber); + + uint mixed = (blockNumber << BlockNumberShift) | txNumber; + + // Append address + Append(entry.Address.Bytes, AddressSeed, mixed); + + for (var i = 0; i < entry.Topics.Length; i++) + { + Hash256 topic = entry.Topics[i]; + Append(topic.Bytes, GetTopicSeed(i), mixed); + } + } + + private void Append(ReadOnlySpan payload, long seed, uint mixed) + { + if (_hashes.Length == _index) + { + Grow(); + } + + _hashes[_index] = Hash(payload, seed); + _mixed[_index] = mixed; + _index++; + } + + private static ulong Hash(ReadOnlySpan payload, long seed) => XxHash64.HashToUInt64(payload, seed); + + public void Reset() + { + _index = 0; + } + + private void Grow() + { + Array.Resize(ref _hashes, _hashes.Length + Size); + Array.Resize(ref _mixed, _hashes.Length + Size); + } + + private static long GetTopicSeed(int i) => TopicSeed + i; + + public void Build(IBufferWriter data) + { + var w = new CountingBufferWriter(data); + + // Sort by hash first + Array.Sort(_hashes, _mixed, 0, _index); + + var count = Deduplicate(w); + + // Mixed first + w.WriteNativeEndianSpan(_mixed.AsSpan(0, count)); + + // Hashes then + w.WriteNativeEndianSpan(_hashes.AsSpan(0, count)); + + // Store it as the last one, so that the reader can read last 4 bytes and create proper span. + w.WriteNativeEndian(count); + } + + private int Deduplicate(CountingBufferWriter writer) + { + // Values are sorted by the hashes, so that two entries that share a hash would be next to each other. + // We walk through the entries, gathering hashes that are the same and compressing them by writing the hash only once + + int writeAt = 0; // Where we place the next "deduplicated" entry + ulong currentHash = _hashes[0]; // The hash value we’re currently accumulating + uint currentMixed = _mixed[0]; // The mixed value for the first occurrence + int currentCount = 1; // How many times we've seen currentHash so far + + // Scan through all array elements, starting from index 1: + for (int i = 1; i < _index; i++) + { + if (_hashes[i] == currentHash) + { + // Found another occurrence of the same hash: + currentCount++; + } + else + { + // We reached a new hash => finalize the block for the previous one + _hashes[writeAt] = currentHash; + if (currentCount == 1) + { + // If there was only 1 occurrence, keep the original mixed + _mixed[writeAt] = currentMixed; + } + else + { + CompressMixed(writer, i, currentCount, writeAt); + } + + writeAt++; + + // Now start tracking the new hash block + currentHash = _hashes[i]; + currentMixed = _mixed[i]; + currentCount = 1; + } + } + + // After the loop, finalize the last block + _hashes[writeAt] = currentHash; + if (currentCount == 1) + { + _mixed[writeAt] = currentMixed; + } + else + { + CompressMixed(writer, _index, currentCount, writeAt); + } + + return writeAt + 1; + } + + private void CompressMixed(CountingBufferWriter writer, int to, int count, int writeAt) + { + Span values = _mixed.AsSpan(to - count, count); + + // Previous might not be stably sorted. Ensure it. + values.Sort(); + + // Remember the starting position + var start = writer.WrittenCount; + + var previous = 0U; + + // Simple diff encoding + foreach (var value in values) + { + // TODO: optimize span getting and advancing. + Span span = writer.GetSpan(5); + var written = BinaryEncoding.WriteVarInt(value - previous, span, 0); + writer.Advance(written); + previous = value; + } + + var end = writer.WrittenCount; + + writer.WriteNativeEndian(start); + + // Write the marker + _mixed[writeAt] = LookupMarker | (uint)end; + } +} diff --git a/src/Nethermind/Nethermind.Logs/Nethermind.Logs.csproj b/src/Nethermind/Nethermind.Logs/Nethermind.Logs.csproj new file mode 100644 index 00000000000..d242b9b48e4 --- /dev/null +++ b/src/Nethermind/Nethermind.Logs/Nethermind.Logs.csproj @@ -0,0 +1,18 @@ + + + + net9.0 + enable + enable + true + + + + + + + + + + + diff --git a/src/Nethermind/Nethermind.Logs/PrimitiveExtensions.cs b/src/Nethermind/Nethermind.Logs/PrimitiveExtensions.cs new file mode 100644 index 00000000000..50f10808f10 --- /dev/null +++ b/src/Nethermind/Nethermind.Logs/PrimitiveExtensions.cs @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2025 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Nethermind.Logs; + +public static class PrimitiveExtensions +{ + public static void WriteNativeEndian(this IBufferWriter w, int deduplicatedLength) + { + w.Write(MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref deduplicatedLength, 1))); + } + + public static void WriteNativeEndianSpan(this IBufferWriter w, ReadOnlySpan data) + where T : struct + { + w.Write(MemoryMarshal.Cast(data)); + } + + public static int ReadNativeEndian(this ReadOnlySpan data) => Unsafe.ReadUnaligned(ref MemoryMarshal.GetReference(data)); +} diff --git a/src/Nethermind/Nethermind.sln b/src/Nethermind/Nethermind.sln index da794fc3616..e1f3f06d7cb 100644 --- a/src/Nethermind/Nethermind.sln +++ b/src/Nethermind/Nethermind.sln @@ -230,6 +230,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nethermind.Shutter.Test", " EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nethermind.Optimism.Test", "Nethermind.Optimism.Test\Nethermind.Optimism.Test.csproj", "{DC983CEF-BA18-45DE-9AEB-AB9B459655BC}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nethermind.Logs", "Nethermind.Logs\Nethermind.Logs.csproj", "{BF7D657B-0C74-47DE-B1A4-BC9E68D60E0A}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nethermind.Logs.Test", "Nethermind.Logs.Test\Nethermind.Logs.Test.csproj", "{778151CF-019F-4874-972C-FAD9C8D60925}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -636,6 +640,14 @@ Global {DC983CEF-BA18-45DE-9AEB-AB9B459655BC}.Debug|Any CPU.Build.0 = Debug|Any CPU {DC983CEF-BA18-45DE-9AEB-AB9B459655BC}.Release|Any CPU.ActiveCfg = Release|Any CPU {DC983CEF-BA18-45DE-9AEB-AB9B459655BC}.Release|Any CPU.Build.0 = Release|Any CPU + {BF7D657B-0C74-47DE-B1A4-BC9E68D60E0A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BF7D657B-0C74-47DE-B1A4-BC9E68D60E0A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BF7D657B-0C74-47DE-B1A4-BC9E68D60E0A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BF7D657B-0C74-47DE-B1A4-BC9E68D60E0A}.Release|Any CPU.Build.0 = Release|Any CPU + {778151CF-019F-4874-972C-FAD9C8D60925}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {778151CF-019F-4874-972C-FAD9C8D60925}.Debug|Any CPU.Build.0 = Debug|Any CPU + {778151CF-019F-4874-972C-FAD9C8D60925}.Release|Any CPU.ActiveCfg = Release|Any CPU + {778151CF-019F-4874-972C-FAD9C8D60925}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -700,6 +712,7 @@ Global {B1CE6CBC-D85B-4631-A9F3-437ADB1FB049} = {4019B82F-1104-4D2C-9F96-05FD7D3575E8} {B068C72F-8B77-474E-A58C-5B929096FEF3} = {4019B82F-1104-4D2C-9F96-05FD7D3575E8} {DC983CEF-BA18-45DE-9AEB-AB9B459655BC} = {4019B82F-1104-4D2C-9F96-05FD7D3575E8} + {778151CF-019F-4874-972C-FAD9C8D60925} = {4019B82F-1104-4D2C-9F96-05FD7D3575E8} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {092CA5E3-6180-4ED7-A3CB-9B57FAC2AA85} diff --git a/src/Nethermind/Nethermind.sln.DotSettings b/src/Nethermind/Nethermind.sln.DotSettings new file mode 100644 index 00000000000..94191d36059 --- /dev/null +++ b/src/Nethermind/Nethermind.sln.DotSettings @@ -0,0 +1,2 @@ + + True \ No newline at end of file From 997370d0d5e190ced63fda36c054e55656f9a4b6 Mon Sep 17 00:00:00 2001 From: scooletz Date: Mon, 20 Jan 2025 21:32:57 +0100 Subject: [PATCH 2/6] format --- src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs | 2 +- src/Nethermind/Nethermind.Logs/LogsBuilder.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs index b7f3b50b421..3760e05677a 100644 --- a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs +++ b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs @@ -1,4 +1,4 @@ -using System.Buffers; +using System.Buffers; using FluentAssertions; using Nethermind.Core; using Nethermind.Core.Crypto; diff --git a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs index 74d353557dc..8243de24660 100644 --- a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs +++ b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs @@ -1,4 +1,4 @@ -using System.Buffers; +using System.Buffers; using System.Buffers.Binary; using System.Collections; using System.Diagnostics; From 1f9e7b04f048ca9296bd158a8db33745859b3b4a Mon Sep 17 00:00:00 2001 From: scooletz Date: Tue, 21 Jan 2025 11:54:23 +0100 Subject: [PATCH 3/6] deduplicated logs for the same block/tx --- .../Nethermind.Logs.Test/LogBuilderTests.cs | 34 +++++++++++++++++++ src/Nethermind/Nethermind.Logs/LogsBuilder.cs | 15 +++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs index 3760e05677a..c53c194b8ca 100644 --- a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs +++ b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs @@ -56,6 +56,8 @@ public void Simple() reader.Find(hash0).ToArray().Should().BeEquivalentTo([e1, e2]); reader.Find(hash0, 1).Should().BeEmpty(); + + writer.WrittenCount.Should().Be(48); } [Test] @@ -93,6 +95,8 @@ public void Entries_are_always_ordered_per_block() reader.Find(hash0).ToArray().Should().BeEquivalentTo(expected); reader.Find(hash0, 1).Should().BeEmpty(); + + writer.WrittenCount.Should().Be(50); } [Test] @@ -121,4 +125,34 @@ public void Frequent_topics_are_compressed_well() Console.WriteLine($"{(double)writer.WrittenCount / logEntries:F1} bytes per {nameof(LogEntry)}"); } + + [Test] + public void Repeated_entries_should_be_deduplicated() + { + var builder = new LogsBuilder(); + + var entry = new LogEntry(Address.SystemUser, [], [_hashes[0]]); + + const int logsReported = 1000; + const uint block = 1; + const ushort tx = 1; + + // Report a lot of times with the same position, to replicate a complex exchange where a lot of + // Transfer (index_topic_1 address src, index_topic_2 address dst, uint256 wad) is done. + for (ushort i = 0; i < logsReported; i++) + { + builder.Append(entry, block, tx); + } + + var writer = new ArrayBufferWriter(); + + builder.Build(writer); + + var reader = new LogsBuilder.MemoryReader(writer.WrittenMemory); + + LogsBuilder.Entry e = new(block, tx); + + reader.Find(Address.SystemUser).ToArray().Should().BeEquivalentTo([e]); + writer.WrittenCount.Should().Be(42); + } } diff --git a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs index 8243de24660..c44e292a96f 100644 --- a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs +++ b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs @@ -279,10 +279,17 @@ private void CompressMixed(CountingBufferWriter writer, int to, int count, // Simple diff encoding foreach (var value in values) { - // TODO: optimize span getting and advancing. - Span span = writer.GetSpan(5); - var written = BinaryEncoding.WriteVarInt(value - previous, span, 0); - writer.Advance(written); + var diff = value - previous; + + // diff == 0 when it's a repeated entry. Skip these + if (diff > 0) + { + // TODO: optimize span getting and advancing. + Span span = writer.GetSpan(5); + var written = BinaryEncoding.WriteVarInt(diff, span, 0); + writer.Advance(written); + } + previous = value; } From 4ae8e8ac0b328c2a856d25dce5d057994f1ebfec Mon Sep 17 00:00:00 2001 From: scooletz Date: Tue, 21 Jan 2025 13:06:15 +0100 Subject: [PATCH 4/6] asserting more --- .../Nethermind.Logs.Test/LogBuilderTests.cs | 29 ++++++++++++++----- src/Nethermind/Nethermind.Logs/LogsBuilder.cs | 4 ++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs index c53c194b8ca..e9c785e9993 100644 --- a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs +++ b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs @@ -107,23 +107,38 @@ public void Frequent_topics_are_compressed_well() var entry = new LogEntry(Address.SystemUser, [], [hash0]); - const int blocks = 1000; - const int txs = 1000; + const int blocks = 100; + const int txs = 100; const int logEntries = blocks * txs; - for (uint i = 0; i < blocks; i++) + foreach ((uint block, ushort tx) in Builder()) { - for (ushort j = 0; j < txs; j++) - { - builder.Append(entry, i, j); - } + builder.Append(entry, block, tx); } var writer = new ArrayBufferWriter(); builder.Build(writer); + var reader = new LogsBuilder.MemoryReader(writer.WrittenMemory); + + reader.Find(Address.SystemUser) + .Should() + .BeEquivalentTo(Builder().Select(t => new LogsBuilder.Entry(t.block, t.tx))); + Console.WriteLine($"{(double)writer.WrittenCount / logEntries:F1} bytes per {nameof(LogEntry)}"); + return; + + static IEnumerable<(uint block, ushort tx)> Builder() + { + for (uint i = 1; i < blocks; i++) + { + for (ushort j = 1; j < txs; j++) + { + yield return (i, j); + } + } + } } [Test] diff --git a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs index c44e292a96f..e13fc00f966 100644 --- a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs +++ b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs @@ -134,6 +134,8 @@ public IEnumerator GetEnumerator() /// private const int MaxBlockNumber = (1 << (31 - BlockNumberShift)) - 1; + private const int MinBlockNumber = 1; + private const int MaxTxPerBlock = 1 << BlockNumberShift; private const int TxMask = MaxTxPerBlock - 1; private const uint LookupMarker = 0x80_00_00_00; @@ -149,7 +151,7 @@ public IEnumerator GetEnumerator() public void Append(LogEntry entry, uint blockNumber, ushort txNumber) { Debug.Assert(txNumber < MaxTxPerBlock); - Debug.Assert(blockNumber <= MaxBlockNumber); + Debug.Assert(MinBlockNumber <= blockNumber && blockNumber <= MaxBlockNumber); uint mixed = (blockNumber << BlockNumberShift) | txNumber; From cb5fc0c1c4348c12ffc74743272ba69fdec03d8e Mon Sep 17 00:00:00 2001 From: scooletz Date: Tue, 21 Jan 2025 13:19:22 +0100 Subject: [PATCH 5/6] Proper spanification of the writing process --- .../Nethermind.Logs/BinaryEncoding.cs | 4 ++- src/Nethermind/Nethermind.Logs/LogsBuilder.cs | 28 +++++++++++++++---- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs b/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs index 3e424adec09..4f3727c5b6c 100644 --- a/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs +++ b/src/Nethermind/Nethermind.Logs/BinaryEncoding.cs @@ -36,9 +36,11 @@ public static int TryReadVarInt(ReadOnlySpan span, int offset, out uint va // Use 32 - 28 bits as the last one value |= (chunk & 0b00001111) << (4 * bits); - return 5; + return MaxVarIntByteCount; } + public const int MaxVarIntByteCount = 5; + public static int WriteVarInt(uint value, Span span, int offset = 0) { int count = 0; diff --git a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs index e13fc00f966..889299e3f5d 100644 --- a/src/Nethermind/Nethermind.Logs/LogsBuilder.cs +++ b/src/Nethermind/Nethermind.Logs/LogsBuilder.cs @@ -277,24 +277,40 @@ private void CompressMixed(CountingBufferWriter writer, int to, int count, var start = writer.WrittenCount; var previous = 0U; + var written = 0; + + Span span = default; // Simple diff encoding foreach (var value in values) { var diff = value - previous; - // diff == 0 when it's a repeated entry. Skip these - if (diff > 0) + // Skip repeated entries + if (diff == 0) continue; + + if (span.Length - written < BinaryEncoding.MaxVarIntByteCount) { - // TODO: optimize span getting and advancing. - Span span = writer.GetSpan(5); - var written = BinaryEncoding.WriteVarInt(diff, span, 0); - writer.Advance(written); + if (written > 0) + { + writer.Advance(written); + written = 0; + } + + span = writer.GetSpan(BinaryEncoding.MaxVarIntByteCount); } + written += BinaryEncoding.WriteVarInt(diff, span, written); previous = value; } + // Advance the leftover + if (written > 0) + { + writer.Advance(written); + written = 0; + } + var end = writer.WrittenCount; writer.WriteNativeEndian(start); From f1df98ed4b32812110eeaab03339eb41ab3fc17c Mon Sep 17 00:00:00 2001 From: scooletz Date: Tue, 21 Jan 2025 13:40:16 +0100 Subject: [PATCH 6/6] format --- src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs index e9c785e9993..3fff3c7d8a2 100644 --- a/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs +++ b/src/Nethermind/Nethermind.Logs.Test/LogBuilderTests.cs @@ -111,7 +111,7 @@ public void Frequent_topics_are_compressed_well() const int txs = 100; const int logEntries = blocks * txs; - foreach ((uint block, ushort tx) in Builder()) + foreach ((uint block, ushort tx) in Builder()) { builder.Append(entry, block, tx); }