Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sdk-metrics] Tags perf improvements #5457

Merged
merged 11 commits into from
Mar 22, 2024

Conversation

CodeBlanch
Copy link
Member

@CodeBlanch CodeBlanch commented Mar 18, 2024

Changes

  • Elide bounds checks in Tags hash code computation and equality checks
  • Call string.Equals without passing StringComparison (seems to inline better)

Benchmarks

Method NumberOfTags Mean Error StdDev
TagsGetHashCodeOld 1 13.520 ns 0.1686 ns 0.1577 ns
TagsGetHashCodeNew 1 11.989 ns 0.1091 ns 0.0967 ns
TagsEqualsOld 1 5.802 ns 0.0405 ns 0.0359 ns
TagsEqualsNew 1 4.962 ns 0.0696 ns 0.0617 ns
TagsGetHashCodeOld 3 46.325 ns 0.4549 ns 0.4033 ns
TagsGetHashCodeNew 3 37.827 ns 0.4984 ns 0.3891 ns
TagsEqualsOld 3 14.708 ns 0.3192 ns 0.3278 ns
TagsEqualsNew 3 12.526 ns 0.2695 ns 0.2884 ns
TagsGetHashCodeOld 5 79.541 ns 0.8260 ns 0.7727 ns
TagsGetHashCodeNew 5 59.275 ns 0.4959 ns 0.4639 ns
TagsEqualsOld 5 23.636 ns 0.2285 ns 0.2026 ns
TagsEqualsNew 5 18.615 ns 0.1292 ns 0.1146 ns
TagsGetHashCodeOld 10 162.381 ns 1.4783 ns 1.3828 ns
TagsGetHashCodeNew 10 128.601 ns 1.7367 ns 1.6245 ns
TagsEqualsOld 10 45.330 ns 0.6802 ns 0.6362 ns
TagsEqualsNew 10 35.216 ns 0.5322 ns 0.4978 ns
Code
#nullable enable

using System.Diagnostics;
using System.Runtime.CompilerServices;
#if NET6_0_OR_GREATER
using System.Runtime.InteropServices;
#endif
using BenchmarkDotNet.Attributes;

namespace Benchmarks.Metrics;

public class TagsBenchmarks
{
    private KeyValuePair<string, object?>[]? tags;
    private TagsNew tagsNew;
    private TagsOld tagsOld;

    [Params(/*1, 3, 5, */10/*, 100*/)]
    public int NumberOfTags { get; set; }

    [GlobalSetup]
    public void GlobalSetup()
    {
        var tags = new KeyValuePair<string, object?>[this.NumberOfTags];

        for (int i = 0; i < this.NumberOfTags; i++)
        {
            tags[i] = new KeyValuePair<string, object?>($"tag_key_name_{i}", i);
        }

        this.tags = tags;
        this.tagsNew = new(tags);
        this.tagsOld = new(tags);
    }

    [Benchmark]
    public int TagsGetHashCodeOld()
    {
        return new TagsOld(this.tags!).GetHashCode();
    }

    [Benchmark]
    public int TagsGetHashCodeNew()
    {
        return new TagsNew(this.tags!).GetHashCode();
    }

    [Benchmark]
    public bool TagsEqualsOld()
    {
        return this.tagsOld.Equals(this.tagsOld);
    }

    [Benchmark]
    public bool TagsEqualsNew()
    {
        return this.tagsNew.Equals(this.tagsNew);
    }

    internal readonly struct TagsNew : IEquatable<TagsNew>
    {
        public static readonly TagsNew EmptyTags = new(Array.Empty<KeyValuePair<string, object?>>());

        private readonly int hashCode;

        public TagsNew(KeyValuePair<string, object?>[] keyValuePairs)
        {
            this.KeyValuePairs = keyValuePairs;
            this.hashCode = ComputeHashCode(keyValuePairs);
        }

        public readonly KeyValuePair<string, object?>[] KeyValuePairs { get; }

        public static bool operator ==(TagsNew tag1, TagsNew tag2) => tag1.Equals(tag2);

        public static bool operator !=(TagsNew tag1, TagsNew tag2) => !tag1.Equals(tag2);

        public override readonly bool Equals(object? obj)
        {
            return obj is TagsNew other && this.Equals(other);
        }

        public readonly bool Equals(TagsNew other)
        {
            var ourKvps = this.KeyValuePairs;
            var theirKvps = other.KeyValuePairs;

            if (ourKvps.Length != theirKvps.Length)
            {
                return false;
            }

#if NET6_0_OR_GREATER
            // Note: This loop uses unsafe code (pointers) to elide bounds checks on
            // two arrays we know to be of equal length.
            var cursor = ourKvps.Length;
            if (cursor > 0)
            {
                ref var ours = ref MemoryMarshal.GetArrayDataReference(ourKvps);
                ref var theirs = ref MemoryMarshal.GetArrayDataReference(theirKvps);
                while (true)
                {
                    // Equality check for Keys
                    if (!ours.Key.Equals(theirs.Key))
                    {
                        return false;
                    }

                    // Equality check for Values
                    if (!ours.Value?.Equals(theirs.Value) ?? theirs.Value != null)
                    {
                        return false;
                    }

                    if (--cursor == 0)
                    {
                        break;
                    }

                    ours = ref Unsafe.Add(ref ours, 1);
                    theirs = ref Unsafe.Add(ref theirs, 1);
                }
            }
#else
            for (int i = 0; i < ourKvps.Length; i++)
            {
                ref var ours = ref ourKvps[i];

                // Note: Bounds check happens here for theirKvps element access
                ref var theirs = ref theirKvps[i];

                // Equality check for Keys
                if (!ours.Key.Equals(theirs.Key))
                {
                    return false;
                }

                // Equality check for Values
                if (!ours.Value?.Equals(theirs.Value) ?? theirs.Value != null)
                {
                    return false;
                }
            }
#endif

            return true;
        }

        public override readonly int GetHashCode() => this.hashCode;

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static int ComputeHashCode(KeyValuePair<string, object?>[] keyValuePairs)
        {
            Debug.Assert(keyValuePairs != null, "keyValuePairs was null");

#if NET6_0_OR_GREATER
            HashCode hashCode = default;

            for (int i = 0; i < keyValuePairs.Length; i++)
            {
                ref var item = ref keyValuePairs[i];
                hashCode.Add(item.Key.GetHashCode());
                hashCode.Add(item.Value);
            }

            return hashCode.ToHashCode();
#else
            var hash = 17;

            for (int i = 0; i < keyValuePairs!.Length; i++)
            {
                ref var item = ref keyValuePairs[i];
                unchecked
                {
                    hash = (hash * 31) + item.Key.GetHashCode();
                    hash = (hash * 31) + (item.Value?.GetHashCode() ?? 0);
                }
            }

            return hash;
#endif
        }
    }

    internal readonly struct TagsOld : IEquatable<TagsOld>
    {
        public static readonly TagsOld EmptyTags = new(Array.Empty<KeyValuePair<string, object?>>());

        private readonly int hashCode;

        public TagsOld(KeyValuePair<string, object?>[] keyValuePairs)
        {
            this.KeyValuePairs = keyValuePairs;

#if NET6_0_OR_GREATER
            HashCode hashCode = default;

            for (int i = 0; i < this.KeyValuePairs.Length; i++)
            {
                ref var item = ref this.KeyValuePairs[i];
                hashCode.Add(item.Key);
                hashCode.Add(item.Value);
            }

            var hash = hashCode.ToHashCode();
#else
            var hash = 17;
            for (int i = 0; i < this.KeyValuePairs.Length; i++)
            {
                ref var item = ref this.KeyValuePairs[i];
                unchecked
                {
                    hash = (hash * 31) + (item.Key?.GetHashCode() ?? 0);
                    hash = (hash * 31) + (item.Value?.GetHashCode() ?? 0);
                }
            }
#endif

            this.hashCode = hash;
        }

        public readonly KeyValuePair<string, object?>[] KeyValuePairs { get; }

        public static bool operator ==(TagsOld tag1, TagsOld tag2) => tag1.Equals(tag2);

        public static bool operator !=(TagsOld tag1, TagsOld tag2) => !tag1.Equals(tag2);

        public override readonly bool Equals(object? obj)
        {
            return obj is TagsOld other && this.Equals(other);
        }

        public readonly bool Equals(TagsOld other)
        {
            var length = this.KeyValuePairs.Length;

            if (length != other.KeyValuePairs.Length)
            {
                return false;
            }

            for (int i = 0; i < length; i++)
            {
                ref var left = ref this.KeyValuePairs[i];
                ref var right = ref other.KeyValuePairs[i];

                // Equality check for Keys
                if (!left.Key.Equals(right.Key, StringComparison.Ordinal))
                {
                    return false;
                }

                // Equality check for Values
                if (!left.Value?.Equals(right.Value) ?? right.Value != null)
                {
                    return false;
                }
            }

            return true;
        }

        public override readonly int GetHashCode() => this.hashCode;
    }
}

Merge requirement checklist

  • CONTRIBUTING guidelines followed (license requirements, nullable enabled, static analysis, etc.)

@CodeBlanch CodeBlanch added pkg:OpenTelemetry Issues related to OpenTelemetry NuGet package metrics Metrics signal related labels Mar 18, 2024
@CodeBlanch CodeBlanch requested a review from a team March 18, 2024 20:59
Copy link

codecov bot commented Mar 18, 2024

Codecov Report

Attention: Patch coverage is 93.75000% with 2 lines in your changes are missing coverage. Please review.

Project coverage is 85.43%. Comparing base (6250307) to head (aa3d867).
Report is 140 commits behind head on main.

Additional details and impacted files

Impacted file tree graph

@@            Coverage Diff             @@
##             main    #5457      +/-   ##
==========================================
+ Coverage   83.38%   85.43%   +2.05%     
==========================================
  Files         297      289       -8     
  Lines       12531    12493      -38     
==========================================
+ Hits        10449    10674     +225     
+ Misses       2082     1819     -263     
Flag Coverage Δ
unittests ?
unittests-Solution-Experimental 85.43% <93.75%> (?)
unittests-Solution-Stable 85.41% <93.75%> (?)

Flags with carried forward coverage won't be shown. Click here to find out more.

Files Coverage Δ
src/OpenTelemetry/Metrics/Tags.cs 91.11% <93.75%> (-2.44%) ⬇️

... and 62 files with indirect coverage changes

@cijothomas
Copy link
Member

@CodeBlanch can you add the code used in the benchmarks for reference as well?

@CodeBlanch
Copy link
Member Author

@cijothomas Added

@cijothomas
Copy link
Member

@cijothomas Added

Given we are ultra optimized for <=3 tags, and reasonably optimized for 3-7 tags, it'd be nicer to use a tagCount<7 in benchmarks shared, to see more practical gains.

Also, if feasible, can you show the existing benchmarks before/after, so as to get a picture of how much this affects overall perf. (hash calc and equals() is significant chunk of overall metrics cost in hot path, so I expect this to be significant. But curios if it is significant enough to show up in normal benchmark runs)

var cursor = ourKvps.Length;
if (cursor > 0)
{
ref var ours = ref MemoryMarshal.GetArrayDataReference(ourKvps);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this and the Unsafe.Add calls vulnerable to garbage collector relocating this array when compacting memory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK they are managed pointers meaning the GC is aware of them.

I ran this pattern by @stephentoub a while back. IIRC the answer was it is safe to do so long as you know for sure the array references won't change out from under you and they are indeed the same length.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If done correctly, it's valid. These are managed refs, so if the GC were to move things around, it would also update the values stored in the refs to keep them valid. The concern with code like this is it's easy to accidentally get it wrong, at which point you're subject to all the same kinds of memory safety problems as in languages like C++. As a simple for loop, the JIT would already be eliminating the bounds check on accessing the elements from whichever span was used in the for loop condition, so at best this is only eliminating the other half of the bounds checks. You'll want to be really, really sure it's worth it.

@CodeBlanch CodeBlanch changed the title [sdk-metrics] Metric lookup perf improvements [sdk-metrics] Tags perf improvements Mar 19, 2024
@CodeBlanch
Copy link
Member Author

@cijothomas I updated the description to show benchmarks for different number of tags. There aren't any optimizations in struct Tags based on length, the gains should be pretty linear based on the number of tags. It might be worth looking at if we should special case a length of 1?

Here is a run of MetricsBenchmarks:

Before:

Method AggregationTemporality Mean Error StdDev
CounterHotPath Cumulative 13.01 ns 0.146 ns 0.136 ns
CounterWith1LabelsHotPath Cumulative 53.83 ns 0.330 ns 0.293 ns
CounterWith2LabelsHotPath Cumulative 72.84 ns 0.783 ns 0.695 ns
CounterWith3LabelsHotPath Cumulative 99.68 ns 0.652 ns 0.578 ns
CounterWith4LabelsHotPath Cumulative 125.77 ns 1.449 ns 1.356 ns
CounterWith5LabelsHotPath Cumulative 144.82 ns 1.459 ns 1.364 ns
CounterWith6LabelsHotPath Cumulative 164.60 ns 1.171 ns 0.978 ns
CounterWith7LabelsHotPath Cumulative 184.49 ns 2.803 ns 2.485 ns
CounterWith1LabelsHotPathUsingTagList Cumulative 64.65 ns 1.312 ns 1.227 ns
CounterWith2LabelsHotPathUsingTagList Cumulative 87.66 ns 1.096 ns 0.972 ns
CounterWith3LabelsHotPathUsingTagList Cumulative 114.65 ns 1.031 ns 0.964 ns
CounterWith4LabelsHotPathUsingTagList Cumulative 135.71 ns 1.352 ns 1.265 ns
CounterWith5LabelsHotPathUsingTagList Cumulative 156.48 ns 1.008 ns 0.943 ns
CounterWith6LabelsHotPathUsingTagList Cumulative 180.64 ns 1.736 ns 1.624 ns
CounterWith7LabelsHotPathUsingTagList Cumulative 202.70 ns 2.262 ns 2.116 ns
CounterWith8LabelsHotPathUsingTagList Cumulative 228.39 ns 2.670 ns 2.498 ns
CounterWith9LabelsHotPathUsingTagList Cumulative 294.53 ns 2.080 ns 1.844 ns
CounterHotPath Delta 12.62 ns 0.039 ns 0.034 ns
CounterWith1LabelsHotPath Delta 52.23 ns 0.441 ns 0.413 ns
CounterWith2LabelsHotPath Delta 73.28 ns 0.727 ns 0.680 ns
CounterWith3LabelsHotPath Delta 97.32 ns 1.266 ns 1.184 ns
CounterWith4LabelsHotPath Delta 124.98 ns 2.041 ns 1.909 ns
CounterWith5LabelsHotPath Delta 150.81 ns 2.569 ns 3.685 ns
CounterWith6LabelsHotPath Delta 170.89 ns 1.589 ns 1.408 ns
CounterWith7LabelsHotPath Delta 185.56 ns 1.646 ns 1.540 ns
CounterWith1LabelsHotPathUsingTagList Delta 62.80 ns 0.481 ns 0.426 ns
CounterWith2LabelsHotPathUsingTagList Delta 89.48 ns 0.794 ns 0.743 ns
CounterWith3LabelsHotPathUsingTagList Delta 114.09 ns 1.596 ns 1.492 ns
CounterWith4LabelsHotPathUsingTagList Delta 135.33 ns 2.359 ns 2.206 ns
CounterWith5LabelsHotPathUsingTagList Delta 155.94 ns 0.978 ns 0.915 ns
CounterWith6LabelsHotPathUsingTagList Delta 180.58 ns 0.955 ns 0.894 ns
CounterWith7LabelsHotPathUsingTagList Delta 200.10 ns 1.079 ns 1.009 ns
CounterWith8LabelsHotPathUsingTagList Delta 228.79 ns 2.795 ns 2.615 ns
CounterWith9LabelsHotPathUsingTagList Delta 297.95 ns 4.772 ns 4.464 ns

After:

Method AggregationTemporality Mean Error StdDev
CounterHotPath Cumulative 12.61 ns 0.271 ns 0.371 ns
CounterWith1LabelsHotPath Cumulative 46.16 ns 0.480 ns 0.449 ns
CounterWith2LabelsHotPath Cumulative 62.84 ns 1.123 ns 1.051 ns
CounterWith3LabelsHotPath Cumulative 92.78 ns 1.874 ns 4.033 ns
CounterWith4LabelsHotPath Cumulative 117.40 ns 1.908 ns 1.691 ns
CounterWith5LabelsHotPath Cumulative 136.79 ns 2.619 ns 3.671 ns
CounterWith6LabelsHotPath Cumulative 155.58 ns 1.922 ns 1.798 ns
CounterWith7LabelsHotPath Cumulative 171.33 ns 2.661 ns 2.489 ns
CounterWith1LabelsHotPathUsingTagList Cumulative 60.23 ns 0.851 ns 0.796 ns
CounterWith2LabelsHotPathUsingTagList Cumulative 79.14 ns 1.011 ns 0.945 ns
CounterWith3LabelsHotPathUsingTagList Cumulative 104.30 ns 1.196 ns 1.119 ns
CounterWith4LabelsHotPathUsingTagList Cumulative 122.29 ns 1.709 ns 1.427 ns
CounterWith5LabelsHotPathUsingTagList Cumulative 141.39 ns 1.343 ns 1.256 ns
CounterWith6LabelsHotPathUsingTagList Cumulative 163.88 ns 1.119 ns 1.046 ns
CounterWith7LabelsHotPathUsingTagList Cumulative 185.83 ns 2.857 ns 2.672 ns
CounterWith8LabelsHotPathUsingTagList Cumulative 209.42 ns 1.477 ns 1.310 ns
CounterWith9LabelsHotPathUsingTagList Cumulative 272.17 ns 4.786 ns 4.477 ns
CounterHotPath Delta 12.49 ns 0.101 ns 0.089 ns
CounterWith1LabelsHotPath Delta 45.07 ns 0.256 ns 0.227 ns
CounterWith2LabelsHotPath Delta 63.01 ns 0.270 ns 0.252 ns
CounterWith3LabelsHotPath Delta 86.59 ns 1.717 ns 1.522 ns
CounterWith4LabelsHotPath Delta 116.75 ns 2.345 ns 4.575 ns
CounterWith5LabelsHotPath Delta 131.35 ns 2.530 ns 2.707 ns
CounterWith6LabelsHotPath Delta 150.90 ns 2.327 ns 1.943 ns
CounterWith7LabelsHotPath Delta 173.34 ns 3.479 ns 5.099 ns
CounterWith1LabelsHotPathUsingTagList Delta 58.93 ns 0.993 ns 0.880 ns
CounterWith2LabelsHotPathUsingTagList Delta 81.29 ns 1.625 ns 2.577 ns
CounterWith3LabelsHotPathUsingTagList Delta 104.42 ns 1.757 ns 1.644 ns
CounterWith4LabelsHotPathUsingTagList Delta 126.46 ns 2.380 ns 2.226 ns
CounterWith5LabelsHotPathUsingTagList Delta 146.67 ns 2.696 ns 2.521 ns
CounterWith6LabelsHotPathUsingTagList Delta 169.62 ns 1.926 ns 1.801 ns
CounterWith7LabelsHotPathUsingTagList Delta 195.53 ns 3.848 ns 3.951 ns
CounterWith8LabelsHotPathUsingTagList Delta 205.62 ns 2.932 ns 2.743 ns
CounterWith9LabelsHotPathUsingTagList Delta 281.14 ns 4.489 ns 4.199 ns

@@ -73,7 +74,7 @@ public Tags(KeyValuePair<string, object?>[] keyValuePairs)
}
}
#else
for (int i = 0; i < ourKvps.Length; i++)
for (int i = 0; i < length; i++)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we want this to use one of the array's length properties? Would it be skipping the bounds check if we use a regular int?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does seem to elide using the local ya. Here's a demo: SharpLab

Notice in "ElideTest1" there is call 0x7ff9435f8f40 that is the thing that throws the range check failure. The other two don't have that.

@CodeBlanch CodeBlanch merged commit 2f0af65 into open-telemetry:main Mar 22, 2024
37 checks passed
@CodeBlanch CodeBlanch deleted the sdk-metrics-lookup-perf branch March 22, 2024 19:49
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
metrics Metrics signal related pkg:OpenTelemetry Issues related to OpenTelemetry NuGet package
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants