Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory and CPU costs due to SegmentedList usage #75661

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.CodeAnalysis.Collections;
using Xunit;

namespace Microsoft.CodeAnalysis.UnitTests.Collections
{
/// <summary>
/// Contains tests that ensure the correctness of the List class.
/// </summary>
public abstract partial class SegmentedList_Generic_Tests<T> : IList_Generic_Tests<T>
where T : notnull
{
public static IEnumerable<object[]> TestLengthsAndSegmentCounts
{
get
{
for (var segmentsToAdd = 1; segmentsToAdd < 4; segmentsToAdd++)
{
yield return new object[] { 1, segmentsToAdd };
yield return new object[] { 10, segmentsToAdd };
yield return new object[] { 100, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize / 2, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize * 2, segmentsToAdd };
yield return new object[] { 100000, segmentsToAdd };
}
}
}

[Theory]
[MemberData(nameof(ValidCollectionSizes))]
public void Capacity_ArgumentValidity(int count)
{
var list = new SegmentedList<T>(count);
for (var i = 0; i < count; i++)
list.Add(CreateT(i));

Assert.Throws<ArgumentOutOfRangeException>(() => list.Capacity = count - 1);
}

[Theory]
[InlineData(0, 0, 1)]
[InlineData(0, 0, 10)]
[InlineData(4, 4, 6)]
[InlineData(4, 4, 10)]
[InlineData(4, 4, 100_000)]
public void Capacity_MatchesSizeRequested(int initialCapacity, int initialSize, int requestedCapacity)
{
var list = new SegmentedList<T>(initialCapacity);

for (var i = 0; i < initialSize; i++)
list.Add(CreateT(i));

list.Capacity = requestedCapacity;

Assert.Equal(requestedCapacity, list.Capacity);
}

[Theory]
[MemberData(nameof(TestLengthsAndSegmentCounts))]
public void Capacity_ReusesSegments(
int length,
int addSegmentCount)
{
var elementCountToAdd = SegmentedArray<object>.TestAccessor.SegmentSize * addSegmentCount;
var o = new object();

var segmented = new SegmentedList<object>(length);
for (var i = 0; i < length; i++)
segmented.Add(o);

var oldSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);
var oldSegmentCount = oldSegments.Length;

segmented.Capacity = length + elementCountToAdd;

var resizedSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);
var resizedSegmentCount = resizedSegments.Length;

Assert.Equal(oldSegmentCount + addSegmentCount, resizedSegmentCount);

for (var i = 0; i < oldSegmentCount - 1; i++)
Assert.Same(resizedSegments[i], oldSegments[i]);

for (var i = oldSegmentCount - 1; i < resizedSegmentCount - 1; i++)
Assert.Equal(resizedSegments[i].Length, SegmentedArray<object>.TestAccessor.SegmentSize);

Assert.NotSame(resizedSegments[resizedSegmentCount - 1], oldSegments[oldSegmentCount - 1]);
Assert.Equal(resizedSegments[resizedSegmentCount - 1].Length, oldSegments[oldSegmentCount - 1].Length);
}

[Theory]
[CombinatorialData]
public void Capacity_InOnlySingleSegment(
[CombinatorialValues(1, 2, 10, 100)] int length,
[CombinatorialValues(1, 2, 10, 100)] int addItemCount)
{
var o = new object();

var segmented = new SegmentedList<object>(length);
for (var i = 0; i < length; i++)
segmented.Add(o);

var oldSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);

segmented.Capacity = length + addItemCount;

var resizedSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);

Assert.Equal(1, oldSegments.Length);
Assert.Equal(1, resizedSegments.Length);
Assert.Same(resizedSegments[0], oldSegments[0]);
Assert.Equal(segmented.Capacity, resizedSegments[0].Length);
}

[Theory]
[InlineData(0, 0, 1, 4)]
[InlineData(0, 0, 10, 10)]
[InlineData(4, 4, 6, 8)]
[InlineData(4, 4, 10, 10)]
public void EnsureCapacity_ResizesAppropriately(int initialCapacity, int initialSize, int requestedCapacity, int expectedCapacity)
{
var list = new SegmentedList<T>(initialCapacity);

for (var i = 0; i < initialSize; i++)
list.Add(CreateT(i));

list.EnsureCapacity(requestedCapacity);

Assert.Equal(expectedCapacity, list.Capacity);
}

[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(4)]
public void EnsureCapacity_GrowsBySegment(int segmentCount)
{
var elementCount = SegmentedArray<T>.TestAccessor.SegmentSize * segmentCount;
var list = new SegmentedList<T>(elementCount);

for (var i = 0; i < elementCount; i++)
list.Add(CreateT(i));

Assert.Equal(elementCount, list.Capacity);

list.EnsureCapacity(elementCount + 1);
Assert.Equal(elementCount + SegmentedArray<T>.TestAccessor.SegmentSize, list.Capacity);
}

[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(4)]
public void EnsureCapacity_MatchesSizeWithLargeCapacityRequest(int segmentCount)
{
var elementCount = SegmentedArray<T>.TestAccessor.SegmentSize * segmentCount;
var list = new SegmentedList<T>(elementCount);

for (var i = 0; i < elementCount; i++)
list.Add(CreateT(i));

Assert.Equal(elementCount, list.Capacity);

var requestedCapacity = 2 * elementCount + 10;
list.EnsureCapacity(requestedCapacity);
Assert.Equal(requestedCapacity, list.Capacity);
}
}
}
14 changes: 14 additions & 0 deletions src/Dependencies/Collections/SegmentedArray`1+PrivateMarshal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;

namespace Microsoft.CodeAnalysis.Collections;

internal readonly partial struct SegmentedArray<T>
Expand All @@ -14,5 +16,17 @@ internal static class PrivateMarshal
/// <inheritdoc cref="SegmentedCollectionsMarshal.AsSegments{T}(SegmentedArray{T})"/>
public static T[][] AsSegments(SegmentedArray<T> array)
=> array._items;

public static SegmentedArray<T> AsSegmentedArray(T[][] segments)
{
if (segments is null)
throw new ArgumentNullException(nameof(segments));

var length = 0;
foreach (var segment in segments)
length += segment.Length;
sharwell marked this conversation as resolved.
Show resolved Hide resolved

return new SegmentedArray<T>(length, segments);
}
}
}
18 changes: 18 additions & 0 deletions src/Dependencies/Collections/SegmentedCollectionsMarshal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,24 @@ internal static class SegmentedCollectionsMarshal
public static T[][] AsSegments<T>(SegmentedArray<T> array)
=> SegmentedArray<T>.PrivateMarshal.AsSegments(array);

/// <summary>
/// Gets a <see cref="SegmentedArray{T}"/> value wrapping the input T[][].
/// </summary>
/// <typeparam name="T">The type of elements in the input.</typeparam>
/// <param name="segments">The input array to wrap in the returned <see cref="SegmentedArray{T}"/> value.</param>
/// <returns>A <see cref="SegmentedArray{T}"/> value wrapping <paramref name="segments"/>.</returns>
/// <remarks>
/// <para>
/// When using this method, callers should take extra care to ensure that they're the sole owners of the input
/// array, and that it won't be modified once the returned <see cref="SegmentedArray{T}"/> value starts
/// being used. Doing so might cause undefined behavior in code paths which don't expect the contents of a given
/// <see cref="SegmentedArray{T}"/> values to change outside their control.
/// </para>
/// </remarks>
/// <exception cref="System.ArgumentNullException">Thrown when <paramref name="segments"/> is <see langword="null"/></exception>
public static SegmentedArray<T> AsSegmentedArray<T>(T[][] segments)
=> SegmentedArray<T>.PrivateMarshal.AsSegmentedArray(segments);

/// <summary>
/// Gets either a ref to a <typeparamref name="TValue"/> in the <see cref="SegmentedDictionary{TKey, TValue}"/> or a
/// ref null if it does not exist in the <paramref name="dictionary"/>.
Expand Down
45 changes: 41 additions & 4 deletions src/Dependencies/Collections/SegmentedList`1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,39 @@ public int Capacity
{
if (value > 0)
{
var newItems = new SegmentedArray<T>(value);
if (_size > 0)
// Rather than creating a copy of _items, instead reuse as much of it's data as possible.
// This saves as much as 50% of allocations and 70% of CPU cost of Add in large collections.
// See SegmentedListBenchmarks_Add for repro details.
sharwell marked this conversation as resolved.
Show resolved Hide resolved
var segments = SegmentedCollectionsMarshal.AsSegments(_items);

var segmentSize = SegmentedArrayHelper.GetSegmentSize<T>();
var segmentShift = SegmentedArrayHelper.GetSegmentShift<T>();
sharwell marked this conversation as resolved.
Show resolved Hide resolved
var oldSegmentCount = segments.Length;
var newSegmentCount = (value + segmentSize - 1) >> segmentShift;

// Grow the array of segments, if necessary
Array.Resize(ref segments, newSegmentCount);

var lastPageSize = value - ((newSegmentCount - 1) << segmentShift);

// If the previous last page is still the last page, resize it to lastPageSize.
// Otherwise, resize it to SegmentSize.
if (oldSegmentCount > 0)
{
SegmentedArray.Copy(_items, newItems, _size);
Array.Resize(
ref segments[oldSegmentCount - 1],
oldSegmentCount == newSegmentCount ? lastPageSize : segmentSize);
}
_items = newItems;

// Create all new pages (except the last one which is done separately)
for (var i = oldSegmentCount; i < newSegmentCount - 1; i++)
segments[i] = new T[segmentSize];

// Create a new last page if necessary
if (oldSegmentCount < newSegmentCount)
segments[newSegmentCount - 1] = new T[lastPageSize];
sharwell marked this conversation as resolved.
Show resolved Hide resolved

_items = SegmentedCollectionsMarshal.AsSegmentedArray(segments);
}
else
{
Expand Down Expand Up @@ -502,7 +529,17 @@ internal void Grow(int capacity)
// If the computed capacity is still less than specified, set to the original argument.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes:

  1. Document the algorithm for initialization of newCapacity
  2. For initial length greater than or equal to half the segment size but less than one full segment size, set newCapacity to one segment size.
  3. For any initial length where the final segment is not a full segment, set newCapacity to the length it would be with a full-size final segment. This guarantees that the outer array will not be reallocated, and also guarantees that the single inner array allocation performed during the resize will not need to be performed a second time during the next resize. (Can be modified and treated as a generalization of the preceding point)
  4. If the calculated newCapacity ends up being less than capacity and capacity is greater than the segment size, apply a final ceiling operation so the final segment is full size.

These rules are all relevant regardless of whether we increase by doubling or by a page at a time. I am still reviewing the choice of expansion size.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

➡️ Changing the capacity selection algorithm for resize has been deferred to a later pull request.

// Capacities exceeding Array.MaxLength will be surfaced as OutOfMemoryException by Array.Resize.
if (newCapacity < capacity)
{
newCapacity = capacity;
}
else
ToddGrun marked this conversation as resolved.
Show resolved Hide resolved
{
var segmentSize = SegmentedArrayHelper.GetSegmentSize<T>();

// If caller didn't request a large capacity increase, limit the increase to a single page
if (newCapacity > segmentSize)
newCapacity = (((capacity - 1) / segmentSize) + 1) * segmentSize;
sharwell marked this conversation as resolved.
Show resolved Hide resolved
}

Capacity = newCapacity;
}
Expand Down
31 changes: 31 additions & 0 deletions src/Tools/IdeCoreBenchmarks/SegmentedListBenchmarks_Add.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;

namespace IdeCoreBenchmarks
{
[MemoryDiagnoser]
public class SegmentedListBenchmarks_Add
{
[Params(1_000, 10_000, 100_000, 1_000_000)]
public int Count { get; set; }

[GlobalSetup]
public void GlobalSetup()
{
}

[Benchmark(Description = "AddToSegmentedList<object>", Baseline = true)]
public void AddList()
{
var array = new Microsoft.CodeAnalysis.Collections.SegmentedList<object?>();
var iterations = Count;
for (var i = 0; i < iterations; i++)
{
array.Add(null);
}
}
}
}