Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LSP] Report unchanged results if diagnostic data is the same as last report #75587

Merged
merged 10 commits into from
Oct 23, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.CodeCleanup;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.PooledObjects;
Expand Down Expand Up @@ -45,13 +49,11 @@ internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams
protected readonly IDiagnosticAnalyzerService DiagnosticAnalyzerService = diagnosticAnalyzerService;

/// <summary>
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the
/// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>. The former is faster
/// and works well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that
/// update the version stamp but not the content (for example, forking LSP text).
/// Map of diagnostic category to the diagnostics cache for that category.
/// Each category has a separate cache as they have disjoint resultIds and diagnostics. For example, we may have
/// one cache for DocumentSyntax, another for DocumentSemantic, another for WorkspaceSemantic, etc etc.
/// </summary>
private readonly ConcurrentDictionary<string, VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum)>> _categoryToVersionedCache = [];
private readonly ConcurrentDictionary<string, DiagnosticsPullCache> _categoryToVersionedCache = [];

protected virtual bool PotentialDuplicate => false;

Expand Down Expand Up @@ -156,18 +158,18 @@ protected virtual Task WaitForChangesAsync(string? category, RequestContext cont
var globalStateVersion = _diagnosticRefresher.GlobalStateVersion;

var project = diagnosticSource.GetProject();
var cacheState = new DiagnosticsRequestState(project, globalStateVersion, context, diagnosticSource);

var newResultId = await versionedCache.GetNewResultIdAsync(
var newResult = await versionedCache.GetOrComputeNewDataAsync(
documentIdToPreviousDiagnosticParams,
diagnosticSource.GetId(),
project,
computeCheapVersionAsync: async () => (globalStateVersion, await project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false)),
computeExpensiveVersionAsync: async () => (globalStateVersion, await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false)),
cacheState,
cancellationToken).ConfigureAwait(false);
if (newResultId != null)
if (newResult != null)
{
await ComputeAndReportCurrentDiagnosticsAsync(
context, diagnosticSource, progress, newResultId, clientCapabilities, cancellationToken).ConfigureAwait(false);
ReportCurrentDiagnostics(
Copy link
Member Author

@dibarbet dibarbet Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Computation moved to run inside the cache based on what parameters it determines. Makes it much easier to reason about concurrency

diagnosticSource, newResult.Value.Data, progress, newResult.Value.ResultId, clientCapabilities);
}
else
{
Expand Down Expand Up @@ -269,16 +271,14 @@ static async Task ProcessPreviousResultsAsync(
}
}

private async Task ComputeAndReportCurrentDiagnosticsAsync(
RequestContext context,
private void ReportCurrentDiagnostics(
IDiagnosticSource diagnosticSource,
ImmutableArray<DiagnosticData> diagnostics,
BufferedProgress<TReport> progress,
string resultId,
ClientCapabilities clientCapabilities,
CancellationToken cancellationToken)
string newResultId,
ClientCapabilities clientCapabilities)
{
using var _ = ArrayBuilder<LSP.Diagnostic>.GetInstance(out var result);
var diagnostics = await diagnosticSource.GetDiagnosticsAsync(context, cancellationToken).ConfigureAwait(false);

// If we can't get a text document identifier we can't report diagnostics for this source.
// This can happen for 'fake' projects (e.g. used for TS script blocks).
Expand All @@ -290,12 +290,10 @@ private async Task ComputeAndReportCurrentDiagnosticsAsync(
return;
}

context.TraceInformation($"Found {diagnostics.Length} diagnostics for {diagnosticSource.ToDisplayString()}");
dibarbet marked this conversation as resolved.
Show resolved Hide resolved

foreach (var diagnostic in diagnostics)
result.AddRange(ConvertDiagnostic(diagnosticSource, diagnostic, clientCapabilities));

var report = CreateReport(documentIdentifier, result.ToArray(), resultId);
var report = CreateReport(documentIdentifier, result.ToArray(), newResultId);
progress.Report(report);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Immutable;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Text;
using Roslyn.LanguageServer.Protocol;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;

internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams, TReport, TReturn>
where TDiagnosticsParams : IPartialResultParams<TReport>
{
internal record struct DiagnosticsRequestState(Project Project, int GlobalStateVersion, RequestContext Context, IDiagnosticSource DiagnosticSource);

/// <summary>
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the
/// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>. The former is faster
/// and works well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that
/// update the version stamp but not the content (for example, forking LSP text).
/// </summary>
private sealed class DiagnosticsPullCache(string uniqueKey) : VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum), DiagnosticsRequestState, DiagnosticData>(uniqueKey)
{
public override async Task<(int globalStateVersion, VersionStamp? dependentVersion)> ComputeCheapVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved from passing functions to impls of abstract methods

{
return (state.GlobalStateVersion, await state.Project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false));
}

public override async Task<(int globalStateVersion, Checksum dependentChecksum)> ComputeExpensiveVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
{
return (state.GlobalStateVersion, await state.Project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false));
}

/// <inheritdoc cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}.ComputeDataAsync(TState, CancellationToken)"/>
public override async Task<ImmutableArray<DiagnosticData>> ComputeDataAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
{
var diagnostics = await state.DiagnosticSource.GetDiagnosticsAsync(state.Context, cancellationToken).ConfigureAwait(false);
state.Context.TraceInformation($"Found {diagnostics.Length} diagnostics for {state.DiagnosticSource.ToDisplayString()}");
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
return diagnostics;
}

public override Checksum ComputeChecksum(ImmutableArray<DiagnosticData> data)
{
// Create checksums of diagnostic data and sort to ensure stable ordering for comparison.
var diagnosticDataChecksums = data
.SelectAsArray(d => Checksum.Create(d, SerializeDiagnosticData))
.Sort();

return Checksum.Create(diagnosticDataChecksums);
}

private static void SerializeDiagnosticData(DiagnosticData diagnosticData, ObjectWriter writer)
{
writer.WriteString(diagnosticData.Id);
writer.WriteString(diagnosticData.Category);
writer.WriteString(diagnosticData.Message);
writer.WriteInt32((int)diagnosticData.Severity);
writer.WriteInt32((int)diagnosticData.DefaultSeverity);
writer.WriteBoolean(diagnosticData.IsEnabledByDefault);
writer.WriteInt32(diagnosticData.WarningLevel);

// Ensure the tag order is stable before we write it.
foreach (var tag in diagnosticData.CustomTags.Sort())
{
writer.WriteString(tag);
}

foreach (var key in diagnosticData.Properties.Keys.ToImmutableArray().Sort())
{
writer.WriteString(key);
writer.WriteString(diagnosticData.Properties[key]);
}

if (diagnosticData.ProjectId != null)
writer.WriteGuid(diagnosticData.ProjectId.Id);

WriteDiagnosticDataLocation(diagnosticData.DataLocation, writer);

foreach (var additionalLocation in diagnosticData.AdditionalLocations)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AdditionalLocations

Are there usually very many items in here? If so, do they usually have the same file path?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I didn't notice there were multiple file spans per location. Maybe these can't be easily grouped by file location.

{
WriteDiagnosticDataLocation(additionalLocation, writer);
}

writer.WriteString(diagnosticData.Language);
writer.WriteString(diagnosticData.Title);
writer.WriteString(diagnosticData.Description);
writer.WriteString(diagnosticData.HelpLink);
writer.WriteBoolean(diagnosticData.IsSuppressed);

static void WriteDiagnosticDataLocation(DiagnosticDataLocation location, ObjectWriter writer)
{
WriteFileLinePositionSpan(location.UnmappedFileSpan, writer);
if (location.DocumentId != null)
writer.WriteGuid(location.DocumentId.Id);

WriteFileLinePositionSpan(location.MappedFileSpan, writer);
}

static void WriteFileLinePositionSpan(FileLinePositionSpan fileSpan, ObjectWriter writer)
{
writer.WriteString(fileSpan.Path);
WriteLinePositionSpan(fileSpan.Span, writer);
writer.WriteBoolean(fileSpan.HasMappedPath);
}

static void WriteLinePositionSpan(LinePositionSpan span, ObjectWriter writer)
{
WriteLinePosition(span.Start, writer);
WriteLinePosition(span.End, writer);
}

static void WriteLinePosition(LinePosition position, ObjectWriter writer)
{
writer.WriteInt32(position.Line);
writer.WriteInt32(position.Character);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: trailing empty line


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Immutable;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.LanguageServer.Handler;

internal abstract partial class VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData>
{
/// <summary>
/// Internal cache item that updates state for a particular <see cref="Workspace"/> and <see cref="ProjectOrDocumentId"/> in <see cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}"/>
/// This type ensures that the state for a particular key is never updated concurrently for the same key (but different key states can be concurrent).
Copy link
Member Author

@dibarbet dibarbet Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prior to this change, there was a single semaphore around updating resultIds for the entire cache item. However now we need to also run the computation to determine if and how the cache should be updated. I did not want to block requests for other documents on computation of diagnostics for a different diagnostic. So now we have the CacheItem which gives a finer grained semaphore to an entry for a specific workspace+proj/doc.

/// </summary>
private sealed class CacheItem(string uniqueKey)
{
/// <summary>
/// Guards access to <see cref="_lastResult"/>.
/// This ensures that a cache entry is fully updated in a single transaction.
/// </summary>
private readonly SemaphoreSlim _gate = new(initialCount: 1);

/// <summary>
/// Stores the current state associated with this cache item.
/// Guarded by <see cref="_gate"/>
///
/// <list type="bullet">
/// <item>The resultId reported to the client.</item>
/// <item>The TCheapVersion of the data that was used to calculate results.
/// <para>
/// Note that this version can change even when nothing has actually changed (for example, forking the
/// LSP text, reloading the same project). So we additionally store:</para></item>
/// <item>A TExpensiveVersion (normally a checksum) checksum that will still allow us to reuse data even when
/// unimportant changes happen that trigger the cheap version change detection.</item>
/// <item>The checksum of the data that was computed when the resultId was generated.
/// <para>
/// When the versions above change, we must recalculate the data. However sometimes that data ends up being exactly the same as the prior request.
/// When that happens, this allows us to send back an unchanged result instead of reserializing data the client already has.
/// </para>
/// </item>
/// </list>
///
/// </summary>
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
private (string resultId, TCheapVersion cheapVersion, TExpensiveVersion expensiveVersion, Checksum dataChecksum)? _lastResult;

/// <summary>
/// Updates the values for this cache entry. Guarded by <see cref="_gate"/>
///
/// Returns <see langword="null"/> if the previousPullResult can be re-used, otherwise returns a new resultId and the new data associated with it.
/// </summary>
public async Task<(string, ImmutableArray<TComputedData>)?> UpdateCacheItemAsync(
VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData> cache,
PreviousPullResult? previousPullResult,
bool isFullyLoaded,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hrmm... what is isFullyLoaded...

Copy link
Member Author

@dibarbet dibarbet Oct 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the operation progress stuff. We always recompute until we've fully loaded the sln. I'm not 100% convinced its necessary anymore (since checksums should change as we load), but I don't want to remove it in this PR.

TState state,
CancellationToken cancellationToken)
{
// Ensure that we only update the cache item one at a time.
// This means that the computation of new data for this item only occurs sequentially.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aside: have we considered computing results in parallel higher up in workspace diagnostics? we walk the diagnostic sources serially. but we could use our parallel helpers to potentially speed that up.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely potentially possible. I'll leave that to investigate in a different PR.

using (await _gate.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
{
TCheapVersion cheapVersion;
TExpensiveVersion expensiveVersion;

// Check if the version we have in the cache matches the request version. If so we can re-use the resultId.
if (isFullyLoaded &&
_lastResult is not null &&
_lastResult.Value.resultId == previousPullResult?.PreviousResultId)
{
cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
if (cheapVersion != null && cheapVersion.Equals(_lastResult.Value.cheapVersion))
{
// The client's resultId matches our cached resultId and the cheap version is an
// exact match for our current cheap version. We return early here to avoid calculating
// expensive versions as we know nothing is changed.
return null;
}

// The current cheap version does not match the last reported. This may be because we've forked
// or reloaded a project, so fall back to calculating the full expensive version to determine if
// anything is actually changed.
expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
if (expensiveVersion != null && expensiveVersion.Equals(_lastResult.Value.expensiveVersion))
{
return null;
}
}
else
{
// The versions we have in our cache (if any) do not match the ones provided by the client (if any).
// We need to calculate new results.
cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
}

// Compute the new result for the request.
var data = await cache.ComputeDataAsync(state, cancellationToken).ConfigureAwait(false);
var dataChecksum = cache.ComputeChecksum(data);

string newResultId;
if (_lastResult is not null && _lastResult?.resultId == previousPullResult?.PreviousResultId && _lastResult?.dataChecksum == dataChecksum)
{
// The new data we've computed is exactly the same as the data we computed last time even though the versions have changed.
// Instead of reserializing everything, we can return the same result id back to the client.

// Ensure we store the updated versions we calculated against old resultId. If we do not do this,
// subsequent requests will always fail the version comparison check (the resultId is still associated with the older version even
// though we reused it here for a newer version) and will trigger re-computation.
// By storing the updated version with the resultId we can short circuit earlier in the version checks.
_lastResult = (_lastResult.Value.resultId, cheapVersion, expensiveVersion, dataChecksum);
return null;
}
else
{
// Keep track of the results we reported here so that we can short-circuit producing results for
// the same state of the world in the future. Use a custom result-id per type (doc requests or workspace
// requests) so that clients of one don't errantly call into the other.
//
// For example, a client getting document diagnostics should not ask for workspace diagnostics with the result-ids it got for
// doc-diagnostics. The two systems are different and cannot share results, or do things like report
// what changed between each other.
//
// Note that we can safely update the map before computation as any cancellation or exception
// during computation means that the client will never recieve this resultId and so cannot ask us for it.
newResultId = $"{uniqueKey}:{cache.GetNextResultId()}";
_lastResult = (newResultId, cheapVersion, expensiveVersion, dataChecksum);
return (newResultId, data);
}
}
}
}
}

Loading
Loading