Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LSP] Report unchanged results if diagnostic data is the same as last report #75587

Merged
merged 10 commits into from
Oct 23, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.CodeCleanup;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.PooledObjects;
Expand Down Expand Up @@ -45,13 +49,11 @@ internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams
protected readonly IDiagnosticAnalyzerService DiagnosticAnalyzerService = diagnosticAnalyzerService;

/// <summary>
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the
/// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>. The former is faster
/// and works well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that
/// update the version stamp but not the content (for example, forking LSP text).
/// Map of diagnostic category to the diagnostics cache for that category.
/// Each category has a separate cache as they have disjoint resultIds and diagnostics. For example, we may have
/// one cache for DocumentSyntax, another for DocumentSemantic, another for WorkspaceSemantic, etc etc.
/// </summary>
private readonly ConcurrentDictionary<string, VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum)>> _categoryToVersionedCache = [];
private readonly ConcurrentDictionary<string, DiagnosticsPullCache> _categoryToVersionedCache = [];

protected virtual bool PotentialDuplicate => false;

Expand Down Expand Up @@ -156,18 +158,18 @@ protected virtual Task WaitForChangesAsync(string? category, RequestContext cont
var globalStateVersion = _diagnosticRefresher.GlobalStateVersion;

var project = diagnosticSource.GetProject();
var cacheState = new DiagnosticsRequestState(project, globalStateVersion, context, diagnosticSource);

var newResultId = await versionedCache.GetNewResultIdAsync(
var newResult = await versionedCache.GetOrComputeNewDataAsync(
documentIdToPreviousDiagnosticParams,
diagnosticSource.GetId(),
project,
computeCheapVersionAsync: async () => (globalStateVersion, await project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false)),
computeExpensiveVersionAsync: async () => (globalStateVersion, await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false)),
cacheState,
cancellationToken).ConfigureAwait(false);
if (newResultId != null)
if (newResult != null)
{
await ComputeAndReportCurrentDiagnosticsAsync(
context, diagnosticSource, progress, newResultId, clientCapabilities, cancellationToken).ConfigureAwait(false);
ReportCurrentDiagnostics(
Copy link
Member Author

@dibarbet dibarbet Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Computation moved to run inside the cache based on what parameters it determines. Makes it much easier to reason about concurrency

diagnosticSource, newResult.Value.Data, progress, newResult.Value.ResultId, clientCapabilities);
}
else
{
Expand Down Expand Up @@ -269,16 +271,14 @@ static async Task ProcessPreviousResultsAsync(
}
}

private async Task ComputeAndReportCurrentDiagnosticsAsync(
RequestContext context,
private void ReportCurrentDiagnostics(
IDiagnosticSource diagnosticSource,
ImmutableArray<DiagnosticData> diagnostics,
BufferedProgress<TReport> progress,
string resultId,
ClientCapabilities clientCapabilities,
CancellationToken cancellationToken)
string newResultId,
ClientCapabilities clientCapabilities)
{
using var _ = ArrayBuilder<LSP.Diagnostic>.GetInstance(out var result);
var diagnostics = await diagnosticSource.GetDiagnosticsAsync(context, cancellationToken).ConfigureAwait(false);

// If we can't get a text document identifier we can't report diagnostics for this source.
// This can happen for 'fake' projects (e.g. used for TS script blocks).
Expand All @@ -290,12 +290,10 @@ private async Task ComputeAndReportCurrentDiagnosticsAsync(
return;
}

context.TraceInformation($"Found {diagnostics.Length} diagnostics for {diagnosticSource.ToDisplayString()}");
dibarbet marked this conversation as resolved.
Show resolved Hide resolved

foreach (var diagnostic in diagnostics)
result.AddRange(ConvertDiagnostic(diagnosticSource, diagnostic, clientCapabilities));

var report = CreateReport(documentIdentifier, result.ToArray(), resultId);
var report = CreateReport(documentIdentifier, result.ToArray(), newResultId);
progress.Report(report);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Immutable;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Diagnostics;
using Roslyn.LanguageServer.Protocol;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;

internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams, TReport, TReturn> where TDiagnosticsParams : IPartialResultParams<TReport>
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
{
internal record struct DiagnosticsRequestState(Project Project, int GlobalStateVersion, RequestContext Context, IDiagnosticSource DiagnosticSource);

/// <summary>
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the
/// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>. The former is faster
/// and works well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that
/// update the version stamp but not the content (for example, forking LSP text).
/// </summary>
internal class DiagnosticsPullCache(string uniqueKey) : VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum), DiagnosticsRequestState, DiagnosticData>(uniqueKey)
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
{
public override async Task<(int globalStateVersion, VersionStamp? dependentVersion)> ComputeCheapVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved from passing functions to impls of abstract methods

{
return (state.GlobalStateVersion, await state.Project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false));
}

public override async Task<(int globalStateVersion, Checksum dependentChecksum)> ComputeExpensiveVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
{
return (state.GlobalStateVersion, await state.Project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false));
}

public override async Task<ImmutableArray<DiagnosticData>> ComputeDataAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
{
var diagnostics = await state.DiagnosticSource.GetDiagnosticsAsync(state.Context, cancellationToken).ConfigureAwait(false);
state.Context.TraceInformation($"Found {diagnostics.Length} diagnostics for {state.DiagnosticSource.ToDisplayString()}");
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
return diagnostics;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Immutable;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.LanguageServer.Handler;

internal abstract partial class VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData>
{
/// <summary>
/// Internal cache item that updates state for a particular <see cref="Workspace"/> and <see cref="ProjectOrDocumentId"/> in <see cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}"/>
/// This type ensures that the state for a particular key is never updated concurrently for the same key (but different key states can be concurrent).
Copy link
Member Author

@dibarbet dibarbet Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prior to this change, there was a single semaphore around updating resultIds for the entire cache item. However now we need to also run the computation to determine if and how the cache should be updated. I did not want to block requests for other documents on computation of diagnostics for a different diagnostic. So now we have the CacheItem which gives a finer grained semaphore to an entry for a specific workspace+proj/doc.

/// </summary>
private class CacheItem(string uniqueKey)
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
{
/// <summary>
/// Guards access to <see cref="_lastResult"/>.
/// This ensures that a cache entry is fully updated in a single transaction.
/// </summary>
private readonly SemaphoreSlim _semaphore = new(1);
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
dibarbet marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Stores the current state associated with this cache item.
///
/// <list type="bullet">
/// <item>The resultId reported to the client.</item>
/// <item>The TCheapVersion of the data that was used to calculate results.
/// <para>
/// Note that this version can change even when nothing has actually changed (for example, forking the
/// LSP text, reloading the same project). So we additionally store:</para></item>
/// <item>A TExpensiveVersion (normally a checksum) checksum that will still allow us to reuse data even when
/// unimportant changes happen that trigger the cheap version change detection.</item>
/// <item>The hash of the data that was computed when the resultId was generated.
/// <para>
/// When the versions above change, we must recalculate the data. However sometimes that data ends up being exactly the same as the prior request.
/// When that happens, this allows us to send back an unchanged result instead of reserializing data the client already has.
/// </para>
/// </item>
/// </list>
///
/// </summary>
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
private (string resultId, TCheapVersion cheapVersion, TExpensiveVersion expensiveVersion, int hashedData)? _lastResult;

/// <summary>
/// Updates the values for this cache entry. Guarded by <see cref="_semaphore"/>
///
/// Returns null if the previousPullResult can be re-used, otherwise returns a new resultId and the new data associated with it.
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
public async Task<(string, ImmutableArray<TComputedData>)?> UpdateCacheItemAsync(
VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData> cache,
PreviousPullResult? previousPullResult,
bool isFullyLoaded,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hrmm... what is isFullyLoaded...

Copy link
Member Author

@dibarbet dibarbet Oct 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the operation progress stuff. We always recompute until we've fully loaded the sln. I'm not 100% convinced its necessary anymore (since checksums should change as we load), but I don't want to remove it in this PR.

TState state,
CancellationToken cancellationToken)
{
// Ensure that we only update the cache item one at a time.
// This means that the computation of new data for this item only occurs sequentially.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aside: have we considered computing results in parallel higher up in workspace diagnostics? we walk the diagnostic sources serially. but we could use our parallel helpers to potentially speed that up.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely potentially possible. I'll leave that to investigate in a different PR.

using (await _semaphore.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
{
TCheapVersion cheapVersion;
TExpensiveVersion expensiveVersion;

// Check if the version we have in the cache matches the request version. If so we can re-use the resultId.
if (isFullyLoaded &&
_lastResult is not null &&
_lastResult.Value.resultId == previousPullResult?.PreviousResultId)
{
cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
if (cheapVersion != null && cheapVersion.Equals(_lastResult.Value.cheapVersion))
{
// The client's resultId matches our cached resultId and the cheap version is an
// exact match for our current cheap version. We return early here to avoid calculating
// expensive versions as we know nothing is changed.
return null;
}

// The current cheap version does not match the last reported. This may be because we've forked
// or reloaded a project, so fall back to calculating the full expensive version to determine if
// anything is actually changed.
expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
if (expensiveVersion != null && expensiveVersion.Equals(_lastResult.Value.expensiveVersion))
{
return null;
}
}
else
{
// The versions we have in our cache (if any) do not match the ones provided by the client (if any).
// We need to calculate new results.
cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
}

// Compute the new result for the request.
var data = await cache.ComputeDataAsync(state, cancellationToken).ConfigureAwait(false);
var dataHash = GetComputedDataHash(data);

string newResultId;
if (_lastResult is not null && _lastResult?.resultId == previousPullResult?.PreviousResultId && _lastResult?.hashedData == dataHash)
{
// The new data we've computed is exactly the same as the data we computed last time even though the versions have changed.
// Instead of reserializing everything, we can return the same result id back to the client.

// Ensure we store the updated versions we calculated with the old resultId.
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
_lastResult = (_lastResult.Value.resultId, cheapVersion, expensiveVersion, dataHash);
return null;
}
else
{
// Keep track of the results we reported here so that we can short-circuit producing results for
// the same state of the world in the future. Use a custom result-id per type (doc requests or workspace
// requests) so that clients of one don't errantly call into the other.
//
// For example, a client getting document diagnostics should not ask for workspace diagnostics with the result-ids it got for
// doc-diagnostics. The two systems are different and cannot share results, or do things like report
// what changed between each other.
//
// Note that we can safely update the map before computation as any cancellation or exception
// during computation means that the client will never recieve this resultId and so cannot ask us for it.
newResultId = $"{uniqueKey}:{cache.IncrementResultId()}";
_lastResult = (newResultId, cheapVersion, expensiveVersion, dataHash);
return (newResultId, data);
}
}
}

private static int GetComputedDataHash(ImmutableArray<TComputedData> data)
{
var hashes = data.SelectAsArray(d => d.GetHashCode()).Sort();
dibarbet marked this conversation as resolved.
Show resolved Hide resolved
return Hash.CombineValues(hashes);
}
}
}

Loading
Loading