dotnet · dibarbet · Oct 23, 2024 · Oct 18, 2024 · Oct 22, 2024 · Oct 22, 2024
@@ -2,12 +2,16 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Collections.Immutable;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.CodeAnalysis.CodeCleanup;
 using Microsoft.CodeAnalysis.Diagnostics;
 using Microsoft.CodeAnalysis.Options;
 using Microsoft.CodeAnalysis.PooledObjects;
@@ -45,13 +49,11 @@ internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams
     protected readonly IDiagnosticAnalyzerService DiagnosticAnalyzerService = diagnosticAnalyzerService;
 
     /// <summary>
-    /// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance 
-    /// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the 
-    /// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>.  The former is faster
-    /// and works well for us in the normal case.  The latter still allows us to reuse diagnostics when changes happen that
-    /// update the version stamp but not the content (for example, forking LSP text).
+    /// Map of diagnostic category to the diagnostics cache for that category.
+    /// Each category has a separate cache as they have disjoint resultIds and diagnostics.  For example, we may have
+    /// one cache for DocumentSyntax, another for DocumentSemantic, another for WorkspaceSemantic, etc etc.
     /// </summary>
-    private readonly ConcurrentDictionary<string, VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum)>> _categoryToVersionedCache = [];
+    private readonly ConcurrentDictionary<string, DiagnosticsPullCache> _categoryToVersionedCache = [];
 
     protected virtual bool PotentialDuplicate => false;
 
@@ -156,18 +158,18 @@ protected virtual Task WaitForChangesAsync(string? category, RequestContext cont
                 var globalStateVersion = _diagnosticRefresher.GlobalStateVersion;
 
                 var project = diagnosticSource.GetProject();
+                var cacheState = new DiagnosticsRequestState(project, globalStateVersion, context, diagnosticSource);
 
-                var newResultId = await versionedCache.GetNewResultIdAsync(
+                var newResult = await versionedCache.GetOrComputeNewDataAsync(
                     documentIdToPreviousDiagnosticParams,
                     diagnosticSource.GetId(),
                     project,
-                    computeCheapVersionAsync: async () => (globalStateVersion, await project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false)),
-                    computeExpensiveVersionAsync: async () => (globalStateVersion, await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false)),
+                    cacheState,
                     cancellationToken).ConfigureAwait(false);
-                if (newResultId != null)
+                if (newResult != null)
                 {
-                    await ComputeAndReportCurrentDiagnosticsAsync(
-                        context, diagnosticSource, progress, newResultId, clientCapabilities, cancellationToken).ConfigureAwait(false);
+                    ReportCurrentDiagnostics(
+                        diagnosticSource, newResult.Value.Data, progress, newResult.Value.ResultId, clientCapabilities);
                 }
                 else
                 {
@@ -269,16 +271,14 @@ static async Task ProcessPreviousResultsAsync(
         }
     }
 
-    private async Task ComputeAndReportCurrentDiagnosticsAsync(
-        RequestContext context,
+    private void ReportCurrentDiagnostics(
         IDiagnosticSource diagnosticSource,
+        ImmutableArray<DiagnosticData> diagnostics,
         BufferedProgress<TReport> progress,
-        string resultId,
-        ClientCapabilities clientCapabilities,
-        CancellationToken cancellationToken)
+        string newResultId,
+        ClientCapabilities clientCapabilities)
     {
         using var _ = ArrayBuilder<LSP.Diagnostic>.GetInstance(out var result);
-        var diagnostics = await diagnosticSource.GetDiagnosticsAsync(context, cancellationToken).ConfigureAwait(false);
 
         // If we can't get a text document identifier we can't report diagnostics for this source.
         // This can happen for 'fake' projects (e.g. used for TS script blocks).
@@ -290,12 +290,10 @@ private async Task ComputeAndReportCurrentDiagnosticsAsync(
             return;
         }
 
-        context.TraceInformation($"Found {diagnostics.Length} diagnostics for {diagnosticSource.ToDisplayString()}");
-
         foreach (var diagnostic in diagnostics)
             result.AddRange(ConvertDiagnostic(diagnosticSource, diagnostic, clientCapabilities));
 
-        var report = CreateReport(documentIdentifier, result.ToArray(), resultId);
+        var report = CreateReport(documentIdentifier, result.ToArray(), newResultId);
         progress.Report(report);
     }
 

@@ -0,0 +1,125 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Immutable;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.CodeAnalysis.Diagnostics;
+using Microsoft.CodeAnalysis.Text;
+using Roslyn.LanguageServer.Protocol;
+using Roslyn.Utilities;
+
+namespace Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;
+
+internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams, TReport, TReturn>
+    where TDiagnosticsParams : IPartialResultParams<TReport>
+{
+    internal record struct DiagnosticsRequestState(Project Project, int GlobalStateVersion, RequestContext Context, IDiagnosticSource DiagnosticSource);
+
+    /// <summary>
+    /// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance 
+    /// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the 
+    /// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>.  The former is faster
+    /// and works well for us in the normal case.  The latter still allows us to reuse diagnostics when changes happen that
+    /// update the version stamp but not the content (for example, forking LSP text).
+    /// </summary>
+    private sealed class DiagnosticsPullCache(string uniqueKey) : VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum), DiagnosticsRequestState, DiagnosticData>(uniqueKey)
+    {
+        public override async Task<(int globalStateVersion, VersionStamp? dependentVersion)> ComputeCheapVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
+        {
+            return (state.GlobalStateVersion, await state.Project.GetDependentVersionAsync(cancellationToken).ConfigureAwait(false));
+        }
+
+        public override async Task<(int globalStateVersion, Checksum dependentChecksum)> ComputeExpensiveVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
+        {
+            return (state.GlobalStateVersion, await state.Project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false));
+        }
+
+        /// <inheritdoc cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}.ComputeDataAsync(TState, CancellationToken)"/>
+        public override async Task<ImmutableArray<DiagnosticData>> ComputeDataAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
+        {
+            var diagnostics = await state.DiagnosticSource.GetDiagnosticsAsync(state.Context, cancellationToken).ConfigureAwait(false);
+            state.Context.TraceInformation($"Found {diagnostics.Length} diagnostics for {state.DiagnosticSource.ToDisplayString()}");
+            return diagnostics;
+        }
+
+        public override Checksum ComputeChecksum(ImmutableArray<DiagnosticData> data)
+        {
+            // Create checksums of diagnostic data and sort to ensure stable ordering for comparison.
+            var diagnosticDataChecksums = data
+                .SelectAsArray(d => Checksum.Create(d, SerializeDiagnosticData))
+                .Sort();
+
+            return Checksum.Create(diagnosticDataChecksums);
+        }
+
+        private static void SerializeDiagnosticData(DiagnosticData diagnosticData, ObjectWriter writer)
+        {
+            writer.WriteString(diagnosticData.Id);
+            writer.WriteString(diagnosticData.Category);
+            writer.WriteString(diagnosticData.Message);
+            writer.WriteInt32((int)diagnosticData.Severity);
+            writer.WriteInt32((int)diagnosticData.DefaultSeverity);
+            writer.WriteBoolean(diagnosticData.IsEnabledByDefault);
+            writer.WriteInt32(diagnosticData.WarningLevel);
+
+            // Ensure the tag order is stable before we write it.
+            foreach (var tag in diagnosticData.CustomTags.Sort())
+            {
+                writer.WriteString(tag);
+            }
+
+            foreach (var key in diagnosticData.Properties.Keys.ToImmutableArray().Sort())
+            {
+                writer.WriteString(key);
+                writer.WriteString(diagnosticData.Properties[key]);
+            }
+
+            if (diagnosticData.ProjectId != null)
+                writer.WriteGuid(diagnosticData.ProjectId.Id);
+
+            WriteDiagnosticDataLocation(diagnosticData.DataLocation, writer);
+
+            foreach (var additionalLocation in diagnosticData.AdditionalLocations)
+            {
+                WriteDiagnosticDataLocation(additionalLocation, writer);
+            }
+
+            writer.WriteString(diagnosticData.Language);
+            writer.WriteString(diagnosticData.Title);
+            writer.WriteString(diagnosticData.Description);
+            writer.WriteString(diagnosticData.HelpLink);
+            writer.WriteBoolean(diagnosticData.IsSuppressed);
+
+            static void WriteDiagnosticDataLocation(DiagnosticDataLocation location, ObjectWriter writer)
+            {
+                WriteFileLinePositionSpan(location.UnmappedFileSpan, writer);
+                if (location.DocumentId != null)
+                    writer.WriteGuid(location.DocumentId.Id);
+
+                WriteFileLinePositionSpan(location.MappedFileSpan, writer);
+            }
+
+            static void WriteFileLinePositionSpan(FileLinePositionSpan fileSpan, ObjectWriter writer)
+            {
+                writer.WriteString(fileSpan.Path);
+                WriteLinePositionSpan(fileSpan.Span, writer);
+                writer.WriteBoolean(fileSpan.HasMappedPath);
+            }
+
+            static void WriteLinePositionSpan(LinePositionSpan span, ObjectWriter writer)
+            {
+                WriteLinePosition(span.Start, writer);
+                WriteLinePosition(span.End, writer);
+            }
+
+            static void WriteLinePosition(LinePosition position, ObjectWriter writer)
+            {
+                writer.WriteInt32(position.Line);
+                writer.WriteInt32(position.Character);
+            }
+
-
-
+        }
+    }
+}
@@ -0,0 +1,137 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Immutable;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.CodeAnalysis.LanguageServer.Handler.Diagnostics;
+using Roslyn.Utilities;
+
+namespace Microsoft.CodeAnalysis.LanguageServer.Handler;
+
+internal abstract partial class VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData>
+{
+    /// <summary>
+    /// Internal cache item that updates state for a particular <see cref="Workspace"/> and <see cref="ProjectOrDocumentId"/> in <see cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}"/>
+    /// This type ensures that the state for a particular key is never updated concurrently for the same key (but different key states can be concurrent).
+    /// </summary>
+    private sealed class CacheItem(string uniqueKey)
+    {
+        /// <summary>
+        /// Guards access to <see cref="_lastResult"/>.
+        /// This ensures that a cache entry is fully updated in a single transaction.
+        /// </summary>
+        private readonly SemaphoreSlim _gate = new(initialCount: 1);
+
+        /// <summary>
+        /// Stores the current state associated with this cache item.
+        /// Guarded by <see cref="_gate"/>
+        /// 
+        /// <list type="bullet">
+        ///   <item>The resultId reported to the client.</item>
+        ///   <item>The TCheapVersion of the data that was used to calculate results.
+        ///       <para>
+        ///       Note that this version can change even when nothing has actually changed (for example, forking the 
+        ///       LSP text, reloading the same project). So we additionally store:</para></item>
+        ///   <item>A TExpensiveVersion (normally a checksum) checksum that will still allow us to reuse data even when
+        ///   unimportant changes happen that trigger the cheap version change detection.</item>
+        ///   <item>The checksum of the data that was computed when the resultId was generated.
+        ///       <para>
+        ///       When the versions above change, we must recalculate the data.  However sometimes that data ends up being exactly the same as the prior request.
+        ///       When that happens, this allows us to send back an unchanged result instead of reserializing data the client already has.
+        ///       </para>
+        ///   </item>
+        /// </list>
+        /// 
+        /// </summary>
+        private (string resultId, TCheapVersion cheapVersion, TExpensiveVersion expensiveVersion, Checksum dataChecksum)? _lastResult;
+
+        /// <summary>
+        /// Updates the values for this cache entry.  Guarded by <see cref="_gate"/>
+        /// 
+        /// Returns <see langword="null"/> if the previousPullResult can be re-used, otherwise returns a new resultId and the new data associated with it.
+        /// </summary>
+        public async Task<(string, ImmutableArray<TComputedData>)?> UpdateCacheItemAsync(
+            VersionedPullCache<TCheapVersion, TExpensiveVersion, TState, TComputedData> cache,
+            PreviousPullResult? previousPullResult,
+            bool isFullyLoaded,
+            TState state,
+            CancellationToken cancellationToken)
+        {
+            // Ensure that we only update the cache item one at a time.
+            // This means that the computation of new data for this item only occurs sequentially.
+            using (await _gate.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
+            {
+                TCheapVersion cheapVersion;
+                TExpensiveVersion expensiveVersion;
+
+                // Check if the version we have in the cache matches the request version.  If so we can re-use the resultId.
+                if (isFullyLoaded &&
+                    _lastResult is not null &&
+                    _lastResult.Value.resultId == previousPullResult?.PreviousResultId)
+                {
+                    cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
+                    if (cheapVersion != null && cheapVersion.Equals(_lastResult.Value.cheapVersion))
+                    {
+                        // The client's resultId matches our cached resultId and the cheap version is an
+                        // exact match for our current cheap version. We return early here to avoid calculating
+                        // expensive versions as we know nothing is changed.
+                        return null;
+                    }
+
+                    // The current cheap version does not match the last reported.  This may be because we've forked
+                    // or reloaded a project, so fall back to calculating the full expensive version to determine if
+                    // anything is actually changed.
+                    expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
+                    if (expensiveVersion != null && expensiveVersion.Equals(_lastResult.Value.expensiveVersion))
+                    {
+                        return null;
+                    }
+                }
+                else
+                {
+                    // The versions we have in our cache (if any) do not match the ones provided by the client (if any).
+                    // We need to calculate new results.
+                    cheapVersion = await cache.ComputeCheapVersionAsync(state, cancellationToken).ConfigureAwait(false);
+                    expensiveVersion = await cache.ComputeExpensiveVersionAsync(state, cancellationToken).ConfigureAwait(false);
+                }
+
+                // Compute the new result for the request.
+                var data = await cache.ComputeDataAsync(state, cancellationToken).ConfigureAwait(false);
+                var dataChecksum = cache.ComputeChecksum(data);
+
+                string newResultId;
+                if (_lastResult is not null && _lastResult?.resultId == previousPullResult?.PreviousResultId && _lastResult?.dataChecksum == dataChecksum)
+                {
+                    // The new data we've computed is exactly the same as the data we computed last time even though the versions have changed.
+                    // Instead of reserializing everything, we can return the same result id back to the client.
+
+                    // Ensure we store the updated versions we calculated against old resultId.  If we do not do this,
+                    // subsequent requests will always fail the version comparison check (the resultId is still associated with the older version even
+                    // though we reused it here for a newer version) and will trigger re-computation.
+                    // By storing the updated version with the resultId we can short circuit earlier in the version checks.
+                    _lastResult = (_lastResult.Value.resultId, cheapVersion, expensiveVersion, dataChecksum);
+                    return null;
+                }
+                else
+                {
+                    // Keep track of the results we reported here so that we can short-circuit producing results for
+                    // the same state of the world in the future.  Use a custom result-id per type (doc requests or workspace
+                    // requests) so that clients of one don't errantly call into the other.
+                    //
+                    // For example, a client getting document diagnostics should not ask for workspace diagnostics with the result-ids it got for
+                    // doc-diagnostics.  The two systems are different and cannot share results, or do things like report
+                    // what changed between each other.
+                    //
+                    // Note that we can safely update the map before computation as any cancellation or exception
+                    // during computation means that the client will never recieve this resultId and so cannot ask us for it.
+                    newResultId = $"{uniqueKey}:{cache.GetNextResultId()}";
+                    _lastResult = (newResultId, cheapVersion, expensiveVersion, dataChecksum);
+                    return (newResultId, data);
+                }
+            }
+        }
+    }
+}
+