Skip to content

Commit

Permalink
Merge pull request #13876 from michaelnebel/csharp/aspnetdlls
Browse files Browse the repository at this point in the history
C#: Include ASP.NET assemblies in the standalone extraction.
  • Loading branch information
michaelnebel authored Aug 14, 2023
2 parents f5d7765 + 6ecbb40 commit 0e9f8c4
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 93 deletions.
105 changes: 28 additions & 77 deletions csharp/extractor/Semmle.Extraction.CSharp.Standalone/BuildAnalysis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
using System.Collections.Concurrent;
using System.Text;
using System.Security.Cryptography;
using System.Text.RegularExpressions;

namespace Semmle.BuildAnalyser
{
/// <summary>
/// Main implementation of the build analysis.
/// </summary>
internal sealed partial class BuildAnalysis : IDisposable
internal sealed class BuildAnalysis : IDisposable
{
private readonly AssemblyCache assemblyCache;
private readonly ProgressMonitor progressMonitor;
Expand All @@ -29,6 +28,9 @@ internal sealed partial class BuildAnalysis : IDisposable
private readonly Options options;
private readonly DirectoryInfo sourceDir;
private readonly DotNet dotnet;
private readonly FileContent fileContent;
private readonly TemporaryDirectory packageDirectory;


/// <summary>
/// Performs a C# build analysis.
Expand All @@ -55,6 +57,9 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)

this.progressMonitor.FindingFiles(options.SrcDir);

packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));

this.fileContent = new FileContent(packageDirectory, progressMonitor, () => GetFiles("*.*"));
this.allSources = GetFiles("*.cs").ToArray();
var allProjects = GetFiles("*.csproj");
var solutions = options.SolutionFile is not null
Expand All @@ -63,21 +68,26 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)

var dllDirNames = options.DllDirs.Select(Path.GetFullPath).ToList();

// Find DLLs in the .Net Framework
// Find DLLs in the .Net / Asp.Net Framework
if (options.ScanNetFrameworkDlls)
{
var runtimeLocation = new Runtime(dotnet).GetRuntime(options.UseSelfContainedDotnet);
progressMonitor.Log(Util.Logging.Severity.Info, $"Runtime location selected: {runtimeLocation}");
var runtime = new Runtime(dotnet);
var runtimeLocation = runtime.GetRuntime(options.UseSelfContainedDotnet);
progressMonitor.LogInfo($".NET runtime location selected: {runtimeLocation}");
dllDirNames.Add(runtimeLocation);

if (fileContent.UseAspNetDlls && runtime.GetAspRuntime() is string aspRuntime)
{
progressMonitor.LogInfo($"ASP.NET runtime location selected: {aspRuntime}");
dllDirNames.Add(aspRuntime);
}
}

if (options.UseMscorlib)
{
UseReference(typeof(object).Assembly.Location);
}

packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));

if (options.UseNuGet)
{
dllDirNames.Add(packageDirectory.DirInfo.FullName);
Expand Down Expand Up @@ -187,6 +197,7 @@ private void ResolveConflicts()
{
finalAssemblyList[r.Name] = r;
}

// Update the used references list
usedReferences.Clear();
foreach (var r in finalAssemblyList.Select(r => r.Value.Filename))
Expand All @@ -210,24 +221,18 @@ private void ResolveConflicts()
/// Store that a particular reference file is used.
/// </summary>
/// <param name="reference">The filename of the reference.</param>
private void UseReference(string reference)
{
usedReferences[reference] = true;
}
private void UseReference(string reference) => usedReferences[reference] = true;

/// <summary>
/// Store that a particular source file is used (by a project file).
/// </summary>
/// <param name="sourceFile">The source file.</param>
private void UseSource(FileInfo sourceFile)
{
sources[sourceFile.FullName] = sourceFile.Exists;
}
private void UseSource(FileInfo sourceFile) => sources[sourceFile.FullName] = sourceFile.Exists;

/// <summary>
/// The list of resolved reference files.
/// </summary>
public IEnumerable<string> ReferenceFiles => this.usedReferences.Keys;
public IEnumerable<string> ReferenceFiles => usedReferences.Keys;

/// <summary>
/// The list of source files used in projects.
Expand All @@ -242,7 +247,7 @@ private void UseSource(FileInfo sourceFile)
/// <summary>
/// List of assembly IDs which couldn't be resolved.
/// </summary>
public IEnumerable<string> UnresolvedReferences => this.unresolvedReferences.Select(r => r.Key);
public IEnumerable<string> UnresolvedReferences => unresolvedReferences.Select(r => r.Key);

/// <summary>
/// List of source files which were mentioned in project files but
Expand All @@ -256,12 +261,7 @@ private void UseSource(FileInfo sourceFile)
/// </summary>
/// <param name="id">The assembly ID.</param>
/// <param name="projectFile">The project file making the reference.</param>
private void UnresolvedReference(string id, string projectFile)
{
unresolvedReferences[id] = projectFile;
}

private readonly TemporaryDirectory packageDirectory;
private void UnresolvedReference(string id, string projectFile) => unresolvedReferences[id] = projectFile;

/// <summary>
/// Reads all the source files and references from the given list of projects.
Expand Down Expand Up @@ -318,10 +318,8 @@ private void AnalyseProject(FileInfo project)

}

private bool Restore(string target, string? pathToNugetConfig = null)
{
return dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName, pathToNugetConfig);
}
private bool Restore(string target, string? pathToNugetConfig = null) =>
dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName, pathToNugetConfig);

private void Restore(IEnumerable<string> targets, string? pathToNugetConfig = null)
{
Expand All @@ -331,11 +329,9 @@ private void Restore(IEnumerable<string> targets, string? pathToNugetConfig = nu
}
}


private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
{
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
var notYetDownloadedPackages = new HashSet<string>();

var nugetConfigs = GetFiles("nuget.config", recurseSubdirectories: true).ToArray();
string? nugetConfig = null;
if (nugetConfigs.Length > 1)
Expand All @@ -352,46 +348,7 @@ private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
nugetConfig = nugetConfigs.FirstOrDefault();
}

var allFiles = GetFiles("*.*");
foreach (var file in allFiles)
{
try
{
using var sr = new StreamReader(file);
ReadOnlySpan<char> line;
while ((line = sr.ReadLine()) != null)
{
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
var match = line.Slice(valueMatch.Index, valueMatch.Length);
var includeIndex = match.IndexOf("Include", StringComparison.InvariantCultureIgnoreCase);
if (includeIndex == -1)
{
continue;
}

match = match.Slice(includeIndex + "Include".Length + 1);

var quoteIndex1 = match.IndexOf("\"");
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");

var packageName = match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
if (!alreadyDownloadedPackages.Contains(packageName))
{
notYetDownloadedPackages.Add(packageName);
}
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
continue;
}
}

foreach (var package in notYetDownloadedPackages)
foreach (var package in fileContent.NotYetDownloadedPackages)
{
progressMonitor.NugetInstall(package);
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(package));
Expand Down Expand Up @@ -434,12 +391,6 @@ private void AnalyseSolutions(IEnumerable<string> solutions)
});
}

public void Dispose()
{
packageDirectory?.Dispose();
}

[GeneratedRegex("<PackageReference .*Include=\"(.*?)\".*/>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex PackageReference();
public void Dispose() => packageDirectory?.Dispose();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ internal interface IDotNet
bool RestoreToDirectory(string project, string directory, string? pathToNugetConfig = null);
bool New(string folder);
bool AddPackage(string folder, string package);
public IList<string> GetListedRuntimes();
IList<string> GetListedRuntimes();
}

/// <summary>
Expand Down
166 changes: 166 additions & 0 deletions csharp/extractor/Semmle.Extraction.CSharp.Standalone/FileContent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
using Semmle.Util;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace Semmle.BuildAnalyser
{

// <summary>
// This class is used to read a set of files and decide different properties about the
// content (by reading the content of the files only once).
// The implementation is lazy, so the properties are only calculated when
// the first property is accessed.
// </summary>
internal partial class FileContent
{
private readonly ProgressMonitor progressMonitor;
private readonly IUnsafeFileReader unsafeFileReader;
private readonly Func<IEnumerable<string>> getFiles;
private readonly Func<HashSet<string>> getAlreadyDownloadedPackages;
private readonly HashSet<string> notYetDownloadedPackages = new HashSet<string>();
private readonly Initializer initialize;

public HashSet<string> NotYetDownloadedPackages
{
get
{
initialize.Run();
return notYetDownloadedPackages;
}
}

private bool useAspNetDlls = false;

/// <summary>
/// True if any file in the source directory indicates that ASP.NET is used.
/// The following heuristic is used to decide, if ASP.NET is used:
/// If any file in the source directory contains something like (this will most like be a .csproj file)
/// <Project Sdk="Microsoft.NET.Sdk.Web">
/// <FrameworkReference Include="Microsoft.AspNetCore.App"/>
/// </summary>
public bool UseAspNetDlls
{
get
{
initialize.Run();
return useAspNetDlls;
}
}

internal FileContent(Func<HashSet<string>> getAlreadyDownloadedPackages,
ProgressMonitor progressMonitor,
Func<IEnumerable<string>> getFiles,
IUnsafeFileReader unsafeFileReader)
{
this.getAlreadyDownloadedPackages = getAlreadyDownloadedPackages;
this.progressMonitor = progressMonitor;
this.getFiles = getFiles;
this.unsafeFileReader = unsafeFileReader;
this.initialize = new Initializer(DoInitialize);
}


public FileContent(TemporaryDirectory packageDirectory, ProgressMonitor progressMonitor, Func<IEnumerable<string>> getFiles) : this(() => Directory.GetDirectories(packageDirectory.DirInfo.FullName)
.Select(d => Path.GetFileName(d)
.ToLowerInvariant())
.ToHashSet(), progressMonitor, getFiles, new UnsafeFileReader())
{ }

private static string GetGroup(ReadOnlySpan<char> input, ValueMatch valueMatch, string groupPrefix)
{
var match = input.Slice(valueMatch.Index, valueMatch.Length);
var includeIndex = match.IndexOf(groupPrefix, StringComparison.InvariantCultureIgnoreCase);
if (includeIndex == -1)
{
return string.Empty;
}

match = match.Slice(includeIndex + groupPrefix.Length + 1);

var quoteIndex1 = match.IndexOf("\"");
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");

return match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
}

private static bool IsGroupMatch(ReadOnlySpan<char> line, Regex regex, string groupPrefix, string value)
{
foreach (var valueMatch in regex.EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
if (GetGroup(line, valueMatch, groupPrefix) == value.ToLowerInvariant())
{
return true;
}
}
return false;
}

private void DoInitialize()
{
var alreadyDownloadedPackages = getAlreadyDownloadedPackages();
foreach (var file in getFiles())
{
try
{
foreach (ReadOnlySpan<char> line in unsafeFileReader.ReadLines(file))
{

// Find the not yet downloaded packages.
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
var packageName = GetGroup(line, valueMatch, "Include");
if (!string.IsNullOrEmpty(packageName) && !alreadyDownloadedPackages.Contains(packageName))
{
notYetDownloadedPackages.Add(packageName);
}
}

// Determine if ASP.NET is used.
if (!useAspNetDlls)
{
useAspNetDlls =
IsGroupMatch(line, ProjectSdk(), "Sdk", "Microsoft.NET.Sdk.Web") ||
IsGroupMatch(line, FrameworkReference(), "Include", "Microsoft.AspNetCore.App");
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
}
}
}

[GeneratedRegex("<PackageReference.*\\sInclude=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex PackageReference();

[GeneratedRegex("<FrameworkReference.*\\sInclude=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex FrameworkReference();

[GeneratedRegex("<(.*\\s)?Project.*\\sSdk=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex ProjectSdk();
}
}

internal interface IUnsafeFileReader
{
IEnumerable<string> ReadLines(string file);
}

internal class UnsafeFileReader : IUnsafeFileReader
{
public IEnumerable<string> ReadLines(string file)
{
using var sr = new StreamReader(file);
string? line;
while ((line = sr.ReadLine()) != null)
{
yield return line;
}
}
}
Loading

0 comments on commit 0e9f8c4

Please sign in to comment.