Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C# POC: Add binary log based extraction #16581

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class CSharpAutobuildOptions : AutobuildOptionsShared
private const string extractorOptionPrefix = "CODEQL_EXTRACTOR_CSHARP_OPTION_";

public bool Buildless { get; }
public string? Binlog { get; }

public override Language Language => Language.CSharp;

Expand All @@ -29,7 +30,7 @@ public CSharpAutobuildOptions(IBuildActions actions) : base(actions)
actions.GetEnvironmentVariable(extractorOptionPrefix + "BUILDLESS").AsBool("buildless", false) ||
actions.GetEnvironmentVariable(buildModeEnvironmentVariable)?.ToLower() == "none";


Binlog = actions.GetEnvironmentVariable(extractorOptionPrefix + "BINLOG");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<ProjectReference Include="..\..\extractor\Semmle.Util\Semmle.Util.csproj" />
<ProjectReference Include="..\..\extractor\Semmle.Extraction.CSharp\Semmle.Extraction.CSharp.csproj" />
<ProjectReference Include="..\..\extractor\Semmle.Extraction.CSharp.Standalone\Semmle.Extraction.CSharp.Standalone.csproj" />
<ProjectReference Include="..\..\extractor\Semmle.Extraction.CSharp.Driver\Semmle.Extraction.CSharp.Driver.csproj" />
<ProjectReference Include="..\..\extractor\Semmle.Extraction.CSharp.DependencyFetching\Semmle.Extraction.CSharp.DependencyFetching.csproj" />
<ProjectReference Include="..\Semmle.Autobuild.Shared\Semmle.Autobuild.Shared.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@
{
public BuildScript Analyse(IAutobuilder<CSharpAutobuildOptions> builder, bool auto)
{
return BuildScript.Create(_ => Semmle.Extraction.CSharp.Standalone.Program.Main([]));
if (builder.Options.Binlog is string binlog)
{
return BuildScript.Create(_ => Semmle.Extraction.CSharp.Driver.Main(["--binlog", binlog]));
}
else
{
return BuildScript.Create(_ => Semmle.Extraction.CSharp.Standalone.Program.Main([]));
}
Comment on lines +13 to +20

Check notice

Code scanning / CodeQL

Missed ternary opportunity Note

Both branches of this 'if' statement return - consider using '?' to express intent better.
}
}
}
3 changes: 3 additions & 0 deletions csharp/codeql-extractor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ options:
- progress+++
type: string
pattern: "^(off|errors|warnings|(info|progress)|(debug|progress\\+)|(trace|progress\\+\\+)|progress\\+\\+\\+)$"
binlog:
title: Binlog
type: string
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,6 @@ public virtual void Dispose()
else
Logger.Log(Severity.Info, "EXTRACTION SUCCEEDED in {0}", stopWatch.Elapsed);

Logger.Dispose();

compilationTrapFile?.Dispose();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using Microsoft.CodeAnalysis.CSharp;
using Semmle.Util.Logging;

namespace Semmle.Extraction.CSharp
{
public class BinaryLogAnalyser : Analyser
{
public BinaryLogAnalyser(IProgressMonitor pm, ILogger logger, bool addAssemblyTrapPrefix, PathTransformer pathTransformer)
: base(pm, logger, addAssemblyTrapPrefix, pathTransformer)
{
}

public void Initialize(string cwd, string[] args, string outputPath, CSharpCompilation compilationIn, CommonOptions options)
{
compilation = compilationIn;
extractor = new BinaryLogExtractor(cwd, args, outputPath, Logger, PathTransformer, options);
this.options = options;
LogExtractorInfo(Extraction.Extractor.Version);
SetReferencePaths();
}
}
}
131 changes: 96 additions & 35 deletions csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Basic.CompilerLog.Util;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.Text;
Expand Down Expand Up @@ -97,60 +98,120 @@
stopwatch.Start();

var options = Options.CreateWithEnvironment(args);
var workingDirectory = Directory.GetCurrentDirectory();
var compilerArgs = options.CompilerArguments.ToArray();

using var logger = MakeLogger(options.Verbosity, options.Console);

var canonicalPathCache = CanonicalPathCache.Create(logger, 1000);
var pathTransformer = new PathTransformer(canonicalPathCache);

using var analyser = new TracingAnalyser(new LogProgressMonitor(logger), logger, options.AssemblySensitiveTrap, pathTransformer);

try
{
if (options.ProjectsToLoad.Any())
var canonicalPathCache = CanonicalPathCache.Create(logger, 1000);
var pathTransformer = new PathTransformer(canonicalPathCache);

if (options.BinaryLogPath is string binlogPath)
{
AddSourceFilesFromProjects(options.ProjectsToLoad, options.CompilerArguments, logger);
logger.LogInfo(" Running binary log analysis.");
return RunBinaryLogAnalysis(stopwatch, binlogPath, options, logger, pathTransformer);
}

var compilerVersion = new CompilerVersion(options);

if (compilerVersion.SkipExtraction)
else
{
logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason);
return ExitCode.Ok;
logger.LogInfo(" Running tracing analysis.");
return RunTracingAnalysis(stopwatch, options, logger, canonicalPathCache, pathTransformer);
}
}
catch (Exception ex) // lgtm[cs/catch-of-all-exceptions]
{
logger.Log(Severity.Error, " Unhandled exception: {0}", ex);
return ExitCode.Errors;
}
Comment on lines +119 to +123

Check notice

Code scanning / CodeQL

Generic catch clause Note

Generic catch clause.
}

var compilerArguments = CSharpCommandLineParser.Default.Parse(
compilerVersion.ArgsWithResponse,
workingDirectory,
compilerVersion.FrameworkPath,
compilerVersion.AdditionalReferenceDirectories
);
private static ExitCode RunBinaryLogAnalysis(Stopwatch stopwatch, string binlogPath, Options options, ILogger logger, PathTransformer pathTransformer)
{
using var fileStream = new FileStream(binlogPath, FileMode.Open, FileAccess.Read, FileShare.Read);
// Filter out compiler calls that aren't interesting for examination
static bool filter(CompilerCall compilerCall)
{
return compilerCall.IsCSharp &&
compilerCall.Kind == CompilerCallKind.Regular;
}

if (compilerArguments is null)
using var reader = BinaryLogReader.Create(fileStream);
var allCompilationData = reader.ReadAllCompilationData(filter);

var exitCode = ExitCode.Ok;

logger.LogInfo($" Found {allCompilationData.Count} compilations in binary log");

foreach (var compilationData in allCompilationData)
{
if (compilationData.GetCompilationAfterGenerators() is not CSharpCompilation compilation)
{
var sb = new StringBuilder();
sb.Append(" Failed to parse command line: ").AppendList(" ", compilerArgs);
logger.Log(Severity.Error, sb.ToString());
++analyser.CompilationErrors;
return ExitCode.Failed;
logger.LogError(" Compilation data is not C#");
continue;
}

if (!analyser.BeginInitialize(compilerVersion.ArgsWithResponse))
var compilerCall = compilationData.CompilerCall;
var compilerArgs = compilerCall.GetArguments();
var args = reader.ReadCommandLineArguments(compilerCall);

using var analyser = new BinaryLogAnalyser(new LogProgressMonitor(logger), logger, options.AssemblySensitiveTrap, pathTransformer);

var exit = Analyse(stopwatch, analyser, options,
references => [() => compilation.References.ForEach(r => references.Add(r))],
(analyser, syntaxTrees) => [() => syntaxTrees.AddRange(compilation.SyntaxTrees)],
(syntaxTrees, references) => compilation,
(compilation, options) => analyser.Initialize(compilerCall.ProjectDirectory, compilerArgs?.ToArray() ?? [], TracingAnalyser.GetOutputName(compilation, args), compilation, options),
() => { });

if (exitCode == ExitCode.Ok && exit != ExitCode.Ok)
{
logger.Log(Severity.Info, "Skipping extraction since files have already been extracted");
return ExitCode.Ok;
exitCode = ExitCode.Errors;
}
}
return exitCode;
}

return AnalyseTracing(workingDirectory, compilerArgs, analyser, compilerArguments, options, canonicalPathCache, stopwatch);
private static ExitCode RunTracingAnalysis(Stopwatch stopwatch, Options options, ILogger logger, CanonicalPathCache canonicalPathCache, PathTransformer pathTransformer)
{
var workingDirectory = Directory.GetCurrentDirectory();
var compilerArgs = options.CompilerArguments.ToArray();

using var analyser = new TracingAnalyser(new LogProgressMonitor(logger), logger, options.AssemblySensitiveTrap, pathTransformer);

if (options.ProjectsToLoad.Any())
{
AddSourceFilesFromProjects(options.ProjectsToLoad, options.CompilerArguments, logger);
}
catch (Exception ex) // lgtm[cs/catch-of-all-exceptions]

var compilerVersion = new CompilerVersion(options);

if (compilerVersion.SkipExtraction)
{
logger.Log(Severity.Error, " Unhandled exception: {0}", ex);
return ExitCode.Errors;
logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason);
return ExitCode.Ok;
}

var compilerArguments = CSharpCommandLineParser.Default.Parse(
compilerVersion.ArgsWithResponse,
workingDirectory,
compilerVersion.FrameworkPath,
compilerVersion.AdditionalReferenceDirectories
);

if (compilerArguments is null)
{
var sb = new StringBuilder();
sb.Append(" Failed to parse command line: ").AppendList(" ", compilerArgs);
logger.Log(Severity.Error, sb.ToString());
++analyser.CompilationErrors;
return ExitCode.Failed;
}

if (!analyser.BeginInitialize(compilerVersion.ArgsWithResponse))
{
logger.Log(Severity.Info, "Skipping extraction since files have already been extracted");
return ExitCode.Ok;
}

return AnalyseTracing(workingDirectory, compilerArgs, analyser, compilerArguments, options, canonicalPathCache, stopwatch);
}

private static void AddSourceFilesFromProjects(IEnumerable<string> projectsToLoad, IList<string> compilerArguments, ILogger logger)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ public sealed class Options : CommonOptions
/// </summary>
public bool AssemblySensitiveTrap { get; private set; } = false;

/// <summary>
/// The path to the binary log file, or null if unspecified.
/// </summary>
public string? BinaryLogPath { get; set; }

public static Options CreateWithEnvironment(string[] arguments)
{
var options = new Options();
Expand Down Expand Up @@ -65,6 +70,9 @@ public override bool HandleOption(string key, string value)
case "load-sources-from-project":
ProjectsToLoad.Add(value);
return true;
case "binlog":
BinaryLogPath = value;
return true;
default:
return base.HandleOption(key, value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ private bool LogRoslynArgs(IEnumerable<string> roslynArgs, string extractorVersi
/// <param name="compilation">Information about the compilation.</param>
/// <param name="cancel">Cancellation token required.</param>
/// <returns>The filename.</returns>
private static string GetOutputName(CSharpCompilation compilation,
CSharpCommandLineArguments commandLineArguments)
internal static string GetOutputName(CSharpCompilation compilation,
CommandLineArguments commandLineArguments)
{
// There's no apparent way to access the output filename from the compilation,
// so we need to re-parse the command line arguments.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Microsoft.Build
Microsoft.CodeAnalysis.CSharp

Basic.CompilerLog.Util
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using Semmle.Util.Logging;

namespace Semmle.Extraction
{
public class BinaryLogExtractor : Extractor
{
public override ExtractorMode Mode { get; }

public BinaryLogExtractor(string cwd, string[] args, string outputPath, ILogger logger, PathTransformer pathTransformer, CommonOptions options)
: base(cwd, args, outputPath, [], logger, pathTransformer)
{
Mode = ExtractorMode.BinaryLog;

Check warning

Code scanning / CodeQL

Virtual call in constructor or destructor Warning

Avoid virtual calls in a constructor or destructor.
if (options.QlTest)
{
Mode |= ExtractorMode.QlTest;

Check warning

Code scanning / CodeQL

Virtual call in constructor or destructor Warning

Avoid virtual calls in a constructor or destructor.
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ public enum ExtractorMode
Standalone = 1,
Pdb = 2,
QlTest = 4,
BinaryLog = 8,
}
}
3 changes: 2 additions & 1 deletion csharp/paket.dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ source https://api.nuget.org/v3/index.json
# behave like nuget in choosing transitive dependency versions
strategy: min

nuget Basic.CompilerLog.Util
nuget Mono.Posix.NETStandard
nuget Newtonsoft.Json
nuget xunit
Expand All @@ -17,4 +18,4 @@ nuget System.Net.Primitives
nuget System.Security.Principal
nuget System.Threading.ThreadPool
nuget System.IO.FileSystem
nuget GitInfo 3.3.3
nuget GitInfo 3.3.3
21 changes: 21 additions & 0 deletions csharp/paket.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions csharp/tools/run-dotnet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/sh

set -eu

echo Args: "$@"
$@ || exit $?
Loading
Loading