Skip to content

Commit

Permalink
Add tool src/tools/coverage-dump for use by some new coverage tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Zalathar committed Sep 5, 2023
1 parent 04374cd commit 1367104
Show file tree
Hide file tree
Showing 11 changed files with 562 additions and 1 deletion.
18 changes: 18 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,18 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"

[[package]]
name = "coverage-dump"
version = "0.1.0"
dependencies = [
"anyhow",
"leb128",
"md-5",
"miniz_oxide",
"regex",
"rustc-demangle",
]

[[package]]
name = "coverage_test_macros"
version = "0.0.0"
Expand Down Expand Up @@ -2041,6 +2053,12 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"

[[package]]
name = "leb128"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"

[[package]]
name = "levenshtein"
version = "1.0.5"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ members = [
"src/tools/generate-windows-sys",
"src/tools/rustdoc-gui-test",
"src/tools/opt-dist",
"src/tools/coverage-dump",
]

exclude = [
Expand Down
3 changes: 2 additions & 1 deletion src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,8 @@ impl<'a> Builder<'a> {
llvm::Lld,
llvm::CrtBeginEnd,
tool::RustdocGUITest,
tool::OptimizedDist
tool::OptimizedDist,
tool::CoverageDump,
),
Kind::Check | Kind::Clippy | Kind::Fix => describe!(
check::Std,
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ bootstrap_tool!(
GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys";
RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test";
OptimizedDist, "src/tools/opt-dist", "opt-dist";
CoverageDump, "src/tools/coverage-dump", "coverage-dump";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
14 changes: 14 additions & 0 deletions src/tools/coverage-dump/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "coverage-dump"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.71"
leb128 = "0.2.5"
md5 = { package = "md-5" , version = "0.10.5" }
miniz_oxide = "0.7.1"
regex = "1.8.4"
rustc-demangle = "0.1.23"
8 changes: 8 additions & 0 deletions src/tools/coverage-dump/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
This tool extracts coverage mapping information from an LLVM IR assembly file
(`.ll`), and prints it in a more human-readable form that can be used for
snapshot tests.

The output format is mostly arbitrary, so it's OK to change the output as long
as any affected tests are also re-blessed. However, the output should be
consistent across different executions on different platforms, so avoid
printing any information that is platform-specific or non-deterministic.
296 changes: 296 additions & 0 deletions src/tools/coverage-dump/src/covfun.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
use crate::parser::{unescape_llvm_string_contents, Parser};
use anyhow::{anyhow, Context};
use regex::Regex;
use std::collections::HashMap;
use std::fmt::{self, Debug, Write as _};
use std::sync::OnceLock;

pub(crate) fn dump_covfun_mappings(
llvm_ir: &str,
function_names: &HashMap<u64, String>,
) -> anyhow::Result<()> {
// Extract function coverage entries from the LLVM IR assembly, and associate
// each entry with its (demangled) name.
let mut covfun_entries = llvm_ir
.lines()
.filter_map(covfun_line_data)
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
.collect::<Vec<_>>();
covfun_entries.sort_by(|a, b| {
// Sort entries primarily by name, to help make the order consistent
// across platforms and relatively insensitive to changes.
// (Sadly we can't use `sort_by_key` because we would need to return references.)
Ord::cmp(&a.0, &b.0)
.then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used))
.then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice()))
});

for (name, line_data) in &covfun_entries {
let name = name.unwrap_or("(unknown)");
let unused = if line_data.is_used { "" } else { " (unused)" };
println!("Function name: {name}{unused}");

let payload: &[u8] = &line_data.payload;
println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len());

let mut parser = Parser::new(payload);

let num_files = parser.read_uleb128_u32()?;
println!("Number of files: {num_files}");

for i in 0..num_files {
let global_file_id = parser.read_uleb128_u32()?;
println!("- file {i} => global file {global_file_id}");
}

let num_expressions = parser.read_uleb128_u32()?;
println!("Number of expressions: {num_expressions}");

let mut expression_resolver = ExpressionResolver::new();
for i in 0..num_expressions {
let lhs = parser.read_simple_term()?;
let rhs = parser.read_simple_term()?;
println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}");
expression_resolver.push_operands(lhs, rhs);
}

for i in 0..num_files {
let num_mappings = parser.read_uleb128_u32()?;
println!("Number of file {i} mappings: {num_mappings}");

for _ in 0..num_mappings {
let (kind, region) = parser.read_mapping_kind_and_region()?;
println!("- {kind:?} at {region:?}");

match kind {
// Also print expression mappings in resolved form.
MappingKind::Code(term @ CovTerm::Expression { .. })
| MappingKind::Gap(term @ CovTerm::Expression { .. }) => {
println!(" = {}", expression_resolver.format_term(term));
}
// If the mapping is a branch region, print both of its arms
// in resolved form (even if they aren't expressions).
MappingKind::Branch { r#true, r#false } => {
println!(" true = {}", expression_resolver.format_term(r#true));
println!(" false = {}", expression_resolver.format_term(r#false));
}
_ => (),
}
}
}

parser.ensure_empty()?;
println!();
}
Ok(())
}

struct CovfunLineData {
name_hash: u64,
is_used: bool,
payload: Vec<u8>,
}

/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
/// entry, and if so extracts relevant data in a `CovfunLineData`.
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
let re = {
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
// rather than the section name, because the section name is harder to
// extract and differs across Linux/Windows/macOS. We also extract the
// symbol name hash from the variable name rather than the data, since
// it's easier and both should match.
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
)
.unwrap()
})
};

let captures = re.captures(line)?;
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
let is_used = captures.name("is_used").is_some();
let payload = unescape_llvm_string_contents(&captures["payload"]);

Some(CovfunLineData { name_hash, is_used, payload })
}

// Extra parser methods only needed when parsing `covfun` payloads.
impl<'a> Parser<'a> {
fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> {
let raw_term = self.read_uleb128_u32()?;
CovTerm::decode(raw_term).context("decoding term")
}

fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> {
let mut kind = self.read_raw_mapping_kind()?;
let mut region = self.read_raw_mapping_region()?;

const HIGH_BIT: u32 = 1u32 << 31;
if region.end_column & HIGH_BIT != 0 {
region.end_column &= !HIGH_BIT;
kind = match kind {
MappingKind::Code(term) => MappingKind::Gap(term),
// LLVM's coverage mapping reader will actually handle this
// case without complaint, but the result is almost certainly
// a meaningless implementation artifact.
_ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")),
}
}

Ok((kind, region))
}

fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> {
let raw_mapping_kind = self.read_uleb128_u32()?;
if let Some(term) = CovTerm::decode(raw_mapping_kind) {
return Ok(MappingKind::Code(term));
}

assert_eq!(raw_mapping_kind & 0b11, 0);
assert_ne!(raw_mapping_kind, 0);

let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0);
if is_expansion {
Ok(MappingKind::Expansion(high))
} else {
match high {
0 => unreachable!("zero kind should have already been handled as a code mapping"),
2 => Ok(MappingKind::Skip),
4 => {
let r#true = self.read_simple_term()?;
let r#false = self.read_simple_term()?;
Ok(MappingKind::Branch { r#true, r#false })
}
_ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")),
}
}
}

fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> {
let start_line_offset = self.read_uleb128_u32()?;
let start_column = self.read_uleb128_u32()?;
let end_line_offset = self.read_uleb128_u32()?;
let end_column = self.read_uleb128_u32()?;
Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column })
}
}

/// Enum that can hold a constant zero value, the ID of an physical coverage
/// counter, or the ID (and operation) of a coverage-counter expression.
///
/// Terms are used as the operands of coverage-counter expressions, as the arms
/// of branch mappings, and as the value of code/gap mappings.
#[derive(Clone, Copy, Debug)]
pub(crate) enum CovTerm {
Zero,
Counter(u32),
Expression(u32, Op),
}

/// Operator (addition or subtraction) used by an expression.
#[derive(Clone, Copy, Debug)]
pub(crate) enum Op {
Sub,
Add,
}

impl CovTerm {
pub(crate) fn decode(input: u32) -> Option<Self> {
let (high, tag) = (input >> 2, input & 0b11);
match tag {
0b00 if high == 0 => Some(Self::Zero),
0b01 => Some(Self::Counter(high)),
0b10 => Some(Self::Expression(high, Op::Sub)),
0b11 => Some(Self::Expression(high, Op::Add)),
// When reading expression operands or branch arms, the LLVM coverage
// mapping reader will always interpret a `0b00` tag as a zero
// term, even when the high bits are non-zero.
// We treat that case as failure instead, so that this code can be
// shared by the full mapping-kind reader as well.
_ => None,
}
}
}

#[derive(Debug)]
enum MappingKind {
Code(CovTerm),
Gap(CovTerm),
Expansion(u32),
Skip,
// Using raw identifiers here makes the dump output a little bit nicer
// (via the derived Debug), at the expense of making this tool's source
// code a little bit uglier.
Branch { r#true: CovTerm, r#false: CovTerm },
}

struct MappingRegion {
/// Offset of this region's start line, relative to the *start line* of
/// the *previous mapping* (or 0). Line numbers are 1-based.
start_line_offset: u32,
/// This region's start column, absolute and 1-based.
start_column: u32,
/// Offset of this region's end line, relative to the *this mapping's*
/// start line. Line numbers are 1-based.
end_line_offset: u32,
/// This region's end column, absolute, 1-based, and exclusive.
///
/// If the highest bit is set, that bit is cleared and the associated
/// mapping becomes a gap region mapping.
end_column: u32,
}

impl Debug for MappingRegion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"(prev + {}, {}) to (start + {}, {})",
self.start_line_offset, self.start_column, self.end_line_offset, self.end_column
)
}
}

/// Helper type that prints expressions in a "resolved" form, so that
/// developers reading the dump don't need to resolve expressions by hand.
struct ExpressionResolver {
operands: Vec<(CovTerm, CovTerm)>,
}

impl ExpressionResolver {
fn new() -> Self {
Self { operands: Vec::new() }
}

fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) {
self.operands.push((lhs, rhs));
}

fn format_term(&self, term: CovTerm) -> String {
let mut output = String::new();
self.write_term(&mut output, term);
output
}

fn write_term(&self, output: &mut String, term: CovTerm) {
match term {
CovTerm::Zero => output.push_str("Zero"),
CovTerm::Counter(id) => write!(output, "c{id}").unwrap(),
CovTerm::Expression(id, op) => {
let (lhs, rhs) = self.operands[id as usize];
let op = match op {
Op::Sub => "-",
Op::Add => "+",
};

output.push('(');
self.write_term(output, lhs);
write!(output, " {op} ").unwrap();
self.write_term(output, rhs);
output.push(')');
}
}
}
}
17 changes: 17 additions & 0 deletions src/tools/coverage-dump/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
mod covfun;
mod parser;
mod prf_names;

fn main() -> anyhow::Result<()> {
use anyhow::Context as _;

let args = std::env::args().collect::<Vec<_>>();

let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?;
let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?;

let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?;
crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?;

Ok(())
}
Loading

0 comments on commit 1367104

Please sign in to comment.