forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add tool
src/tools/coverage-dump
for use by some new coverage tests
- Loading branch information
Showing
11 changed files
with
562 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[package] | ||
name = "coverage-dump" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
anyhow = "1.0.71" | ||
leb128 = "0.2.5" | ||
md5 = { package = "md-5" , version = "0.10.5" } | ||
miniz_oxide = "0.7.1" | ||
regex = "1.8.4" | ||
rustc-demangle = "0.1.23" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
This tool extracts coverage mapping information from an LLVM IR assembly file | ||
(`.ll`), and prints it in a more human-readable form that can be used for | ||
snapshot tests. | ||
|
||
The output format is mostly arbitrary, so it's OK to change the output as long | ||
as any affected tests are also re-blessed. However, the output should be | ||
consistent across different executions on different platforms, so avoid | ||
printing any information that is platform-specific or non-deterministic. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,296 @@ | ||
use crate::parser::{unescape_llvm_string_contents, Parser}; | ||
use anyhow::{anyhow, Context}; | ||
use regex::Regex; | ||
use std::collections::HashMap; | ||
use std::fmt::{self, Debug, Write as _}; | ||
use std::sync::OnceLock; | ||
|
||
pub(crate) fn dump_covfun_mappings( | ||
llvm_ir: &str, | ||
function_names: &HashMap<u64, String>, | ||
) -> anyhow::Result<()> { | ||
// Extract function coverage entries from the LLVM IR assembly, and associate | ||
// each entry with its (demangled) name. | ||
let mut covfun_entries = llvm_ir | ||
.lines() | ||
.filter_map(covfun_line_data) | ||
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data)) | ||
.collect::<Vec<_>>(); | ||
covfun_entries.sort_by(|a, b| { | ||
// Sort entries primarily by name, to help make the order consistent | ||
// across platforms and relatively insensitive to changes. | ||
// (Sadly we can't use `sort_by_key` because we would need to return references.) | ||
Ord::cmp(&a.0, &b.0) | ||
.then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used)) | ||
.then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice())) | ||
}); | ||
|
||
for (name, line_data) in &covfun_entries { | ||
let name = name.unwrap_or("(unknown)"); | ||
let unused = if line_data.is_used { "" } else { " (unused)" }; | ||
println!("Function name: {name}{unused}"); | ||
|
||
let payload: &[u8] = &line_data.payload; | ||
println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len()); | ||
|
||
let mut parser = Parser::new(payload); | ||
|
||
let num_files = parser.read_uleb128_u32()?; | ||
println!("Number of files: {num_files}"); | ||
|
||
for i in 0..num_files { | ||
let global_file_id = parser.read_uleb128_u32()?; | ||
println!("- file {i} => global file {global_file_id}"); | ||
} | ||
|
||
let num_expressions = parser.read_uleb128_u32()?; | ||
println!("Number of expressions: {num_expressions}"); | ||
|
||
let mut expression_resolver = ExpressionResolver::new(); | ||
for i in 0..num_expressions { | ||
let lhs = parser.read_simple_term()?; | ||
let rhs = parser.read_simple_term()?; | ||
println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}"); | ||
expression_resolver.push_operands(lhs, rhs); | ||
} | ||
|
||
for i in 0..num_files { | ||
let num_mappings = parser.read_uleb128_u32()?; | ||
println!("Number of file {i} mappings: {num_mappings}"); | ||
|
||
for _ in 0..num_mappings { | ||
let (kind, region) = parser.read_mapping_kind_and_region()?; | ||
println!("- {kind:?} at {region:?}"); | ||
|
||
match kind { | ||
// Also print expression mappings in resolved form. | ||
MappingKind::Code(term @ CovTerm::Expression { .. }) | ||
| MappingKind::Gap(term @ CovTerm::Expression { .. }) => { | ||
println!(" = {}", expression_resolver.format_term(term)); | ||
} | ||
// If the mapping is a branch region, print both of its arms | ||
// in resolved form (even if they aren't expressions). | ||
MappingKind::Branch { r#true, r#false } => { | ||
println!(" true = {}", expression_resolver.format_term(r#true)); | ||
println!(" false = {}", expression_resolver.format_term(r#false)); | ||
} | ||
_ => (), | ||
} | ||
} | ||
} | ||
|
||
parser.ensure_empty()?; | ||
println!(); | ||
} | ||
Ok(()) | ||
} | ||
|
||
struct CovfunLineData { | ||
name_hash: u64, | ||
is_used: bool, | ||
payload: Vec<u8>, | ||
} | ||
|
||
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun` | ||
/// entry, and if so extracts relevant data in a `CovfunLineData`. | ||
fn covfun_line_data(line: &str) -> Option<CovfunLineData> { | ||
let re = { | ||
// We cheat a little bit and match variable names `@__covrec_[HASH]u` | ||
// rather than the section name, because the section name is harder to | ||
// extract and differs across Linux/Windows/macOS. We also extract the | ||
// symbol name hash from the variable name rather than the data, since | ||
// it's easier and both should match. | ||
static RE: OnceLock<Regex> = OnceLock::new(); | ||
RE.get_or_init(|| { | ||
Regex::new( | ||
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#, | ||
) | ||
.unwrap() | ||
}) | ||
}; | ||
|
||
let captures = re.captures(line)?; | ||
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap(); | ||
let is_used = captures.name("is_used").is_some(); | ||
let payload = unescape_llvm_string_contents(&captures["payload"]); | ||
|
||
Some(CovfunLineData { name_hash, is_used, payload }) | ||
} | ||
|
||
// Extra parser methods only needed when parsing `covfun` payloads. | ||
impl<'a> Parser<'a> { | ||
fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> { | ||
let raw_term = self.read_uleb128_u32()?; | ||
CovTerm::decode(raw_term).context("decoding term") | ||
} | ||
|
||
fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> { | ||
let mut kind = self.read_raw_mapping_kind()?; | ||
let mut region = self.read_raw_mapping_region()?; | ||
|
||
const HIGH_BIT: u32 = 1u32 << 31; | ||
if region.end_column & HIGH_BIT != 0 { | ||
region.end_column &= !HIGH_BIT; | ||
kind = match kind { | ||
MappingKind::Code(term) => MappingKind::Gap(term), | ||
// LLVM's coverage mapping reader will actually handle this | ||
// case without complaint, but the result is almost certainly | ||
// a meaningless implementation artifact. | ||
_ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")), | ||
} | ||
} | ||
|
||
Ok((kind, region)) | ||
} | ||
|
||
fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> { | ||
let raw_mapping_kind = self.read_uleb128_u32()?; | ||
if let Some(term) = CovTerm::decode(raw_mapping_kind) { | ||
return Ok(MappingKind::Code(term)); | ||
} | ||
|
||
assert_eq!(raw_mapping_kind & 0b11, 0); | ||
assert_ne!(raw_mapping_kind, 0); | ||
|
||
let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0); | ||
if is_expansion { | ||
Ok(MappingKind::Expansion(high)) | ||
} else { | ||
match high { | ||
0 => unreachable!("zero kind should have already been handled as a code mapping"), | ||
2 => Ok(MappingKind::Skip), | ||
4 => { | ||
let r#true = self.read_simple_term()?; | ||
let r#false = self.read_simple_term()?; | ||
Ok(MappingKind::Branch { r#true, r#false }) | ||
} | ||
_ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")), | ||
} | ||
} | ||
} | ||
|
||
fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> { | ||
let start_line_offset = self.read_uleb128_u32()?; | ||
let start_column = self.read_uleb128_u32()?; | ||
let end_line_offset = self.read_uleb128_u32()?; | ||
let end_column = self.read_uleb128_u32()?; | ||
Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column }) | ||
} | ||
} | ||
|
||
/// Enum that can hold a constant zero value, the ID of an physical coverage | ||
/// counter, or the ID (and operation) of a coverage-counter expression. | ||
/// | ||
/// Terms are used as the operands of coverage-counter expressions, as the arms | ||
/// of branch mappings, and as the value of code/gap mappings. | ||
#[derive(Clone, Copy, Debug)] | ||
pub(crate) enum CovTerm { | ||
Zero, | ||
Counter(u32), | ||
Expression(u32, Op), | ||
} | ||
|
||
/// Operator (addition or subtraction) used by an expression. | ||
#[derive(Clone, Copy, Debug)] | ||
pub(crate) enum Op { | ||
Sub, | ||
Add, | ||
} | ||
|
||
impl CovTerm { | ||
pub(crate) fn decode(input: u32) -> Option<Self> { | ||
let (high, tag) = (input >> 2, input & 0b11); | ||
match tag { | ||
0b00 if high == 0 => Some(Self::Zero), | ||
0b01 => Some(Self::Counter(high)), | ||
0b10 => Some(Self::Expression(high, Op::Sub)), | ||
0b11 => Some(Self::Expression(high, Op::Add)), | ||
// When reading expression operands or branch arms, the LLVM coverage | ||
// mapping reader will always interpret a `0b00` tag as a zero | ||
// term, even when the high bits are non-zero. | ||
// We treat that case as failure instead, so that this code can be | ||
// shared by the full mapping-kind reader as well. | ||
_ => None, | ||
} | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
enum MappingKind { | ||
Code(CovTerm), | ||
Gap(CovTerm), | ||
Expansion(u32), | ||
Skip, | ||
// Using raw identifiers here makes the dump output a little bit nicer | ||
// (via the derived Debug), at the expense of making this tool's source | ||
// code a little bit uglier. | ||
Branch { r#true: CovTerm, r#false: CovTerm }, | ||
} | ||
|
||
struct MappingRegion { | ||
/// Offset of this region's start line, relative to the *start line* of | ||
/// the *previous mapping* (or 0). Line numbers are 1-based. | ||
start_line_offset: u32, | ||
/// This region's start column, absolute and 1-based. | ||
start_column: u32, | ||
/// Offset of this region's end line, relative to the *this mapping's* | ||
/// start line. Line numbers are 1-based. | ||
end_line_offset: u32, | ||
/// This region's end column, absolute, 1-based, and exclusive. | ||
/// | ||
/// If the highest bit is set, that bit is cleared and the associated | ||
/// mapping becomes a gap region mapping. | ||
end_column: u32, | ||
} | ||
|
||
impl Debug for MappingRegion { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!( | ||
f, | ||
"(prev + {}, {}) to (start + {}, {})", | ||
self.start_line_offset, self.start_column, self.end_line_offset, self.end_column | ||
) | ||
} | ||
} | ||
|
||
/// Helper type that prints expressions in a "resolved" form, so that | ||
/// developers reading the dump don't need to resolve expressions by hand. | ||
struct ExpressionResolver { | ||
operands: Vec<(CovTerm, CovTerm)>, | ||
} | ||
|
||
impl ExpressionResolver { | ||
fn new() -> Self { | ||
Self { operands: Vec::new() } | ||
} | ||
|
||
fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) { | ||
self.operands.push((lhs, rhs)); | ||
} | ||
|
||
fn format_term(&self, term: CovTerm) -> String { | ||
let mut output = String::new(); | ||
self.write_term(&mut output, term); | ||
output | ||
} | ||
|
||
fn write_term(&self, output: &mut String, term: CovTerm) { | ||
match term { | ||
CovTerm::Zero => output.push_str("Zero"), | ||
CovTerm::Counter(id) => write!(output, "c{id}").unwrap(), | ||
CovTerm::Expression(id, op) => { | ||
let (lhs, rhs) = self.operands[id as usize]; | ||
let op = match op { | ||
Op::Sub => "-", | ||
Op::Add => "+", | ||
}; | ||
|
||
output.push('('); | ||
self.write_term(output, lhs); | ||
write!(output, " {op} ").unwrap(); | ||
self.write_term(output, rhs); | ||
output.push(')'); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
mod covfun; | ||
mod parser; | ||
mod prf_names; | ||
|
||
fn main() -> anyhow::Result<()> { | ||
use anyhow::Context as _; | ||
|
||
let args = std::env::args().collect::<Vec<_>>(); | ||
|
||
let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?; | ||
let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?; | ||
|
||
let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?; | ||
crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?; | ||
|
||
Ok(()) | ||
} |
Oops, something went wrong.