Skip to content

Commit

Permalink
Auto merge of #96551 - ferrocene:pa-ignore-paths-when-abbreviating, r…
Browse files Browse the repository at this point in the history
…=Mark-Simulacrum

[compiletest] Ignore known paths when abbreviating output

To prevent out of memory conditions, compiletest limits the amount of output a test can generate, abbreviating it if the test emits more than a threshold. While the behavior is desirable, it also causes some issues (like #96229, #94322 and #92211).

The latest one happened recently, when the `src/test/ui/numeric/numeric-cast.rs` test started to fail on systems where the path of the rust-lang/rust checkout is too long. This includes my own development machine and [LLVM's CI](#96362 (comment)). Rust's CI uses a pretty short directory name for the checkout, which hides these sort of problems until someone runs the test suite on their own computer.

When developing the fix I tried to find the most targeted fix that would prevent this class of failures from happening in the future, deferring the decision on if/how to redesign abbreviation to a later date. The solution I came up with was to ignore known base paths when calculating whether the output exceeds the abbreviation threshold, which removes this kind of nondeterminism.

This PR is best reviewed commit-by-commit.
  • Loading branch information
bors committed Jun 6, 2022
2 parents 760237f + 8ea9598 commit 6609c67
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 60 deletions.
157 changes: 101 additions & 56 deletions src/tools/compiletest/src/read2.rs
Original file line number Diff line number Diff line change
@@ -1,71 +1,24 @@
// FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
// Consider unify the read2() in libstd, cargo and this to prevent further code duplication.

#[cfg(test)]
mod tests;

pub use self::imp::read2;
use std::io;
use std::io::{self, Write};
use std::mem::replace;
use std::process::{Child, Output};

pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
use io::Write;
use std::mem::replace;

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;

enum ProcOutput {
Full(Vec<u8>),
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn extend(&mut self, data: &[u8]) {
let new_self = match *self {
ProcOutput::Full(ref mut bytes) => {
bytes.extend_from_slice(data);
let new_len = bytes.len();
if new_len <= HEAD_LEN + TAIL_LEN {
return;
}
let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
let head = replace(bytes, Vec::new());
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full(bytes) => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

let mut stdout = ProcOutput::Full(Vec::new());
let mut stderr = ProcOutput::Full(Vec::new());
pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result<Output> {
let mut stdout = ProcOutput::new();
let mut stderr = ProcOutput::new();

drop(child.stdin.take());
read2(
child.stdout.take().unwrap(),
child.stderr.take().unwrap(),
&mut |is_stdout, data, _| {
if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len);
data.clear();
},
)?;
Expand All @@ -74,6 +27,98 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
}

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;

// Whenever a path is filtered when counting the length of the output, we need to add some
// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM.
//
// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test
// suite to pass at the moment of implementing path filtering.
const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32;

enum ProcOutput {
Full { bytes: Vec<u8>, filtered_len: usize },
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn new() -> Self {
ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 }
}

fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) {
let new_self = match *self {
ProcOutput::Full { ref mut bytes, ref mut filtered_len } => {
let old_len = bytes.len();
bytes.extend_from_slice(data);
*filtered_len += data.len();

// We had problems in the past with tests failing only in some environments,
// due to the length of the base path pushing the output size over the limit.
//
// To make those failures deterministic across all environments we ignore known
// paths when calculating the string length, while still including the full
// path in the output. This could result in some output being larger than the
// threshold, but it's better than having nondeterministic failures.
//
// The compiler emitting only excluded strings is addressed by adding a
// placeholder size for each excluded segment, which will eventually reach
// the configured threshold.
for path in filter_paths_from_len {
let path_bytes = path.as_bytes();
// We start matching `path_bytes - 1` into the previously loaded data,
// to account for the fact a path_bytes might be included across multiple
// `extend` calls. Starting from `- 1` avoids double-counting paths.
let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..])
.windows(path_bytes.len())
.filter(|window| window == &path_bytes)
.count();
*filtered_len -= matches * path_bytes.len();

// We can't just remove the length of the filtered path from the output lenght,
// otherwise a compiler emitting only filtered paths would OOM compiletest. Add
// a fixed placeholder length for each path to prevent that.
*filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN;
}

let new_len = bytes.len();
if *filtered_len <= HEAD_LEN + TAIL_LEN {
return;
}

let mut head = replace(bytes, Vec::new());
let mut middle = head.split_off(HEAD_LEN);
let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full { bytes, .. } => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

#[cfg(not(any(unix, windows)))]
mod imp {
use std::io::{self, Read};
Expand Down
123 changes: 123 additions & 0 deletions src/tools/compiletest/src/read2/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use crate::read2::{ProcOutput, FILTERED_PATHS_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};

#[test]
fn test_abbreviate_short_string() {
let mut out = ProcOutput::new();
out.extend(b"Hello world!", &[]);
assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_short_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(b"Hello ", &[]);
out.extend(b"world!", &[]);

assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_long_string() {
let mut out = ProcOutput::new();

let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16];
out.extend(&data, &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_long_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(&vec![b'.'; HEAD_LEN], &[]);
out.extend(&vec![b'.'; TAIL_LEN], &[]);
// Also test whether the rotation works
out.extend(&vec![b'!'; 16], &[]);
out.extend(&vec![b'?'; 16], &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]);
expected.extend_from_slice(&vec![b'!'; 16]);
expected.extend_from_slice(&vec![b'?'; 16]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_filterss_are_detected() {
let mut out = ProcOutput::new();
let filters = &["foo".to_string(), "quux".to_string()];

out.extend(b"Hello foo", filters);
// Check items from a previous extension are not double-counted.
out.extend(b"! This is a qu", filters);
// Check items are detected across extensions.
out.extend(b"ux.", filters);

match &out {
ProcOutput::Full { bytes, filtered_len } => assert_eq!(
*filtered_len,
bytes.len() + FILTERED_PATHS_PLACEHOLDER_LEN * filters.len()
- filters.iter().map(|i| i.len()).sum::<usize>()
),
ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
}

assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes());
}

#[test]
fn test_abbreviate_filters_avoid_abbreviations() {
let mut out = ProcOutput::new();
let filters = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut expected = vec![b'.'; HEAD_LEN - FILTERED_PATHS_PLACEHOLDER_LEN as usize];
expected.extend_from_slice(filters[0].as_bytes());
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

out.extend(&expected, filters);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_filters_can_still_cause_abbreviations() {
let mut out = ProcOutput::new();
let filters = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut input = vec![b'.'; HEAD_LEN];
input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
input.extend_from_slice(filters[0].as_bytes());

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
expected.extend_from_slice(&vec![b'a'; 64]);

out.extend(&input, filters);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}
29 changes: 25 additions & 4 deletions src/tools/compiletest/src/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
use std::io::prelude::*;
use std::io::{self, BufReader};
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus, Output, Stdio};
use std::process::{Child, Command, ExitStatus, Output, Stdio};
use std::str;

use glob::glob;
Expand Down Expand Up @@ -1745,6 +1745,28 @@ impl<'test> TestCx<'test> {
dylib
}

fn read2_abbreviated(&self, child: Child) -> Output {
let mut filter_paths_from_len = Vec::new();
let mut add_path = |path: &Path| {
let path = path.display().to_string();
let windows = path.replace("\\", "\\\\");
if windows != path {
filter_paths_from_len.push(windows);
}
filter_paths_from_len.push(path);
};

// List of paths that will not be measured when determining whether the output is larger
// than the output truncation threshold.
//
// Note: avoid adding a subdirectory of an already filtered directory here, otherwise the
// same slice of text will be double counted and the truncation might not happen.
add_path(&self.config.src_base);
add_path(&self.config.build_base);

read2_abbreviated(child, &filter_paths_from_len).expect("failed to read output")
}

fn compose_and_run(
&self,
mut command: Command,
Expand Down Expand Up @@ -1779,8 +1801,7 @@ impl<'test> TestCx<'test> {
child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap();
}

let Output { status, stdout, stderr } =
read2_abbreviated(child).expect("failed to read output");
let Output { status, stdout, stderr } = self.read2_abbreviated(child);

let result = ProcRes {
status,
Expand Down Expand Up @@ -2969,7 +2990,7 @@ impl<'test> TestCx<'test> {
}
}

let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`");
let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`"));
if !output.status.success() {
let res = ProcRes {
status: output.status,
Expand Down

0 comments on commit 6609c67

Please sign in to comment.