Skip to content

Commit

Permalink
Refactor data generation to be cleaner and more principled. (#1001)
Browse files Browse the repository at this point in the history
cargo-semver-checks outgrew its previous data generation system. Over
the previous 2+ years, this code was slowly turning into spaghetti as we
discovered we needed to add ever more functionality and work around ever
more issues. It was borderline unmaintainable, and impossible to extend
in a meaningful fashion.

As part of rust-lang/rust-project-goals#104,
we aim to start supporting manifest linting. That would require
generating more data -- not just rustdoc JSON but also project
metadatasuch as package features and targets. Plumbing that data though
the existing data generation spaghetti code was proving infeasible.

This PR tackles the lion's share of the spaghetti problem. It introduces
a new submodule, `data_generation`, that handles data generation tasks
and has its own internal invariants and data structures which are not
tied into CLI considerations like "should we use color when printing to
the terminal". That will make it easier to extend with project metadata
in subsequent PRs.

There is still some clean-up work remaining -- this PR is just the start
of that work. But here we untangle the biggest mess in the tool, which
will let us move more quickly elsewhere.
  • Loading branch information
obi1kenobi authored Nov 21, 2024
1 parent 68611c4 commit 2615b5d
Show file tree
Hide file tree
Showing 28 changed files with 1,705 additions and 810 deletions.
53 changes: 39 additions & 14 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -406,28 +406,32 @@ jobs:
# Show what's in the cache.
ls subject-new/target/semver-checks/cache/
EXPECTED_CACHED_RUSTDOC='subject-new/target/semver-checks/cache/libp2p_core-0_37_0-ccce455725cbab73.json'
# Ensure the previous cached rustdoc file still exists.
[ -f "subject-new/target/semver-checks/cache/registry-libp2p_core-0_37_0.json" ] || { \
[ -f "$EXPECTED_CACHED_RUSTDOC" ] || { \
echo "could not find libp2p-core cache file"; \
exit 1;
}
export OLD_RUSTDOC_VERSION="$(jq .format_version subject-new/target/semver-checks/cache/registry-libp2p_core-0_37_0.json)"
export OLD_RUSTDOC_VERSION="$(jq .format_version "$EXPECTED_CACHED_RUSTDOC")"
# Run cargo-semver-checks with the older cached file.
# In a prior step, we installed a newer Rust, so we'll set that as the default
# for cargo-semver-checks to invoke here.
rustup override set "$NEWER_RUST"
bins/cargo-semver-checks semver-checks check-release --manifest-path="subject-new/core/Cargo.toml" --verbose
export NEW_RUSTDOC_VERSION="$(jq .format_version subject-new/target/semver-checks/cache/registry-libp2p_core-0_37_0.json)"
# Show what's in the cache now.
ls subject-new/target/semver-checks/cache/
export NEW_RUSTDOC_VERSION="$(jq .format_version "$EXPECTED_CACHED_RUSTDOC")"
# Ensure the cached rustdoc versions were indeed different before and after.
[ "$OLD_RUSTDOC_VERSION" != "$NEW_RUSTDOC_VERSION" ] || exit 1
cross-feature-caching:
# Ensure that cached rustdoc JSON files created with mismatched feature config is not used.
# Ensure that cached rustdoc JSON files created with mismatched feature config are not used.
name: 'Caching with feature config changed'
runs-on: ubuntu-latest
needs:
Expand Down Expand Up @@ -479,8 +483,19 @@ jobs:
# Run even if there was a cache hit, to make sure the check passes.
- name: Run semver-checks
run: |
cd semver
../bins/cargo-semver-checks semver-checks check-release --manifest-path="../subject/serde/Cargo.toml" --verbose
bins/cargo-semver-checks semver-checks check-release --manifest-path="subject/serde/Cargo.toml" --verbose
# Show what's in the cache.
ls subject/target/semver-checks/cache/
# Save the older rustdoc on cache miss. This must run *before* the cross-feature check,
# since we don't want to save that rustdoc -- we're making sure it gets generated and used!
- name: Save rustdoc
uses: actions/cache/save@v4
if: steps.cache.outputs.cache-hit != 'true'
with:
key: cross-feature-caching-${{ runner.os }}-${{ steps.toolchain.outputs.cachekey }}-${{ hashFiles('semver/**/Cargo.lock') }}-${{ github.job }}-rustdoc
path: subject/target/semver-checks/cache

# Run cargo-semver-checks, enabling feature `unstable` this time.
# We expect this to fail.
Expand Down Expand Up @@ -508,21 +523,31 @@ jobs:
- name: Verify cached rustdoc's hash
run: |
if [ -f subject/target/semver-checks/cache/registry-serde-1_0_162-f0dcb72a80405fd6.json ]; then exit 0; else exit 1; fi
# Show what's in the cache.
ls subject/target/semver-checks/cache/
# The previous cached rustdoc should continue to exist.
if [ -f subject/target/semver-checks/cache/serde-1_0_162-5900ebf8bb9b9f8b.json ]; then
exit 0
else
echo "Older rustdoc JSON not found in cache!"
exit 1
fi
# There should also be a new cached rustdoc file for the new feature settings.
if [ -f subject/target/semver-checks/cache/serde-1_0_162-6999ae87ca463ab3.json ]; then
exit 0
else
echo "New rustdoc JSON for new feature combo not found in cache!"
exit 1
fi
- name: Cleanup
run: |
cd semver
rm output
rm -f unexpectedly_did_not_fail
- name: Save rustdoc
uses: actions/cache/save@v4
if: steps.cache.outputs.cache-hit != 'true'
with:
key: cross-feature-caching-${{ runner.os }}-${{ steps.toolchain.outputs.cachekey }}-${{ hashFiles('semver/**/Cargo.lock') }}-${{ github.job }}-rustdoc
path: subject/target/semver-checks/cache

run-on-rust-libp2p:
# Run cargo-semver-checks on a crate with no semver violations,
# to make sure there are no false-positives.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ clap-cargo = { version = "0.14.1", features = ["cargo_metadata"] }
ignore = "0.4.23"
clap-verbosity-flag = "2.2.1"
log = "0.4.22"
fs-err = "2.11.0"
# Note that `tame-index` and `gix` must be upgraded in lock-step to retain the same `gix`
# minor version. Otherwise, one will compile `gix` two times in different minor versions.
gix = { version = "0.66", default-features = false, features = ["max-performance-safe", "revision"] }
Expand All @@ -54,7 +55,6 @@ assert_cmd = "2.0"
similar-asserts = { version = "1.6.0", features = ["serde"] }
predicates = "3.1.2"
insta = { version = "1.40.0", features = ["ron", "filters", "toml"] }
fs-err = "2.11.0"
regex = "1.10.6"
insta-cmd = "0.6.0"

Expand Down
118 changes: 118 additions & 0 deletions src/callbacks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::collections::BTreeMap;

use crate::GlobalConfig;

pub(crate) struct Callbacks<'a> {
config: &'a mut GlobalConfig,

// (crate_name, version, is_baseline) keys
generate_starts: BTreeMap<(&'a str, &'a str, bool), std::time::Instant>,
parse_starts: BTreeMap<(&'a str, &'a str, bool), std::time::Instant>,
}

impl<'a> Callbacks<'a> {
pub(crate) fn new(config: &'a mut GlobalConfig) -> Self {
Self {
config,
generate_starts: Default::default(),
parse_starts: Default::default(),
}
}
}

impl<'a> crate::data_generation::ProgressCallbacks<'a> for Callbacks<'a> {
fn generate_rustdoc_start(&mut self, crate_name: &'a str, version: &'a str, is_baseline: bool) {
let kind = if is_baseline { "baseline" } else { "current" };

// Ignore terminal printing failures.
let _ = self.config.shell_status(
"Building",
format_args!("{crate_name} v{version} ({kind})",),
);

self.generate_starts.insert(
(crate_name, version, is_baseline),
std::time::Instant::now(),
);
}

fn generate_rustdoc_success(
&mut self,
crate_name: &'a str,
version: &'a str,
is_baseline: bool,
) {
let kind = if is_baseline { "baseline" } else { "current" };
let start = self
.generate_starts
.remove(&(crate_name, version, is_baseline))
.expect("success on generation task that never started");

// Ignore terminal printing failures.
let _ = self.config.shell_status(
"Built",
format_args!("[{:>8.3}s] ({kind})", start.elapsed().as_secs_f32()),
);
}

fn parse_rustdoc_start(
&mut self,
cached: bool,
crate_name: &'a str,
version: &'a str,
is_baseline: bool,
) {
let kind = if is_baseline { "baseline" } else { "current" };
let cached = if cached { ", cached" } else { "" };

// Ignore terminal printing failures.
let _ = self.config.shell_status(
"Parsing",
format_args!("{crate_name} v{version} ({kind}{cached})",),
);

self.parse_starts.insert(
(crate_name, version, is_baseline),
std::time::Instant::now(),
);
}

fn parse_rustdoc_success(
&mut self,
_cached: bool,
crate_name: &'a str,
version: &'a str,
is_baseline: bool,
) {
let kind = if is_baseline { "baseline" } else { "current" };
let start = self
.parse_starts
.remove(&(crate_name, version, is_baseline))
.expect("success on parse task that never started");

// Ignore terminal printing failures.
let _ = self.config.shell_status(
"Parsed",
format_args!("[{:>8.3}s] ({kind})", start.elapsed().as_secs_f32()),
);
}

fn non_fatal_error(
&mut self,
crate_name: &'a str,
version: &'a str,
is_baseline: bool,
error: anyhow::Error,
) {
let kind = if is_baseline { "baseline" } else { "current" };

// Ignore terminal printing failures.
let _ = self.config.log_info(|config| {
config.shell_warn(format!(
"encountered non-fatal error while working on crate \
{crate_name} v{version} ({kind}): {error}",
))?;
Ok(())
});
}
}
14 changes: 6 additions & 8 deletions src/check_release.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use clap::crate_version;
use itertools::Itertools;
use rayon::prelude::*;
use trustfall::{FieldValue, TransparentValue};
use trustfall_rustdoc::{VersionedCrate, VersionedIndexedCrate, VersionedRustdocAdapter};

use crate::data_generation::DataStorage;
use crate::{
query::{ActualSemverUpdate, LintLevel, OverrideStack, RequiredSemverUpdate, SemverQuery},
CrateReport, GlobalConfig, ReleaseType, WitnessGeneration,
Expand Down Expand Up @@ -179,15 +179,14 @@ fn print_triggered_lint(

pub(super) fn run_check_release(
config: &mut GlobalConfig,
data_storage: &DataStorage,
crate_name: &str,
current_crate: VersionedCrate,
baseline_crate: VersionedCrate,
release_type: Option<ReleaseType>,
overrides: &OverrideStack,
witness_generation: &WitnessGeneration,
) -> anyhow::Result<CrateReport> {
let current_version = current_crate.crate_version();
let baseline_version = baseline_crate.crate_version();
let current_version = data_storage.current_crate().crate_version();
let baseline_version = data_storage.baseline_crate().crate_version();

let version_change = release_type
.map(Into::into)
Expand All @@ -211,9 +210,8 @@ pub(super) fn run_check_release(
None => "",
};

let current = VersionedIndexedCrate::new(&current_crate);
let previous = VersionedIndexedCrate::new(&baseline_crate);
let adapter = VersionedRustdocAdapter::new(&current, Some(&previous))?;
let index_storage = data_storage.create_indexes();
let adapter = index_storage.create_adapter();

let (queries_to_run, queries_to_skip): (Vec<_>, _) =
SemverQuery::all_queries().into_values().partition(|query| {
Expand Down
22 changes: 22 additions & 0 deletions src/data_generation/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/// An error that blocks further progress. "Terminal" in the sense of "cannot continue."
#[derive(Debug)]
pub(crate) enum TerminalError {
WithAdvice(anyhow::Error, String),
Other(anyhow::Error),
}

impl<E: std::error::Error + Send + Sync + 'static> From<E> for TerminalError {
fn from(value: E) -> Self {
Self::Other(value.into())
}
}

pub(crate) trait IntoTerminalResult<T> {
fn into_terminal_result(self) -> Result<T, TerminalError>;
}

impl<T> IntoTerminalResult<T> for Result<T, anyhow::Error> {
fn into_terminal_result(self) -> Result<T, TerminalError> {
self.map_err(TerminalError::Other)
}
}
Loading

0 comments on commit 2615b5d

Please sign in to comment.