From 8ff33bd10b6bab8ca4c6fdfc8a13e52960be9966 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Mon, 30 Sep 2024 01:49:07 +0200 Subject: [PATCH] Coverage html (#46) * Revert "Allow nativelink flake module to upload results (#1369)" (#1372) This partially reverts commit 9600839bd2ba0a6915908c55fca24f373c3a2106. The original idea was to implement a `readonly` setting to dynamically configure `--remote_upload_local_results`. While this is fairly straightforward to implement it turned out that the UX implications around the environment variables/scripts/files to configure this are nontrivial and we need to evaluate the different approaches in more depth first. For now, revert to readonly by default and also add a small modifier to the relevant workflow so that the write-access workflow retains the ability to write artifacts on pushes to main. Fixes #1371 * Introduce reproducible branch-based coverage Reports may now be build via: ```nix nix build .#nativelinkCoverageForHost ``` The `result` symlink then contains the contents of a webpage to view the existing reports. Pushes to main publish the site at tracemachina.github.io/nativelink. Reports are built in release mode to closely resemble production coverage of the testsuite. This also means that most worker tests are ignored via a new `nix` feature to make the testsuite run in nix sandboxes. It's not ideal, but accurately reflects our production coverage guarantees. A future resolution for this might be to implement more elaborate mocking functionality for `nativelink-worker`. Coverage leverages nix caching, but not Bazel caching. While Bazel has builtin support for coverage, the reports were not satisfactory as they rely on outdated gcov toolchains with vague hermeticity guarantees and unsatisfactory implementations for llvm-cov-based workflows (e.g. no branch-based coverage for rust and no story around coverage for heterogeneous code). Mid-term we should implement "fast development" coverage via Bazel alongside the "slow production" coverage via Nix. As next steps we should continuously publish the generated HTML pages via the web infrastructure and add hooks to report regressions. --- .github/workflows/coverage.yaml | 69 +++++++++++++++++++ .github/workflows/main.yml | 2 +- CONTRIBUTING.md | 10 +++ Cargo.lock | 2 +- Cargo.toml | 3 + flake.nix | 41 ++++++++++- modules/nativelink.nix | 1 + .../nativelink-metric-macro-derive/Cargo.toml | 2 +- nativelink-worker/Cargo.toml | 3 + nativelink-worker/tests/local_worker_test.rs | 1 + .../tests/running_actions_manager_test.rs | 9 +++ .../docs/docs/nativelink-cloud/nix.mdx | 2 + web/platform/starlight.conf.ts | 1 + 13 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/coverage.yaml diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml new file mode 100644 index 0000000000..1cdde3ea30 --- /dev/null +++ b/.github/workflows/coverage.yaml @@ -0,0 +1,69 @@ +--- +name: Coverage + +on: + push: + branches: [main] + pull_request: + branches: [main] + paths-ignore: + - 'docs/**' + +permissions: read-all + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + coverage: + name: Coverage + runs-on: ubuntu-24.04 + timeout-minutes: 45 + steps: + - name: Checkout + uses: >- # v4.1.1 + actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + + - name: Install Nix + uses: >- # v10 + DeterminateSystems/nix-installer-action@de22e16c4711fca50c816cc9081563429d1cf563 + + - name: Free disk space + uses: >- # v2.0.0 + endersonmenezes/free-disk-space@3f9ec39ebae520864ac93467ee395f5237585c21 + with: + remove_android: true + remove_dotnet: true + remove_haskell: true + remove_tool_cache: false + + - name: Cache Nix derivations + uses: >- # v4 + DeterminateSystems/magic-nix-cache-action@fc6aaceb40b9845a02b91e059ec147e78d1b4e41 + + - name: Generate coverage + run: | + nix build -L .#nativelinkCoverageForHost + + - name: Upload coverage artifact + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa + with: + path: result/html + + deploy: + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + name: Deploy Coverage + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + needs: coverage + runs-on: ubuntu-24.04 + permissions: + pages: write # to deploy to GitHub Pages + id-token: write # to authenticate to GitHub Pages + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dea81672de..baf26063bf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -101,7 +101,7 @@ jobs: --bes_backend=grpcs://bes-tracemachina-shared.build-faster.nativelink.net \ --bes_header=x-nativelink-api-key=$NL_COM_API_KEY \ --bes_results_url=https://app.nativelink.com/a/e3b1e0e0-4b73-45d6-85bc-5cb7b02edea5/build \ - ${{ github.ref == 'refs/heads/main' && ' ' || '--nogenerate_json_trace_profile --remote_upload_local_results=false' }} \ + ${{ github.ref == 'refs/heads/main' && '--remote_upload_local_results=true' || '--nogenerate_json_trace_profile --remote_upload_local_results=false' }} \ //..." docker-compose-compiles-nativelink: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 155ad44bf6..c595a81fb1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -468,3 +468,13 @@ most automatically generated changelogs provide. NativeLink Code of Conduct is available in the [CODE_OF_CONDUCT](https://github.com/tracemachina/nativelink/tree/main/CODE_OF_CONDUCT.md) file. + +## Generating code coverage + +You can generate branch-based coverage reports via: + +``` +nix run .#nativelinkCoverageForHost +``` + +The `result` symlink contains a webpage with the visualized report. diff --git a/Cargo.lock b/Cargo.lock index 210b3a8d2b..537b755f60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1826,7 +1826,7 @@ dependencies = [ [[package]] name = "nativelink-metric-macro-derive" -version = "0.4.0" +version = "0.5.3" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1cb83d960e..54a2e147e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,9 @@ name = "nativelink" enable_tokio_console = [ "nativelink-util/enable_tokio_console" ] +nix = [ + "nativelink-worker/nix" +] [dependencies] nativelink-error = { path = "nativelink-error" } diff --git a/flake.nix b/flake.nix index 479379f02d..bebc5d906f 100644 --- a/flake.nix +++ b/flake.nix @@ -117,12 +117,21 @@ ]; }; + nightlyRustFor = p: + p.rust-bin.nightly.${nightly-rust-version}.default.override { + extensions = ["llvm-tools"]; + targets = [ + "${nixSystemToRustTriple p.stdenv.targetPlatform.system}" + ]; + }; + craneLibFor = p: (crane.mkLib p).overrideToolchain stableRustFor; + nightlyCraneLibFor = p: (crane.mkLib p).overrideToolchain nightlyRustFor; src = pkgs.lib.cleanSourceWith { src = (craneLibFor pkgs).path ./.; filter = path: type: - (builtins.match "^.*(data/SekienSkashita\.jpg|nativelink-config/README\.md)" path != null) + (builtins.match "^.*(data/SekienAkashita\.jpg|nativelink-config/README\.md)" path != null) || ((craneLibFor pkgs).filterCargoSources path type); }; @@ -184,6 +193,7 @@ # Additional target for external dependencies to simplify caching. cargoArtifactsFor = p: (craneLibFor p).buildDepsOnly (commonArgsFor p); + nightlyCargoArtifactsFor = p: (craneLibFor p).buildDepsOnly (commonArgsFor p); nativelinkFor = p: (craneLibFor p).buildPackage ((commonArgsFor p) @@ -334,6 +344,34 @@ os = "linux"; }; }; + + nativelinkCoverageFor = p: let + coverageArgs = + (commonArgsFor p) + // { + # TODO(aaronmondal): For some reason we're triggering an edgecase where + # mimalloc builds against glibc headers in coverage + # builds. This leads to nonexistend __memcpy_chk and + # __memset_chk symbols if fortification is enabled. + # Our regular builds also have this issue, but we + # should investigate further. + hardeningDisable = ["fortify"]; + }; + in + (nightlyCraneLibFor p).cargoLlvmCov (coverageArgs + // { + cargoArtifacts = nightlyCargoArtifactsFor p; + cargoExtraArgs = builtins.concatStringsSep " " [ + "--all" + "--locked" + "--features nix" + "--branch" + "--ignore-filename-regex '.*(genproto|vendor-cargo-deps|crates).*'" + ]; + cargoLlvmCovExtraArgs = "--html --output-dir $out"; + }); + + nativelinkCoverageForHost = nativelinkCoverageFor pkgs; in rec { _module.args.pkgs = let nixpkgs-patched = (import self.inputs.nixpkgs {inherit system;}).applyPatches { @@ -366,6 +404,7 @@ lre-cc native-cli nativelink + nativelinkCoverageForHost nativelink-aarch64-linux nativelink-debug nativelink-image diff --git a/modules/nativelink.nix b/modules/nativelink.nix index 2173d7360e..eb7a498b24 100644 --- a/modules/nativelink.nix +++ b/modules/nativelink.nix @@ -20,6 +20,7 @@ "--remote_instance_name=main" "--remote_header=x-nativelink-project=nativelink-ci" "--nogenerate_json_trace_profile" + "--remote_upload_local_results=false" "--experimental_remote_cache_async" ]; diff --git a/nativelink-metric/nativelink-metric-macro-derive/Cargo.toml b/nativelink-metric/nativelink-metric-macro-derive/Cargo.toml index cafa6c4416..201fbd313f 100644 --- a/nativelink-metric/nativelink-metric-macro-derive/Cargo.toml +++ b/nativelink-metric/nativelink-metric-macro-derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nativelink-metric-macro-derive" -version = "0.4.0" +version = "0.5.3" edition = "2021" [lib] diff --git a/nativelink-worker/Cargo.toml b/nativelink-worker/Cargo.toml index 75b54b4587..76dc5df5c7 100644 --- a/nativelink-worker/Cargo.toml +++ b/nativelink-worker/Cargo.toml @@ -3,6 +3,9 @@ name = "nativelink-worker" version = "0.5.3" edition = "2021" +[features] +nix = [] + [dependencies] nativelink-error = { path = "../nativelink-error" } nativelink-proto = { path = "../nativelink-proto" } diff --git a/nativelink-worker/tests/local_worker_test.rs b/nativelink-worker/tests/local_worker_test.rs index cd7c77f344..d9b26477f0 100644 --- a/nativelink-worker/tests/local_worker_test.rs +++ b/nativelink-worker/tests/local_worker_test.rs @@ -73,6 +73,7 @@ fn make_temp_path(data: &str) -> String { ) } +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn platform_properties_smoke_test() -> Result<(), Error> { let mut platform_properties = HashMap::new(); diff --git a/nativelink-worker/tests/running_actions_manager_test.rs b/nativelink-worker/tests/running_actions_manager_test.rs index 7014eb9900..14759c5249 100644 --- a/nativelink-worker/tests/running_actions_manager_test.rs +++ b/nativelink-worker/tests/running_actions_manager_test.rs @@ -989,6 +989,7 @@ async fn upload_files_from_above_cwd_test() -> Result<(), Box Result<(), Box> { const WORKER_ID: &str = "foo_worker_id"; @@ -1321,6 +1322,7 @@ async fn cleanup_happens_on_job_failure() -> Result<(), Box Result<(), Box> { const WORKER_ID: &str = "foo_worker_id"; @@ -1429,6 +1431,7 @@ async fn kill_ends_action() -> Result<(), Box> { // The wrapper script will print a constant string to stderr, and the test itself will // print to stdout. We then check the results of both to make sure the shell script was // invoked and the actual command was invoked under the shell script. +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn entrypoint_does_invoke_if_set() -> Result<(), Box> { #[cfg(target_family = "unix")] @@ -1572,6 +1575,7 @@ exit 0 Ok(()) } +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn entrypoint_injects_properties() -> Result<(), Box> { #[cfg(target_family = "unix")] @@ -1747,6 +1751,7 @@ exit 0 Ok(()) } +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn entrypoint_sends_timeout_via_side_channel() -> Result<(), Box> { #[cfg(target_family = "unix")] @@ -2268,6 +2273,7 @@ async fn action_result_has_used_in_message() -> Result<(), Box Result<(), Box> { const WORKER_ID: &str = "foo_worker_id"; @@ -2681,6 +2687,7 @@ async fn worker_times_out() -> Result<(), Box> { Ok(()) } +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn kill_all_waits_for_all_tasks_to_finish() -> Result<(), Box> { const WORKER_ID: &str = "foo_worker_id"; @@ -2841,6 +2848,7 @@ async fn kill_all_waits_for_all_tasks_to_finish() -> Result<(), Box Result<(), Box> { const WORKER_ID: &str = "foo_worker_id"; @@ -3219,6 +3227,7 @@ async fn upload_with_single_permit() -> Result<(), Box> { Ok(()) } +#[cfg_attr(feature = "nix", ignore)] #[nativelink_test] async fn running_actions_manager_respects_action_timeout() -> Result<(), Box> { diff --git a/web/platform/src/content/docs/docs/nativelink-cloud/nix.mdx b/web/platform/src/content/docs/docs/nativelink-cloud/nix.mdx index e20f634d66..8b788d655c 100644 --- a/web/platform/src/content/docs/docs/nativelink-cloud/nix.mdx +++ b/web/platform/src/content/docs/docs/nativelink-cloud/nix.mdx @@ -95,6 +95,7 @@ build --remote_header=x-nativelink-api-key=065f02f53f26a12331d5cfd00a778fb243bfb build --remote_instance_name=main build --remote_header=x-nativelink-project=nativelink-ci build --nogenerate_json_trace_profile +build --remote_upload_local_results=false build --experimental_remote_cache_async ``` @@ -116,5 +117,6 @@ build:nativelink --remote_cache=grpcs://my-custom-endpoints.com build:nativelink --remote_header=x-nativelink-api-key=my-custom-readonly-api-key build:nativelink --remote_header=x-nativelink-project=nativelink-ci build:nativelink --nogenerate_json_trace_profile +build:nativelink --remote_upload_local_results=false build:nativelink --experimental_remote_cache_async ``` diff --git a/web/platform/starlight.conf.ts b/web/platform/starlight.conf.ts index 18b8ea4897..97fc259201 100644 --- a/web/platform/starlight.conf.ts +++ b/web/platform/starlight.conf.ts @@ -244,6 +244,7 @@ export const starlightConfig = { items: [ { label: "Docs", link: `${docsRoot}/introduction/setup` }, { label: "NativeLink Cloud", link: "https://app.nativelink.com/" }, + { label: "Coverage", link: "https://tracemachina.github.io/nativelink" }, ], }, ],