From d80805396ba2417e4d6bf1bbb0fbbcc76535192e Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:25:48 +1000
Subject: [PATCH 01/16] Start on sharding mutants

---
 src/main.rs  |  9 ++++++
 src/shard.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 src/shard.rs
diff --git a/src/main.rs b/src/main.rs
index 8bb8ebef..a8966b89 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -24,6 +24,7 @@ mod path;
 mod pretty;
 mod process;
 mod scenario;
+mod shard;
 mod source;
 mod span;
 mod tail_file;
@@ -53,6 +54,7 @@ use crate::mutate::{Genre, Mutant};
 use crate::options::Options;
 use crate::outcome::{Phase, ScenarioOutcome};
 use crate::scenario::Scenario;
+use crate::shard::Shard;
 use crate::workspace::{PackageFilter, Workspace};
 
 const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -200,6 +202,10 @@ struct Args {
     #[arg(long)]
     no_shuffle: bool,
 
+    /// run only one shard of all generated mutants: specify as e.g. 1/4.
+    #[arg(long)]
+    shard: Option<Shard>,
+
     /// maximum run time for all cargo commands, in seconds.
     #[arg(long, short = 't')]
     timeout: Option<f64>,
@@ -291,6 +297,9 @@ fn main() -> Result<()> {
             &read_to_string(in_diff).context("Failed to read filter diff")?,
         )?;
     }
+    if let Some(shard) = &args.shard {
+        mutants = shard.select(mutants);
+    }
     if args.list {
         list_mutants(FmtToIoWrite::new(io::stdout()), &mutants, &options)?;
     } else {
diff --git a/src/shard.rs b/src/shard.rs
new file mode 100644
index 00000000..e6b81bd7
--- /dev/null
+++ b/src/shard.rs
@@ -0,0 +1,78 @@
+// Copyright 2023 Martin Pool
+
+//! Sharding parameters.
+
+use std::str::FromStr;
+
+use anyhow::{anyhow, ensure, Context, Error};
+
+use crate::Mutant;
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub struct Shard {
+    /// Index modulo n.
+    pub k: usize,
+    /// Modulus of sharding.
+    pub n: usize,
+}
+
+impl Shard {
+    /// Select the mutants that should be run for this shard.
+    pub fn select<M, I: IntoIterator<Item = M>>(&self, mutants: I) -> Vec<M> {
+        mutants
+            .into_iter()
+            .enumerate()
+            .filter_map(|(i, m)| if i % self.n == self.k { Some(m) } else { None })
+            .collect()
+    }
+}
+
+impl FromStr for Shard {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let parts = s.split_once('/').ok_or(anyhow!("shard must be k/n"))?;
+        let k = parts.0.parse().context("shard k")?;
+        let n = parts.1.parse().context("shard n")?;
+        ensure!(k < n, "shard k must be less than n"); // implies n>0
+        Ok(Shard { k, n })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::str::FromStr;
+
+    use super::*;
+
+    #[test]
+    fn test_shard_from_str_valid_input() {
+        let shard_str = "2/5";
+        let shard = Shard::from_str(shard_str).unwrap();
+        assert_eq!(shard.k, 2);
+        assert_eq!(shard.n, 5);
+    }
+
+    #[test]
+    fn test_shard_from_str_invalid_input() {
+        assert_eq!(
+            Shard::from_str("").unwrap_err().to_string(),
+            "shard must be k/n"
+        );
+
+        assert_eq!(
+            Shard::from_str("2").unwrap_err().to_string(),
+            "shard must be k/n"
+        );
+
+        assert_eq!(
+            Shard::from_str("2/0").unwrap_err().to_string(),
+            "shard k must be less than n"
+        );
+
+        assert_eq!(
+            Shard::from_str("5/2").unwrap_err().to_string(),
+            "shard k must be less than n"
+        );
+    }
+}

From 7b42ccf7e4e09630b7568cea2fa88e1e94a704b6 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:29:28 +1000
Subject: [PATCH 02/16] Try sharding in own mutants workflow

---
 .github/workflows/tests.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d4e64991..0b988d1f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -76,6 +76,9 @@ jobs:
   cargo-mutants:
     runs-on: ubuntu-latest
     # needs: [build, incremental-mutants]
+    strategy:
+      matrix:
+        shard: [0, 1, 2, 3]
     steps:
       - uses: actions/checkout@v3
       - uses: dtolnay/rust-toolchain@master
@@ -85,7 +88,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -j 2 -vV
+          cargo mutants --no-shuffle --exclude console.rs -vV --shard ${{ matrix.shard }}/4
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()

From ceb5d54aca4444b8844c59df7d8a4bb11db84026 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:30:37 +1000
Subject: [PATCH 03/16] Fix tests

---
 ...s__visit__test__expected_mutants_for_own_source_tree.snap | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap b/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap
index 8e7577f2..fdf51028 100644
--- a/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap
+++ b/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap
@@ -456,6 +456,11 @@ src/scenario.rs: replace Scenario::is_mutant -> bool with true
 src/scenario.rs: replace Scenario::is_mutant -> bool with false
 src/scenario.rs: replace Scenario::log_file_name_base -> String with String::new()
 src/scenario.rs: replace Scenario::log_file_name_base -> String with "xyzzy".into()
+src/shard.rs: replace Shard::select -> Vec<M> with vec![]
+src/shard.rs: replace Shard::select -> Vec<M> with vec![Default::default()]
+src/shard.rs: replace == with != in Shard::select
+src/shard.rs: replace <impl FromStr for Shard>::from_str -> Result<Self, Self::Err> with Ok(Default::default())
+src/shard.rs: replace <impl FromStr for Shard>::from_str -> Result<Self, Self::Err> with Err(::anyhow::anyhow!("mutated!"))
 src/source.rs: replace SourceFile::tree_relative_slashes -> String with String::new()
 src/source.rs: replace SourceFile::tree_relative_slashes -> String with "xyzzy".into()
 src/source.rs: replace SourceFile::path -> &Utf8Path with &Default::default()

From f055b7c771c1598c79395dfc28d40f2a038acfe2 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:32:07 +1000
Subject: [PATCH 04/16] Unit test for Shard::select

---
 src/shard.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/shard.rs b/src/shard.rs
index e6b81bd7..03807b9e 100644
--- a/src/shard.rs
+++ b/src/shard.rs
@@ -75,4 +75,12 @@ mod tests {
             "shard k must be less than n"
         );
     }
+
+    #[test]
+    fn shard_select() {
+        assert_eq!(
+            Shard::from_str("1/4").unwrap().select(0..10).as_slice(),
+            &[1, 5, 9]
+        );
+    }
 }

From 4bb2e8876a9b65d1c2b66c34b39c985c90d2dc07 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:50:43 +1000
Subject: [PATCH 05/16] Test sharding

---
 src/shard.rs       | 11 +++----
 tests/cli/main.rs  |  1 +
 tests/cli/shard.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 tests/cli/shard.rs

diff --git a/src/shard.rs b/src/shard.rs
index 03807b9e..f40eb333 100644
--- a/src/shard.rs
+++ b/src/shard.rs
@@ -6,8 +6,7 @@ use std::str::FromStr;
 
 use anyhow::{anyhow, ensure, Context, Error};
 
-use crate::Mutant;
-
+/// Select mutants for a particular shard of the total list.
 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 pub struct Shard {
     /// Index modulo n.
@@ -46,15 +45,15 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_shard_from_str_valid_input() {
-        let shard_str = "2/5";
-        let shard = Shard::from_str(shard_str).unwrap();
+    fn shard_from_str_valid_input() {
+        let shard = Shard::from_str("2/5").unwrap();
         assert_eq!(shard.k, 2);
         assert_eq!(shard.n, 5);
+        assert_eq!(shard, Shard { k: 2, n: 5 });
     }
 
     #[test]
-    fn test_shard_from_str_invalid_input() {
+    fn shard_from_str_invalid_input() {
         assert_eq!(
             Shard::from_str("").unwrap_err().to_string(),
             "shard must be k/n"
diff --git a/tests/cli/main.rs b/tests/cli/main.rs
index 76d7891c..f94f4971 100644
--- a/tests/cli/main.rs
+++ b/tests/cli/main.rs
@@ -26,6 +26,7 @@ mod config;
 mod error_value;
 mod in_diff;
 mod jobs;
+mod shard;
 mod trace;
 #[cfg(windows)]
 mod windows;
diff --git a/tests/cli/shard.rs b/tests/cli/shard.rs
new file mode 100644
index 00000000..fd01e6fe
--- /dev/null
+++ b/tests/cli/shard.rs
@@ -0,0 +1,81 @@
+// Copyright 2023 Martin Pool
+
+//! Test `--shard`
+
+use itertools::Itertools;
+
+use super::run;
+
+#[test]
+fn shard_divides_all_mutants() {
+    // For speed, this only lists the mutants, trusting that the mutants
+    // that are listed are the ones that are run.
+    let common_args = [
+        "mutants",
+        "--list",
+        "-d",
+        "testdata/well_tested",
+        "--no-shuffle",
+    ];
+    let full_list = String::from_utf8(
+        run()
+            .args(common_args)
+            .assert()
+            .success()
+            .get_output()
+            .stdout
+            .clone(),
+    )
+    .unwrap()
+    .lines()
+    .map(ToOwned::to_owned)
+    .collect_vec();
+
+    let n_shards = 5;
+    let shard_lists = (0..n_shards)
+        .map(|k| {
+            String::from_utf8(
+                run()
+                    .args(common_args)
+                    .args(["--shard", &format!("{}/{}", k, n_shards)])
+                    .assert()
+                    .success()
+                    .get_output()
+                    .stdout
+                    .clone(),
+            )
+            .unwrap()
+            .lines()
+            .map(ToOwned::to_owned)
+            .collect_vec()
+        })
+        .collect_vec();
+
+    // If you combine all the mutants selected for each shard, you get the
+    // full list, with nothing lost or duplicated, disregarding order.
+    assert_eq!(
+        shard_lists.iter().flatten().sorted().collect_vec(),
+        full_list.iter().sorted().collect_vec()
+    );
+
+    // The shards should also be approximately the same size.
+    let shard_lens = shard_lists.iter().map(|l| l.len()).collect_vec();
+    assert!(shard_lens.iter().max().unwrap() - shard_lens.iter().min().unwrap() <= 1);
+
+    // And the shards are disjoint
+    for i in 0..n_shards {
+        for j in 0..n_shards {
+            if i != j {
+                assert!(
+                    shard_lists[i].iter().all(|m| !shard_lists[j].contains(m)),
+                    "shard {} contains {}",
+                    j,
+                    shard_lists[j]
+                        .iter()
+                        .filter(|m| shard_lists[i].contains(m))
+                        .join(", ")
+                );
+            }
+        }
+    }
+}

From 885307b6175a72ef172bf8476404b6dec5efbaa6 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 09:51:52 +1000
Subject: [PATCH 06/16] 8 shards in CI

---
 .github/workflows/tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0b988d1f..c554f6e7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -78,7 +78,7 @@ jobs:
     # needs: [build, incremental-mutants]
     strategy:
       matrix:
-        shard: [0, 1, 2, 3]
+        shard: [0, 1, 2, 3, 4, 5, 6, 7]
     steps:
       - uses: actions/checkout@v3
       - uses: dtolnay/rust-toolchain@master
@@ -88,7 +88,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -vV --shard ${{ matrix.shard }}/4
+          cargo mutants --no-shuffle --exclude console.rs -vV --shard ${{ matrix.shard }}/8
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()

From 11f9f3a6b3b66d8afa763861550017adad4b38ba Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 10:30:58 +1000
Subject: [PATCH 07/16] Book content about sharding

---
 NEWS.md             |  4 +++
 book/src/SUMMARY.md |  3 +-
 book/src/in-diff.md |  4 ++-
 book/src/shards.md  | 79 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 book/src/shards.md

diff --git a/NEWS.md b/NEWS.md
index 1b29c32c..058ad517 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # cargo-mutants changelog
 
+## Unreleased
+
+- New: A `--shard k/n` allows you to split the work across n independent parallel `cargo mutants` invocations running on separate machines to get a faster overall solution on large suites. You, or your CI system, are responsible for launching all the shards and checking whether any of them failed.
+
 ## 23.12.1
 
 - Improved progress bars and console output, including putting the outcome of each mutant on the left, and the overall progress bar at the bottom. Improved display of estimated remaining time, and other times.
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
index 27bcbece..dffedcc8 100644
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@@ -20,7 +20,8 @@
   - [Error values](error-values.md)
 - [Improving performance](performance.md)
   - [Parallelism](parallelism.md)
-  - [Incremental tests of modified code](in-diff.md)
+  - [Sharding](shards.md)
+  - [Testing code changed in a diff](in-diff.md)
 - [Integrations](integrations.md)
 - [Continuous integration](ci.md)
   - [Incremental tests of pull requests](pr-diff.md)
diff --git a/book/src/in-diff.md b/book/src/in-diff.md
index 64624fa9..15b8d444 100644
--- a/book/src/in-diff.md
+++ b/book/src/in-diff.md
@@ -1,4 +1,4 @@
-# Incremental tests of modified code
+# Testing code changed in a diff
 
 If you're working on a large project or one with a long test suite, you may not want to test the entire codebase every time you make a change. You can use `cargo-mutants --in-diff` to test only mutants generated from recently changed code.
 
@@ -19,6 +19,8 @@ Changes to non-Rust files, or files from which no mutants are produced, are igno
 
 `--in-diff` makes tests faster by covering the mutants that are most likely to be missed in the changed code. However, it's certainly possible that edits in one region cause code in a different region or a different file to no longer be well tested. Incremental tests are helpful for giving faster feedback, but they're not a substitute for a full test run.
 
+The diff is only matched against the code under test, not the test code. So, a diff that only deletes or changes test code won't cause any mutants to run, even though it may have a very material effect on test coverage.
+
 ## Example
 
 In this diff, we've added a new function `two` to `src/lib.rs`, and the existing code is unaltered. With `--in-diff`, `cargo-mutants` will only test mutants that affect the function `two`.
diff --git a/book/src/shards.md b/book/src/shards.md
new file mode 100644
index 00000000..7c8d200b
--- /dev/null
+++ b/book/src/shards.md
@@ -0,0 +1,79 @@
+# Sharding
+
+In addition to [running multiple jobs locally](parallelism.md), cargo-mutants can also run jobs on multiple machines, to get an overall job faster.
+
+Each job tests a subset of mutants, selected by a shard. Shards are described as `k/n`, where `n` is the number of shards and `k` is the index of the shard, from 0 to `n-1`.
+
+There is no runtime coordination between shards: they each independently discover the available mutants and then select a subset based on the `--shard` option.
+
+If any shard fails then that would indicate that some mutants were missed, or there was some other problem.
+
+## Consistency across shards
+
+**CAUTION:**
+All shards must be run with the same arguments, and the same sharding `k`, or the results will be meaningless, as they won't agree on how to divide the work.
+
+Sharding can be combined with filters or shuffling, as long as the filters are set consistently in all shards. Sharding can also combine with `--in-diff`, again as long as all shards see the same diff.
+
+## Setting up sharding
+
+Your CI system or other tooling is responsible for launching multiple shards, and for collecting the results. You're responsible for choosing the number of shards (see below).
+
+For example, in GitHub Actions, you could use a matrix job to run multiple shards:
+
+```yaml
+  cargo-mutants:
+    runs-on: ubuntu-latest
+    # needs: [build, incremental-mutants]
+    strategy:
+      matrix:
+        shard: [0, 1, 2, 3, 4, 5, 6, 7]
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: beta
+      - uses: Swatinem/rust-cache@v2
+      - run: cargo install cargo-mutants
+      - name: Mutants
+        run: |
+          cargo mutants --no-shuffle -vV --shard ${{ matrix.shard }}/8
+      - name: Archive mutants.out
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: mutants.out
+          path: mutants.out
+```
+
+Note that the number of shards is set to match the `/8` in the `--shard` argument.
+
+## Performance of sharding
+
+Each mutant does some constant upfront work:
+
+* Any CI setup including starting the machine, getting a checkout, installing a Rust toolchain, and installing cargo-mutants
+* An initial clean build of the code under test
+* A baseline run of the unmutated code
+
+Then, for each mutant in its shard, it does an incremental build and runs all the tests.
+
+Each shard runs the same number of mutants, +/-1. Typically this will mean they each take roughly the same amount of time, although it's possible that some shards are unlucky in drawing mutants that happen to take longer to test.
+
+A rough model for the overall execution time for all of the shards, allowing for this work occuring in parallel, is
+
+```raw
+SHARD_STARTUP + (CLEAN_BUILD + TEST) + (N_MUTANTS/K) * (INCREMENTAL_BUILD + TEST)
+```
+
+## Choosing a number of shards
+
+Because there's some constant overhead for every shard there will be diminishing returns and increasing ineffiency if you use too many shards. (In the extreme cases where there are more shards than mutants, some of them will do the setup work, then find they have nothing to do and immediately exit.)
+
+As a rule of thumb, you should probably choose `k` such that each worker runs at least 10 mutants, and possibly much more. 8 to 32 shards might be a good place to start.
+
+The optimal setting probably depends on how long your tree takes to build from zero and incrementally, how long the tests take to run, and the performance of your CI system.
+
+If your CI system offers a choice of VM sizes you might experiment with using smaller or larger VMs and more or less shards: the optimal setting probably also depends on your tree's ability to exploit larger machines.
+
+You should also think about cost and capacity constraints in your CI system.

From c20f4a2f2df83a05f6b98cc9f1e4a1dcd6ed049a Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 10:50:12 +1000
Subject: [PATCH 08/16] Better docs about -j

---
 NEWS.md                 |  2 ++
 book/src/parallelism.md | 51 ++++++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 058ad517..444bc627 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@
 
 - New: A `--shard k/n` allows you to split the work across n independent parallel `cargo mutants` invocations running on separate machines to get a faster overall solution on large suites. You, or your CI system, are responsible for launching all the shards and checking whether any of them failed.
 
+- Improved: Better documentation about `-j`, with stronger recommendations not to set it too high.
+
 ## 23.12.1
 
 - Improved progress bars and console output, including putting the outcome of each mutant on the left, and the overall progress bar at the bottom. Improved display of estimated remaining time, and other times.
diff --git a/book/src/parallelism.md b/book/src/parallelism.md
index bd7da2e9..4caf3e80 100644
--- a/book/src/parallelism.md
+++ b/book/src/parallelism.md
@@ -1,30 +1,45 @@
 # Parallelism
 
-After the initial test of the unmutated tree, cargo-mutants can test multiple
-mutants in parallel. This can give significant performance improvements,
-depending on the tree under test and the hardware resources available.
+After the initial test of the unmutated tree, cargo-mutants can run multiple
+builds and tests of the tree in parallel on a single machine. Separately, you can
+[shard](shards.md) work across multiple machines.
+
+**Caution:** `cargo build` and `cargo test` internally spawn many threads and processes and can be very resource hungry. Don't set `--jobs` too high, or your machine may thrash, run out of memory, or overheat.
+
+## Background
 
 Even though cargo builds, rustc, and Rust's test framework launch multiple
-processes or threads, they typically can't use all available CPU cores all the
-time, and many `cargo test` runs will end up using only one core waiting for the
-last task to complete. Running multiple jobs in parallel makes use of resources
-that would otherwise be idle.
+processes or threads, they typically spend some time waiting for straggler tasks, during which time some CPU cores are idle. For example, a cargo build commonly ends up waiting for a single-threaded linker for several seconds.
+
+Running one or more build or test tasks in parallel can use up this otherwise wasted capacity.
+This can give significant performance improvements, depending on the tree under test and the hardware resources available.
+
+## Timeouts
+
+Because tests may be slower with high parallelism, or may exhibit more variability in execution time, you may see some spurious timeouts, and you may need to set `--timeout` manually to allow enough safety margin. (User feedback on this is welcome.)
+
+## Non-hermetic tests
 
-By default, only one job is run at a time.
+If your test suite is non-hermetic -- for example, if it talks to an external database -- then running multiple jobs in parallel may cause test flakes. `cargo-mutants` is just running multiple copies of `cargo test` simultaneously: if that doesn't work in your tree, then you can't use this option.
 
-To run more, use the `--jobs` or `-j` option, or set the `CARGO_MUTANTS_JOBS`
-environment variable.
+## Choosing a job count
 
-Setting this higher than the number of CPU cores is unlikely to be helpful.
+You should set the number of jobs very conservatively, starting at `-j2` or `-j3`.
+
+Higher settings are only likely to be helpful on very large machines, perhaps with >100 cores and >256GB RAM.
+
+Unlike with `make`, setting `-j` proportionally to the number of cores is unlikely to work out well, because so the Rust build and test tools already parallelize very aggressively.
 
 The best setting will depend on many factors including the behavior of your
 program's test suite, the amount of memory on your system, and your system's
-behavior under high thermal load.
+behavior under high load. Ultimately you'll need to experiment to find the best setting.
+
+To tune the number of jobs, you can watch `htop` or some similar program while the tests are running, to see whether cores are fully utilized or whether the system is running out of memory. On laptop or desktop machines you might also want to watch the temperature of the CPU.
+
+## Interaction with `--test-threads`
+
+The Rust test framework exposes a `--test-threads` option controlling how many threads run inside a test binary. cargo-mutants doesn't set this, but you can set it from the command line, along with other parameters to the test binary. You might need to set this if your test suite is non-hermetic with regard to global process state.
 
-`-j 4` may be a good starting point. Start there and watch memory and CPU usage,
-and tune towards a setting where all cores are fully utilized without apparent
-thrashing, memory exhaustion, or thermal issues.
+Limiting the number of threads inside a single test binary would tend to make that binary less resource-hungry, and so _might_ allow you to set a higher `-j` option.
 
-Because tests may be slower with high parallelism, you may see some spurious
-timeouts, and you may need to set `--timeout` manually to allow enough safety
-margin.
+Reducing the number of test threads to increase `-j`  seems unlikely to help performance in most trees.

From 904b74ae85a26202a3aa57977d198d95e5418eae Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 10:53:57 +1000
Subject: [PATCH 09/16] Comment about shuffling and sharding

---
 tests/cli/shard.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/cli/shard.rs b/tests/cli/shard.rs
index fd01e6fe..4fc4b4cb 100644
--- a/tests/cli/shard.rs
+++ b/tests/cli/shard.rs
@@ -15,7 +15,6 @@ fn shard_divides_all_mutants() {
         "--list",
         "-d",
         "testdata/well_tested",
-        "--no-shuffle",
     ];
     let full_list = String::from_utf8(
         run()
@@ -53,6 +52,9 @@ fn shard_divides_all_mutants() {
 
     // If you combine all the mutants selected for each shard, you get the
     // full list, with nothing lost or duplicated, disregarding order.
+    //
+    // If we had a bug where we shuffled before sharding, then the shards would
+    // see inconsistent lists and this test would fail in at least some attempts.
     assert_eq!(
         shard_lists.iter().flatten().sorted().collect_vec(),
         full_list.iter().sorted().collect_vec()

From 036cb2eac56df20e34eca62916bbb88517872ff2 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 10:54:55 +1000
Subject: [PATCH 10/16] Also shard PR mutants

---
 .github/workflows/tests.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c554f6e7..4b84240b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -49,6 +49,9 @@ jobs:
 
   pr-mutants:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        shard: [0, 1, 2]
     if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v3
@@ -65,7 +68,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -j 2 -vV --in-diff git.diff
+          cargo mutants --no-shuffle --exclude console.rs -vV --in-diff git.diff --shard ${{ matrix.shard }}/3
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()

From a2cbe791aadc0f1dea225a596a3fe2c3819476cc Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 10:56:07 +1000
Subject: [PATCH 11/16] rustfmt

---
 tests/cli/shard.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/cli/shard.rs b/tests/cli/shard.rs
index 4fc4b4cb..734209b2 100644
--- a/tests/cli/shard.rs
+++ b/tests/cli/shard.rs
@@ -10,12 +10,7 @@ use super::run;
 fn shard_divides_all_mutants() {
     // For speed, this only lists the mutants, trusting that the mutants
     // that are listed are the ones that are run.
-    let common_args = [
-        "mutants",
-        "--list",
-        "-d",
-        "testdata/well_tested",
-    ];
+    let common_args = ["mutants", "--list", "-d", "testdata/well_tested"];
     let full_list = String::from_utf8(
         run()
             .args(common_args)

From 748e28a360754a82b5aef87887073e4a954348ec Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 11:00:59 +1000
Subject: [PATCH 12/16] Also mention tempdir space

---
 book/src/parallelism.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/book/src/parallelism.md b/book/src/parallelism.md
index 4caf3e80..f0a4f4c8 100644
--- a/book/src/parallelism.md
+++ b/book/src/parallelism.md
@@ -36,6 +36,8 @@ behavior under high load. Ultimately you'll need to experiment to find the best
 
 To tune the number of jobs, you can watch `htop` or some similar program while the tests are running, to see whether cores are fully utilized or whether the system is running out of memory. On laptop or desktop machines you might also want to watch the temperature of the CPU.
 
+As well as using more CPU and RAM, higher `-j` settings will also use more disk space in your temporary directory: Rust `target` directories can commonly be 2GB or more, and there will be one per parallel job, plus whatever temp files your test suite might create.
+
 ## Interaction with `--test-threads`
 
 The Rust test framework exposes a `--test-threads` option controlling how many threads run inside a test binary. cargo-mutants doesn't set this, but you can set it from the command line, along with other parameters to the test binary. You might need to set this if your test suite is non-hermetic with regard to global process state.

From c0edc7983ed1b6819313b16680860525712a3f02 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 11:57:43 +1000
Subject: [PATCH 13/16] Run overall mutants after build and PR mutants

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4b84240b..278d527f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -78,7 +78,7 @@ jobs:
 
   cargo-mutants:
     runs-on: ubuntu-latest
-    # needs: [build, incremental-mutants]
+    needs: [build, pr-mutants]
     strategy:
       matrix:
         shard: [0, 1, 2, 3, 4, 5, 6, 7]

From 52df0f4a95d1e7ccbcff0bd5f2a5eb1c23185c61 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 12:01:26 +1000
Subject: [PATCH 14/16] More on shard scaling

---
 book/src/shards.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/book/src/shards.md b/book/src/shards.md
index 7c8d200b..f7a3e6e4 100644
--- a/book/src/shards.md
+++ b/book/src/shards.md
@@ -66,6 +66,14 @@ A rough model for the overall execution time for all of the shards, allowing for
 SHARD_STARTUP + (CLEAN_BUILD + TEST) + (N_MUTANTS/K) * (INCREMENTAL_BUILD + TEST)
 ```
 
+The total cost in CPU seconds can be modelled as:
+
+```raw
+K * (SHARD_STARTUP + CLEAN_BUILD + TEST) + N_MUTANTS * (INCREMENTAL_BUILD + TEST)
+```
+
+As a result, at very large `k` the cost of the initial setup work will dominate, but overall time to solution will be minimized.
+
 ## Choosing a number of shards
 
 Because there's some constant overhead for every shard there will be diminishing returns and increasing ineffiency if you use too many shards. (In the extreme cases where there are more shards than mutants, some of them will do the setup work, then find they have nothing to do and immediately exit.)
@@ -76,4 +84,6 @@ The optimal setting probably depends on how long your tree takes to build from z
 
 If your CI system offers a choice of VM sizes you might experiment with using smaller or larger VMs and more or less shards: the optimal setting probably also depends on your tree's ability to exploit larger machines.
 
-You should also think about cost and capacity constraints in your CI system.
+You should also think about cost and capacity constraints in your CI system, and the risk of starving out other users.
+
+cargo-mutants has no internal scaling constraints to prevent you from setting `k` very large, if cost, efficiency and CI capacity are not a concern.

From 6b9bcc641c983c759d7333d7b7ad1197a64df4fe Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 18:57:40 +1000
Subject: [PATCH 15/16] Don't shard pr-matrix

It plays badly with branch protection rules, which could be
solved, but also it seems like there will often not be enough new
mutants to be worthwhile.
---
 .github/workflows/tests.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 278d527f..4cb2570c 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -49,9 +49,6 @@ jobs:
 
   pr-mutants:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        shard: [0, 1, 2]
     if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v3
@@ -68,7 +65,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -vV --in-diff git.diff --shard ${{ matrix.shard }}/3
+          cargo mutants --no-shuffle --exclude console.rs -vV --in-diff git.diff
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()

From a196b437f4781ccab87c3ac34a7d9caa12e949b1 Mon Sep 17 00:00:00 2001
From: Martin Pool <mbp@sourcefrog.net>
Date: Sun, 17 Dec 2023 18:58:30 +1000
Subject: [PATCH 16/16] Don't specify exclusions in workflow

They're in mutants.toml so let's rely on that.
---
 .github/workflows/tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4cb2570c..61d59c46 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -65,7 +65,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -vV --in-diff git.diff
+          cargo mutants --no-shuffle -vV --in-diff git.diff
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()
@@ -88,7 +88,7 @@ jobs:
       - run: cargo install --path .
       - name: Mutants
         run: |
-          cargo mutants --no-shuffle --exclude console.rs -vV --shard ${{ matrix.shard }}/8
+          cargo mutants --no-shuffle -vV --shard ${{ matrix.shard }}/8
       - name: Archive mutants.out
         uses: actions/upload-artifact@v3
         if: always()