Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add topo.nr_cpu_ids() to Topology crate #379

Merged
merged 3 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 44 additions & 34 deletions rust/scx_utils/src/topology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ pub struct Topology {
cpus: BTreeMap<usize, Cpu>,
span: Cpumask,
nr_cpus_possible: usize,
nr_cpus_online: usize,
nr_cpu_ids: usize,
}

impl Topology {
Expand All @@ -195,7 +197,7 @@ impl Topology {
// If the kernel is compiled with CONFIG_NUMA, then build a topology
// from the NUMA hierarchy in sysfs. Otherwise, just make a single
// default node of ID 0 which contains all cores.
let nodes = if Path::new("/sys/devices/system/node").exists() {
let (nodes, nr_cpu_ids) = if Path::new("/sys/devices/system/node").exists() {
create_numa_nodes(&span)?
} else {
create_default_node(&span)?
Expand All @@ -222,7 +224,8 @@ impl Topology {
}

let nr_cpus_possible = libbpf_rs::num_possible_cpus().unwrap();
Ok(Topology { nodes, cores, cpus, span, nr_cpus_possible, })
let nr_cpus_online = span.weight();
Ok(Topology { nodes, cores, cpus, span, nr_cpus_possible, nr_cpus_online, nr_cpu_ids, })
}

/// Get a slice of the NUMA nodes on the host.
Expand All @@ -245,22 +248,32 @@ impl Topology {
&self.span
}

/// Get the maximum possible number of CPUs. Note that this number is likely
/// only applicable in the context of storing and extracting per-CPU data
/// between user space and BPF, as it doesn't necessarily reflect the actual
/// number of online or even possible CPUs in the system.
///
/// For example, as described in
/// https://bugzilla.kernel.org/show_bug.cgi?id=218109, some buggy AMD BIOS
/// implementations may incorrectly report disabled CPUs as offlined / part
/// of the CPUs possible mask.
///
/// Even if the CPUs are possible and may be enabled with hotplug, they're
/// not active now, so you wouldn't want to use this number to determine
/// system load, util, etc.
/// Get the number of possible CPUs that may be active on the system. Note
/// that this value is separate from the number of possible _CPU IDs_ in the
/// system, as there may be gaps in what CPUs are allowed to be onlined. For
/// example, some BIOS implementations may report spans of disabled CPUs
/// that may not be onlined, whose IDs are lower than the IDs of other CPUs
/// that may be onlined.
pub fn nr_cpus_possible(&self) -> usize {
self.nr_cpus_possible
}

/// Get the number of CPUs that were online when the Topology was created.
/// Because Topology objects are read-only, this value will not change if a
/// CPU is onlined after a Topology object has been created.
pub fn nr_cpus_online(&self) -> usize {
self.nr_cpus_online
}

/// Get the maximum possible number of CPU IDs in the system. As mentioned
/// above, this is different than the number of possible CPUs on the system
/// (though very seldom is). This number may differ from the number of
/// possible CPUs on the system when e.g. there are fully disabled CPUs in
/// the middle of the range of possible CPUs (i.e. CPUs that may be
/// onlined).
pub fn nr_cpu_ids(&self) -> usize {
self.nr_cpu_ids
}
}

/// Generate a topology map from a Topology object, represented as an array of arrays.
Expand All @@ -283,34 +296,21 @@ impl Topology {
#[derive(Debug)]
pub struct TopologyMap {
map: Vec<Vec<usize>>,
nr_cpus_possible: usize,
nr_cpus_online: usize,
}

impl TopologyMap {
pub fn new(topo: Topology) -> Result<TopologyMap> {
pub fn new(topo: &Topology) -> Result<TopologyMap> {
let mut map: Vec<Vec<usize>> = Vec::new();
let mut nr_cpus_online = 0;

for core in topo.cores().into_iter() {
let mut cpu_ids: Vec<usize> = Vec::new();
for cpu_id in core.span().clone().into_iter() {
cpu_ids.push(cpu_id);
nr_cpus_online += 1;
}
map.push(cpu_ids);
}
let nr_cpus_possible = topo.nr_cpus_possible;

Ok(TopologyMap { map, nr_cpus_possible, nr_cpus_online })
}

pub fn nr_cpus_possible(&self) -> usize {
self.nr_cpus_possible
}

pub fn nr_cpus_online(&self) -> usize {
self.nr_cpus_online
Ok(TopologyMap { map, })
}

pub fn iter(&self) -> Iter<Vec<usize>> {
Expand Down Expand Up @@ -429,7 +429,7 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
Ok(())
}

fn create_default_node(online_mask: &Cpumask) -> Result<Vec<Node>> {
fn create_default_node(online_mask: &Cpumask) -> Result<(Vec<Node>, usize)> {
let mut nodes: Vec<Node> = Vec::with_capacity(1);
let mut node = Node {
id: 0,
Expand All @@ -441,6 +441,7 @@ fn create_default_node(online_mask: &Cpumask) -> Result<Vec<Node>> {
bail!("/sys/devices/system/cpu sysfs node not found");
}

let mut nr_cpu_ids = 0;
let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*")?;
for cpu_path in cpu_paths.filter_map(Result::ok) {
let cpu_str = cpu_path.to_str().unwrap().trim();
Expand All @@ -451,17 +452,22 @@ fn create_default_node(online_mask: &Cpumask) -> Result<Vec<Node>> {
}
};

if cpu_id + 1 > nr_cpu_ids {
nr_cpu_ids = cpu_id + 1;
}

create_insert_cpu(cpu_id, &mut node, &online_mask)?;
}

nodes.push(node);

Ok(nodes)
Ok((nodes, nr_cpu_ids))
}

fn create_numa_nodes(online_mask: &Cpumask) -> Result<Vec<Node>> {
fn create_numa_nodes(online_mask: &Cpumask) -> Result<(Vec<Node>, usize)> {
let mut nodes: Vec<Node> = Vec::new();

let mut nr_cpu_ids = 0;
let numa_paths = glob("/sys/devices/system/node/node*")?;
for numa_path in numa_paths.filter_map(Result::ok) {
let numa_str = numa_path.to_str().unwrap().trim();
Expand Down Expand Up @@ -489,10 +495,14 @@ fn create_numa_nodes(online_mask: &Cpumask) -> Result<Vec<Node>> {
}
};

if cpu_id + 1 > nr_cpu_ids {
nr_cpu_ids = cpu_id + 1;
}

create_insert_cpu(cpu_id, &mut node, &online_mask)?;
}

nodes.push(node);
}
Ok(nodes)
Ok((nodes, nr_cpu_ids))
}
16 changes: 8 additions & 8 deletions scheds/rust/scx_rlfifo/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ impl<'a> Scheduler<'a> {
fn init() -> Result<Self> {
let topo = Topology::new().expect("Failed to build host topology");
let bpf = BpfScheduler::init(
5000, // slice_ns (default task time slice)
topo.nr_cpus_possible() as i32, // nr_cpus (max CPUs available in the system)
false, // partial (include all tasks if disabled)
0, // exit_dump_len (buffer size of exit info)
true, // full_user (schedule all tasks in user-space)
false, // low_power (low power mode)
false, // fifo_sched (enable BPF FIFO scheduling)
false, // debug (debug mode)
5000, // slice_ns (default task time slice)
topo.nr_cpu_ids() as i32, // nr_cpus (max CPUs available in the system)
false, // partial (include all tasks if disabled)
0, // exit_dump_len (buffer size of exit info)
true, // full_user (schedule all tasks in user-space)
false, // low_power (low power mode)
false, // fifo_sched (enable BPF FIFO scheduling)
false, // debug (debug mode)
)?;
Ok(Self { bpf })
}
Expand Down
4 changes: 2 additions & 2 deletions scheds/rust/scx_rustland/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ impl<'a> Scheduler<'a> {
fn init(opts: &Opts) -> Result<Self> {
// Initialize core mapping topology.
let topo = Topology::new().expect("Failed to build host topology");
let topo_map = TopologyMap::new(topo).expect("Failed to generate topology map");
let topo_map = TopologyMap::new(&topo).expect("Failed to generate topology map");

// Save the default time slice (in ns) in the scheduler class.
let slice_ns = opts.slice_us * NSEC_PER_USEC;
Expand Down Expand Up @@ -301,7 +301,7 @@ impl<'a> Scheduler<'a> {
let init_page_faults: u64 = 0;

// Low-level BPF connector.
let nr_cpus = topo_map.nr_cpus_possible();
let nr_cpus = topo.nr_cpu_ids();
let bpf = BpfScheduler::init(
opts.slice_us,
nr_cpus as i32,
Expand Down
6 changes: 3 additions & 3 deletions scheds/rust/scx_rusty/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ UEI_DEFINE(uei);
*/
const volatile u32 nr_doms = 32; /* !0 for veristat, set during init */
const volatile u32 nr_nodes = 32; /* !0 for veristat, set during init */
const volatile u32 nr_cpus_possible = 64; /* !0 for veristat, set during init */
const volatile u32 nr_cpu_ids = 64; /* !0 for veristat, set during init */
const volatile u32 cpu_dom_id_map[MAX_CPUS];
const volatile u32 dom_numa_id_map[MAX_DOMS];
const volatile u64 dom_cpumasks[MAX_DOMS][MAX_CPUS / 64];
Expand Down Expand Up @@ -493,7 +493,7 @@ static void refresh_tune_params(void)
tune_params_gen = tune_input.gen;
slice_ns = tune_input.slice_ns;

bpf_for(cpu, 0, nr_cpus_possible) {
bpf_for(cpu, 0, nr_cpu_ids) {
u32 dom_id = cpu_to_dom_id(cpu);
struct dom_ctx *domc;

Expand Down Expand Up @@ -1784,7 +1784,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
return ret;
}

bpf_for(i, 0, nr_cpus_possible) {
bpf_for(i, 0, nr_cpu_ids) {
if (is_offline_cpu(i))
continue;

Expand Down
12 changes: 6 additions & 6 deletions scheds/rust/scx_rusty/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,10 @@ impl<'a> Scheduler<'a> {

let domains = Arc::new(DomainGroup::new(top.clone(), &opts.cpumasks)?);

if top.nr_cpus_possible() > MAX_CPUS {
if top.nr_cpu_ids() > MAX_CPUS {
bail!(
"Num possible CPUs ({}) exceeds maximum of ({})",
top.nr_cpus_possible(),
"Num possible CPU IDs ({}) exceeds maximum of ({})",
top.nr_cpu_ids(),
MAX_CPUS
);
}
Expand All @@ -261,13 +261,13 @@ impl<'a> Scheduler<'a> {

skel.rodata_mut().nr_nodes = domains.nr_nodes() as u32;
skel.rodata_mut().nr_doms = domains.nr_doms() as u32;
skel.rodata_mut().nr_cpus_possible = top.nr_cpus_possible() as u32;
skel.rodata_mut().nr_cpu_ids = top.nr_cpu_ids() as u32;

// Any CPU with dom > MAX_DOMS is considered offline by default. There
// are a few places in the BPF code where we skip over offlined CPUs
// (e.g. when initializing or refreshing tune params), and elsewhere the
// scheduler will error if we try to schedule from them.
for cpu in 0..top.nr_cpus_possible() {
for cpu in 0..top.nr_cpu_ids() {
skel.rodata_mut().cpu_dom_id_map[cpu] = u32::MAX;
}

Expand Down Expand Up @@ -413,7 +413,7 @@ impl<'a> Scheduler<'a> {
let stats_map = maps.stats();
let mut stats: Vec<u64> = Vec::new();
let zero_vec =
vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpus_possible()];
vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpu_ids()];

for stat in 0..bpf_intf::stat_idx_RUSTY_NR_STATS {
let cpu_stat_vec = stats_map
Expand Down