Skip to content

Commit

Permalink
rusty: Integrate stats with the metrics framework
Browse files Browse the repository at this point in the history
We need a layer of indirection between the stats collection and their
output destinations. Currently, stats are only printed to stdout. Our
goal is to integrate with various telemetry systems such as Prometheus,
StatsD, and custom metric backends like those used by Meta and Netflix.
Importantly, adding a new backend should not require changes to the
existing stats code.

This patch introduces the `metrics` [1] crate, which provides a
framework for defining metrics and publishing them to different
backends.

The initial implementation includes the `dispatched_tasks_count`
metric, tagged with `type`. This metric increments every time a task is
dispatched, emitting the raw count instead of a percentage. A monotonic
counter is the most suitable metric type for this use case, as
percentages can be calculated at query time if needed. Existing logged
metrics continue to print percentages and remain unchanged.

A new flag, `--enable-prometheus`, has been added. When enabled, it
starts a Prometheus endpoint on port 9000 (default is false). This
endpoint allows metrics to be charted in Prometheus or Grafana
dashboards.

Future changes will migrate additional stats to this framework and add
support for other backends.

[1] https://metrics.rs/

Signed-off-by: Jose Fernandez <[email protected]>
  • Loading branch information
jfernandez committed Jun 20, 2024
1 parent 819ffd5 commit e053204
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 27 deletions.
2 changes: 2 additions & 0 deletions scheds/rust/scx_rusty/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ scx_utils = { path = "../../../rust/scx_utils", version = "0.8.1" }
simplelog = "0.12.0"
sorted-vec = "0.8.3"
static_assertions = "1.1.0"
metrics = "0.23.0"
metrics-exporter-prometheus = "0.15.0"

[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.8.1" }
Expand Down
96 changes: 69 additions & 27 deletions scheds/rust/scx_rusty/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
use log::info;
use metrics::counter;
use metrics_exporter_prometheus::PrometheusBuilder;
use scx_utils::compat;
use scx_utils::init_libbpf_logging;
use scx_utils::scx_ops_attach;
Expand Down Expand Up @@ -189,6 +191,10 @@ struct Opts {
/// times to increase verbosity.
#[clap(short = 'v', long, action = clap::ArgAction::Count)]
verbose: u8,

/// Enable the Prometheus endpoint for metrics on port 9090.
#[clap(long, action = clap::ArgAction::SetTrue)]
enable_prometheus: bool,
}

fn read_total_cpu(reader: &procfs::ProcReader) -> Result<procfs::CpuStat> {
Expand Down Expand Up @@ -446,17 +452,41 @@ impl<'a> Scheduler<'a> {
lb_stats: &[NumaStat],
) {
let stat = |idx| bpf_stats[idx as usize];
let total = stat(bpf_intf::stat_idx_RUSTY_STAT_WAKE_SYNC)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_SYNC_PREV_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_PREV_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_PINNED)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_DISPATCH)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY_FAR)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DSQ_DISPATCH)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_LOCAL)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_XNUMA);

let wsync = stat(bpf_intf::stat_idx_RUSTY_STAT_WAKE_SYNC);
let wsync_prev_idle = stat(bpf_intf::stat_idx_RUSTY_STAT_SYNC_PREV_IDLE);
let prev_idle = stat(bpf_intf::stat_idx_RUSTY_STAT_PREV_IDLE);
let greedy_idle = stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_IDLE);
let pinned = stat(bpf_intf::stat_idx_RUSTY_STAT_PINNED);
let direct_dispatch = stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_DISPATCH);
let direct_greedy = stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY);
let direct_greedy_far = stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY_FAR);
let dsq = stat(bpf_intf::stat_idx_RUSTY_STAT_DSQ_DISPATCH);
let greedy_local = stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_LOCAL);
let greedy_xnuma = stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_XNUMA);
let total = wsync
+ wsync_prev_idle
+ prev_idle
+ greedy_idle
+ pinned
+ direct_dispatch
+ direct_greedy
+ direct_greedy_far
+ dsq
+ greedy_local
+ greedy_xnuma;

counter!("dispatched_tasks_count", "type" => "wsync_prev_idle").increment(wsync_prev_idle);
counter!("dispatched_tasks_count", "type" => "wsync").increment(wsync);
counter!("dispatched_tasks_count", "type" => "prev_idle").increment(prev_idle);
counter!("dispatched_tasks_count", "type" => "greedy_idle").increment(greedy_idle);
counter!("dispatched_tasks_count", "type" => "pinned").increment(pinned);
counter!("dispatched_tasks_count", "type" => "direct_dispatch").increment(direct_dispatch);
counter!("dispatched_tasks_count", "type" => "direct_greedy").increment(direct_greedy);
counter!("dispatched_tasks_count", "type" => "direct_greedy_far").increment(direct_greedy_far);
counter!("dispatched_tasks_count", "type" => "dsq").increment(dsq);
counter!("dispatched_tasks_count", "type" => "greedy_local").increment(greedy_local);
counter!("dispatched_tasks_count", "type" => "greedy_xnuma").increment(greedy_xnuma);

let numa_load_avg = lb_stats[0].load.load_avg();
let dom_load_avg = lb_stats[0].domains[0].load.load_avg();
Expand All @@ -470,45 +500,50 @@ impl<'a> Scheduler<'a> {
processing_dur.as_millis(),
);

let stat_pct = |idx| stat(idx) as f64 / total as f64 * 100.0;
let stat_pct = |value| value as f64 / total as f64 * 100.0;

info!(
"tot={:7} wsync_prev_idle={:5.2} wsync={:5.2}",
total,
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_SYNC_PREV_IDLE),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_WAKE_SYNC),
stat_pct(wsync_prev_idle),
stat_pct(wsync),
);

info!(
"prev_idle={:5.2} greedy_idle={:5.2} pin={:5.2}",
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_PREV_IDLE),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_IDLE),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_PINNED),
stat_pct(prev_idle),
stat_pct(greedy_idle),
stat_pct(pinned),
);

info!(
"dir={:5.2} dir_greedy={:5.2} dir_greedy_far={:5.2}",
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_DISPATCH),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY_FAR),
stat_pct(direct_dispatch),
stat_pct(direct_greedy),
stat_pct(direct_greedy_far),
);

info!(
"dsq={:5.2} greedy_local={:5.2} greedy_xnuma={:5.2}",
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DSQ_DISPATCH),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_LOCAL),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_XNUMA),
stat_pct(dsq),
stat_pct(greedy_local),
stat_pct(greedy_xnuma),
);

let kick_greedy = stat(bpf_intf::stat_idx_RUSTY_STAT_KICK_GREEDY);
let repatriate = stat(bpf_intf::stat_idx_RUSTY_STAT_REPATRIATE);
info!(
"kick_greedy={:5.2} rep={:5.2}",
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_KICK_GREEDY),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_REPATRIATE),
stat_pct(kick_greedy),
stat_pct(repatriate),
);

let dl_clamped = stat(bpf_intf::stat_idx_RUSTY_STAT_DL_CLAMP);
let dl_preset = stat(bpf_intf::stat_idx_RUSTY_STAT_DL_PRESET);
info!(
"dl_clamped={:5.2} dl_preset={:5.2}",
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DL_CLAMP),
stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DL_PRESET),
stat_pct(dl_clamped),
stat_pct(dl_preset),
);

info!("slice_length={}us", self.tuner.slice_ns / 1000);
Expand Down Expand Up @@ -621,6 +656,13 @@ fn main() -> Result<()> {
})
.context("Error setting Ctrl-C handler")?;

if opts.enable_prometheus {
info!("Enabling Prometheus endpoint: http://localhost:9000");
PrometheusBuilder::new()
.install()
.expect("failed to install Prometheus recorder");
}

loop {
let mut sched = Scheduler::init(&opts)?;
if !sched.run(shutdown.clone())?.should_restart() {
Expand Down

0 comments on commit e053204

Please sign in to comment.