Skip to content

Commit

Permalink
Merge pull request #199 from sched-ext/lavd-task-states
Browse files Browse the repository at this point in the history
scx_lavd: Clean up task state transition tracking
  • Loading branch information
multics69 authored Mar 28, 2024
2 parents 8316948 + 129d99f commit 360d4ec
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 105 deletions.
14 changes: 0 additions & 14 deletions scheds/rust/scx_lavd/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,6 @@ struct cpu_ctx {
volatile u64 sched_nr; /* number of schedules */
};

/*
* Per-task scheduling context
*/
enum task_stat {
_LAVD_TASK_STAT_MIN = 0,

LAVD_TASK_STAT_STOPPING = _LAVD_TASK_STAT_MIN,
LAVD_TASK_STAT_ENQ,
LAVD_TASK_STAT_RUNNING,

_LAVD_TASK_STAT_MAX = LAVD_TASK_STAT_RUNNING,
};

struct task_ctx {
/*
* Essential task running statistics for latency criticality calculation
Expand All @@ -151,7 +138,6 @@ struct task_ctx {
u64 slice_ns;
u64 greedy_ratio;
u64 lat_cri;
u16 stat; /* NIL -> ENQ -> RUN -> STOP -> NIL ... */
u16 slice_boost_prio;/* how many times a task fully consumed the slice */
u16 lat_prio; /* latency priority */
s16 lat_boost_prio; /* DEBUG */
Expand Down
127 changes: 40 additions & 87 deletions scheds/rust/scx_lavd/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1220,58 +1220,6 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
return slice;
}

static bool transit_task_stat(struct task_ctx *taskc, int tgt_stat)
{
/*
* Update task loads only when the state transition is valid. So far,
* two types of invalid state transitions have been observed, and there
* are reasons for that. The two are as follows:
*
* - ENQ -> ENQ: This transition can happen because scx_lavd does not
* provide ops.dequeue. When task attributes are updated (e.g., nice
* level, allowed cpus and so on), the scx core will dequeue the task
* and re-enqueue it (ENQ->DEQ->ENQ). However, When ops.dequeue() is
* not provided, the dequeue operations is done by the scx core.
* Hence, ignoring the dequeue operation is completely fine.
*
* - STOPPING -> RUNNING: This can happen because there are several
* special cases where scx core skips enqueue including: 1) bypass
* mode is turned on (this is turned on during both init and exit.
* it's also used across suspend/resume operations. 2)
* SCX_OPS_ENQ_EXITING is not set and an exiting task was woken up.
* 3) The associated CPU is not fully online. However, we avoid
* collecting time & frequency statistics for such special cases for
* accuracy.
*
* initial state
* -------------
* |
* \/
* [STOPPING] --> [ENQ] --> [RUNNING]
* /\ |
* | |
* +-------------------------+
*/
const static int valid_tgt_stat[] = {
[LAVD_TASK_STAT_STOPPING] = LAVD_TASK_STAT_ENQ,
[LAVD_TASK_STAT_ENQ] = LAVD_TASK_STAT_RUNNING,
[LAVD_TASK_STAT_RUNNING] = LAVD_TASK_STAT_STOPPING,
};
int src_stat = taskc->stat;

if (src_stat < _LAVD_TASK_STAT_MIN || src_stat > _LAVD_TASK_STAT_MAX) {
scx_bpf_error("Invalid task state: %d", src_stat);
return false;
}

if (valid_tgt_stat[src_stat] == tgt_stat) {
taskc->stat = tgt_stat;
return true;
}

return false;
}

static void update_stat_for_enq(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
Expand Down Expand Up @@ -1323,13 +1271,6 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,

now = bpf_ktime_get_ns();

/*
* When stopped, reduce the per-CPU task load. Per-CPU task load will
* be aggregated periodically at update_sys_cpu_load().
*/
cpuc->load_actual -= taskc->load_actual;
cpuc->load_ideal -= get_task_load_ideal(p);

/*
* Update task's run_time. If a task got slice-boosted -- in other
* words, its time slices have been fully consumed multiple times,
Expand All @@ -1344,6 +1285,17 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
taskc->last_stop_clk = now;
}

static void update_stat_for_quiescent(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
/*
* When quiescent, reduce the per-CPU task load. Per-CPU task load will
* be aggregated periodically at update_sys_cpu_load().
*/
cpuc->load_actual -= taskc->load_actual;
cpuc->load_ideal -= get_task_load_ideal(p);
}

static void calc_when_to_run(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc, u64 enq_flags)
{
Expand Down Expand Up @@ -1379,14 +1331,6 @@ static bool put_local_rq(struct task_struct *p, struct task_ctx *taskc,
if (!is_eligible(taskc))
return false;

/*
* Add task load based on the current statistics regardless of a target
* rq. Statistics will be adjusted when more accurate statistics
* become available (ops.running).
*/
if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
update_stat_for_enq(p, taskc, cpuc);

/*
* This task should be scheduled as soon as possible (e.g., wakened up)
* so the deadline is no use and enqueued into a local DSQ, which
Expand Down Expand Up @@ -1416,12 +1360,6 @@ static bool put_global_rq(struct task_struct *p, struct task_ctx *taskc,
*/
calc_when_to_run(p, taskc, cpuc, enq_flags);

/*
* Reflect task's load immediately.
*/
if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
update_stat_for_enq(p, taskc, cpuc);

/*
* Enqueue the task to the global runqueue based on its virtual
* deadline.
Expand Down Expand Up @@ -1511,10 +1449,23 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)

void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
{
struct cpu_ctx *cpuc;
struct task_struct *waker;
struct task_ctx *taskc;
struct task_ctx *p_taskc, *waker_taskc;
u64 now, interval;

cpuc = get_cpu_ctx();
p_taskc = get_task_ctx(p);
if (!cpuc || !p_taskc)
return;

/*
* Add task load based on the current statistics regardless of a target
* rq. Statistics will be adjusted when more accurate statistics become
* available (ops.running).
*/
update_stat_for_enq(p, p_taskc, cpuc);

/*
* When a task @p is wakened up, the wake frequency of its waker task
* is updated. The @current task is a waker and @p is a waiter, which
Expand All @@ -1524,8 +1475,8 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
return;

waker = bpf_get_current_task_btf();
taskc = try_get_task_ctx(waker);
if (!taskc) {
waker_taskc = try_get_task_ctx(waker);
if (!waker_taskc) {
/*
* In this case, the waker could be an idle task
* (swapper/_[_]), so we just ignore.
Expand All @@ -1534,9 +1485,9 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
}

now = bpf_ktime_get_ns();
interval = now - taskc->last_wake_clk;
taskc->wake_freq = calc_avg_freq(taskc->wake_freq, interval);
taskc->last_wake_clk = now;
interval = now - waker_taskc->last_wake_clk;
waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
waker_taskc->last_wake_clk = now;
}

void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
Expand All @@ -1555,8 +1506,7 @@ void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
if (!cpuc)
return;

if (transit_task_stat(taskc, LAVD_TASK_STAT_RUNNING))
update_stat_for_run(p, taskc, cpuc);
update_stat_for_run(p, taskc, cpuc);

/*
* Calcualte task's time slice based on updated load.
Expand Down Expand Up @@ -1619,8 +1569,7 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
if (!taskc)
return;

if (transit_task_stat(taskc, LAVD_TASK_STAT_STOPPING))
update_stat_for_stop(p, taskc, cpuc);
update_stat_for_stop(p, taskc, cpuc);

/*
* Adjust slice boost for the task's next schedule.
Expand All @@ -1630,9 +1579,17 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)

void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now, interval;

cpuc = get_cpu_ctx();
taskc = get_task_ctx(p);
if (!cpuc || !taskc)
return;

update_stat_for_quiescent(p, taskc, cpuc);

/*
* If a task @p is dequeued from a run queue for some other reason
* other than going to sleep, it is an implementation-level side
Expand All @@ -1644,10 +1601,6 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
/*
* When a task @p goes to sleep, its associated wait_freq is updated.
*/
taskc = get_task_ctx(p);
if (!taskc)
return;

now = bpf_ktime_get_ns();
interval = now - taskc->last_wait_clk;
taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);
Expand Down
4 changes: 0 additions & 4 deletions scheds/rust/scx_lavd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ pub use bpf_skel::*;
pub mod bpf_intf;
pub use bpf_intf::*;

extern crate libc;
extern crate plain;
extern crate static_assertions;

use std::mem;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
Expand Down

0 comments on commit 360d4ec

Please sign in to comment.