Merge pull request #199 from sched-ext/lavd-task-states

scx_lavd: Clean up task state transition tracking
sched-ext · Mar 28, 2024 · 360d4ec · 360d4ec
2 parents 8316948 + 129d99f
commit 360d4ec
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 105 deletions.
diff --git a/scheds/rust/scx_lavd/src/bpf/intf.h b/scheds/rust/scx_lavd/src/bpf/intf.h
@@ -119,19 +119,6 @@ struct cpu_ctx {
 	volatile u64	sched_nr;	/* number of schedules */
 };
 
-/* 
- * Per-task scheduling context 
- */
-enum task_stat {
-	_LAVD_TASK_STAT_MIN		= 0,
-
-	LAVD_TASK_STAT_STOPPING		= _LAVD_TASK_STAT_MIN,
-	LAVD_TASK_STAT_ENQ,
-	LAVD_TASK_STAT_RUNNING,
-
-	_LAVD_TASK_STAT_MAX		= LAVD_TASK_STAT_RUNNING,
-};
-
 struct task_ctx {
 	/*
 	 * Essential task running statistics for latency criticality calculation
@@ -151,7 +138,6 @@ struct task_ctx {
 	u64	slice_ns;
 	u64	greedy_ratio;
 	u64	lat_cri;
-	u16	stat;		/* NIL -> ENQ -> RUN -> STOP -> NIL ... */
 	u16	slice_boost_prio;/* how many times a task fully consumed the slice */
 	u16	lat_prio;	/* latency priority */
 	s16	lat_boost_prio;	/* DEBUG */

diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@@ -1220,58 +1220,6 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
 	return slice;
 }
 
-static bool transit_task_stat(struct task_ctx *taskc, int tgt_stat)
-{
-	/*
-	 * Update task loads only when the state transition is valid. So far,
-	 * two types of invalid state transitions have been observed, and there
-	 * are reasons for that. The two are as follows:
-	 *
-	 *   - ENQ -> ENQ: This transition can happen because scx_lavd does not
-	 *   provide ops.dequeue. When task attributes are updated (e.g., nice
-	 *   level, allowed cpus and so on), the scx core will dequeue the task
-	 *   and re-enqueue it (ENQ->DEQ->ENQ). However, When ops.dequeue() is
-	 *   not provided, the dequeue operations is done by the scx core.
-	 *   Hence, ignoring the dequeue operation is completely fine.
-	 *
-	 *   - STOPPING -> RUNNING: This can happen because there are several
-	 *   special cases where scx core skips enqueue including: 1) bypass
-	 *   mode is turned on (this is turned on during both init and exit.
-	 *   it's also used across suspend/resume operations. 2)
-	 *   SCX_OPS_ENQ_EXITING is not set and an exiting task was woken up.
-	 *   3) The associated CPU is not fully online. However, we avoid
-	 *   collecting time & frequency statistics for such special cases for
-	 *   accuracy.
-	 *
-	 * initial state
-	 * -------------
-	 *     |
-	 *    \/
-	 * [STOPPING] --> [ENQ] --> [RUNNING]
-	 *    /\                        |
-	 *    |                         |
-	 *    +-------------------------+
-	 */
-	const static int valid_tgt_stat[] = {
-		[LAVD_TASK_STAT_STOPPING]	= LAVD_TASK_STAT_ENQ,
-		[LAVD_TASK_STAT_ENQ]		= LAVD_TASK_STAT_RUNNING,
-		[LAVD_TASK_STAT_RUNNING]	= LAVD_TASK_STAT_STOPPING,
-	};
-	int src_stat = taskc->stat;
-
-	if (src_stat < _LAVD_TASK_STAT_MIN || src_stat > _LAVD_TASK_STAT_MAX) {
-		scx_bpf_error("Invalid task state: %d", src_stat);
-		return false;
-	}
-
-	if (valid_tgt_stat[src_stat] == tgt_stat) {
-		taskc->stat = tgt_stat;
-		return true;
-	}
-
-	return false;
-}
-
 static void update_stat_for_enq(struct task_struct *p, struct task_ctx *taskc,
 				struct cpu_ctx *cpuc)
 {
@@ -1323,13 +1271,6 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
 
 	now = bpf_ktime_get_ns();
 
-	/*
-	 * When stopped, reduce the per-CPU task load. Per-CPU task load will
-	 * be aggregated periodically at update_sys_cpu_load().
-	 */
-	cpuc->load_actual -= taskc->load_actual;
-	cpuc->load_ideal  -= get_task_load_ideal(p);
-
 	/*
 	 * Update task's run_time. If a task got slice-boosted -- in other
 	 * words, its time slices have been fully consumed multiple times,
@@ -1344,6 +1285,17 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
 	taskc->last_stop_clk = now;
 }
 
+static void update_stat_for_quiescent(struct task_struct *p, struct task_ctx *taskc,
+				      struct cpu_ctx *cpuc)
+{
+	/*
+	 * When quiescent, reduce the per-CPU task load. Per-CPU task load will
+	 * be aggregated periodically at update_sys_cpu_load().
+	 */
+	cpuc->load_actual -= taskc->load_actual;
+	cpuc->load_ideal  -= get_task_load_ideal(p);
+}
+
 static void calc_when_to_run(struct task_struct *p, struct task_ctx *taskc,
 			     struct cpu_ctx *cpuc, u64 enq_flags)
 {
@@ -1379,14 +1331,6 @@ static bool put_local_rq(struct task_struct *p, struct task_ctx *taskc,
 	if (!is_eligible(taskc))
 		return false;
 
-	/*
-	 * Add task load based on the current statistics regardless of a target
-	 * rq. Statistics will be adjusted when more accurate statistics
-	 * become available (ops.running).
-	 */
-	if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
-		update_stat_for_enq(p, taskc, cpuc);
-
 	/*
 	 * This task should be scheduled as soon as possible (e.g., wakened up)
 	 * so the deadline is no use and enqueued into a local DSQ, which
@@ -1416,12 +1360,6 @@ static bool put_global_rq(struct task_struct *p, struct task_ctx *taskc,
 	 */
 	calc_when_to_run(p, taskc, cpuc, enq_flags);
 
-	/*
-	 * Reflect task's load immediately.
-	 */
-	if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
-		update_stat_for_enq(p, taskc, cpuc);
-
 	/*
 	 * Enqueue the task to the global runqueue based on its virtual
 	 * deadline.
@@ -1511,10 +1449,23 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
 
 void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
 {
+	struct cpu_ctx *cpuc;
 	struct task_struct *waker;
-	struct task_ctx *taskc;
+	struct task_ctx *p_taskc, *waker_taskc;
 	u64 now, interval;
 
+	cpuc = get_cpu_ctx();
+	p_taskc = get_task_ctx(p);
+	if (!cpuc || !p_taskc)
+		return;
+
+	/*
+	 * Add task load based on the current statistics regardless of a target
+	 * rq. Statistics will be adjusted when more accurate statistics become
+	 * available (ops.running).
+	 */
+	update_stat_for_enq(p, p_taskc, cpuc);
+
 	/*
 	 * When a task @p is wakened up, the wake frequency of its waker task
 	 * is updated. The @current task is a waker and @p is a waiter, which
@@ -1524,8 +1475,8 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
 		return;
 
 	waker = bpf_get_current_task_btf();
-	taskc = try_get_task_ctx(waker);
-	if (!taskc) {
+	waker_taskc = try_get_task_ctx(waker);
+	if (!waker_taskc) {
 		/*
 		 * In this case, the waker could be an idle task
 		 * (swapper/_[_]), so we just ignore.
@@ -1534,9 +1485,9 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
 	}
 
 	now = bpf_ktime_get_ns();
-	interval = now - taskc->last_wake_clk;
-	taskc->wake_freq = calc_avg_freq(taskc->wake_freq, interval);
-	taskc->last_wake_clk = now;
+	interval = now - waker_taskc->last_wake_clk;
+	waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
+	waker_taskc->last_wake_clk = now;
 }
 
 void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
@@ -1555,8 +1506,7 @@ void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
 	if (!cpuc)
 		return;
 
-	if (transit_task_stat(taskc, LAVD_TASK_STAT_RUNNING))
-		update_stat_for_run(p, taskc, cpuc);
+	update_stat_for_run(p, taskc, cpuc);
 
 	/*
 	 * Calcualte task's time slice based on updated load.
@@ -1619,8 +1569,7 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
 	if (!taskc)
 		return;
 
-	if (transit_task_stat(taskc, LAVD_TASK_STAT_STOPPING))
-		update_stat_for_stop(p, taskc, cpuc);
+	update_stat_for_stop(p, taskc, cpuc);
 
 	/*
 	 * Adjust slice boost for the task's next schedule.
@@ -1630,9 +1579,17 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
 
 void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
 {
+	struct cpu_ctx *cpuc;
 	struct task_ctx *taskc;
 	u64 now, interval;
 
+	cpuc = get_cpu_ctx();
+	taskc = get_task_ctx(p);
+	if (!cpuc || !taskc)
+		return;
+
+	update_stat_for_quiescent(p, taskc, cpuc);
+
 	/*
 	 * If a task @p is dequeued from a run queue for some other reason
 	 * other than going to sleep, it is an implementation-level side
@@ -1644,10 +1601,6 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
 	/*
 	 * When a task @p goes to sleep, its associated wait_freq is updated.
 	 */
-	taskc = get_task_ctx(p);
-	if (!taskc)
-		return;
-
 	now = bpf_ktime_get_ns();
 	interval = now - taskc->last_wait_clk;
 	taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);

diff --git a/scheds/rust/scx_lavd/src/main.rs b/scheds/rust/scx_lavd/src/main.rs
@@ -10,10 +10,6 @@ pub use bpf_skel::*;
 pub mod bpf_intf;
 pub use bpf_intf::*;
 
-extern crate libc;
-extern crate plain;
-extern crate static_assertions;
-
 use std::mem;
 use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering;