From 416d6a940f83fceadffa4dde977daa2654767274 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 24 Feb 2024 15:56:34 +0100 Subject: [PATCH 1/8] rust: introduce scx_rustland_core crate Introduce a separate crate (scx_rustland_core) that can be used to implement sched-ext schedulers in Rust that run in user-space. This commit only provides the basic layout for the new crate and the abstraction to the custom allocator. In general, any scheduler that has a user-space component needs to use the custom allocator to prevent potential deadlock conditions, caused by page faults (a kthread needs to run to resolve the page fault, but the scheduler is blocked waiting for the user-space page fault to be resolved => deadlock). However, we don't want to necessarily enforce this constraint to all the existing Rust schedulers, some of them may do all user-space allocations in safe paths, hence the separate scx_rustland_core crate. Merging this code in scx_utils would force all the Rust schedulers to use the custom allocator. In a future commit the scx_rustland backend will be moved to scx_rustland_core, making it a totally generic BPF scheduler framework that can be used to implement user-space schedulers in Rust. Signed-off-by: Andrea Righi --- rust/meson.build | 1 + rust/scx_rustland_core/.gitignore | 1 + rust/scx_rustland_core/Cargo.toml | 32 +++++++++++ rust/scx_rustland_core/LICENSE | 1 + rust/scx_rustland_core/README.md | 8 +++ rust/scx_rustland_core/bindings.h | 1 + rust/scx_rustland_core/bpf_h | 1 + rust/scx_rustland_core/build.rs | 53 +++++++++++++++++++ rust/scx_rustland_core/meson.build | 7 +++ .../scx_rustland_core/src}/alloc.rs | 16 +++--- rust/scx_rustland_core/src/bindings.rs | 6 +++ rust/scx_rustland_core/src/lib.rs | 4 ++ rust/scx_utils/Cargo.toml | 1 + scheds/rust/scx_rustland/Cargo.toml | 2 +- scheds/rust/scx_rustland/src/bpf.rs | 5 +- 15 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 rust/scx_rustland_core/.gitignore create mode 100644 rust/scx_rustland_core/Cargo.toml create mode 120000 rust/scx_rustland_core/LICENSE create mode 100644 rust/scx_rustland_core/README.md create mode 100644 rust/scx_rustland_core/bindings.h create mode 120000 rust/scx_rustland_core/bpf_h create mode 100644 rust/scx_rustland_core/build.rs create mode 100644 rust/scx_rustland_core/meson.build rename {scheds/rust/scx_rustland/src/bpf => rust/scx_rustland_core/src}/alloc.rs (94%) create mode 100644 rust/scx_rustland_core/src/bindings.rs create mode 100644 rust/scx_rustland_core/src/lib.rs diff --git a/rust/meson.build b/rust/meson.build index 62e7364a2..7ac2aab45 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -1 +1,2 @@ subdir('scx_utils') +subdir('scx_rustland_core') diff --git a/rust/scx_rustland_core/.gitignore b/rust/scx_rustland_core/.gitignore new file mode 100644 index 000000000..03314f77b --- /dev/null +++ b/rust/scx_rustland_core/.gitignore @@ -0,0 +1 @@ +Cargo.lock diff --git a/rust/scx_rustland_core/Cargo.toml b/rust/scx_rustland_core/Cargo.toml new file mode 100644 index 000000000..d8949d4fb --- /dev/null +++ b/rust/scx_rustland_core/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "scx_rustland_core" +version = "0.1.0" +edition = "2021" +authors = ["Andrea Righi "] +license = "GPL-2.0-only" +repository = "https://github.com/sched-ext/scx" +description = "Framework to implement sched_ext schedulers running in user space" + +[dependencies] +anyhow = "1.0" +bitvec = { version = "1.0", features = ["serde"] } +# FIXME - We need to allow both 0.68 and 0.69 to accommodate fedora. See the +# comment in BpfBuilder::bindgen_bpf_intf() for details. +bindgen = ">=0.68, <0.70" +glob = "0.3" +hex = "0.4.3" +lazy_static = "1.4" +libbpf-cargo = "0.22" +libbpf-rs = "0.22.0" +libc = "0.2.137" +buddy-alloc = "0.5.1" +log = "0.4.17" +regex = "1.10" +sscanf = "0.4" +tar = "0.4" +version-compare = "0.1" + +[build-dependencies] +bindgen = ">=0.68, <0.70" +tar = "0.4" +walkdir = "2.4" diff --git a/rust/scx_rustland_core/LICENSE b/rust/scx_rustland_core/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/rust/scx_rustland_core/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/rust/scx_rustland_core/README.md b/rust/scx_rustland_core/README.md new file mode 100644 index 000000000..bc08790ad --- /dev/null +++ b/rust/scx_rustland_core/README.md @@ -0,0 +1,8 @@ +# Framework to implement sched_ext schedulers running in user-space + +[sched_ext](https://github.com/sched-ext/scx) is a Linux kernel feature +which enables implementing kernel thread schedulers in BPF and dynamically +loading them. + +Thie crate provides a generic framework that allows to implement sched_ext +schedulers that are running in user-space. diff --git a/rust/scx_rustland_core/bindings.h b/rust/scx_rustland_core/bindings.h new file mode 100644 index 000000000..024239315 --- /dev/null +++ b/rust/scx_rustland_core/bindings.h @@ -0,0 +1 @@ +#include "bpf_h/vmlinux/vmlinux.h" diff --git a/rust/scx_rustland_core/bpf_h b/rust/scx_rustland_core/bpf_h new file mode 120000 index 000000000..1cf48acda --- /dev/null +++ b/rust/scx_rustland_core/bpf_h @@ -0,0 +1 @@ +../../scheds/include \ No newline at end of file diff --git a/rust/scx_rustland_core/build.rs b/rust/scx_rustland_core/build.rs new file mode 100644 index 000000000..ad00c19de --- /dev/null +++ b/rust/scx_rustland_core/build.rs @@ -0,0 +1,53 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +use std::env; +use std::fs::File; +use std::path::PathBuf; + +const BPF_H: &str = "bpf_h"; + +fn gen_bpf_h() { + let file = + File::create(PathBuf::from(env::var("OUT_DIR").unwrap()).join(format!("{}.tar", BPF_H))) + .unwrap(); + let mut ar = tar::Builder::new(file); + + ar.follow_symlinks(false); + ar.append_dir_all(".", BPF_H).unwrap(); + ar.finish().unwrap(); + + for ent in walkdir::WalkDir::new(BPF_H) { + let ent = ent.unwrap(); + if !ent.file_type().is_dir() { + println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy()); + } + } +} + +fn gen_bindings() { + // FIXME - bindgen's API changed between 0.68 and 0.69 so that + // `bindgen::CargoCallbacks::new()` should be used instead of + // `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is + // shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of + // bindgen and suppress deprecation warning. Remove the following once + // fedora can be updated to bindgen >= 0.69. + #[allow(deprecated)] + let bindings = bindgen::Builder::default() + .header("bindings.h") + .allowlist_type("scx_exit_kind") + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .expect("Unable to generate bindings"); + + bindings + .write_to_file(PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs")) + .expect("Couldn't write bindings"); +} + +fn main() { + gen_bpf_h(); + gen_bindings(); +} diff --git a/rust/scx_rustland_core/meson.build b/rust/scx_rustland_core/meson.build new file mode 100644 index 000000000..f24a7ad71 --- /dev/null +++ b/rust/scx_rustland_core/meson.build @@ -0,0 +1,7 @@ +custom_target('scx_rustland_core', + output: '@PLAINNAME@.__PHONY__', + input: 'Cargo.toml', + command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@', + cargo_build_args], + env: cargo_env, + build_by_default: true) diff --git a/scheds/rust/scx_rustland/src/bpf/alloc.rs b/rust/scx_rustland_core/src/alloc.rs similarity index 94% rename from scheds/rust/scx_rustland/src/bpf/alloc.rs rename to rust/scx_rustland_core/src/alloc.rs index 1487a7c6c..25fffe6f1 100644 --- a/scheds/rust/scx_rustland/src/bpf/alloc.rs +++ b/rust/scx_rustland_core/src/alloc.rs @@ -15,7 +15,7 @@ const HEAP_SIZE: usize = 64 * 1024 * 1024; // 64M const LEAF_SIZE: usize = 64; #[repr(align(4096))] -pub struct AlignedHeap([u8; N]); +struct AlignedHeap([u8; N]); // Statically pre-allocated memory arena. static mut FAST_HEAP: AlignedHeap = AlignedHeap([0u8; FAST_HEAP_SIZE]); @@ -26,25 +26,25 @@ static mut HEAP: AlignedHeap = AlignedHeap([0u8; HEAP_SIZE]); // To prevent potential deadlock conditions under heavy loads, any scheduler that delegates // scheduling decisions to user-space should avoid triggering page faults. // -// To address this issue, replace the global allocator with a custom one (RustLandAllocator), +// To address this issue, replace the global allocator with a custom one (UserAllocator), // designed to operate on a pre-allocated buffer. This, coupled with the memory locking achieved // through mlockall(), prevents page faults from occurring during the execution of the user-space // scheduler. #[cfg_attr(not(test), global_allocator)] -pub static ALLOCATOR: RustLandAllocator = unsafe { +pub static ALLOCATOR: UserAllocator = unsafe { let fast_param = FastAllocParam::new(FAST_HEAP.0.as_ptr(), FAST_HEAP_SIZE); let buddy_param = BuddyAllocParam::new(HEAP.0.as_ptr(), HEAP_SIZE, LEAF_SIZE); - RustLandAllocator { + UserAllocator { arena: NonThreadsafeAlloc::new(fast_param, buddy_param), } }; // Main allocator class. -pub struct RustLandAllocator { - pub arena: NonThreadsafeAlloc, +pub struct UserAllocator { + arena: NonThreadsafeAlloc, } -impl RustLandAllocator { +impl UserAllocator { pub fn lock_memory(&self) { unsafe { VM.save(); @@ -75,7 +75,7 @@ impl RustLandAllocator { } // Override global allocator methods. -unsafe impl GlobalAlloc for RustLandAllocator { +unsafe impl GlobalAlloc for UserAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { self.arena.alloc(layout) } diff --git a/rust/scx_rustland_core/src/bindings.rs b/rust/scx_rustland_core/src/bindings.rs new file mode 100644 index 000000000..cd503e4b5 --- /dev/null +++ b/rust/scx_rustland_core/src/bindings.rs @@ -0,0 +1,6 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(dead_code)] + +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/rust/scx_rustland_core/src/lib.rs b/rust/scx_rustland_core/src/lib.rs new file mode 100644 index 000000000..fdb3bca71 --- /dev/null +++ b/rust/scx_rustland_core/src/lib.rs @@ -0,0 +1,4 @@ +mod bindings; + +mod alloc; +pub use alloc::ALLOCATOR; diff --git a/rust/scx_utils/Cargo.toml b/rust/scx_utils/Cargo.toml index a0c063b6c..3ae5b212b 100644 --- a/rust/scx_utils/Cargo.toml +++ b/rust/scx_utils/Cargo.toml @@ -18,6 +18,7 @@ hex = "0.4.3" lazy_static = "1.4" libbpf-cargo = "0.22" libbpf-rs = "0.22.0" +buddy-alloc = "0.5.1" log = "0.4.17" regex = "1.10" sscanf = "0.4" diff --git a/scheds/rust/scx_rustland/Cargo.toml b/scheds/rust/scx_rustland/Cargo.toml index a0d6890d0..c51df7bcf 100644 --- a/scheds/rust/scx_rustland/Cargo.toml +++ b/scheds/rust/scx_rustland/Cargo.toml @@ -15,10 +15,10 @@ fb_procfs = "0.7.0" hex = "0.4.3" libbpf-rs = "0.22.0" libc = "0.2.137" -buddy-alloc = "0.5.1" log = "0.4.17" ordered-float = "3.4.0" scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } +scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" } simplelog = "0.12.0" [build-dependencies] diff --git a/scheds/rust/scx_rustland/src/bpf.rs b/scheds/rust/scx_rustland/src/bpf.rs index 6cdec059a..e2f36733f 100644 --- a/scheds/rust/scx_rustland/src/bpf.rs +++ b/scheds/rust/scx_rustland/src/bpf.rs @@ -15,13 +15,12 @@ use libbpf_rs::skel::SkelBuilder as _; use libc::{sched_param, sched_setscheduler}; -mod alloc; -use alloc::*; - use scx_utils::init_libbpf_logging; use scx_utils::uei_exited; use scx_utils::uei_report; +use scx_rustland_core::ALLOCATOR; + // Defined in UAPI const SCHED_EXT: i32 = 7; From 871a6c10f9986a52aade860b02093900d91bc779 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 24 Feb 2024 22:02:47 +0100 Subject: [PATCH 2/8] scx_rustland_core: include scx_rustland backend Move the BPF component of scx_rustland to scx_rustland_core and make it available to other user-space schedulers. NOTE: main.bpf.c and bpf.rs are not pre-compiled in the scx_rustland_core crate, they need to be included in the user-space scheduler's source code in order to be compiled/linked properly. Signed-off-by: Andrea Righi --- rust/scx_rustland_core/README.md | 75 ++- rust/scx_rustland_core/src/bpf.rs | 447 ++++++++++++++++++ .../scx_rustland_core}/src/bpf/intf.h | 0 .../scx_rustland_core}/src/bpf/main.bpf.c | 18 +- rust/scx_rustland_core/src/bpf_intf.rs | 9 + rust/scx_rustland_core/src/bpf_skel.rs | 4 + scheds/rust/scx_rustland/build.rs | 4 +- scheds/rust/scx_rustland/src/bpf.rs | 434 +---------------- 8 files changed, 544 insertions(+), 447 deletions(-) create mode 100644 rust/scx_rustland_core/src/bpf.rs rename {scheds/rust/scx_rustland => rust/scx_rustland_core}/src/bpf/intf.h (100%) rename {scheds/rust/scx_rustland => rust/scx_rustland_core}/src/bpf/main.bpf.c (96%) create mode 100644 rust/scx_rustland_core/src/bpf_intf.rs create mode 100644 rust/scx_rustland_core/src/bpf_skel.rs diff --git a/rust/scx_rustland_core/README.md b/rust/scx_rustland_core/README.md index bc08790ad..e2d191b69 100644 --- a/rust/scx_rustland_core/README.md +++ b/rust/scx_rustland_core/README.md @@ -4,5 +4,76 @@ which enables implementing kernel thread schedulers in BPF and dynamically loading them. -Thie crate provides a generic framework that allows to implement sched_ext -schedulers that are running in user-space. +This crate provides a generic layer that can be used to implement sched-ext +schedulers that run in user-space. + +It provides a generic BPF abstraction that is completely agnostic of the +particular scheduling policy implemented in user-space. + +Developers can use such abstraction to implement schedulers using pure Rust +code, without having to deal with any internal kernel / BPF internal details. + +## API + +The main BPF interface is provided by the `BpfScheduler` struct. When this +object is initialized it will take care of registering and initializing the BPF +component. + +The scheduler then can use `BpfScheduler` instance to receive tasks (in the +form of `QueuedTask` objects) and dispatch tasks (in the form of DispatchedTask +objects), using respectively the methods `dequeue_task()` and `dispatch_task()`. + +Example usage (FIFO scheduler): +``` +struct Scheduler<'a> { + bpf: BpfScheduler<'a>, +} + +impl<'a> Scheduler<'a> { + fn init() -> Result { + let topo = Topology::new().expect("Failed to build host topology"); + let bpf = BpfScheduler::init(5000, topo.nr_cpus() as i32, false, false, false)?; + Ok(Self { bpf }) + } + + fn schedule(&mut self) { + match self.bpf.dequeue_task() { + Ok(Some(task)) => { + // task.cpu < 0 is used to to notify an exiting task, in this + // case we can simply ignore it. + if task.cpu >= 0 { + let _ = self.bpf.dispatch_task(&DispatchedTask { + pid: task.pid, + cpu: task.cpu, + cpumask_cnt: task.cpumask_cnt, + payload: 0, + }); + } + } + Ok(None) => { + // Notify the BPF component that all tasks have been dispatched. + *self.bpf.nr_queued_mut() = 0; + *self.bpf.nr_scheduled_mut() = 0; + + break; + } + Err(_) => { + break; + } + } + } +``` + +Moreover, a CPU ownership map (that keeps track of which PID runs on which CPU) +can be accessed using the method `get_cpu_pid()`. This also allows to keep +track of the idle and busy CPUs, with the corresponding PIDs associated to +them. + +BPF counters and statistics can be accessed using the methods `nr_*_mut()`, in +particular `nr_queued_mut()` and `nr_scheduled_mut()` can be updated to notify +the BPF component if the user-space scheduler has still some pending work to do +or not. + +Lastly, the methods `exited()` and `shutdown_and_report()` can be used +respectively to test whether the BPF component exited, and to shutdown and +report the exit message. diff --git a/rust/scx_rustland_core/src/bpf.rs b/rust/scx_rustland_core/src/bpf.rs new file mode 100644 index 000000000..ae6be4656 --- /dev/null +++ b/rust/scx_rustland_core/src/bpf.rs @@ -0,0 +1,447 @@ +// Copyright (c) Andrea Righi + +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +use crate::bpf_intf; +use crate::bpf_skel::*; + +use anyhow::Context; +use anyhow::Result; + +use libbpf_rs::skel::OpenSkel as _; +use libbpf_rs::skel::Skel as _; +use libbpf_rs::skel::SkelBuilder as _; + +use libc::{sched_param, sched_setscheduler}; + +use scx_utils::init_libbpf_logging; +use scx_utils::uei_exited; +use scx_utils::uei_report; + +use scx_rustland_core::ALLOCATOR; + +// Defined in UAPI +const SCHED_EXT: i32 = 7; + +// Do not assign any specific CPU to the task. +// +// The task will be dispatched to the global shared DSQ and it will run on the first CPU available. +#[allow(dead_code)] +pub const NO_CPU: i32 = -1; + +/// High-level Rust abstraction to interact with a generic sched-ext BPF component. +/// +/// Overview +/// ======== +/// +/// The main BPF interface is provided by the BpfScheduler() struct. When this object is +/// initialized it will take care of registering and initializing the BPF component. +/// +/// The scheduler then can use BpfScheduler() instance to receive tasks (in the form of QueuedTask +/// objects) and dispatch tasks (in the form of DispatchedTask objects), using respectively the +/// methods dequeue_task() and dispatch_task(). +/// +/// The CPU ownership map can be accessed using the method get_cpu_pid(), this also allows to keep +/// track of the idle and busy CPUs, with the corresponding PIDs associated to them. +/// +/// BPF counters and statistics can be accessed using the methods nr_*_mut(), in particular +/// nr_queued_mut() and nr_scheduled_mut() can be updated to notify the BPF component if the +/// user-space scheduler has some pending work to do or not. +/// +/// Finally the methods exited() and shutdown_and_report() can be used respectively to test +/// whether the BPF component exited, and to shutdown and report the exit message. +/// +/// Example +/// ======= +/// +/// Following you can find bare minimum template that can be used to implement a simple FIFO +/// scheduler using the BPF abstraction: +/// +/// mod bpf_skel; +/// pub use bpf_skel::*; +/// mod bpf; +/// pub mod bpf_intf; +/// use bpf::*; +/// +/// use std::thread; +/// +/// use std::sync::atomic::AtomicBool; +/// use std::sync::atomic::Ordering; +/// use std::sync::Arc; +/// +/// use anyhow::Result; +/// +/// struct Scheduler<'a> { +/// bpf: BpfScheduler<'a>, +/// } +/// +/// impl<'a> Scheduler<'a> { +/// fn init() -> Result { +/// let bpf = BpfScheduler::init(20000, false, false)?; +/// Ok(Self { bpf }) +/// } +/// +/// fn dispatch_tasks(&mut self) { +/// loop { +/// match self.bpf.dequeue_task() { +/// Ok(Some(task)) => { +/// if task.cpu >= 0 { +/// let _ = self.bpf.dispatch_task( +/// &DispatchedTask { +/// pid: task.pid, +/// cpu: task.cpu, +/// payload: 0, +/// } +/// ); +/// } +/// } +/// Ok(None) => { +/// *self.bpf.nr_queued_mut() = 0; +/// *self.bpf.nr_scheduled_mut() = 0; +/// break; +/// } +/// Err(_) => { +/// break; +/// } +/// } +/// } +/// } +/// +/// fn run(&mut self, shutdown: Arc) -> Result<()> { +/// while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() { +/// self.dispatch_tasks(); +/// thread::yield_now(); +/// } +/// +/// Ok(()) +/// } +/// } +/// +/// fn main() -> Result<()> { +/// let mut sched = Scheduler::init()?; +/// let shutdown = Arc::new(AtomicBool::new(false)); +/// let shutdown_clone = shutdown.clone(); +/// ctrlc::set_handler(move || { +/// shutdown_clone.store(true, Ordering::Relaxed); +/// })?; +/// +/// sched.run(shutdown) +/// } +/// + +// Task queued for scheduling from the BPF component (see bpf_intf::queued_task_ctx). +#[derive(Debug)] +pub struct QueuedTask { + pub pid: i32, // pid that uniquely identifies a task + pub cpu: i32, // CPU where the task is running (-1 = exiting) + pub cpumask_cnt: u64, // cpumask generation counter + pub sum_exec_runtime: u64, // Total cpu time + pub nvcsw: u64, // Voluntary context switches + pub weight: u64, // Task static priority +} + +// Task queued for dispatching to the BPF component (see bpf_intf::dispatched_task_ctx). +#[derive(Debug)] +pub struct DispatchedTask { + pub pid: i32, // pid that uniquely identifies a task + pub cpu: i32, // target CPU selected by the scheduler + pub cpumask_cnt: u64, // cpumask generation counter + pub payload: u64, // task payload (used for debugging) +} + +// Message received from the dispatcher (see bpf_intf::queued_task_ctx for details). +// +// NOTE: eventually libbpf-rs will provide a better abstraction for this. +struct EnqueuedMessage { + inner: bpf_intf::queued_task_ctx, +} + +impl EnqueuedMessage { + fn from_bytes(bytes: &[u8]) -> Self { + let queued_task_struct = unsafe { *(bytes.as_ptr() as *const bpf_intf::queued_task_ctx) }; + EnqueuedMessage { + inner: queued_task_struct, + } + } + + fn to_queued_task(&self) -> QueuedTask { + QueuedTask { + pid: self.inner.pid, + cpu: self.inner.cpu, + cpumask_cnt: self.inner.cpumask_cnt, + sum_exec_runtime: self.inner.sum_exec_runtime, + nvcsw: self.inner.nvcsw, + weight: self.inner.weight, + } + } +} + +// Message sent to the dispatcher (see bpf_intf::dispatched_task_ctx for details). +// +// NOTE: eventually libbpf-rs will provide a better abstraction for this. +struct DispatchedMessage { + inner: bpf_intf::dispatched_task_ctx, +} + +impl DispatchedMessage { + fn from_dispatched_task(task: &DispatchedTask) -> Self { + let dispatched_task_struct = bpf_intf::dispatched_task_ctx { + pid: task.pid, + cpu: task.cpu, + cpumask_cnt: task.cpumask_cnt, + payload: task.payload, + }; + DispatchedMessage { + inner: dispatched_task_struct, + } + } + + fn as_bytes(&self) -> &[u8] { + let size = std::mem::size_of::(); + let ptr = &self.inner as *const _ as *const u8; + + unsafe { std::slice::from_raw_parts(ptr, size) } + } +} + +pub struct BpfScheduler<'cb> { + pub skel: BpfSkel<'cb>, // Low-level BPF connector + queued: libbpf_rs::RingBuffer<'cb>, // Ring buffer of queued tasks + struct_ops: Option, // Low-level BPF methods +} + +// Buffer to store a task read from the ring buffer. +// +// NOTE: make the buffer aligned to 64-bits to prevent misaligned dereferences when accessing the +// buffer using a pointer. +const BUFSIZE: usize = std::mem::size_of::(); + +#[repr(align(8))] +struct AlignedBuffer([u8; BUFSIZE]); + +static mut BUF: AlignedBuffer = AlignedBuffer([0; BUFSIZE]); + +impl<'cb> BpfScheduler<'cb> { + pub fn init( + slice_us: u64, + nr_cpus_online: i32, + partial: bool, + full_user: bool, + debug: bool, + ) -> Result { + // Open the BPF prog first for verification. + let skel_builder = BpfSkelBuilder::default(); + init_libbpf_logging(None); + let mut skel = skel_builder.open().context("Failed to open BPF program")?; + + // Lock all the memory to prevent page faults that could trigger potential deadlocks during + // scheduling. + ALLOCATOR.lock_memory(); + + // Copy one item from the ring buffer. + // + // # Safety + // + // Each invocation of the callback will trigger the copy of exactly one QueuedTask item to + // BUF. The caller must be synchronize to ensure that multiple invocations of the callback + // are not happening at the same time, but this is implicitly guaranteed by the fact that + // the caller is a single-thread process (for now). + // + // Use of a `str` whose contents are not valid UTF-8 is undefined behavior. + fn callback(data: &[u8]) -> i32 { + unsafe { + // SAFETY: copying from the BPF ring buffer to BUF is safe, since the size of BUF + // is exactly the size of QueuedTask and the callback operates in chunks of + // QueuedTask items. It also copies exactly one QueuedTask at a time, this is + // guaranteed by the error code returned by this callback (see below). From a + // thread-safety perspective this is also correct, assuming the caller is a + // single-thread process (as it is for now). + BUF.0.copy_from_slice(data); + } + + // Return an unsupported error to stop early and consume only one item. + // + // NOTE: this is quite a hack. I wish libbpf would honor stopping after the first item + // is consumed, upon returning a non-zero positive value here, but it doesn't seem to + // be the case: + // + // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/ringbuf.c?h=v6.8-rc5#n260 + // + // Maybe we should fix this to stop processing items from the ring buffer also when a + // value > 0 is returned. + // + -255 + } + + // Initialize online CPUs counter. + // + // NOTE: we should probably refresh this counter during the normal execution to support cpu + // hotplugging, but for now let's keep it simple and set this only at initialization). + skel.rodata_mut().num_possible_cpus = nr_cpus_online; + + // Set scheduler options (defined in the BPF part). + skel.bss_mut().usersched_pid = std::process::id(); + skel.rodata_mut().slice_ns = slice_us * 1000; + skel.rodata_mut().switch_partial = partial; + skel.rodata_mut().debug = debug; + skel.rodata_mut().full_user = full_user; + + // Attach BPF scheduler. + let mut skel = skel.load().context("Failed to load BPF program")?; + skel.attach().context("Failed to attach BPF program")?; + let struct_ops = Some( + skel.maps_mut() + .rustland() + .attach_struct_ops() + .context("Failed to attach struct ops")?, + ); + + // Build the ring buffer of queued tasks. + let binding = skel.maps(); + let queued_ring_buffer = binding.queued(); + let mut rbb = libbpf_rs::RingBufferBuilder::new(); + rbb.add(queued_ring_buffer, callback) + .expect("failed to add ringbuf callback"); + let queued = rbb.build().expect("failed to build ringbuf"); + + // Make sure to use the SCHED_EXT class at least for the scheduler itself. + match Self::use_sched_ext() { + 0 => Ok(Self { + skel, + queued, + struct_ops, + }), + err => Err(anyhow::Error::msg(format!( + "sched_setscheduler error: {}", + err + ))), + } + } + + // Override the default scheduler time slice (in us). + #[allow(dead_code)] + pub fn set_effective_slice_us(&mut self, slice_us: u64) { + self.skel.bss_mut().effective_slice_ns = slice_us * 1000; + } + + // Get current value of time slice (slice_ns). + #[allow(dead_code)] + pub fn get_effective_slice_us(&mut self) -> u64 { + let slice_ns = self.skel.bss().effective_slice_ns; + + if slice_ns > 0 { + slice_ns / 1000 + } else { + self.skel.rodata().slice_ns / 1000 + } + } + + // Counter of queued tasks. + pub fn nr_queued_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_queued + } + + // Counter of scheduled tasks. + pub fn nr_scheduled_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_scheduled + } + + // Counter of user dispatch events. + #[allow(dead_code)] + pub fn nr_user_dispatches_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_user_dispatches + } + + // Counter of user kernel events. + #[allow(dead_code)] + pub fn nr_kernel_dispatches_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_kernel_dispatches + } + + // Counter of cancel dispatch events. + #[allow(dead_code)] + pub fn nr_cancel_dispatches_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_cancel_dispatches + } + + // Counter of dispatches bounced to the shared DSQ. + #[allow(dead_code)] + pub fn nr_bounce_dispatches_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_bounce_dispatches + } + + // Counter of failed dispatch events. + #[allow(dead_code)] + pub fn nr_failed_dispatches_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_failed_dispatches + } + + // Counter of scheduler congestion events. + #[allow(dead_code)] + pub fn nr_sched_congested_mut(&mut self) -> &mut u64 { + &mut self.skel.bss_mut().nr_sched_congested + } + + // Set scheduling class for the scheduler itself to SCHED_EXT + fn use_sched_ext() -> i32 { + let pid = std::process::id(); + let param: sched_param = sched_param { sched_priority: 0 }; + let res = + unsafe { sched_setscheduler(pid as i32, SCHED_EXT, ¶m as *const sched_param) }; + res + } + + // Get the pid running on a certain CPU, if no tasks are running return 0. + #[allow(dead_code)] + pub fn get_cpu_pid(&self, cpu: i32) -> u32 { + let cpu_map_ptr = self.skel.bss().cpu_map.as_ptr(); + + unsafe { *cpu_map_ptr.offset(cpu as isize) } + } + + // Receive a task to be scheduled from the BPF dispatcher. + // + // NOTE: if task.cpu is negative the task is exiting and it does not require to be scheduled. + pub fn dequeue_task(&mut self) -> Result, libbpf_rs::Error> { + match self.queued.consume() { + Ok(()) => Ok(None), + Err(error) if error.kind() == libbpf_rs::ErrorKind::Other => { + // A valid task is received, convert data to a proper task struct. + let task = unsafe { EnqueuedMessage::from_bytes(&BUF.0).to_queued_task() }; + Ok(Some(task)) + } + Err(error) => Err(error), + } + } + + // Send a task to the dispatcher. + pub fn dispatch_task(&mut self, task: &DispatchedTask) -> Result<(), libbpf_rs::Error> { + let maps = self.skel.maps(); + let dispatched = maps.dispatched(); + let msg = DispatchedMessage::from_dispatched_task(&task); + + dispatched.update(&[], msg.as_bytes(), libbpf_rs::MapFlags::ANY) + } + + // Read exit code from the BPF part. + pub fn exited(&mut self) -> bool { + uei_exited!(&self.skel.bss().uei) + } + + // Called on exit to shutdown and report exit message from the BPF part. + pub fn shutdown_and_report(&mut self) -> Result<()> { + self.struct_ops.take(); + uei_report!(self.skel.bss().uei) + } +} + +// Disconnect the low-level BPF scheduler. +impl<'a> Drop for BpfScheduler<'a> { + fn drop(&mut self) { + if let Some(struct_ops) = self.struct_ops.take() { + drop(struct_ops); + } + ALLOCATOR.unlock_memory(); + } +} diff --git a/scheds/rust/scx_rustland/src/bpf/intf.h b/rust/scx_rustland_core/src/bpf/intf.h similarity index 100% rename from scheds/rust/scx_rustland/src/bpf/intf.h rename to rust/scx_rustland_core/src/bpf/intf.h diff --git a/scheds/rust/scx_rustland/src/bpf/main.bpf.c b/rust/scx_rustland_core/src/bpf/main.bpf.c similarity index 96% rename from scheds/rust/scx_rustland/src/bpf/main.bpf.c rename to rust/scx_rustland_core/src/bpf/main.bpf.c index 8c073da51..fe8330cff 100644 --- a/scheds/rust/scx_rustland/src/bpf/main.bpf.c +++ b/rust/scx_rustland_core/src/bpf/main.bpf.c @@ -1,19 +1,13 @@ /* Copyright (c) Andrea Righi */ /* - * scx_rustland: simple user-space scheduler written in Rust + * scx_rustland_core: BPF backend for schedulers running in user-space. * - * The main goal of this scheduler is be an "easy to read" template that can be - * used to quickly test more complex scheduling policies. For this reason this - * scheduler is mostly focused on simplicity and code readability. + * This BPF backend implements the low level sched-ext functionalities for a + * user-space counterpart, that implements the actual scheduling policy. * - * The scheduler is made of a BPF component (dispatcher) that implements the - * low level sched-ext functionalities and a user-space counterpart - * (scheduler), written in Rust, that implements the actual scheduling policy. - * - * The BPF dispatcher collects total cputime and weight from the tasks that - * need to run, then it sends all details to the user-space scheduler that - * decides the best order of execution of the tasks (based on the collected - * metrics). + * The BPF part collects total cputime and weight from the tasks that need to + * run, then it sends all details to the user-space scheduler that decides the + * best order of execution of the tasks (based on the collected metrics). * * The user-space scheduler then returns to the BPF component the list of tasks * to be dispatched in the proper order. diff --git a/rust/scx_rustland_core/src/bpf_intf.rs b/rust/scx_rustland_core/src/bpf_intf.rs new file mode 100644 index 000000000..9db020efd --- /dev/null +++ b/rust/scx_rustland_core/src/bpf_intf.rs @@ -0,0 +1,9 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(dead_code)] + +include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs")); diff --git a/rust/scx_rustland_core/src/bpf_skel.rs b/rust/scx_rustland_core/src/bpf_skel.rs new file mode 100644 index 000000000..c42af33d6 --- /dev/null +++ b/rust/scx_rustland_core/src/bpf_skel.rs @@ -0,0 +1,4 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs")); diff --git a/scheds/rust/scx_rustland/build.rs b/scheds/rust/scx_rustland/build.rs index 42e96b711..926487817 100644 --- a/scheds/rust/scx_rustland/build.rs +++ b/scheds/rust/scx_rustland/build.rs @@ -4,8 +4,8 @@ fn main() { scx_utils::BpfBuilder::new() .unwrap() - .enable_intf("src/bpf/intf.h", "bpf_intf.rs") - .enable_skel("src/bpf/main.bpf.c", "bpf") + .enable_intf("../../../rust/scx_rustland_core/src/bpf/intf.h", "bpf_intf.rs") + .enable_skel("../../../rust/scx_rustland_core/src/bpf/main.bpf.c", "bpf") .build() .unwrap(); } diff --git a/scheds/rust/scx_rustland/src/bpf.rs b/scheds/rust/scx_rustland/src/bpf.rs index e2f36733f..c55c704e5 100644 --- a/scheds/rust/scx_rustland/src/bpf.rs +++ b/scheds/rust/scx_rustland/src/bpf.rs @@ -1,432 +1,4 @@ -// Copyright (c) Andrea Righi + // This software may be used and distributed according to the terms of the + // GNU General Public License version 2. -// This software may be used and distributed according to the terms of the -// GNU General Public License version 2. - -use crate::bpf_intf; -use crate::bpf_skel::*; - -use anyhow::Context; -use anyhow::Result; - -use libbpf_rs::skel::OpenSkel as _; -use libbpf_rs::skel::Skel as _; -use libbpf_rs::skel::SkelBuilder as _; - -use libc::{sched_param, sched_setscheduler}; - -use scx_utils::init_libbpf_logging; -use scx_utils::uei_exited; -use scx_utils::uei_report; - -use scx_rustland_core::ALLOCATOR; - -// Defined in UAPI -const SCHED_EXT: i32 = 7; - -// Do not assign any specific CPU to the task. -// -// The task will be dispatched to the global shared DSQ and it will run on the first CPU available. -#[allow(dead_code)] -pub const NO_CPU: i32 = -1; - -/// scx_rustland: provide high-level abstractions to interact with the BPF component. -/// -/// Overview -/// ======== -/// -/// The main BPF interface is provided by the BpfScheduler() struct. When this object is -/// initialized it will take care of registering and initializing the BPF component. -/// -/// The scheduler then can use BpfScheduler() instance to receive tasks (in the form of QueuedTask -/// object) and dispatch tasks (in the form of DispatchedTask objects), using respectively the -/// methods dequeue_task() and dispatch_task(). -/// -/// The CPU ownership map can be accessed using the method get_cpu_pid(), this also allows to keep -/// track of the idle and busy CPUs, with the corresponding PIDs associated to them. -/// -/// BPF counters and statistics can be accessed using the methods nr_*_mut(), in particular -/// nr_queued_mut() and nr_scheduled_mut() can be updated to notify the BPF component if the -/// user-space scheduler has some pending work to do or not. -/// -/// Finally the methods exited() and shutdown_and_report() can be used respectively to test -/// whether the BPF component exited, and to shutdown and report exit message. -/// -/// Example -/// ======= -/// -/// Following you can find bare minimum template that can be used to implement a simple FIFO -/// scheduler using the BPF abstraction: -/// -/// mod bpf_skel; -/// pub use bpf_skel::*; -/// mod bpf; -/// pub mod bpf_intf; -/// use bpf::*; -/// -/// use std::thread; -/// -/// use std::sync::atomic::AtomicBool; -/// use std::sync::atomic::Ordering; -/// use std::sync::Arc; -/// -/// use anyhow::Result; -/// -/// struct Scheduler<'a> { -/// bpf: BpfScheduler<'a>, -/// } -/// -/// impl<'a> Scheduler<'a> { -/// fn init() -> Result { -/// let bpf = BpfScheduler::init(20000, false, false)?; -/// Ok(Self { bpf }) -/// } -/// -/// fn dispatch_tasks(&mut self) { -/// loop { -/// match self.bpf.dequeue_task() { -/// Ok(Some(task)) => { -/// if task.cpu >= 0 { -/// let _ = self.bpf.dispatch_task( -/// &DispatchedTask { -/// pid: task.pid, -/// cpu: task.cpu, -/// payload: 0, -/// } -/// ); -/// } -/// } -/// Ok(None) => { -/// *self.bpf.nr_queued_mut() = 0; -/// *self.bpf.nr_scheduled_mut() = 0; -/// break; -/// } -/// Err(_) => { -/// break; -/// } -/// } -/// } -/// } -/// -/// fn run(&mut self, shutdown: Arc) -> Result<()> { -/// while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() { -/// self.dispatch_tasks(); -/// thread::yield_now(); -/// } -/// -/// Ok(()) -/// } -/// } -/// -/// fn main() -> Result<()> { -/// let mut sched = Scheduler::init()?; -/// let shutdown = Arc::new(AtomicBool::new(false)); -/// let shutdown_clone = shutdown.clone(); -/// ctrlc::set_handler(move || { -/// shutdown_clone.store(true, Ordering::Relaxed); -/// })?; -/// -/// sched.run(shutdown) -/// } -/// - -// Task queued for scheduling from the BPF component (see bpf_intf::queued_task_ctx). -#[derive(Debug)] -pub struct QueuedTask { - pub pid: i32, // pid that uniquely identifies a task - pub cpu: i32, // CPU where the task is running (-1 = exiting) - pub cpumask_cnt: u64, // cpumask generation counter - pub sum_exec_runtime: u64, // Total cpu time - pub nvcsw: u64, // Voluntary context switches - pub weight: u64, // Task static priority -} - -// Task queued for dispatching to the BPF component (see bpf_intf::dispatched_task_ctx). -#[derive(Debug)] -pub struct DispatchedTask { - pub pid: i32, // pid that uniquely identifies a task - pub cpu: i32, // target CPU selected by the scheduler - pub cpumask_cnt: u64, // cpumask generation counter - pub payload: u64, // task payload (used for debugging) -} - -// Message received from the dispatcher (see bpf_intf::queued_task_ctx for details). -// -// NOTE: eventually libbpf-rs will provide a better abstraction for this. -struct EnqueuedMessage { - inner: bpf_intf::queued_task_ctx, -} - -impl EnqueuedMessage { - fn from_bytes(bytes: &[u8]) -> Self { - let queued_task_struct = unsafe { *(bytes.as_ptr() as *const bpf_intf::queued_task_ctx) }; - EnqueuedMessage { - inner: queued_task_struct, - } - } - - fn to_queued_task(&self) -> QueuedTask { - QueuedTask { - pid: self.inner.pid, - cpu: self.inner.cpu, - cpumask_cnt: self.inner.cpumask_cnt, - sum_exec_runtime: self.inner.sum_exec_runtime, - nvcsw: self.inner.nvcsw, - weight: self.inner.weight, - } - } -} - -// Message sent to the dispatcher (see bpf_intf::dispatched_task_ctx for details). -// -// NOTE: eventually libbpf-rs will provide a better abstraction for this. -struct DispatchedMessage { - inner: bpf_intf::dispatched_task_ctx, -} - -impl DispatchedMessage { - fn from_dispatched_task(task: &DispatchedTask) -> Self { - let dispatched_task_struct = bpf_intf::dispatched_task_ctx { - pid: task.pid, - cpu: task.cpu, - cpumask_cnt: task.cpumask_cnt, - payload: task.payload, - }; - DispatchedMessage { - inner: dispatched_task_struct, - } - } - - fn as_bytes(&self) -> &[u8] { - let size = std::mem::size_of::(); - let ptr = &self.inner as *const _ as *const u8; - - unsafe { std::slice::from_raw_parts(ptr, size) } - } -} - -pub struct BpfScheduler<'cb> { - pub skel: BpfSkel<'cb>, // Low-level BPF connector - queued: libbpf_rs::RingBuffer<'cb>, // Ring buffer of queued tasks - struct_ops: Option, // Low-level BPF methods -} - -// Buffer to store a task read from the ring buffer. -// -// NOTE: make the buffer aligned to 64-bits to prevent misaligned dereferences when accessing the -// buffer using a pointer. -const BUFSIZE: usize = std::mem::size_of::(); - -#[repr(align(8))] -struct AlignedBuffer([u8; BUFSIZE]); - -static mut BUF: AlignedBuffer = AlignedBuffer([0; BUFSIZE]); - -impl<'cb> BpfScheduler<'cb> { - pub fn init( - slice_us: u64, - nr_cpus_online: i32, - partial: bool, - full_user: bool, - debug: bool, - ) -> Result { - // Open the BPF prog first for verification. - let skel_builder = BpfSkelBuilder::default(); - init_libbpf_logging(None); - let mut skel = skel_builder.open().context("Failed to open BPF program")?; - - // Lock all the memory to prevent page faults that could trigger potential deadlocks during - // scheduling. - ALLOCATOR.lock_memory(); - - // Copy one item from the ring buffer. - fn callback(data: &[u8]) -> i32 { - unsafe { - BUF.0.copy_from_slice(data); - } - - // Return an unsupported error to stop early and consume only one item. - // - // NOTE: this is quite a hack. I wish libbpf would honor stopping after the first item - // is consumed, upon returnin a non-zero positive value here, but it doesn't seem to be - // the case: - // - // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/ringbuf.c?h=v6.8-rc5#n260 - // - // Maybe we should fix this to stop processing items from the ring buffer also when a - // value > 0 is returned. - // - -255 - } - - // Initialize online CPUs counter. - // - // NOTE: we should probably refresh this counter during the normal execution to support cpu - // hotplugging, but for now let's keep it simple and set this only at initialization). - skel.rodata_mut().num_possible_cpus = nr_cpus_online; - - // Set scheduler options (defined in the BPF part). - skel.bss_mut().usersched_pid = std::process::id(); - skel.rodata_mut().slice_ns = slice_us * 1000; - skel.rodata_mut().switch_partial = partial; - skel.rodata_mut().debug = debug; - skel.rodata_mut().full_user = full_user; - - // Attach BPF scheduler. - let mut skel = skel.load().context("Failed to load BPF program")?; - skel.attach().context("Failed to attach BPF program")?; - let struct_ops = Some( - skel.maps_mut() - .rustland() - .attach_struct_ops() - .context("Failed to attach struct ops")?, - ); - - // Build the ring buffer of queued tasks. - let binding = skel.maps(); - let queued_ring_buffer = binding.queued(); - let mut rbb = libbpf_rs::RingBufferBuilder::new(); - rbb.add(queued_ring_buffer, callback) - .expect("failed to add ringbuf callback"); - let queued = rbb.build().expect("failed to build ringbuf"); - - // Make sure to use the SCHED_EXT class at least for the scheduler itself. - match Self::use_sched_ext() { - 0 => Ok(Self { - skel, - queued, - struct_ops, - }), - err => Err(anyhow::Error::msg(format!( - "sched_setscheduler error: {}", - err - ))), - } - } - - // Override the default scheduler time slice (in us). - #[allow(dead_code)] - pub fn set_effective_slice_us(&mut self, slice_us: u64) { - self.skel.bss_mut().effective_slice_ns = slice_us * 1000; - } - - // Get current value of time slice (slice_ns). - #[allow(dead_code)] - pub fn get_effective_slice_us(&mut self) -> u64 { - let slice_ns = self.skel.bss().effective_slice_ns; - - if slice_ns > 0 { - slice_ns / 1000 - } else { - self.skel.rodata().slice_ns / 1000 - } - } - - // Counter of queued tasks. - pub fn nr_queued_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_queued - } - - // Counter of scheduled tasks. - pub fn nr_scheduled_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_scheduled - } - - // Counter of user dispatch events. - #[allow(dead_code)] - pub fn nr_user_dispatches_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_user_dispatches - } - - // Counter of user kernel events. - #[allow(dead_code)] - pub fn nr_kernel_dispatches_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_kernel_dispatches - } - - // Counter of cancel dispatch events. - #[allow(dead_code)] - pub fn nr_cancel_dispatches_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_cancel_dispatches - } - - // Counter of dispatches bounced to the shared DSQ. - #[allow(dead_code)] - pub fn nr_bounce_dispatches_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_bounce_dispatches - } - - // Counter of failed dispatch events. - #[allow(dead_code)] - pub fn nr_failed_dispatches_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_failed_dispatches - } - - // Counter of scheduler congestion events. - #[allow(dead_code)] - pub fn nr_sched_congested_mut(&mut self) -> &mut u64 { - &mut self.skel.bss_mut().nr_sched_congested - } - - // Set scheduling class for the scheduler itself to SCHED_EXT - fn use_sched_ext() -> i32 { - let pid = std::process::id(); - let param: sched_param = sched_param { sched_priority: 0 }; - let res = - unsafe { sched_setscheduler(pid as i32, SCHED_EXT, ¶m as *const sched_param) }; - res - } - - // Get the pid running on a certain CPU, if no tasks are running return 0. - #[allow(dead_code)] - pub fn get_cpu_pid(&self, cpu: i32) -> u32 { - let cpu_map_ptr = self.skel.bss().cpu_map.as_ptr(); - - unsafe { *cpu_map_ptr.offset(cpu as isize) } - } - - // Receive a task to be scheduled from the BPF dispatcher. - // - // NOTE: if task.cpu is negative the task is exiting and it does not require to be scheduled. - pub fn dequeue_task(&mut self) -> Result, libbpf_rs::Error> { - match self.queued.consume() { - Ok(()) => Ok(None), - Err(error) if error.kind() == libbpf_rs::ErrorKind::Other => { - // A valid task is received, convert data to a proper task struct. - let task = unsafe { EnqueuedMessage::from_bytes(&BUF.0).to_queued_task() }; - Ok(Some(task)) - } - Err(error) => Err(error), - } - } - - // Send a task to the dispatcher. - pub fn dispatch_task(&mut self, task: &DispatchedTask) -> Result<(), libbpf_rs::Error> { - let maps = self.skel.maps(); - let dispatched = maps.dispatched(); - let msg = DispatchedMessage::from_dispatched_task(&task); - - dispatched.update(&[], msg.as_bytes(), libbpf_rs::MapFlags::ANY) - } - - // Read exit code from the BPF part. - pub fn exited(&mut self) -> bool { - uei_exited!(&self.skel.bss().uei) - } - - // Called on exit to shutdown and report exit message from the BPF part. - pub fn shutdown_and_report(&mut self) -> Result<()> { - self.struct_ops.take(); - uei_report!(self.skel.bss().uei) - } -} - -// Disconnect the low-level BPF scheduler. -impl<'a> Drop for BpfScheduler<'a> { - fn drop(&mut self) { - if let Some(struct_ops) = self.struct_ops.take() { - drop(struct_ops); - } - ALLOCATOR.unlock_memory(); - } -} +include!("../../../../rust/scx_rustland_core/src/bpf.rs"); From cf43129d89f898b3f3614a8d91c3d7b5b8c2b433 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 25 Feb 2024 09:52:48 +0100 Subject: [PATCH 3/8] scx_rustland: update documentation scx_rustland has significantly evolved since its original design. With the introduction of scx_rustland_core and the inclusion of the scx_rlfifo example, scx_rustland's focus can be shifted from solely being an "easy-to-read Rust scheduler template" to a fully functional scheduler. For this reason, update the README and documentation to reflect its revised design, objectives, and intended use cases. Signed-off-by: Andrea Righi --- scheds/rust/scx_rustland/README.md | 42 +++++++++++++--------------- scheds/rust/scx_rustland/src/bpf.rs | 4 +-- scheds/rust/scx_rustland/src/main.rs | 21 +++++++------- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/scheds/rust/scx_rustland/README.md b/scheds/rust/scx_rustland/README.md index 47d86526b..a3b252799 100644 --- a/scheds/rust/scx_rustland/README.md +++ b/scheds/rust/scx_rustland/README.md @@ -4,25 +4,24 @@ This is a single user-defined scheduler used within [sched_ext](https://github.c ## Overview -scx_rustland is made of a BPF component (dispatcher) that implements the low -level sched-ext functionalities and a user-space counterpart (scheduler), +scx_rustland is made of a BPF component (scx_rustland_core) that implements the +low level sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that implements the actual scheduling policy. -The BPF dispatcher is completely agnostic of the particular scheduling policy -implemented in user-space. For this reason developers that are willing to use -this scheduler to experiment scheduling policies should be able to simply -modify the Rust component, without having to deal with any internal kernel / -BPF details. - ## How To Install Available as a [Rust crate](https://crates.io/crates/scx_rustland): `cargo add scx_rustland` ## Typical Use Case -scx_rustland is designed to be "easy to read" template that can be used by any -developer to quickly experiment more complex scheduling policies, that can be -fully implemented in Rust. +scx_rustland is designed to prioritize interactive workloads over background +CPU-intensive workloads. For this reason the typical use case of this scheduler +involves low-latency interactive applications, such as gaming, video +conferencing and live streaming. + +scx_rustland is also designed to be an "easy to read" template that can be used +by any developer to quickly experiment more complex scheduling policies fully +implemented in Rust. ## Production Ready? @@ -32,20 +31,17 @@ with a certain cost. However, a scheduler entirely implemented in user-space holds the potential for seamless integration with sophisticated libraries, tracing tools, external -services (e.g., AI), etc. Hence, there might be situations where the benefits -outweigh the overhead, justifying the use of this scheduler in a production -environment. +services (e.g., AI), etc. + +Hence, there might be situations where the benefits outweigh the overhead, +justifying the use of this scheduler in a production environment. ## Demo [scx_rustland-terraria](https://github.com/sched-ext/scx/assets/1051723/42ec3bf2-9f1f-4403-80ab-bf5d66b7c2d5) -For this demo the scheduler includes an extra patch to impose a "time slice -penalty" on new short-lived tasks. While this approach might not be suitable -for general usage, it can yield significant advantages in this specific -scenario. - -The key takeaway is to demonstrate the ease and safety of conducting -experiments like this, as we operate in user-space, and we can accomplish -everything simply by modifying the Rust code, that is completely abstracted -from the underlying BPF/kernel internal details. +The key takeaway of this demo is to demonstrate that , despite the overhead of +running a scheduler in user-space, we can still obtain interesting results and, +in this particular case, even outperform the default Linux scheduler (EEVDF) in +terms of application responsiveness (fps), while a CPU intensive workload +(parallel kernel build) is running in the background. diff --git a/scheds/rust/scx_rustland/src/bpf.rs b/scheds/rust/scx_rustland/src/bpf.rs index c55c704e5..eb727b303 100644 --- a/scheds/rust/scx_rustland/src/bpf.rs +++ b/scheds/rust/scx_rustland/src/bpf.rs @@ -1,4 +1,4 @@ - // This software may be used and distributed according to the terms of the - // GNU General Public License version 2. +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. include!("../../../../rust/scx_rustland_core/src/bpf.rs"); diff --git a/scheds/rust/scx_rustland/src/main.rs b/scheds/rust/scx_rustland/src/main.rs index f0ffcf240..5cb5eba44 100644 --- a/scheds/rust/scx_rustland/src/main.rs +++ b/scheds/rust/scx_rustland/src/main.rs @@ -33,15 +33,18 @@ use log::warn; const SCHEDULER_NAME: &'static str = "RustLand"; -/// scx_rustland: simple user-space scheduler written in Rust +/// scx_rustland: user-space scheduler written in Rust /// -/// The main goal of this scheduler is be an "easy to read" template that can be used to quickly -/// test more complex scheduling policies. For this reason this scheduler is mostly focused on -/// simplicity and code readability. +/// scx_rustland is designed to prioritize interactive workloads over background CPU-intensive +/// workloads. For this reason the typical use case of this scheduler involves low-latency +/// interactive applications, such as gaming, video conferencing and live streaming. /// -/// The scheduler is made of a BPF component (dispatcher) that implements the low level sched-ext -/// functionalities and a user-space counterpart (scheduler), written in Rust, that implements the -/// actual scheduling policy. +/// scx_rustland is also designed to be an "easy to read" template that can be used by any +/// developer to quickly experiment more complex scheduling policies fully implemented in Rust. +/// +/// The scheduler is made of a BPF component (scx_rustland_core) that implements the low level +/// sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that +/// implements the actual scheduling policy. /// /// The default scheduling policy implemented in the user-space scheduler is a based on virtual /// runtime (vruntime): @@ -65,10 +68,6 @@ const SCHEDULER_NAME: &'static str = "RustLand"; /// /// === Troubleshooting === /// -/// - Disable HyperThreading / SMT if you notice poor performance (add "nosmt" to the kernel boot -/// parameters): this scheduler is not NUMA-aware and it implements a simple policy of handling -/// SMT cores. -/// /// - Adjust the time slice boost parameter (option `-b`) to enhance the responsiveness of /// low-latency applications (i.e., online gaming, live streaming, video conferencing etc.). /// From 00e25530bc513bedbc54c4e5852dbc7d917d373c Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 24 Feb 2024 23:04:04 +0100 Subject: [PATCH 4/8] scx_rlfifo: simple user-space FIFO scheduler written in Rust Implement a FIFO scheduler as an example usage of scx_rustland_core. Signed-off-by: Andrea Righi --- rust/scx_rustland_core/src/bpf.rs | 79 +---------------- scheds/rust/README.md | 1 + scheds/rust/meson.build | 1 + scheds/rust/scx_rlfifo/.gitignore | 3 + scheds/rust/scx_rlfifo/Cargo.toml | 22 +++++ scheds/rust/scx_rlfifo/LICENSE | 1 + scheds/rust/scx_rlfifo/README.md | 20 +++++ scheds/rust/scx_rlfifo/build.rs | 11 +++ scheds/rust/scx_rlfifo/meson.build | 7 ++ scheds/rust/scx_rlfifo/rustfmt.toml | 8 ++ scheds/rust/scx_rlfifo/src/bpf.rs | 4 + scheds/rust/scx_rlfifo/src/bpf_intf.rs | 9 ++ scheds/rust/scx_rlfifo/src/bpf_skel.rs | 4 + scheds/rust/scx_rlfifo/src/main.rs | 118 +++++++++++++++++++++++++ 14 files changed, 210 insertions(+), 78 deletions(-) create mode 100644 scheds/rust/scx_rlfifo/.gitignore create mode 100644 scheds/rust/scx_rlfifo/Cargo.toml create mode 120000 scheds/rust/scx_rlfifo/LICENSE create mode 100644 scheds/rust/scx_rlfifo/README.md create mode 100644 scheds/rust/scx_rlfifo/build.rs create mode 100644 scheds/rust/scx_rlfifo/meson.build create mode 100644 scheds/rust/scx_rlfifo/rustfmt.toml create mode 100644 scheds/rust/scx_rlfifo/src/bpf.rs create mode 100644 scheds/rust/scx_rlfifo/src/bpf_intf.rs create mode 100644 scheds/rust/scx_rlfifo/src/bpf_skel.rs create mode 100644 scheds/rust/scx_rlfifo/src/main.rs diff --git a/rust/scx_rustland_core/src/bpf.rs b/rust/scx_rustland_core/src/bpf.rs index ae6be4656..83692aeef 100644 --- a/rust/scx_rustland_core/src/bpf.rs +++ b/rust/scx_rustland_core/src/bpf.rs @@ -51,84 +51,7 @@ pub const NO_CPU: i32 = -1; /// /// Finally the methods exited() and shutdown_and_report() can be used respectively to test /// whether the BPF component exited, and to shutdown and report the exit message. -/// -/// Example -/// ======= -/// -/// Following you can find bare minimum template that can be used to implement a simple FIFO -/// scheduler using the BPF abstraction: -/// -/// mod bpf_skel; -/// pub use bpf_skel::*; -/// mod bpf; -/// pub mod bpf_intf; -/// use bpf::*; -/// -/// use std::thread; -/// -/// use std::sync::atomic::AtomicBool; -/// use std::sync::atomic::Ordering; -/// use std::sync::Arc; -/// -/// use anyhow::Result; -/// -/// struct Scheduler<'a> { -/// bpf: BpfScheduler<'a>, -/// } -/// -/// impl<'a> Scheduler<'a> { -/// fn init() -> Result { -/// let bpf = BpfScheduler::init(20000, false, false)?; -/// Ok(Self { bpf }) -/// } -/// -/// fn dispatch_tasks(&mut self) { -/// loop { -/// match self.bpf.dequeue_task() { -/// Ok(Some(task)) => { -/// if task.cpu >= 0 { -/// let _ = self.bpf.dispatch_task( -/// &DispatchedTask { -/// pid: task.pid, -/// cpu: task.cpu, -/// payload: 0, -/// } -/// ); -/// } -/// } -/// Ok(None) => { -/// *self.bpf.nr_queued_mut() = 0; -/// *self.bpf.nr_scheduled_mut() = 0; -/// break; -/// } -/// Err(_) => { -/// break; -/// } -/// } -/// } -/// } -/// -/// fn run(&mut self, shutdown: Arc) -> Result<()> { -/// while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() { -/// self.dispatch_tasks(); -/// thread::yield_now(); -/// } -/// -/// Ok(()) -/// } -/// } -/// -/// fn main() -> Result<()> { -/// let mut sched = Scheduler::init()?; -/// let shutdown = Arc::new(AtomicBool::new(false)); -/// let shutdown_clone = shutdown.clone(); -/// ctrlc::set_handler(move || { -/// shutdown_clone.store(true, Ordering::Relaxed); -/// })?; -/// -/// sched.run(shutdown) -/// } -/// +/// whether the BPF component exited, and to shutdown and report exit message. // Task queued for scheduling from the BPF component (see bpf_intf::queued_task_ctx). #[derive(Debug)] diff --git a/scheds/rust/README.md b/scheds/rust/README.md index 04d3882ff..d35f1e194 100644 --- a/scheds/rust/README.md +++ b/scheds/rust/README.md @@ -15,3 +15,4 @@ main.rs or \*.bpf.c files. - [scx_layered](scx_layered/README.md) - [scx_rusty](scx_rusty/README.md) - [scx_rustland](scx_rustland/README.md) +- [scx_rlfifo](scx_rlfifo/README.md) diff --git a/scheds/rust/meson.build b/scheds/rust/meson.build index ec6d22b86..8750b18e6 100644 --- a/scheds/rust/meson.build +++ b/scheds/rust/meson.build @@ -1,3 +1,4 @@ subdir('scx_layered') subdir('scx_rusty') subdir('scx_rustland') +subdir('scx_rlfifo') diff --git a/scheds/rust/scx_rlfifo/.gitignore b/scheds/rust/scx_rlfifo/.gitignore new file mode 100644 index 000000000..186dba259 --- /dev/null +++ b/scheds/rust/scx_rlfifo/.gitignore @@ -0,0 +1,3 @@ +src/bpf/.output +Cargo.lock +target diff --git a/scheds/rust/scx_rlfifo/Cargo.toml b/scheds/rust/scx_rlfifo/Cargo.toml new file mode 100644 index 000000000..2d1591372 --- /dev/null +++ b/scheds/rust/scx_rlfifo/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "scx_rlfifo" +version = "0.0.1" +authors = ["Andrea Righi ", "Canonical"] +edition = "2021" +description = "A simple FIFO scheduler in Rust that runs in user-space" +license = "GPL-2.0-only" + +[dependencies] +anyhow = "1.0.65" +ctrlc = { version = "3.1", features = ["termination"] } +hex = "0.4.3" +libbpf-rs = "0.22.0" +libc = "0.2.137" +scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } +scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" } + +[build-dependencies] +scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } + +[features] +enable_backtrace = [] diff --git a/scheds/rust/scx_rlfifo/LICENSE b/scheds/rust/scx_rlfifo/LICENSE new file mode 120000 index 000000000..5853aaea5 --- /dev/null +++ b/scheds/rust/scx_rlfifo/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/scheds/rust/scx_rlfifo/README.md b/scheds/rust/scx_rlfifo/README.md new file mode 100644 index 000000000..f57115fc3 --- /dev/null +++ b/scheds/rust/scx_rlfifo/README.md @@ -0,0 +1,20 @@ +# scx_rlfifo + +This is a single user-defined scheduler used within [sched_ext](https://github.com/sched-ext/scx/tree/main), which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. [Read more about sched_ext](https://github.com/sched-ext/scx/tree/main). + +## Overview + +scx_rlfifo is a simple FIFO scheduler runs in user-space, based on the +scx_rustland_core framework. + +## Typical Use Case + +This scheduler is provided as a simple template that can be used as a baseline +to test more complex scheduling policies. + +## Production Ready? + +Definitely not. Using this scheduler in a production environment is not +recommended, unless there are specific requirements that necessitate a basic +FIFO scheduling approach. Even then, it's still recommended to use the kernel's +SCHED_FIFO real-time class. diff --git a/scheds/rust/scx_rlfifo/build.rs b/scheds/rust/scx_rlfifo/build.rs new file mode 100644 index 000000000..926487817 --- /dev/null +++ b/scheds/rust/scx_rlfifo/build.rs @@ -0,0 +1,11 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +fn main() { + scx_utils::BpfBuilder::new() + .unwrap() + .enable_intf("../../../rust/scx_rustland_core/src/bpf/intf.h", "bpf_intf.rs") + .enable_skel("../../../rust/scx_rustland_core/src/bpf/main.bpf.c", "bpf") + .build() + .unwrap(); +} diff --git a/scheds/rust/scx_rlfifo/meson.build b/scheds/rust/scx_rlfifo/meson.build new file mode 100644 index 000000000..72ad547d8 --- /dev/null +++ b/scheds/rust/scx_rlfifo/meson.build @@ -0,0 +1,7 @@ +custom_target('scx_rlfifo', + output: '@PLAINNAME@.__PHONY__', + input: 'Cargo.toml', + command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@', + cargo_build_args], + env: cargo_env, + build_by_default: true) diff --git a/scheds/rust/scx_rlfifo/rustfmt.toml b/scheds/rust/scx_rlfifo/rustfmt.toml new file mode 100644 index 000000000..b7258ed0a --- /dev/null +++ b/scheds/rust/scx_rlfifo/rustfmt.toml @@ -0,0 +1,8 @@ +# Get help on options with `rustfmt --help=config` +# Please keep these in alphabetical order. +edition = "2021" +group_imports = "StdExternalCrate" +imports_granularity = "Item" +merge_derives = false +use_field_init_shorthand = true +version = "Two" diff --git a/scheds/rust/scx_rlfifo/src/bpf.rs b/scheds/rust/scx_rlfifo/src/bpf.rs new file mode 100644 index 000000000..eb727b303 --- /dev/null +++ b/scheds/rust/scx_rlfifo/src/bpf.rs @@ -0,0 +1,4 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +include!("../../../../rust/scx_rustland_core/src/bpf.rs"); diff --git a/scheds/rust/scx_rlfifo/src/bpf_intf.rs b/scheds/rust/scx_rlfifo/src/bpf_intf.rs new file mode 100644 index 000000000..9db020efd --- /dev/null +++ b/scheds/rust/scx_rlfifo/src/bpf_intf.rs @@ -0,0 +1,9 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(dead_code)] + +include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs")); diff --git a/scheds/rust/scx_rlfifo/src/bpf_skel.rs b/scheds/rust/scx_rlfifo/src/bpf_skel.rs new file mode 100644 index 000000000..c42af33d6 --- /dev/null +++ b/scheds/rust/scx_rlfifo/src/bpf_skel.rs @@ -0,0 +1,4 @@ +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs")); diff --git a/scheds/rust/scx_rlfifo/src/main.rs b/scheds/rust/scx_rlfifo/src/main.rs new file mode 100644 index 000000000..2f9c3905a --- /dev/null +++ b/scheds/rust/scx_rlfifo/src/main.rs @@ -0,0 +1,118 @@ +// Copyright (c) Andrea Righi + +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. +mod bpf_skel; +pub use bpf_skel::*; +pub mod bpf_intf; + +mod bpf; +use bpf::*; + +use scx_utils::Topology; + +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use std::time::{Duration, SystemTime}; + +use anyhow::Result; + +struct Scheduler<'a> { + bpf: BpfScheduler<'a>, +} + +impl<'a> Scheduler<'a> { + fn init() -> Result { + let topo = Topology::new().expect("Failed to build host topology"); + let bpf = BpfScheduler::init(5000, topo.nr_cpus() as i32, false, false, false)?; + Ok(Self { bpf }) + } + + fn now() -> u64 { + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + fn dispatch_tasks(&mut self) { + loop { + // Get queued taks and dispatch them in order (FIFO). + match self.bpf.dequeue_task() { + Ok(Some(task)) => { + // task.cpu < 0 is used to to notify an exiting task, in this + // case we can simply ignore the task. + if task.cpu >= 0 { + let _ = self.bpf.dispatch_task(&DispatchedTask { + pid: task.pid, + cpu: task.cpu, + cpumask_cnt: task.cpumask_cnt, + payload: 0, + }); + } + // Give the task a chance to run and prevent overflowing the dispatch queue. + std::thread::yield_now(); + } + Ok(None) => { + // Notify the BPF component that all tasks have been dispatched. + *self.bpf.nr_queued_mut() = 0; + *self.bpf.nr_scheduled_mut() = 0; + + // All queued tasks have been dipatched, add a short sleep to reduce + // scheduler's CPU consuption. + std::thread::sleep(Duration::from_millis(1)); + break; + } + Err(_) => { + break; + } + } + } + } + + fn print_stats(&mut self) { + let nr_user_dispatches = *self.bpf.nr_user_dispatches_mut(); + let nr_kernel_dispatches = *self.bpf.nr_kernel_dispatches_mut(); + let nr_cancel_dispatches = *self.bpf.nr_cancel_dispatches_mut(); + let nr_bounce_dispatches = *self.bpf.nr_bounce_dispatches_mut(); + let nr_failed_dispatches = *self.bpf.nr_failed_dispatches_mut(); + let nr_sched_congested = *self.bpf.nr_sched_congested_mut(); + + println!( + "user={} kernel={} cancel={} bounce={} fail={} cong={}", + nr_user_dispatches, nr_kernel_dispatches, + nr_cancel_dispatches, nr_bounce_dispatches, + nr_failed_dispatches, nr_sched_congested, + ); + } + + fn run(&mut self, shutdown: Arc) -> Result<()> { + let mut prev_ts = Self::now(); + + while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() { + self.dispatch_tasks(); + + let curr_ts = Self::now(); + if curr_ts > prev_ts { + self.print_stats(); + prev_ts = curr_ts; + } + } + + self.bpf.shutdown_and_report() + } +} + +fn main() -> Result<()> { + let mut sched = Scheduler::init()?; + let shutdown = Arc::new(AtomicBool::new(false)); + let shutdown_clone = shutdown.clone(); + + ctrlc::set_handler(move || { + shutdown_clone.store(true, Ordering::Relaxed); + })?; + + sched.run(shutdown) +} From e23426e299b2c46a07c63c2201b30d3bee87cfe7 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 27 Feb 2024 08:59:27 +0100 Subject: [PATCH 5/8] scx_rustland_core: introduce method bpf.update_tasks() Introduce a helper function to update the counter of queued and scheduled tasks (used to notify the BPF component if the user-space scheduler has still some pending work to do). Signed-off-by: Andrea Righi --- rust/scx_rustland_core/README.md | 3 +-- rust/scx_rustland_core/src/bpf.rs | 19 +++++++++++++++++++ scheds/rust/scx_rlfifo/src/main.rs | 5 ++--- scheds/rust/scx_rustland/src/main.rs | 11 +++++------ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/rust/scx_rustland_core/README.md b/rust/scx_rustland_core/README.md index e2d191b69..f4675d351 100644 --- a/rust/scx_rustland_core/README.md +++ b/rust/scx_rustland_core/README.md @@ -52,8 +52,7 @@ impl<'a> Scheduler<'a> { } Ok(None) => { // Notify the BPF component that all tasks have been dispatched. - *self.bpf.nr_queued_mut() = 0; - *self.bpf.nr_scheduled_mut() = 0; + self.bpf.update_tasks(Some(0), Some(0))? break; } diff --git a/rust/scx_rustland_core/src/bpf.rs b/rust/scx_rustland_core/src/bpf.rs index 83692aeef..090ea1072 100644 --- a/rust/scx_rustland_core/src/bpf.rs +++ b/rust/scx_rustland_core/src/bpf.rs @@ -242,6 +242,23 @@ impl<'cb> BpfScheduler<'cb> { } } + // Update the amount of tasks that have been queued to the user-space scheduler and dispatched. + // + // This method is used to notify the BPF component if the user-space scheduler has still some + // pending actions to complete (based on the counter of queued and scheduled tasks). + // + // NOTE: do not set allow(dead_code) for this method, any scheduler must use this method at + // some point, otherwise the BPF component will keep waking-up the user-space scheduler in a + // busy loop, causing unnecessary high CPU consumption. + pub fn update_tasks(&mut self, nr_queued: Option, nr_scheduled: Option) { + if let Some(queued) = nr_queued { + self.skel.bss_mut().nr_queued = queued; + } + if let Some(scheduled) = nr_scheduled { + self.skel.bss_mut().nr_scheduled = scheduled; + } + } + // Override the default scheduler time slice (in us). #[allow(dead_code)] pub fn set_effective_slice_us(&mut self, slice_us: u64) { @@ -261,11 +278,13 @@ impl<'cb> BpfScheduler<'cb> { } // Counter of queued tasks. + #[allow(dead_code)] pub fn nr_queued_mut(&mut self) -> &mut u64 { &mut self.skel.bss_mut().nr_queued } // Counter of scheduled tasks. + #[allow(dead_code)] pub fn nr_scheduled_mut(&mut self) -> &mut u64 { &mut self.skel.bss_mut().nr_scheduled } diff --git a/scheds/rust/scx_rlfifo/src/main.rs b/scheds/rust/scx_rlfifo/src/main.rs index 2f9c3905a..9b61e5b69 100644 --- a/scheds/rust/scx_rlfifo/src/main.rs +++ b/scheds/rust/scx_rlfifo/src/main.rs @@ -56,9 +56,8 @@ impl<'a> Scheduler<'a> { std::thread::yield_now(); } Ok(None) => { - // Notify the BPF component that all tasks have been dispatched. - *self.bpf.nr_queued_mut() = 0; - *self.bpf.nr_scheduled_mut() = 0; + // Notify the BPF component that all tasks have been scheduled and dispatched. + self.bpf.update_tasks(Some(0), Some(0)); // All queued tasks have been dipatched, add a short sleep to reduce // scheduler's CPU consuption. diff --git a/scheds/rust/scx_rustland/src/main.rs b/scheds/rust/scx_rustland/src/main.rs index 5cb5eba44..a43b132f0 100644 --- a/scheds/rust/scx_rustland/src/main.rs +++ b/scheds/rust/scx_rustland/src/main.rs @@ -472,8 +472,7 @@ impl<'a> Scheduler<'a> { // Reset nr_queued and update nr_scheduled, to notify the dispatcher that // queued tasks are drained, but there is still some work left to do in the // scheduler. - *self.bpf.nr_queued_mut() = 0; - *self.bpf.nr_scheduled_mut() = self.task_pool.tasks.len() as u64; + self.bpf.update_tasks(Some(0), Some(self.task_pool.tasks.len() as u64)); break; } Err(err) => { @@ -532,10 +531,10 @@ impl<'a> Scheduler<'a> { None => break, } } - // Reset nr_scheduled to notify the dispatcher that all the tasks received by the scheduler - // has been dispatched, so there is no reason to re-activate the scheduler, unless more - // tasks are queued. - self.bpf.skel.bss_mut().nr_scheduled = self.task_pool.tasks.len() as u64; + // Update nr_scheduled to notify the dispatcher that all the tasks received by the + // scheduler has been dispatched, so there is no reason to re-activate the scheduler, + // unless more tasks are queued. + self.bpf.update_tasks(None, Some(self.task_pool.tasks.len() as u64)); } // Main scheduling function (called in a loop to periodically drain tasks from the queued list From 2ac1a5924faca02c721b909f34cb6dde7258afcf Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 27 Feb 2024 17:54:54 +0100 Subject: [PATCH 6/8] scx_rustland_core: introduce RustLandBuilder() Introduce a wrapper to scx_utils::BpfBuilder that can be used to build the BPF component provided by scx_rustland_core. The source of the BPF components (main.bpf.c) is included in the crate as an array of bytes, the content is then unpacked in a temporary file to perform the build. The RustLandBuilder() helper is also used to generate bpf.rs (that implements the low-level user-space Rust connector to the BPF commponent). Schedulers based on scx_rustland_core can simply use RustLandBuilder(), to build the backend provided by scx_rustland_core. Signed-off-by: Andrea Righi --- rust/scx_rustland_core/Cargo.toml | 22 +++--- rust/scx_rustland_core/src/lib.rs | 3 + .../scx_rustland_core/src/rustland_builder.rs | 67 +++++++++++++++++++ scheds/rust/scx_rlfifo/.gitignore | 1 + scheds/rust/scx_rlfifo/Cargo.toml | 2 +- scheds/rust/scx_rlfifo/build.rs | 4 +- scheds/rust/scx_rlfifo/src/bpf.rs | 4 -- scheds/rust/scx_rustland/.gitignore | 1 + scheds/rust/scx_rustland/Cargo.toml | 3 +- scheds/rust/scx_rustland/build.rs | 4 +- scheds/rust/scx_rustland/src/bpf.rs | 4 -- 11 files changed, 84 insertions(+), 31 deletions(-) create mode 100644 rust/scx_rustland_core/src/rustland_builder.rs delete mode 100644 scheds/rust/scx_rlfifo/src/bpf.rs delete mode 100644 scheds/rust/scx_rustland/src/bpf.rs diff --git a/rust/scx_rustland_core/Cargo.toml b/rust/scx_rustland_core/Cargo.toml index d8949d4fb..40879a609 100644 --- a/rust/scx_rustland_core/Cargo.toml +++ b/rust/scx_rustland_core/Cargo.toml @@ -7,26 +7,20 @@ license = "GPL-2.0-only" repository = "https://github.com/sched-ext/scx" description = "Framework to implement sched_ext schedulers running in user space" +include = [ + "src/bpf/intf.h", + "src/bpf/main.bpf.c", +] + [dependencies] anyhow = "1.0" -bitvec = { version = "1.0", features = ["serde"] } -# FIXME - We need to allow both 0.68 and 0.69 to accommodate fedora. See the -# comment in BpfBuilder::bindgen_bpf_intf() for details. -bindgen = ">=0.68, <0.70" -glob = "0.3" -hex = "0.4.3" -lazy_static = "1.4" -libbpf-cargo = "0.22" libbpf-rs = "0.22.0" libc = "0.2.137" buddy-alloc = "0.5.1" -log = "0.4.17" -regex = "1.10" -sscanf = "0.4" -tar = "0.4" -version-compare = "0.1" +tempfile = "3.10.1" +scx_utils = { path = "../scx_utils", version = "0.6" } [build-dependencies] -bindgen = ">=0.68, <0.70" tar = "0.4" walkdir = "2.4" +scx_utils = { path = "../scx_utils", version = "0.6" } diff --git a/rust/scx_rustland_core/src/lib.rs b/rust/scx_rustland_core/src/lib.rs index fdb3bca71..64e7c55ab 100644 --- a/rust/scx_rustland_core/src/lib.rs +++ b/rust/scx_rustland_core/src/lib.rs @@ -2,3 +2,6 @@ mod bindings; mod alloc; pub use alloc::ALLOCATOR; + +mod rustland_builder; +pub use rustland_builder::RustLandBuilder; diff --git a/rust/scx_rustland_core/src/rustland_builder.rs b/rust/scx_rustland_core/src/rustland_builder.rs new file mode 100644 index 000000000..11514bbeb --- /dev/null +++ b/rust/scx_rustland_core/src/rustland_builder.rs @@ -0,0 +1,67 @@ +// Copyright (c) Andrea Righi + +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +use anyhow::Result; + +use std::error::Error; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use tempfile::tempdir; + +use scx_utils::BpfBuilder; + +pub struct RustLandBuilder { + inner_builder: BpfBuilder, + temp_dir: tempfile::TempDir, +} + +impl RustLandBuilder { + pub fn new() -> Result { + Ok(Self { + inner_builder: BpfBuilder::new()?, + temp_dir: tempdir()?, + }) + } + + fn create_temp_file( + &mut self, + file_name: &str, + content: &[u8], + ) -> Result> { + let mut temp_file = self.temp_dir.as_ref().to_path_buf(); + temp_file.push(file_name); + let mut file = File::create(&temp_file)?; + file.write_all(content)?; + + Ok(temp_file) + } + + fn get_bpf_rs_content(&self) -> &'static str { + include_str!("bpf.rs") + } + + pub fn build(&mut self) -> Result<()> { + // Embed the BPF source files in the crate. + let intf = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/intf.h")); + let skel = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/main.bpf.c")); + + let intf_path = self.create_temp_file("intf.h", intf).unwrap(); + let skel_path = self.create_temp_file("main.bpf.c", skel).unwrap(); + + self.inner_builder + .enable_intf(intf_path.to_str().unwrap(), "bpf_intf.rs"); + self.inner_builder + .enable_skel(skel_path.to_str().unwrap(), "bpf"); + + let content = self.get_bpf_rs_content(); + let path = Path::new("src/bpf.rs"); + let mut file = File::create(&path).expect("Unable to create file"); + file.write_all(content.as_bytes()).expect("Unable to write to file"); + + self.inner_builder.build() + } +} diff --git a/scheds/rust/scx_rlfifo/.gitignore b/scheds/rust/scx_rlfifo/.gitignore index 186dba259..0f2e3015b 100644 --- a/scheds/rust/scx_rlfifo/.gitignore +++ b/scheds/rust/scx_rlfifo/.gitignore @@ -1,3 +1,4 @@ src/bpf/.output +bpf.rs Cargo.lock target diff --git a/scheds/rust/scx_rlfifo/Cargo.toml b/scheds/rust/scx_rlfifo/Cargo.toml index 2d1591372..6128db1f1 100644 --- a/scheds/rust/scx_rlfifo/Cargo.toml +++ b/scheds/rust/scx_rlfifo/Cargo.toml @@ -9,7 +9,6 @@ license = "GPL-2.0-only" [dependencies] anyhow = "1.0.65" ctrlc = { version = "3.1", features = ["termination"] } -hex = "0.4.3" libbpf-rs = "0.22.0" libc = "0.2.137" scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } @@ -17,6 +16,7 @@ scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" [build-dependencies] scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } +scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" } [features] enable_backtrace = [] diff --git a/scheds/rust/scx_rlfifo/build.rs b/scheds/rust/scx_rlfifo/build.rs index 926487817..8b8285790 100644 --- a/scheds/rust/scx_rlfifo/build.rs +++ b/scheds/rust/scx_rlfifo/build.rs @@ -2,10 +2,8 @@ // GNU General Public License version 2. fn main() { - scx_utils::BpfBuilder::new() + scx_rustland_core::RustLandBuilder::new() .unwrap() - .enable_intf("../../../rust/scx_rustland_core/src/bpf/intf.h", "bpf_intf.rs") - .enable_skel("../../../rust/scx_rustland_core/src/bpf/main.bpf.c", "bpf") .build() .unwrap(); } diff --git a/scheds/rust/scx_rlfifo/src/bpf.rs b/scheds/rust/scx_rlfifo/src/bpf.rs deleted file mode 100644 index eb727b303..000000000 --- a/scheds/rust/scx_rlfifo/src/bpf.rs +++ /dev/null @@ -1,4 +0,0 @@ -// This software may be used and distributed according to the terms of the -// GNU General Public License version 2. - -include!("../../../../rust/scx_rustland_core/src/bpf.rs"); diff --git a/scheds/rust/scx_rustland/.gitignore b/scheds/rust/scx_rustland/.gitignore index 186dba259..0f2e3015b 100644 --- a/scheds/rust/scx_rustland/.gitignore +++ b/scheds/rust/scx_rustland/.gitignore @@ -1,3 +1,4 @@ src/bpf/.output +bpf.rs Cargo.lock target diff --git a/scheds/rust/scx_rustland/Cargo.toml b/scheds/rust/scx_rustland/Cargo.toml index c51df7bcf..51f8bafbc 100644 --- a/scheds/rust/scx_rustland/Cargo.toml +++ b/scheds/rust/scx_rustland/Cargo.toml @@ -8,11 +8,9 @@ license = "GPL-2.0-only" [dependencies] anyhow = "1.0.65" -bitvec = { version = "1.0", features = ["serde"] } clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] } ctrlc = { version = "3.1", features = ["termination"] } fb_procfs = "0.7.0" -hex = "0.4.3" libbpf-rs = "0.22.0" libc = "0.2.137" log = "0.4.17" @@ -23,6 +21,7 @@ simplelog = "0.12.0" [build-dependencies] scx_utils = { path = "../../../rust/scx_utils", version = "0.6" } +scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" } [features] enable_backtrace = [] diff --git a/scheds/rust/scx_rustland/build.rs b/scheds/rust/scx_rustland/build.rs index 926487817..8b8285790 100644 --- a/scheds/rust/scx_rustland/build.rs +++ b/scheds/rust/scx_rustland/build.rs @@ -2,10 +2,8 @@ // GNU General Public License version 2. fn main() { - scx_utils::BpfBuilder::new() + scx_rustland_core::RustLandBuilder::new() .unwrap() - .enable_intf("../../../rust/scx_rustland_core/src/bpf/intf.h", "bpf_intf.rs") - .enable_skel("../../../rust/scx_rustland_core/src/bpf/main.bpf.c", "bpf") .build() .unwrap(); } diff --git a/scheds/rust/scx_rustland/src/bpf.rs b/scheds/rust/scx_rustland/src/bpf.rs deleted file mode 100644 index eb727b303..000000000 --- a/scheds/rust/scx_rustland/src/bpf.rs +++ /dev/null @@ -1,4 +0,0 @@ -// This software may be used and distributed according to the terms of the -// GNU General Public License version 2. - -include!("../../../../rust/scx_rustland_core/src/bpf.rs"); From 06d8170f9f17d6f9a84e999c42153ecb2d4c4cbe Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 28 Feb 2024 08:28:18 +0100 Subject: [PATCH 7/8] scx_utils: introduce Builder() Introduce a Builder() class in scx_utils that can be used by other scx crates (such as scx_rustland_core) to prevent code duplication. Signed-off-by: Andrea Righi --- rust/scx_rustland_core/build.rs | 47 ++----------------------- rust/scx_utils/Cargo.toml | 1 + rust/scx_utils/build.rs | 47 ++----------------------- rust/scx_utils/src/builder.rs | 61 +++++++++++++++++++++++++++++++++ rust/scx_utils/src/lib.rs | 3 ++ 5 files changed, 69 insertions(+), 90 deletions(-) create mode 100644 rust/scx_utils/src/builder.rs diff --git a/rust/scx_rustland_core/build.rs b/rust/scx_rustland_core/build.rs index ad00c19de..1eaf10aba 100644 --- a/rust/scx_rustland_core/build.rs +++ b/rust/scx_rustland_core/build.rs @@ -3,51 +3,8 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2. -use std::env; -use std::fs::File; -use std::path::PathBuf; - -const BPF_H: &str = "bpf_h"; - -fn gen_bpf_h() { - let file = - File::create(PathBuf::from(env::var("OUT_DIR").unwrap()).join(format!("{}.tar", BPF_H))) - .unwrap(); - let mut ar = tar::Builder::new(file); - - ar.follow_symlinks(false); - ar.append_dir_all(".", BPF_H).unwrap(); - ar.finish().unwrap(); - - for ent in walkdir::WalkDir::new(BPF_H) { - let ent = ent.unwrap(); - if !ent.file_type().is_dir() { - println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy()); - } - } -} - -fn gen_bindings() { - // FIXME - bindgen's API changed between 0.68 and 0.69 so that - // `bindgen::CargoCallbacks::new()` should be used instead of - // `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is - // shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of - // bindgen and suppress deprecation warning. Remove the following once - // fedora can be updated to bindgen >= 0.69. - #[allow(deprecated)] - let bindings = bindgen::Builder::default() - .header("bindings.h") - .allowlist_type("scx_exit_kind") - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - .generate() - .expect("Unable to generate bindings"); - - bindings - .write_to_file(PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs")) - .expect("Couldn't write bindings"); -} +use scx_utils::Builder; fn main() { - gen_bpf_h(); - gen_bindings(); + Builder::new().build() } diff --git a/rust/scx_utils/Cargo.toml b/rust/scx_utils/Cargo.toml index 3ae5b212b..305bc4173 100644 --- a/rust/scx_utils/Cargo.toml +++ b/rust/scx_utils/Cargo.toml @@ -23,6 +23,7 @@ log = "0.4.17" regex = "1.10" sscanf = "0.4" tar = "0.4" +walkdir = "2.4" version-compare = "0.1" [build-dependencies] diff --git a/rust/scx_utils/build.rs b/rust/scx_utils/build.rs index ad00c19de..16696e311 100644 --- a/rust/scx_utils/build.rs +++ b/rust/scx_utils/build.rs @@ -3,51 +3,8 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2. -use std::env; -use std::fs::File; -use std::path::PathBuf; - -const BPF_H: &str = "bpf_h"; - -fn gen_bpf_h() { - let file = - File::create(PathBuf::from(env::var("OUT_DIR").unwrap()).join(format!("{}.tar", BPF_H))) - .unwrap(); - let mut ar = tar::Builder::new(file); - - ar.follow_symlinks(false); - ar.append_dir_all(".", BPF_H).unwrap(); - ar.finish().unwrap(); - - for ent in walkdir::WalkDir::new(BPF_H) { - let ent = ent.unwrap(); - if !ent.file_type().is_dir() { - println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy()); - } - } -} - -fn gen_bindings() { - // FIXME - bindgen's API changed between 0.68 and 0.69 so that - // `bindgen::CargoCallbacks::new()` should be used instead of - // `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is - // shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of - // bindgen and suppress deprecation warning. Remove the following once - // fedora can be updated to bindgen >= 0.69. - #[allow(deprecated)] - let bindings = bindgen::Builder::default() - .header("bindings.h") - .allowlist_type("scx_exit_kind") - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - .generate() - .expect("Unable to generate bindings"); - - bindings - .write_to_file(PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs")) - .expect("Couldn't write bindings"); -} +include!("src/builder.rs"); fn main() { - gen_bpf_h(); - gen_bindings(); + Builder::new().build() } diff --git a/rust/scx_utils/src/builder.rs b/rust/scx_utils/src/builder.rs new file mode 100644 index 000000000..f55b29ad6 --- /dev/null +++ b/rust/scx_utils/src/builder.rs @@ -0,0 +1,61 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2. + +use std::env; +use std::fs::File; +use std::path::PathBuf; + +const BPF_H: &str = "bpf_h"; + +pub struct Builder; + +impl Builder { + pub fn new() -> Self { + Builder + } + + fn gen_bpf_h(&self) { + let out_dir = env::var("OUT_DIR").unwrap(); + let file = File::create(PathBuf::from(&out_dir).join(format!("{}.tar", BPF_H))).unwrap(); + let mut ar = tar::Builder::new(file); + + ar.follow_symlinks(false); + ar.append_dir_all(".", BPF_H).unwrap(); + ar.finish().unwrap(); + + for ent in walkdir::WalkDir::new(BPF_H) { + let ent = ent.unwrap(); + if !ent.file_type().is_dir() { + println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy()); + } + } + } + + fn gen_bindings(&self) { + let out_dir = env::var("OUT_DIR").unwrap(); + // FIXME - bindgen's API changed between 0.68 and 0.69 so that + // `bindgen::CargoCallbacks::new()` should be used instead of + // `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is + // shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of + // bindgen and suppress deprecation warning. Remove the following once + // fedora can be updated to bindgen >= 0.69. + #[allow(deprecated)] + let bindings = bindgen::Builder::default() + .header("bindings.h") + .allowlist_type("scx_exit_kind") + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .expect("Unable to generate bindings"); + + bindings + .write_to_file(PathBuf::from(&out_dir).join("bindings.rs")) + .expect("Couldn't write bindings"); + } + + pub fn build(self) { + self.gen_bpf_h(); + self.gen_bindings(); + } +} diff --git a/rust/scx_utils/src/lib.rs b/rust/scx_utils/src/lib.rs index a262d0955..18c7de468 100644 --- a/rust/scx_utils/src/lib.rs +++ b/rust/scx_utils/src/lib.rs @@ -35,6 +35,9 @@ mod bindings; mod bpf_builder; pub use bpf_builder::BpfBuilder; +mod builder; +pub use builder::Builder; + pub mod ravg; mod libbpf_logger; From 0d1c6555a40e41d542239f907a7b34bf0955e47e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 28 Feb 2024 12:56:40 +0100 Subject: [PATCH 8/8] scx_rustland_core: generate source files in-tree There is no need to generate source code in a temporary directory with RustLandBuilder(), we can simply generate code in-tree and exclude the generated source files from .gitignore. Having the generated source files in-tree can help to debug potential build issues (and it also allows to drop the the tempfile crate dependency). Signed-off-by: Andrea Righi --- rust/scx_rustland_core/Cargo.toml | 2 +- .../scx_rustland_core/src/rustland_builder.rs | 47 ++++++------------- scheds/rust/scx_rlfifo/.gitignore | 2 + scheds/rust/scx_rustland/.gitignore | 2 + 4 files changed, 20 insertions(+), 33 deletions(-) diff --git a/rust/scx_rustland_core/Cargo.toml b/rust/scx_rustland_core/Cargo.toml index 40879a609..91f40415a 100644 --- a/rust/scx_rustland_core/Cargo.toml +++ b/rust/scx_rustland_core/Cargo.toml @@ -10,6 +10,7 @@ description = "Framework to implement sched_ext schedulers running in user space include = [ "src/bpf/intf.h", "src/bpf/main.bpf.c", + "src/bpf.rs", ] [dependencies] @@ -17,7 +18,6 @@ anyhow = "1.0" libbpf-rs = "0.22.0" libc = "0.2.137" buddy-alloc = "0.5.1" -tempfile = "3.10.1" scx_utils = { path = "../scx_utils", version = "0.6" } [build-dependencies] diff --git a/rust/scx_rustland_core/src/rustland_builder.rs b/rust/scx_rustland_core/src/rustland_builder.rs index 11514bbeb..7e7fd88fd 100644 --- a/rust/scx_rustland_core/src/rustland_builder.rs +++ b/rust/scx_rustland_core/src/rustland_builder.rs @@ -5,63 +5,46 @@ use anyhow::Result; -use std::error::Error; use std::fs::File; use std::io::Write; -use std::path::{Path, PathBuf}; - -use tempfile::tempdir; +use std::path::Path; use scx_utils::BpfBuilder; pub struct RustLandBuilder { inner_builder: BpfBuilder, - temp_dir: tempfile::TempDir, } impl RustLandBuilder { pub fn new() -> Result { Ok(Self { inner_builder: BpfBuilder::new()?, - temp_dir: tempdir()?, }) } - fn create_temp_file( - &mut self, - file_name: &str, - content: &[u8], - ) -> Result> { - let mut temp_file = self.temp_dir.as_ref().to_path_buf(); - temp_file.push(file_name); - let mut file = File::create(&temp_file)?; - file.write_all(content)?; - - Ok(temp_file) - } - - fn get_bpf_rs_content(&self) -> &'static str { - include_str!("bpf.rs") + fn create_file(&self, file_name: &str, content: &[u8]) { + let path = Path::new(file_name); + let mut file = File::create(&path).expect("Unable to create file"); + file.write_all(content).expect("Unable to write to file"); } pub fn build(&mut self) -> Result<()> { - // Embed the BPF source files in the crate. + // Embed the BPF source files. let intf = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/intf.h")); let skel = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/main.bpf.c")); + let bpf = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf.rs")); - let intf_path = self.create_temp_file("intf.h", intf).unwrap(); - let skel_path = self.create_temp_file("main.bpf.c", skel).unwrap(); + // Generate BPF backend code (C). + self.create_file("intf.h", intf); + self.create_file("main.bpf.c", skel); - self.inner_builder - .enable_intf(intf_path.to_str().unwrap(), "bpf_intf.rs"); - self.inner_builder - .enable_skel(skel_path.to_str().unwrap(), "bpf"); + self.inner_builder.enable_intf("intf.h", "bpf_intf.rs"); + self.inner_builder.enable_skel("main.bpf.c", "bpf"); - let content = self.get_bpf_rs_content(); - let path = Path::new("src/bpf.rs"); - let mut file = File::create(&path).expect("Unable to create file"); - file.write_all(content.as_bytes()).expect("Unable to write to file"); + // Generate user-space BPF connector code (Rust). + self.create_file("src/bpf.rs", bpf); + // Build the scheduler. self.inner_builder.build() } } diff --git a/scheds/rust/scx_rlfifo/.gitignore b/scheds/rust/scx_rlfifo/.gitignore index 0f2e3015b..3afb3353f 100644 --- a/scheds/rust/scx_rlfifo/.gitignore +++ b/scheds/rust/scx_rlfifo/.gitignore @@ -1,4 +1,6 @@ src/bpf/.output +intf.h +main.bpf.c bpf.rs Cargo.lock target diff --git a/scheds/rust/scx_rustland/.gitignore b/scheds/rust/scx_rustland/.gitignore index 0f2e3015b..3afb3353f 100644 --- a/scheds/rust/scx_rustland/.gitignore +++ b/scheds/rust/scx_rustland/.gitignore @@ -1,4 +1,6 @@ src/bpf/.output +intf.h +main.bpf.c bpf.rs Cargo.lock target