From ce094a5d79aa3794f0cdb86ebf03e4897764e1bd Mon Sep 17 00:00:00 2001 From: Ata Fatahi Date: Tue, 17 Dec 2024 06:42:40 -0500 Subject: [PATCH] Clean up GPU memory after killing sglang processes (#2457) Signed-off-by: Ata Fatahi --- .github/workflows/pr-test-rust.yml | 2 +- scripts/killall_sglang.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml index 9cfb5f6d961..928d0efa5b3 100644 --- a/.github/workflows/pr-test-rust.yml +++ b/.github/workflows/pr-test-rust.yml @@ -60,7 +60,7 @@ jobs: pip install --force-reinstall dist/*.whl - name: Run e2e test run: | - bash scripts/killall_sglang.sh + bash scripts/killall_sglang.sh "nuk_gpus" cd sgl-router/py_test python3 run_suite.py diff --git a/scripts/killall_sglang.sh b/scripts/killall_sglang.sh index 3696a1c35f4..4057d2be2fb 100755 --- a/scripts/killall_sglang.sh +++ b/scripts/killall_sglang.sh @@ -11,4 +11,9 @@ kill -9 $(ps aux | grep 'sglang.bench' | grep -v 'grep' | awk '{print $2}') 2>/d # Clean all GPU processes if any argument is provided if [ $# -gt 0 ]; then kill -9 $(nvidia-smi | sed -n '/Processes:/,$p' | grep " [0-9]" | awk '{print $5}') 2>/dev/null + lsof /dev/nvidia* | awk '{print $2}' | xargs kill -9 2>/dev/null fi + + +# Show GPU status after clean up +nvidia-smi