diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml index 5e5ecaf6427..d73e855a3b1 100644 --- a/.github/workflows/pr-test-rust.yml +++ b/.github/workflows/pr-test-rust.yml @@ -60,7 +60,7 @@ jobs: pip install --force-reinstall dist/*.whl - name: Run e2e test run: | - bash scripts/killall_sglang.sh + bash scripts/killall_sglang.sh "nuke_gpus" cd rust/py_test python3 run_suite.py diff --git a/scripts/killall_sglang.sh b/scripts/killall_sglang.sh index da9327e3fd1..69ad190564b 100755 --- a/scripts/killall_sglang.sh +++ b/scripts/killall_sglang.sh @@ -9,8 +9,11 @@ kill -9 $(ps aux | grep 'sglang.launch_server' | grep -v 'grep' | awk '{print $2 kill -9 $(ps aux | grep 'sglang.bench' | grep -v 'grep' | awk '{print $2}') 2>/dev/null # Clean all GPU processes -kill -9 $(nvidia-smi | sed -n '/Processes:/,$p' | grep " [0-9]" | awk '{print $5}') 2>/dev/null -lsof /dev/nvidia* | awk '{print $2}' | xargs kill -9 2>/dev/null +if [ $# -gt 0 ]; then + kill -9 $(nvidia-smi | sed -n '/Processes:/,$p' | grep " [0-9]" | awk '{print $5}') 2>/dev/null + lsof /dev/nvidia* | awk '{print $2}' | xargs kill -9 2>/dev/null +fi + # Show GPU status after clean up nvidia-smi