-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
(ADD): Deepseekv3 experimentation / testing dp attn with fp8
- Loading branch information
Showing
1 changed file
with
81 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,88 @@ | ||
# sglang basic commands: launch_server, bench_one_batch (without launch_server) and bench_serving (launch_served requested) | ||
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code --port 20000 | ||
python3 -m sglang.bench_one_batch --trust-remote-code --model-path deepseek-ai/DeepSeek-V3 --batch 32 --input-len 256 --output-len 32 --tp 8 --result-filename bench_one_batch_result.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --port 20000 | ||
# Docker single-node command: (FP8 version) * PROVISIONAL * | ||
: ' | ||
docker run --gpus all \ | ||
--shm-size 32g \ | ||
--network=host \ | ||
-v /mnt/co-research/shared-models:/root/.cache/huggingface \ | ||
--name sglang_singlenodeFP8 \ | ||
-it \ | ||
-rm \ | ||
--env "HF_TOKEN=$HF_TOKEN" \ | ||
--ipc=host \ | ||
lmsysorg/sglang:latest \ | ||
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --quantization fp8 --kv-cache-dtype fp8_e5m2 --trust-remote-code --host 0.0.0.0 --port 40000 --enable-dp-attention | ||
' | ||
|
||
# Llama 3.1-8B testing | ||
python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --tp-size 1 --trust-remote-code --port 20000 --enable-torch-compile | ||
python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --tp 1 --trust-remote-code --port 20000 | ||
# Docker multi-node command: (BF16 version) * PROVISIONAL * | ||
# Node0: * PROVISIONAL * | ||
: ' | ||
docker run --gpus all \ | ||
--shm-size 32g \ | ||
--network=host \ | ||
-v /mnt/co-research/shared-models:/root/.cache/huggingface \ | ||
--name sglang_multinode0 \ | ||
-it \ | ||
--rm \ | ||
--env "HF_TOKEN=$HF_TOKEN" \ | ||
--ipc=host \ | ||
lmsysorg/sglang:latest \ | ||
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 ----dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --host 0.0.0.0 --port 40000 | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1000 --port 20000 --output-file llama3_1_torch_compile.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1000 --port 20000 --output-file llama3_1_NO_torch_compile.jsonl | ||
' | ||
|
||
# Singlenode FP8 | ||
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp-size 8 --trust-remote-code --port 20000 --quantization fp8 --kv-cache-dtype fp8_e5m2 | ||
# Node1: * PROVISIONAL * | ||
: ' | ||
docker run --gpus all \ | ||
--shm-size 32g \ | ||
--network=host \ | ||
-v /mnt/co-research/shared-models:/root/.cache/huggingface \ | ||
--name sglang_multinode1 \ | ||
-it \ | ||
--rm \ | ||
--env "HF_TOKEN=$HF_TOKEN" \ | ||
--ipc=host \ | ||
lmsysorg/sglang:latest \ | ||
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 ----dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000 | ||
# Multinode testing | ||
ERROR GLOO | ||
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --port 20000 | ||
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --port 20000 | ||
' | ||
|
||
# Not working | ||
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 0 | ||
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1 | ||
# Docker basic client command: * PROVISIONAL * | ||
: ' | ||
docker run --gpus all \ | ||
--shm-size 32g \ | ||
--network=host \ | ||
-v /mnt/co-research/shared-models:/root/.cache/huggingface \ | ||
--name sglang_bnchmrk_client \ | ||
-it \ | ||
--rm \ | ||
--env "HF_TOKEN=$HF_TOKEN" \ | ||
--ipc=host \ | ||
lmsysorg/sglang:latest \ | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --host 0.0.0.0 --port 40000 | ||
' | ||
|
||
# ERROR GLOO | ||
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 0 | ||
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1 | ||
# 8xH200 FP8/BF16 | ||
# Online | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl | ||
|
||
# Offline | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl | ||
|
||
# 2x8xH200 BF16 | ||
# Online | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl | ||
|
||
# Offline | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl | ||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl |