Skip to content

Commit

Permalink
(ADD): Deepseekv3 experimentation / testing dp attn with fp8
Browse files Browse the repository at this point in the history
  • Loading branch information
roG0d committed Jan 3, 2025
1 parent 16e46ed commit 4f02c9f
Showing 1 changed file with 81 additions and 21 deletions.
102 changes: 81 additions & 21 deletions sglang/experiments/deepseek_v3.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,88 @@
# sglang basic commands: launch_server, bench_one_batch (without launch_server) and bench_serving (launch_served requested)
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code --port 20000
python3 -m sglang.bench_one_batch --trust-remote-code --model-path deepseek-ai/DeepSeek-V3 --batch 32 --input-len 256 --output-len 32 --tp 8 --result-filename bench_one_batch_result.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --port 20000
# Docker single-node command: (FP8 version) * PROVISIONAL *
: '
docker run --gpus all \
--shm-size 32g \
--network=host \
-v /mnt/co-research/shared-models:/root/.cache/huggingface \
--name sglang_singlenodeFP8 \
-it \
-rm \
--env "HF_TOKEN=$HF_TOKEN" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --quantization fp8 --kv-cache-dtype fp8_e5m2 --trust-remote-code --host 0.0.0.0 --port 40000 --enable-dp-attention
'

# Llama 3.1-8B testing
python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --tp-size 1 --trust-remote-code --port 20000 --enable-torch-compile
python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --tp 1 --trust-remote-code --port 20000
# Docker multi-node command: (BF16 version) * PROVISIONAL *
# Node0: * PROVISIONAL *
: '
docker run --gpus all \
--shm-size 32g \
--network=host \
-v /mnt/co-research/shared-models:/root/.cache/huggingface \
--name sglang_multinode0 \
-it \
--rm \
--env "HF_TOKEN=$HF_TOKEN" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 ----dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --host 0.0.0.0 --port 40000
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1000 --port 20000 --output-file llama3_1_torch_compile.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1000 --port 20000 --output-file llama3_1_NO_torch_compile.jsonl
'

# Singlenode FP8
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp-size 8 --trust-remote-code --port 20000 --quantization fp8 --kv-cache-dtype fp8_e5m2
# Node1: * PROVISIONAL *
: '
docker run --gpus all \
--shm-size 32g \
--network=host \
-v /mnt/co-research/shared-models:/root/.cache/huggingface \
--name sglang_multinode1 \
-it \
--rm \
--env "HF_TOKEN=$HF_TOKEN" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 ----dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000
# Multinode testing
ERROR GLOO
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --port 20000
python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --port 20000
'

# Not working
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 0
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
# Docker basic client command: * PROVISIONAL *
: '
docker run --gpus all \
--shm-size 32g \
--network=host \
-v /mnt/co-research/shared-models:/root/.cache/huggingface \
--name sglang_bnchmrk_client \
-it \
--rm \
--env "HF_TOKEN=$HF_TOKEN" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --host 0.0.0.0 --port 40000
'

# ERROR GLOO
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 0
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --tp 4 --nccl-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1
# 8xH200 FP8/BF16
# Online
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl

# Offline
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl

# 2x8xH200 BF16
# Online
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl

# Offline
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl

0 comments on commit 4f02c9f

Please sign in to comment.