Skip to content

Commit

Permalink
(ADDED): Results and logs for deepseek v3 benchamrking
Browse files Browse the repository at this point in the history
  • Loading branch information
roG0d committed Jan 5, 2025
1 parent 4f02c9f commit 700675b
Show file tree
Hide file tree
Showing 9 changed files with 606 additions and 24 deletions.
29 changes: 5 additions & 24 deletions sglang/experiments/deepseek_v3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,9 @@ docker run --gpus all \
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --host 0.0.0.0 --port 40000
'

# 8xH200 FP8/BF16
# 8xH200/2x8xH200 FP8/BF16
# Online
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_online_output.jsonl

# Offline
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --output-file deepseek_v3_8xh200_offline_output.jsonl

# 2x8xH200 BF16
# Online
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_online_output.jsonl

# Offline
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_offline_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 300 --request-rate 1 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_FP8_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 600 --request-rate 2 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_FP8_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 1200 --request-rate 4 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_FP8_online_output.jsonl
python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-range-ratio 1 --num-prompt 2400 --request-rate 8 --random-input 1024 --random-output 1024 --host 0.0.0.0 --port 40000 --output-file deepseek_v3_2x8xh200_FP8_online_output.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"backend": "sglang", "dataset_name": "random", "request_rate": 1.0, "max_concurrency": null, "total_input_tokens": 307200, "total_output_tokens": 307200, "total_output_tokens_retokenized": 306053, "mean_e2e_latency_ms": 968448.8521837055, "median_e2e_latency_ms": 971353.9656687062, "median_ttft_ms": 53189.53575310297, "median_itl_ms": 638.6785819195211, "output_throughput": 275.05934570759786, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 1116.849890010897, "completed": 300}
{"backend": "sglang", "dataset_name": "random", "request_rate": 2.0, "max_concurrency": null, "total_input_tokens": 614400, "total_output_tokens": 614400, "total_output_tokens_retokenized": 612299, "mean_e2e_latency_ms": 2003883.862575565, "median_e2e_latency_ms": 2010951.2275049929, "median_ttft_ms": 313373.927626526, "median_itl_ms": 1192.372274119407, "output_throughput": 256.4982169510671, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 2395.338288519997, "completed": 600}
{"backend": "sglang", "dataset_name": "random", "request_rate": 4.0, "max_concurrency": null, "total_input_tokens": 1228800, "total_output_tokens": 1228800, "total_output_tokens_retokenized": 1224692, "mean_e2e_latency_ms": 3206867.3097752165, "median_e2e_latency_ms": 3881082.652960904, "median_ttft_ms": 774460.7280562632, "median_itl_ms": 1178.4203723073006, "output_throughput": 255.4465169783954, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 4810.4002925353125, "completed": 1200}
{"backend": "sglang", "dataset_name": "random", "request_rate": 8.0, "max_concurrency": null, "total_input_tokens": 2457600, "total_output_tokens": 2457600, "total_output_tokens_retokenized": 2449303, "mean_e2e_latency_ms": 6004940.752673052, "median_e2e_latency_ms": 6819185.607663356, "median_ttft_ms": 4072706.7238641903, "median_itl_ms": 1205.5958840064704, "output_throughput": 250.07723760043686, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 9827.363831995986, "completed": 2400}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"backend": "sglang", "dataset_name": "random", "request_rate": 1.0, "max_concurrency": null, "total_input_tokens": 307200, "total_output_tokens": 307200, "total_output_tokens_retokenized": 306092, "mean_e2e_latency_ms": 982681.0571394442, "median_e2e_latency_ms": 985610.623908462, "median_ttft_ms": 56824.06605547294, "median_itl_ms": 662.3261536005884, "output_throughput": 271.5959636429426, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 1131.0919200694188, "completed": 300}
{"backend": "sglang", "dataset_name": "random", "request_rate": 2.0, "max_concurrency": null, "total_input_tokens": 614400, "total_output_tokens": 614400, "total_output_tokens_retokenized": 612142, "mean_e2e_latency_ms": 1978002.6884525253, "median_e2e_latency_ms": 1975371.9891069923, "median_ttft_ms": 305318.36949149147, "median_itl_ms": 1219.141379930079, "output_throughput": 288.41458420567164, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 2130.2667536460795, "completed": 600}
{"backend": "sglang", "dataset_name": "random", "request_rate": 4.0, "max_concurrency": null, "total_input_tokens": 1228800, "total_output_tokens": 1228800, "total_output_tokens_retokenized": 1224515, "mean_e2e_latency_ms": 3929702.0734317033, "median_e2e_latency_ms": 3901390.298462007, "median_ttft_ms": 767082.1364489384, "median_itl_ms": 2189.8306920193136, "output_throughput": 269.19030517752464, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 4564.800352633931, "completed": 1200}
{"backend": "sglang", "dataset_name": "random", "request_rate": 8.0, "max_concurrency": null, "total_input_tokens": 2457600, "total_output_tokens": 2457600, "total_output_tokens_retokenized": 2448836, "mean_e2e_latency_ms": 6079389.87389776, "median_e2e_latency_ms": 7374173.1368335895, "median_ttft_ms": 1680440.4092754703, "median_itl_ms": 2007.022154983133, "output_throughput": 276.74194728875386, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 8880.475201093126, "completed": 2400}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"backend": "sglang", "dataset_name": "random", "request_rate": 1.0, "max_concurrency": null, "total_input_tokens": 307200, "total_output_tokens": 307200, "total_output_tokens_retokenized": 306052, "mean_e2e_latency_ms": 219910.4881566499, "median_e2e_latency_ms": 214924.09367999062, "median_ttft_ms": 587.1520687360317, "median_itl_ms": 159.6419473644346, "output_throughput": 639.9948784795965, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 480.00384117104113, "completed": 300}
{"backend": "sglang", "dataset_name": "random", "request_rate": 2.0, "max_concurrency": null, "total_input_tokens": 614400, "total_output_tokens": 614400, "total_output_tokens_retokenized": 612253, "mean_e2e_latency_ms": 235341.5755853096, "median_e2e_latency_ms": 235524.69775360078, "median_ttft_ms": 598.7704854924232, "median_itl_ms": 162.9884666763246, "output_throughput": 1313.7380027330087, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 467.67315760208294, "completed": 600}
{"backend": "sglang", "dataset_name": "random", "request_rate": 4.0, "max_concurrency": null, "total_input_tokens": 1228800, "total_output_tokens": 1228800, "total_output_tokens_retokenized": 1224646, "mean_e2e_latency_ms": 321625.8439514022, "median_e2e_latency_ms": 324438.43806162477, "median_ttft_ms": 766.6953965090215, "median_itl_ms": 237.9868463613093, "output_throughput": 2378.259950851199, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 516.6802727179602, "completed": 1200}
{"backend": "sglang", "dataset_name": "random", "request_rate": 8.0, "max_concurrency": null, "total_input_tokens": 2457600, "total_output_tokens": 2457600, "total_output_tokens_retokenized": 2449187, "mean_e2e_latency_ms": 654511.2723356115, "median_e2e_latency_ms": 686261.5671905223, "median_ttft_ms": 1191.7396115604788, "median_itl_ms": 255.96281047910452, "output_throughput": 2249.0334558295294, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 1092.736078971997, "completed": 2400}
4 changes: 4 additions & 0 deletions sglang/experiments/deepseek_v3_8xh200_FP8_online_output.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"backend": "sglang", "dataset_name": "random", "request_rate": 1.0, "max_concurrency": null, "total_input_tokens": 307200, "total_output_tokens": 307200, "total_output_tokens_retokenized": 306153, "mean_e2e_latency_ms": 139395.84098776494, "median_e2e_latency_ms": 147735.42626714334, "median_ttft_ms": 563.4104600176215, "median_itl_ms": 101.78019991144538, "output_throughput": 773.1474288758621, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 397.33689659507945, "completed": 300}
{"backend": "sglang", "dataset_name": "random", "request_rate": 2.0, "max_concurrency": null, "total_input_tokens": 614400, "total_output_tokens": 614400, "total_output_tokens_retokenized": 612306, "mean_e2e_latency_ms": 227131.108927244, "median_e2e_latency_ms": 234757.1316829417, "median_ttft_ms": 684.3277416191995, "median_itl_ms": 149.458127329126, "output_throughput": 1401.770501155294, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 438.3028459320776, "completed": 600}
{"backend": "sglang", "dataset_name": "random", "request_rate": 4.0, "max_concurrency": null, "total_input_tokens": 1228800, "total_output_tokens": 1228800, "total_output_tokens_retokenized": 1224403, "mean_e2e_latency_ms": 370518.6826122479, "median_e2e_latency_ms": 376040.6724580098, "median_ttft_ms": 865.2611614670604, "median_itl_ms": 287.9461294505745, "output_throughput": 2214.7587909169233, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 554.8233988457359, "completed": 1200}
{"backend": "sglang", "dataset_name": "random", "request_rate": 8.0, "max_concurrency": null, "total_input_tokens": 2457600, "total_output_tokens": 2457600, "total_output_tokens_retokenized": 2449246, "mean_e2e_latency_ms": 687402.8331566683, "median_e2e_latency_ms": 692710.8259119559, "median_ttft_ms": 1358.7704463861883, "median_itl_ms": 515.1780359447002, "output_throughput": 2864.307071093772, "sharegpt_output_len": null, "random_input_len": 1024, "random_output_len": 1024, "random_range_ratio": 1.0, "duration": 858.0085650738329, "completed": 2400}
Loading

0 comments on commit 700675b

Please sign in to comment.