Fix warmup in bench_offline_throughput.py (#2449)

sgl-project · Dec 11, 2024 · f854829 · f854829
1 parent 959735f
commit f854829
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/python/sglang/bench_offline_throughput.py b/python/sglang/bench_offline_throughput.py
@@ -201,18 +201,17 @@ def throughput_test_once(
         for r in reqs
     ]
 
-    st = time.perf_counter()
     if profile:
         backend.start_profile()
 
+    st = time.perf_counter()
     gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
+    latency = time.perf_counter() - st
 
     if profile:
         backend.stop_profile()
         monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
 
-    latency = time.perf_counter() - st
-
     if backend_name == "runtime":
         gen_out = json.loads(gen_out)
 
@@ -304,8 +303,8 @@ def throughput_test(
     warmup_requests = sample_random_requests(
         input_len=256,
         output_len=16,
-        num_prompts=16,
-        range_ratio=0.8,
+        num_prompts=min(bench_args.num_prompts, 16),
+        range_ratio=1.0,
         tokenizer=tokenizer,
         dataset_path=bench_args.dataset_path,
     )
@@ -321,6 +320,7 @@ def throughput_test(
             extra_request_body=extra_request_body,
             profile=False,
         )
+        time.sleep(0.5)
 
     logging.info("\nBenchmark...")
     result = throughput_test_once(