[Misc] sort torch profiler table by kernel timing (vllm-project#11813)

frreiss · Jan 10, 2025 · 887282f · 887282f
1 parent 550b91c
commit 887282f
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py
@@ -52,7 +52,7 @@ def run_to_completion(profile_dir: Optional[str] = None):
                 llm.generate(dummy_prompts,
                              sampling_params=sampling_params,
                              use_tqdm=False)
-            print(p.key_averages())
+            print(p.key_averages().table(sort_by="self_cuda_time_total"))
         else:
             start_time = time.perf_counter()
             llm.generate(dummy_prompts,