Skip to content

Commit

Permalink
Enable user marker for vllm profiling (#357)
Browse files Browse the repository at this point in the history
* Enable user marker for vllm profiling

---------

Co-authored-by: Gregory Shtrasberg <[email protected]>
  • Loading branch information
Lzy17 and gshtras authored Jan 16, 2025
1 parent 5976f48 commit 8bd76fb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
31 changes: 31 additions & 0 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,37 @@ def inner(*args, **kwds):
return func


class rpd_user_marker:

def __init__(self, name=None):
self.name = name
self.marker = None

def __enter__(self):
if is_hipScopedMarker_available():
from hipScopedMarker import hipScopedMarker
marker_name = self.name if self.name else "UserMarker Undefined"
self.marker = hipScopedMarker(f"{marker_name}")
self.marker.__enter__()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
if is_hipScopedMarker_available() and self.marker:
self.marker.__exit__(exc_type, exc_val, exc_tb)

def start(self):
if is_hipScopedMarker_available():
from hipScopedMarker import hipScopedMarker
marker_name = self.name if self.name else "UserMarker Undefined"
self.marker = hipScopedMarker(f"{marker_name}")
self.marker.__enter__()
return self

def end(self, exc_type=0, exc_val=0, exc_tb=0):
if is_hipScopedMarker_available() and self.marker:
self.marker.__exit__(exc_type, exc_val, exc_tb)


class Device(enum.Enum):
GPU = enum.auto()
CPU = enum.auto()
Expand Down
11 changes: 9 additions & 2 deletions vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
from vllm.utils import (DeviceMemoryProfiler, GiB_bytes, PyObjectCache,
async_tensor_h2d, flatten_2d_lists,
is_pin_memory_available, rpd_mark, supports_dynamo,
weak_ref_tensor)
is_pin_memory_available, rpd_mark, rpd_user_marker,
supports_dynamo, weak_ref_tensor)
from vllm.worker.model_runner_base import (
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
_add_attn_metadata_broadcastable_dict,
Expand Down Expand Up @@ -1630,6 +1630,12 @@ def execute_model(
assert model_input.attn_metadata is not None
prefill_meta = model_input.attn_metadata.prefill_metadata
decode_meta = model_input.attn_metadata.decode_metadata
if prefill_meta:
marker_instance = rpd_user_marker(name="Prefill")
else:
marker_instance = rpd_user_marker(name="Decode")

marker_instance.start()
# TODO(andoorve): We can remove this once all
# virtual engines share the same kv cache.
virtual_engine = model_input.virtual_engine
Expand Down Expand Up @@ -1765,6 +1771,7 @@ def execute_model(

output.hidden_states = hidden_states

marker_instance.end()
return [output]

def need_recv_kv(self, model_input, kv_caches) -> bool:
Expand Down

0 comments on commit 8bd76fb

Please sign in to comment.