Skip to content

Commit

Permalink
merged
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat committed Jan 12, 2025
1 parent 5f3f3b7 commit e34b9dc
Showing 1 changed file with 42 additions and 19 deletions.
61 changes: 42 additions & 19 deletions tests/v1/engine/test_async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,38 @@
allow_module_level=True)

ENGINE_ARGS = AsyncEngineArgs(model="meta-llama/Llama-3.2-1B",
enforce_eager=True,
disable_log_requests=True)


async def generate(engine: AsyncLLM, request_id: str,
max_tokens: int) -> Tuple[int, str]:
count = 0
async for _ in engine.generate(request_id=request_id,
prompt="Hello my name is Robert and",
sampling_params=SamplingParams(
max_tokens=max_tokens, temperature=0)):

count += 1
await asyncio.sleep(0.)
async def run_example(
engine: AsyncLLM,
request_id: str,
num_tokens: int,
abort_after: int = 0
) -> Tuple[int, int, str]:

generator = engine.generate(
request_id=request_id,
prompt="Hello my name is Robert and",
sampling_params=SamplingParams(max_tokens=num_tokens, temperature=0))

return count, request_id
count = 0
try:
async for _ in generator():
count += 1
print(f"{request_id=}, {count=}, {abort_after=}")
if count == abort_after:
# Simulate request cancellation.
print(f"{request_id=}")
asyncio.current_task().cancel()

Check failure on line 40 in tests/v1/engine/test_async_llm.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Item "None" of "Optional[Task[Any]]" has no attribute "cancel" [union-attr]

Check failure on line 40 in tests/v1/engine/test_async_llm.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Item "None" of "Task[Any] | None" has no attribute "cancel" [union-attr]

Check failure on line 40 in tests/v1/engine/test_async_llm.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Item "None" of "Task[Any] | None" has no attribute "cancel" [union-attr]

Check failure on line 40 in tests/v1/engine/test_async_llm.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Item "None" of "Task[Any] | None" has no attribute "cancel" [union-attr]
except asyncio.CancelledError:
print(f"{request_id=}")
assert request_id not in engine.request_states
finally:

expected_count = num_tokens if abort_after == 0 else abort_after
return count, expected_count, request_id

Check failure on line 47 in tests/v1/engine/test_async_llm.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (B012)

tests/v1/engine/test_async_llm.py:47:9: B012 `return` inside `finally` blocks cause exceptions to be silenced


@pytest.mark.asyncio
Expand All @@ -40,24 +57,30 @@ async def test_load(monkeypatch):

engine = AsyncLLM.from_engine_args(ENGINE_ARGS)

NUM_REQUESTS = 10000
NUM_REQUESTS = 100
NUM_EXPECTED_TOKENS = 10
# Abort 1/100 requests after 5 tokens.
ABORT_RATE = 100
ABORT_AFTER = 5

request_ids = [f"request-{i}" for i in range(NUM_REQUESTS)]

# Create concurrent requests.
tasks = []
for request_id in request_ids:
tasks.append(
asyncio.create_task(
generate(engine, request_id, NUM_EXPECTED_TOKENS)))
tasks = [
asyncio.create_task(run_example(
engine=engine,
request_id=request_id,
num_tokens=NUM_EXPECTED_TOKENS,
abort_after=(ABORT_AFTER if idx % ABORT_RATE == 0 else 0)
)) for idx, request_id in enumerate(request_ids)
]

# Confirm that we got all the EXPECTED tokens from the requests.
failed_request_id = None
tokens = None
for task in tasks:
num_generated_tokens, request_id = await task
if (num_generated_tokens != NUM_EXPECTED_TOKENS
num_generated_tokens, expected_tokens, request_id = await task
if (num_generated_tokens != expected_tokens
and failed_request_id is None):
failed_request_id = request_id
tokens = num_generated_tokens
Expand Down

0 comments on commit e34b9dc

Please sign in to comment.