Skip to content

Commit

Permalink
add test for payload size
Browse files Browse the repository at this point in the history
Signed-off-by: Ata Fatahi <[email protected]>
  • Loading branch information
MrAta committed Dec 11, 2024
1 parent fb69bc2 commit e736cc5
Showing 1 changed file with 79 additions and 0 deletions.
79 changes: 79 additions & 0 deletions rust/py_test/test_launch_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import socket
import subprocess
import time
Expand All @@ -20,6 +21,7 @@ def popen_launch_router(
dp_size: int,
timeout: float,
policy: str = "cache_aware",
max_payload_size: int = None,
):
"""
Launch the router server process.
Expand All @@ -30,6 +32,7 @@ def popen_launch_router(
dp_size: Data parallel size
timeout: Server launch timeout
policy: Router policy, one of "cache_aware", "round_robin", "random"
max_payload_size: Maximum payload size in bytes
"""
_, host, port = base_url.split(":")
host = host[2:]
Expand All @@ -52,6 +55,9 @@ def popen_launch_router(
policy,
]

if max_payload_size is not None:
command.extend(["--router-max-payload-size", str(max_payload_size)])

process = subprocess.Popen(command, stdout=None, stderr=None)

start_time = time.time()
Expand Down Expand Up @@ -226,6 +232,79 @@ def test_2_add_and_remove_worker(self):
msg = f"MMLU test {'passed' if passed else 'failed'} with score {score:.3f} (threshold: {THRESHOLD})"
self.assertGreaterEqual(score, THRESHOLD, msg)

def test_3_payload_size(self):
print("Running test_3_payload_size...")
# Start router with default 4MB limit
self.process = popen_launch_router(
self.model,
self.base_url,
dp_size=1,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
policy="round_robin",
)

# Test case 1: Payload just under 4MB should succeed
payload_3mb = {
"text": "x" * (3 * 1024 * 1024), # 3MB of text
"temperature": 0.0,
}

with requests.Session() as session:
response = session.post(
f"{self.base_url}/generate",
json=payload_3mb,
headers={"Content-Type": "application/json"},
)
self.assertEqual(
response.status_code,
200,
f"3MB payload should succeed but got status {response.status_code}",
)

# Test case 2: Payload over 4MB should fail
payload_5mb = {
"text": "x" * (5 * 1024 * 1024), # 5MB of text
"temperature": 0.0,
}

with requests.Session() as session:
response = session.post(
f"{self.base_url}/generate",
json=payload_5mb,
headers={"Content-Type": "application/json"},
)
self.assertEqual(
response.status_code,
413, # Payload Too Large
f"5MB payload should fail with 413 but got status {response.status_code}",
)

# Test case 3: Start router with custom 8MB limit
if self.process:
terminate_and_wait(self.process)

self.process = popen_launch_router(
self.model,
self.base_url,
dp_size=1,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
policy="round_robin",
max_payload_size=8 * 1024 * 1024, # 8MB limit
)

# Now 5MB payload should succeed with 8MB limit
with requests.Session() as session:
response = session.post(
f"{self.base_url}/generate",
json=payload_5mb,
headers={"Content-Type": "application/json"},
)
self.assertEqual(
response.status_code,
200,
f"5MB payload should succeed with 8MB limit but got status {response.status_code}",
)


if __name__ == "__main__":
unittest.main()

0 comments on commit e736cc5

Please sign in to comment.