diff --git a/rust/py_test/test_launch_server.py b/rust/py_test/test_launch_server.py index 2591abb5cdf..ed8ae9faeae 100644 --- a/rust/py_test/test_launch_server.py +++ b/rust/py_test/test_launch_server.py @@ -1,3 +1,4 @@ +import json import socket import subprocess import time @@ -20,6 +21,7 @@ def popen_launch_router( dp_size: int, timeout: float, policy: str = "cache_aware", + max_payload_size: int = None, ): """ Launch the router server process. @@ -30,6 +32,7 @@ def popen_launch_router( dp_size: Data parallel size timeout: Server launch timeout policy: Router policy, one of "cache_aware", "round_robin", "random" + max_payload_size: Maximum payload size in bytes """ _, host, port = base_url.split(":") host = host[2:] @@ -52,6 +55,9 @@ def popen_launch_router( policy, ] + if max_payload_size is not None: + command.extend(["--router-max-payload-size", str(max_payload_size)]) + process = subprocess.Popen(command, stdout=None, stderr=None) start_time = time.time() @@ -226,6 +232,79 @@ def test_2_add_and_remove_worker(self): msg = f"MMLU test {'passed' if passed else 'failed'} with score {score:.3f} (threshold: {THRESHOLD})" self.assertGreaterEqual(score, THRESHOLD, msg) + def test_3_payload_size(self): + print("Running test_3_payload_size...") + # Start router with default 4MB limit + self.process = popen_launch_router( + self.model, + self.base_url, + dp_size=1, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + policy="round_robin", + ) + + # Test case 1: Payload just under 4MB should succeed + payload_3mb = { + "text": "x" * (3 * 1024 * 1024), # 3MB of text + "temperature": 0.0, + } + + with requests.Session() as session: + response = session.post( + f"{self.base_url}/generate", + json=payload_3mb, + headers={"Content-Type": "application/json"}, + ) + self.assertEqual( + response.status_code, + 200, + f"3MB payload should succeed but got status {response.status_code}", + ) + + # Test case 2: Payload over 4MB should fail + payload_5mb = { + "text": "x" * (5 * 1024 * 1024), # 5MB of text + "temperature": 0.0, + } + + with requests.Session() as session: + response = session.post( + f"{self.base_url}/generate", + json=payload_5mb, + headers={"Content-Type": "application/json"}, + ) + self.assertEqual( + response.status_code, + 413, # Payload Too Large + f"5MB payload should fail with 413 but got status {response.status_code}", + ) + + # Test case 3: Start router with custom 8MB limit + if self.process: + terminate_and_wait(self.process) + + self.process = popen_launch_router( + self.model, + self.base_url, + dp_size=1, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + policy="round_robin", + max_payload_size=8 * 1024 * 1024, # 8MB limit + ) + + # Now 5MB payload should succeed with 8MB limit + with requests.Session() as session: + response = session.post( + f"{self.base_url}/generate", + json=payload_5mb, + headers={"Content-Type": "application/json"}, + ) + self.assertEqual( + response.status_code, + 200, + f"5MB payload should succeed with 8MB limit but got status {response.status_code}", + ) + if __name__ == "__main__": unittest.main()