From c9bb428cabe528c5ee05a177746928209ffcaa05 Mon Sep 17 00:00:00 2001 From: ByronHsu Date: Wed, 11 Dec 2024 21:17:46 +0000 Subject: [PATCH 1/3] add 0.1.0 relase note --- rust/v0.1.0.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 rust/v0.1.0.md diff --git a/rust/v0.1.0.md b/rust/v0.1.0.md new file mode 100644 index 00000000000..6e765eef287 --- /dev/null +++ b/rust/v0.1.0.md @@ -0,0 +1,55 @@ +# SGLang Router v0.1.0: Dynamic Scaling and Fault Tolerance + +We have released `sglang-router` v0.1.0 equipped with dynamic scaling and fault tolerance! It is essential for the router to be able to dynamically scale the number of workers and handle worker failures. To achieve this, we have implemented the following features: + +1. Dynamic scaling: The router can dynamically scale the number of workers based on the request load. + +We offer `/add_worker` and `/remove_worker` APIs to dynamically add or remove workers from the router. + +- `/add_worker` + +Usage: + +```bash +$ curl -X POST http://localhost:30000/add_worker?url=http://worker_url_1 +``` + +Example: + +```bash +$ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30001 +$ curl -X POST http://localhost:30000/add_worker?url=http://127.0.0.1:30001 +Successfully added worker: http://127.0.0.1:30001 +``` + +- `/remove_worker` + +Usage: + +```bash +$ curl -X POST http://localhost:30000/remove_worker?url=http://worker_url_1 +``` + +Example: + +```bash +$ curl -X POST http://localhost:30000/remove_worker?url=http://127.0.0.1:30001 +Successfully removed worker: http://127.0.0.1:30001 +``` + +Note: + +- For cache-aware router, the worker will be removed from the tree and the queues. + +2. Fault tolerance: The router can handle worker failures and automatically remove the failed worker from the router. + +We provide retries based for failure tolerance. + +1. If the request to a worker fails for `max_worker_retries` times, the router will remove the worker from the router and move on to the next worker. +2. If the total number of retries exceeds `max_total_retries`, the router will return an error. + +Note: + +- `max_worker_retries` is 3 and `max_total_retries` is 6 by default. + +Closing remarks: The feature is still under active improvement, so please don't hesitate to raise issues or submit PRs if you have any suggestions or feedback. From 5bea5a0de17facadb8bb121071a3a7e8eccef1e7 Mon Sep 17 00:00:00 2001 From: ByronHsu Date: Wed, 11 Dec 2024 21:18:37 +0000 Subject: [PATCH 2/3] add link --- rust/pyproject.toml | 2 +- rust/v0.1.0.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/pyproject.toml b/rust/pyproject.toml index d1327d9203e..34f0084c29d 100644 --- a/rust/pyproject.toml +++ b/rust/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang-router" -version = "0.0.11" +version = "0.1.0" description = "SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances." authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}] requires-python = ">=3.8" diff --git a/rust/v0.1.0.md b/rust/v0.1.0.md index 6e765eef287..12188d036c7 100644 --- a/rust/v0.1.0.md +++ b/rust/v0.1.0.md @@ -52,4 +52,7 @@ Note: - `max_worker_retries` is 3 and `max_total_retries` is 6 by default. -Closing remarks: The feature is still under active improvement, so please don't hesitate to raise issues or submit PRs if you have any suggestions or feedback. +Closing remarks: + +1. Please read the full usage at https://sgl-project.github.io/router/router.html +2. The feature is still under active improvement, so please don't hesitate to raise issues or submit PRs if you have any suggestions or feedback. From b6ef0d4ab66639589a4415cfdc8a7fd9aca8fe74 Mon Sep 17 00:00:00 2001 From: ByronHsu Date: Wed, 11 Dec 2024 21:21:46 +0000 Subject: [PATCH 3/3] more readble --- rust/v0.1.0.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/v0.1.0.md b/rust/v0.1.0.md index 12188d036c7..3f56a0c22db 100644 --- a/rust/v0.1.0.md +++ b/rust/v0.1.0.md @@ -2,7 +2,7 @@ We have released `sglang-router` v0.1.0 equipped with dynamic scaling and fault tolerance! It is essential for the router to be able to dynamically scale the number of workers and handle worker failures. To achieve this, we have implemented the following features: -1. Dynamic scaling: The router can dynamically scale the number of workers based on the request load. +## 1. Dynamic scaling: The router can dynamically scale the number of workers based on the request load. We offer `/add_worker` and `/remove_worker` APIs to dynamically add or remove workers from the router. @@ -41,7 +41,7 @@ Note: - For cache-aware router, the worker will be removed from the tree and the queues. -2. Fault tolerance: The router can handle worker failures and automatically remove the failed worker from the router. +## 2. Fault tolerance: The router can handle worker failures and automatically remove the failed worker from the router. We provide retries based for failure tolerance.