From 7c6e609cd4453362f49a643b5b32ed2943658e36 Mon Sep 17 00:00:00 2001
From: Rodrigo Garcia <32329949+roG0d@users.noreply.github.com>
Date: Thu, 2 Jan 2025 09:43:21 +0100
Subject: [PATCH 1/7] Included Multinode DeepSeekv3

---
 benchmark/deepseek_v3/README.md | 45 +++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index 9c61af88fd2..6f4b0a9b8d6 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -56,18 +56,53 @@ response = client.chat.completions.create(
 )
 print(response)
 ```
-### Example serving with 2 H20*8
-For example, there are two H20 nodes, each with 8 GPUs. The first node's IP is `10.0.0.1`, and the second node's IP is `10.0.0.2`.
+### Example serving with Docker two H200*8 nodes
+Having two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configuring the endpoint to expose it to another docker container with `--host 0.0.0.0` and `--port 40000` and configuring nccl comms with `--nccl-init 192.168.114.10:20000`.
 
 ```bash
 # node 1
-python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 0 --trust-remote-code
+docker run --gpus all \
+    --shm-size 32g \
+    --network=host \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --name sglang_multinode1 \
+    -it \
+    --rm \
+    --env "HF_TOKEN=$HF_TOKEN" \
+    --ipc=host \
+    lmsysorg/sglang:latest \
+    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --host 0.0.0.0 --port 40000
+```
 
+```bash
 # node 2
-python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 1 --trust-remote-code
+docker run --gpus all \
+    --shm-size 32g \
+    --network=host \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --name sglang_multinode2 \
+    -it \
+    --rm \
+    --env "HF_TOKEN=$HF_TOKEN" \
+    --ipc=host \
+    lmsysorg/sglang:latest \
+    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000
 ```
 
-If you have two H100 nodes, the usage is similar to the aforementioned H20.
+To ensure the functionality, we include a testing from a client docker container:
+```bash
+docker run --gpus all \
+    --shm-size 32g \
+    --network=host \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --name sglang_multinode_client \
+    -it \
+    --rm \
+    --env "HF_TOKEN=$HF_TOKEN" \
+    --ipc=host \
+    lmsysorg/sglang:latest \
+    python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 1 --random-output 512 --random-range-ratio 1 --num-prompts 1 --host 0.0.0.0 --port 40000 --output-file "deepseekv3_multinode.jsonl"
+```
 
 ## DeepSeek V3 Optimization Plan
 

From 5b809e6998759af6b72a9f1ba05d50a32ee06989 Mon Sep 17 00:00:00 2001
From: Rodrigo Garcia <32329949+roG0d@users.noreply.github.com>
Date: Thu, 2 Jan 2025 10:38:00 +0100
Subject: [PATCH 2/7] Reincluded H20 example

---
 benchmark/deepseek_v3/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index 6f4b0a9b8d6..d8244e39e95 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -56,6 +56,19 @@ response = client.chat.completions.create(
 )
 print(response)
 ```
+### Example serving with 2 H20*8
+For example, there are two H20 nodes, each with 8 GPUs. The first node's IP is `10.0.0.1`, and the second node's IP is `10.0.0.2`.
+
+```bash
+# node 1
+python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 0 --trust-remote-code
+
+# node 2
+python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 1 --trust-remote-code
+```
+
+If you have two H100 nodes, the usage is similar to the aforementioned H20.
+
 ### Example serving with Docker two H200*8 nodes
 Having two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configuring the endpoint to expose it to another docker container with `--host 0.0.0.0` and `--port 40000` and configuring nccl comms with `--nccl-init 192.168.114.10:20000`.
 

From 640b41c16d67aa32a256bd8ec724103d45237761 Mon Sep 17 00:00:00 2001
From: Rodrigo Garcia <32329949+roG0d@users.noreply.github.com>
Date: Thu, 2 Jan 2025 11:16:00 +0100
Subject: [PATCH 3/7] Updated --nccl-init for --dist-init-addr

---
 benchmark/deepseek_v3/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index d8244e39e95..d6d51ac6cb1 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -61,10 +61,10 @@ For example, there are two H20 nodes, each with 8 GPUs. The first node's IP is `
 
 ```bash
 # node 1
-python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 0 --trust-remote-code
+python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --dist-init-addr 10.0.0.1:5000 --nnodes 2 --node-rank 0 --trust-remote-code
 
 # node 2
-python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 10.0.0.1:5000 --nnodes 2 --node-rank 1 --trust-remote-code
+python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --dist-init-addr 10.0.0.1:5000 --nnodes 2 --node-rank 1 --trust-remote-code
 ```
 
 If you have two H100 nodes, the usage is similar to the aforementioned H20.
@@ -84,7 +84,7 @@ docker run --gpus all \
     --env "HF_TOKEN=$HF_TOKEN" \
     --ipc=host \
     lmsysorg/sglang:latest \
-    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --host 0.0.0.0 --port 40000
+    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 0 --trust-remote-code --host 0.0.0.0 --port 40000
 ```
 
 ```bash
@@ -99,7 +99,7 @@ docker run --gpus all \
     --env "HF_TOKEN=$HF_TOKEN" \
     --ipc=host \
     lmsysorg/sglang:latest \
-    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --nccl-init 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000
+    python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000
 ```
 
 To ensure the functionality, we include a testing from a client docker container:

From 9d8c2b4cc8da7238ff6c8b8e006339206413679f Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Thu, 2 Jan 2025 22:12:07 +0800
Subject: [PATCH 4/7] upd

---
 benchmark/deepseek_v3/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index d6d51ac6cb1..7199faba34e 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -70,7 +70,7 @@ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --di
 If you have two H100 nodes, the usage is similar to the aforementioned H20.
 
 ### Example serving with Docker two H200*8 nodes
-Having two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configuring the endpoint to expose it to another docker container with `--host 0.0.0.0` and `--port 40000` and configuring nccl comms with `--nccl-init 192.168.114.10:20000`.
+Having two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configuring the endpoint to expose it to another docker container with `--host 0.0.0.0` and `--port 40000` and configuring nccl comms with `--dist-init-addr 192.168.114.10:20000`.
 
 ```bash
 # node 1

From 2770fe95f2a2d9ab605544096dd62499cb78dea2 Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Thu, 2 Jan 2025 22:13:24 +0800
Subject: [PATCH 5/7] upd

---
 benchmark/deepseek_v3/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index 7199faba34e..d7907824c02 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -70,7 +70,7 @@ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --di
 If you have two H100 nodes, the usage is similar to the aforementioned H20.
 
 ### Example serving with Docker two H200*8 nodes
-Having two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configuring the endpoint to expose it to another docker container with `--host 0.0.0.0` and `--port 40000` and configuring nccl comms with `--dist-init-addr 192.168.114.10:20000`.
+There are two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configure the endpoint to expose it to another Docker container using `--host 0.0.0.0` and `--port 40000`, and set up communications with `--dist-init-addr 192.168.114.10:20000`.
 
 ```bash
 # node 1

From 438cf62e2377c75ac506182266ecaf92295ff414 Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Thu, 2 Jan 2025 22:14:29 +0800
Subject: [PATCH 6/7] upd

---
 benchmark/deepseek_v3/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index d7907824c02..bc09c9e63ef 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -102,7 +102,7 @@ docker run --gpus all \
     python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3 --tp 16 --dist-init-addr 192.168.114.10:20000 --nnodes 2 --node-rank 1 --trust-remote-code --host 0.0.0.0 --port 40000
 ```
 
-To ensure the functionality, we include a testing from a client docker container:
+To ensure functionality, we include a test from a client Docker container.
 ```bash
 docker run --gpus all \
     --shm-size 32g \

From 92b49113fde4d4c1e4ceab4917f7df6507302e64 Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Thu, 2 Jan 2025 22:16:43 +0800
Subject: [PATCH 7/7] upd

---
 benchmark/deepseek_v3/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index bc09c9e63ef..15cf0b26a24 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -71,6 +71,7 @@ If you have two H100 nodes, the usage is similar to the aforementioned H20.
 
 ### Example serving with Docker two H200*8 nodes
 There are two H200 nodes, each with 8 GPUs. The first node's IP is `192.168.114.10`, and the second node's IP is `192.168.114.11`. Configure the endpoint to expose it to another Docker container using `--host 0.0.0.0` and `--port 40000`, and set up communications with `--dist-init-addr 192.168.114.10:20000`.
+A single H200 with 8 devices can run DeepSeek V3, the dual H200 setup is just to demonstrate multi-node usage.
 
 ```bash
 # node 1