diff --git a/docs/Makefile b/docs/Makefile index 50f77a30c09..13d81f4f847 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -19,7 +19,7 @@ compile: echo "Executing $$nb"; \ jupyter nbconvert --to notebook --execute --inplace "$$nb" \ --ExecutePreprocessor.timeout=600 \ - --ExecutePreprocessor.kernel_name=python3; \ + --ExecutePreprocessor.kernel_name=python3 || exit 1; \ fi; \ done diff --git a/docs/backend/native_api.ipynb b/docs/backend/native_api.ipynb index 7207259ea3c..26758f7f975 100644 --- a/docs/backend/native_api.ipynb +++ b/docs/backend/native_api.ipynb @@ -220,19 +220,19 @@ "metadata": {}, "outputs": [], "source": [ - "# failed update with different parameter size\n", + "# failed update with different parameter size or wrong name\n", "\n", "url = \"http://localhost:30010/update_weights_from_disk\"\n", - "data = {\"model_path\": \"meta-llama/Llama-3.2-3B\"}\n", + "data = {\"model_path\": \"meta-llama/Llama-3.2-1B-wrong\"}\n", "\n", "response = requests.post(url, json=data)\n", "response_json = response.json()\n", "print_highlight(response_json)\n", "assert response_json[\"success\"] is False\n", "assert response_json[\"message\"] == (\n", - " \"Failed to update weights: The size of tensor a (2048) must match \"\n", - " \"the size of tensor b (3072) at non-singleton dimension 1.\\n\"\n", - " \"Rolling back to original weights.\"\n", + " \"Failed to get weights iterator: \"\n", + " \"meta-llama/Llama-3.2-1B-wrong\"\n", + " \" (repository not found).\"\n", ")" ] }, diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index fc8ac150b3f..7b91cb69797 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -329,7 +329,7 @@ async def encode_request(obj: EmbeddingReqInput, request: Request): ) -@app.api_route("/encode", methods=["POST", "PUT"]) +@app.api_route("/classify", methods=["POST", "PUT"]) @time_func_latency async def classify_request(obj: EmbeddingReqInput, request: Request): """Handle a reward model request. Now the arguments and return values are the same as embedding models."""