From 84439e62136ead0d561b6b264c154a376cfd0f12 Mon Sep 17 00:00:00 2001 From: chenqianfzh Date: Fri, 6 Dec 2024 01:08:00 +0000 Subject: [PATCH] fix block-size description --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 0b304658f012c..a842652201752 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -415,7 +415,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument('--block-size', type=int, default=EngineArgs.block_size, - choices=[8, 16, 32, 64, 128], + choices=[8, 16, 32], help='Token block size for contiguous chunks of ' 'tokens. This is ignored on neuron devices and ' 'set to max-model-len')