Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check gpu availability at server args creation #2340

Merged
merged 1 commit into from
Dec 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import tempfile
from typing import List, Optional

import torch

from sglang.srt.hf_transformers_utils import check_gguf_file
from sglang.srt.utils import (
get_amdgpu_memory_capacity,
Expand Down Expand Up @@ -151,8 +153,11 @@ def __post_init__(self):

if is_hip():
gpu_mem = get_amdgpu_memory_capacity()
else:
elif torch.cuda.is_available():
gpu_mem = get_nvgpu_memory_capacity()
else:
# GPU memory is not known yet or no GPU is available.
gpu_mem = None

# Set mem fraction static, which depends on the tensor parallelism size
if self.mem_fraction_static is None:
Expand All @@ -169,14 +174,14 @@ def __post_init__(self):

# Set chunked prefill size, which depends on the gpu memory capacity
if self.chunked_prefill_size is None:
if gpu_mem < 25_000:
if gpu_mem is not None and gpu_mem < 25_000:
self.chunked_prefill_size = 2048
else:
self.chunked_prefill_size = 8192

# Set cuda graph max batch size
if self.cuda_graph_max_bs is None:
if gpu_mem < 25_000:
if gpu_mem is not None and gpu_mem < 25_000:
self.cuda_graph_max_bs = 8
else:
self.cuda_graph_max_bs = 160
Expand Down
Loading