Skip to content

Commit

Permalink
feat: add env var to specify the number of layers to offload to GPU (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
nopperl authored May 5, 2024
1 parent 2de3c7f commit 2785a9a
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion crates/llama-cpp-bindings/src/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,18 @@ std::unique_ptr<TextInferenceEngine> create_engine(bool use_gpu, rust::Str model
static BackendInitializer initializer;

llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = use_gpu ? 9999 : 0;
// set the number of model layers to offload to the GPU
int n_gpu_layers = 0;
if (use_gpu) {
if (const char* n_gpu_layers_str = std::getenv("LLAMA_CPP_N_GPU_LAYERS")) {
n_gpu_layers = std::stoi(n_gpu_layers_str);
} else {
// by default, set a high number to offload all layers to GPU
n_gpu_layers = 9999;
}
}
model_params.n_gpu_layers = n_gpu_layers;

llama_model* model = llama_load_model_from_file(std::string(model_path).c_str(), model_params);

if (!model) {
Expand Down

0 comments on commit 2785a9a

Please sign in to comment.