Skip to content

Commit

Permalink
Use a single thread for the reranking service
Browse files Browse the repository at this point in the history
  • Loading branch information
felladrin committed Jan 25, 2025
1 parent e717da0 commit 34d2404
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions server/rerankerService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ export async function startRerankerService() {
await ensureModelExists(modelPath);
printMessage("Starting service...");

const contextSize = 8192;
const threads = Math.min(Math.max(1, os.cpus().length - 2), 16);
const batchSize = Math.ceil(contextSize / threads);
const contextSize = 2048;

const serverProcess = spawn(
"llama-server",
Expand All @@ -51,19 +49,18 @@ export async function startRerankerService() {
modelPath,
"--ctx-size",
contextSize.toString(),
"--parallel",
threads.toString(),
"--batch-size",
batchSize.toString(),
contextSize.toString(),
"--ubatch-size",
batchSize.toString(),
contextSize.toString(),
"--flash-attn",
"--host",
SERVER_HOST,
"--port",
SERVER_PORT.toString(),
"--log-verbosity",
VERBOSE_MODE ? "1" : "0",
"--no-warmup",
"--reranking",
"--pooling",
"rank",
Expand Down

0 comments on commit 34d2404

Please sign in to comment.