From 7beb80912dc2162ddf2030c01d409e0c28324e70 Mon Sep 17 00:00:00 2001 From: JonBasse Date: Sat, 21 Feb 2026 08:02:22 +0100 Subject: [PATCH] fix(llm): try all available GPU types before falling back to CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GPU initialization used `.find()` to pick only the first available GPU type (typically CUDA), and on failure fell directly to CPU — skipping Vulkan and Metal entirely. On Linux systems with Vulkan but no CUDA toolkit, this meant queries always ran on CPU despite a working GPU. Changed to `.filter()` + loop to try each available GPU in priority order (CUDA > Metal > Vulkan) before falling back to CPU. Fixes #213 Co-Authored-By: Claude Opus 4.6 --- src/llm.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/llm.ts b/src/llm.ts index 46c62957..6c05590f 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -502,20 +502,21 @@ export class LlamaCpp implements LLM { // (likely a binary/build config issue in node-llama-cpp). // @ts-expect-error node-llama-cpp API compat const gpuTypes = await getLlamaGpuTypes(); - // Prefer CUDA > Metal > Vulkan > CPU - const preferred = (["cuda", "metal", "vulkan"] as const).find(g => gpuTypes.includes(g)); + // Prefer CUDA > Metal > Vulkan > CPU — try each in order + const gpuOrder = (["cuda", "metal", "vulkan"] as const).filter(g => gpuTypes.includes(g)); - let llama: Llama; - if (preferred) { + let llama: Llama | undefined; + for (const gpu of gpuOrder) { try { - llama = await getLlama({ gpu: preferred, logLevel: LlamaLogLevel.error }); + llama = await getLlama({ gpu, logLevel: LlamaLogLevel.error }); + break; } catch { - llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error }); process.stderr.write( - `QMD Warning: ${preferred} reported available but failed to initialize. Falling back to CPU.\n` + `QMD Warning: ${gpu} reported available but failed to initialize. Trying next...\n` ); } - } else { + } + if (!llama) { llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error }); }