diff --git a/src/llm.ts b/src/llm.ts index 46c6295..6c05590 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -502,20 +502,21 @@ export class LlamaCpp implements LLM { // (likely a binary/build config issue in node-llama-cpp). // @ts-expect-error node-llama-cpp API compat const gpuTypes = await getLlamaGpuTypes(); - // Prefer CUDA > Metal > Vulkan > CPU - const preferred = (["cuda", "metal", "vulkan"] as const).find(g => gpuTypes.includes(g)); + // Prefer CUDA > Metal > Vulkan > CPU — try each in order + const gpuOrder = (["cuda", "metal", "vulkan"] as const).filter(g => gpuTypes.includes(g)); - let llama: Llama; - if (preferred) { + let llama: Llama | undefined; + for (const gpu of gpuOrder) { try { - llama = await getLlama({ gpu: preferred, logLevel: LlamaLogLevel.error }); + llama = await getLlama({ gpu, logLevel: LlamaLogLevel.error }); + break; } catch { - llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error }); process.stderr.write( - `QMD Warning: ${preferred} reported available but failed to initialize. Falling back to CPU.\n` + `QMD Warning: ${gpu} reported available but failed to initialize. Trying next...\n` ); } - } else { + } + if (!llama) { llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error }); }