tobi · JonBasse · Feb 21, 2026
diff --git a/src/llm.ts b/src/llm.ts
@@ -502,20 +502,21 @@ export class LlamaCpp implements LLM {
       // (likely a binary/build config issue in node-llama-cpp).
       // @ts-expect-error node-llama-cpp API compat
       const gpuTypes = await getLlamaGpuTypes();
-      // Prefer CUDA > Metal > Vulkan > CPU
-      const preferred = (["cuda", "metal", "vulkan"] as const).find(g => gpuTypes.includes(g));
+      // Prefer CUDA > Metal > Vulkan > CPU — try each in order
+      const gpuOrder = (["cuda", "metal", "vulkan"] as const).filter(g => gpuTypes.includes(g));
 
-      let llama: Llama;
-      if (preferred) {
+      let llama: Llama | undefined;
+      for (const gpu of gpuOrder) {
         try {
-          llama = await getLlama({ gpu: preferred, logLevel: LlamaLogLevel.error });
+          llama = await getLlama({ gpu, logLevel: LlamaLogLevel.error });
+          break;
         } catch {
-          llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
           process.stderr.write(
-            `QMD Warning: ${preferred} reported available but failed to initialize. Falling back to CPU.\n`
+            `QMD Warning: ${gpu} reported available but failed to initialize. Trying next...\n`
           );
         }
-      } else {
+      }
+      if (!llama) {
         llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
       }