From 645c58df7b360e5e2147c077e822e87af01d9bd7 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 17 Jan 2025 14:41:39 +0100
Subject: [PATCH] local-apps: update llama.cpp snippet (#1103)

This change is related to these upstream PR:
- https://github.com/ggerganov/llama.cpp/pull/11195 allows using
tag-based repo name like on ollama
- https://github.com/ggerganov/llama.cpp/pull/11214 automatically turn
on `--conversation` mode for models having chat template

Example:

```sh
# for "instruct" model, conversation mode is enabled automatically
llama-cli -hf bartowski/Llama-3.2-1B-Instruct-GGUF

# for non-instruct model, it runs as completion
llama-cli -hf TheBloke/Llama-2-7B-GGUF -p "Once upon a time,"
```
---
 packages/tasks/src/local-apps.spec.ts | 10 ++--------
 packages/tasks/src/local-apps.ts      | 15 +++++++++------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index 23806f668..35a054b8d 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -13,11 +13,7 @@ describe("local-apps", () => {
 		const snippet = snippetFunc(model);
 
 		expect(snippet[0].content).toEqual(`# Load and run the model:
-llama-cli \\
-  --hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\
-  --hf-file {{GGUF_FILE}} \\
-  -p "You are a helpful assistant" \\
-  --conversation`);
+llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF`);
 	});
 
 	it("llama.cpp non-conversational", async () => {
@@ -30,9 +26,7 @@ llama-cli \\
 		const snippet = snippetFunc(model);
 
 		expect(snippet[0].content).toEqual(`# Load and run the model:
-llama-cli \\
-  --hf-repo "mlabonne/gemma-2b-GGUF" \\
-  --hf-file {{GGUF_FILE}} \\
+llama-cli -hf mlabonne/gemma-2b-GGUF \\
   -p "Once upon a time,"`);
 	});
 
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index 0f2695c13..ddfff533d 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -95,17 +95,20 @@ function isMlxModel(model: ModelData) {
 }
 
 const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
+	let tagName = "";
+	if (filepath) {
+		const quantLabel = parseGGUFQuantLabel(filepath);
+		tagName = quantLabel ? `:${quantLabel}` : "";
+	}
 	const command = (binary: string) => {
 		const snippet = [
 			"# Load and run the model:",
-			`${binary} \\`,
-			`  --hf-repo "${model.id}" \\`,
-			`  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
-			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`,
+			`${binary} -hf ${model.id}${tagName}`,
 		];
-		if (model.tags.includes("conversational")) {
+		if (!model.tags.includes("conversational")) {
+			// for non-conversational models, add a prompt
 			snippet[snippet.length - 1] += " \\";
-			snippet.push("  --conversation");
+			snippet.push("  -p \"Once upon a time,\"");
 		}
 		return snippet.join("\n");
 	};