From 645c58df7b360e5e2147c077e822e87af01d9bd7 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 17 Jan 2025 14:41:39 +0100 Subject: [PATCH] local-apps: update llama.cpp snippet (#1103) This change is related to these upstream PR: - https://github.com/ggerganov/llama.cpp/pull/11195 allows using tag-based repo name like on ollama - https://github.com/ggerganov/llama.cpp/pull/11214 automatically turn on `--conversation` mode for models having chat template Example: ```sh # for "instruct" model, conversation mode is enabled automatically llama-cli -hf bartowski/Llama-3.2-1B-Instruct-GGUF # for non-instruct model, it runs as completion llama-cli -hf TheBloke/Llama-2-7B-GGUF -p "Once upon a time," ``` --- packages/tasks/src/local-apps.spec.ts | 10 ++-------- packages/tasks/src/local-apps.ts | 15 +++++++++------ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index 23806f668..35a054b8d 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -13,11 +13,7 @@ describe("local-apps", () => { const snippet = snippetFunc(model); expect(snippet[0].content).toEqual(`# Load and run the model: -llama-cli \\ - --hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\ - --hf-file {{GGUF_FILE}} \\ - -p "You are a helpful assistant" \\ - --conversation`); +llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF`); }); it("llama.cpp non-conversational", async () => { @@ -30,9 +26,7 @@ llama-cli \\ const snippet = snippetFunc(model); expect(snippet[0].content).toEqual(`# Load and run the model: -llama-cli \\ - --hf-repo "mlabonne/gemma-2b-GGUF" \\ - --hf-file {{GGUF_FILE}} \\ +llama-cli -hf mlabonne/gemma-2b-GGUF \\ -p "Once upon a time,"`); }); diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 0f2695c13..ddfff533d 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -95,17 +95,20 @@ function isMlxModel(model: ModelData) { } const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => { + let tagName = ""; + if (filepath) { + const quantLabel = parseGGUFQuantLabel(filepath); + tagName = quantLabel ? `:${quantLabel}` : ""; + } const command = (binary: string) => { const snippet = [ "# Load and run the model:", - `${binary} \\`, - ` --hf-repo "${model.id}" \\`, - ` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`, - ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`, + `${binary} -hf ${model.id}${tagName}`, ]; - if (model.tags.includes("conversational")) { + if (!model.tags.includes("conversational")) { + // for non-conversational models, add a prompt snippet[snippet.length - 1] += " \\"; - snippet.push(" --conversation"); + snippet.push(" -p \"Once upon a time,\""); } return snippet.join("\n"); };