feat: use new llama wrapper

developer239 · developer239 · commit 01107f975bdf · 2024-09-27T01:06:52.000+02:00
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
-[submodule "cpp/externals/llama-cpp"]
-	path = cpp/externals/llama-cpp
-	url = https://github.com/developer239/llama-wrapped-cmake
+[submodule "cpp/externals/llama-chat"]
+	path = cpp/externals/llama-chat
+	url = https://github.com/developer239/llama-chat
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![npm version](http://img.shields.io/npm/v/llama.cpp-ts.svg?style=flat)](https://www.npmjs.com/package/llama.cpp-ts "View this project on npm")
 
-LlamaCPP-ts is a Node.js binding for the [LlamaCPP](https://github.com/developer239/llama-wrapped-cmake) library, which wraps the [llama.cpp](https://github.com/ggerganov/llama.cpp) framework. It provides an easy-to-use interface for running language models in Node.js applications, supporting both synchronous queries and asynchronous streaming responses.
+LlamaCPP-ts is a Node.js binding for the [LlamaCPP](https://github.com/developer239/llama-wrapped-cmake) library, which wraps the [llama.cpp](https://github.com/ggerganov/llama.cpp) framework. It provides an easy-to-use interface for running language models in Node.js applications, supporting asynchronous streaming responses.
 
 **Supported Systems:**
 
@@ -37,39 +37,32 @@ yarn add llama.cpp-ts
 ### Basic Usage
 
 ```javascript
-import { Llama } from 'llama.cpp-ts';
+const { Llama } = require('llama.cpp-ts');
 
-const llama = new Llama();
-const initialized = llama.initialize('./path/to/your/model.gguf');
-
-if (initialized) {
-  const response: string = llama.runQuery("Tell me a story.", 100);
-  console.log(response);
-} else {
-  console.error("Failed to initialize the model.");
-}
-```
+async function main() {
+    const llama = new Llama();
+    const modelPath = "./path/to/your/model.gguf";
+    const modelParams = { nGpuLayers: 32 };
+    const contextParams = { nContext: 2048 };
+
+    if (!llama.initialize(modelPath, modelParams, contextParams)) {
+        console.error("Failed to initialize the model");
+        return;
+    }
 
-### Streaming Responses
+    llama.setSystemPrompt("You are a helpful assistant. Always provide clear, concise, and accurate answers.");
 
-```javascript
-import { Llama, TokenStream } from 'llama.cpp-ts';
+    const question = "What is the capital of France?";
+    const tokenStream = llama.prompt(question);
 
-async function main() {
-  const llama = new Llama();
-  const initialized: boolean = llama.initialize('./path/to/your/model.gguf');
-
-  if (initialized) {
-    const tokenStream: TokenStream = llama.runQueryStream("Explain quantum computing", 200);
+    console.log("Question:", question);
+    console.log("Answer: ");
 
     while (true) {
-      const token: string | null = await tokenStream.read();
-      if (token === null) break;
-      process.stdout.write(token);
+        const token = await tokenStream.read();
+        if (token === null) break;
+        process.stdout.write(token);
     }
-  } else {
-    console.error("Failed to initialize the model.");
-  }
 }
 
 main().catch(console.error);
@@ -84,9 +77,10 @@ The `Llama` class provides methods to interact with language models loaded throu
 #### Public Methods
 
 - `constructor()`: Creates a new Llama instance.
-- `initialize(modelPath: string, contextSize?: number): boolean`: Initializes the model with the specified path and context size.
-- `runQuery(prompt: string, maxTokens?: number): string`: Runs a query with the given prompt and returns the result as a string.
-- `runQueryStream(prompt: string, maxTokens?: number): TokenStream`: Streams the response to the given prompt, returning a `TokenStream` object.
+- `initialize(modelPath: string, modelParams?: object, contextParams?: object): boolean`: Initializes the model with the specified path and parameters.
+- `setSystemPrompt(systemPrompt: string): void`: Sets the system prompt for the conversation.
+- `prompt(userMessage: string): TokenStream`: Streams the response to the given prompt, returning a `TokenStream` object.
+- `resetConversation(): void`: Resets the conversation history.
 
 ### TokenStream Class
 
@@ -95,3 +89,48 @@ The `TokenStream` class represents a stream of tokens generated by the language
 #### Public Methods
 
 - `read(): Promise<string | null>`: Reads the next token from the stream. Returns `null` when the stream is finished.
+
+## Example
+
+Here's a more comprehensive example demonstrating the usage of the library:
+
+```javascript
+const { Llama } = require('llama.cpp-ts');
+
+async function main() {
+    const llama = new Llama();
+    const modelPath = __dirname + "/models/Meta-Llama-3.1-8B-Instruct-Q3_K_S.gguf";
+    const modelParams = { nGpuLayers: 32 };
+    const contextParams = { nContext: 2048 };
+
+    if (!llama.initialize(modelPath, modelParams, contextParams)) {
+        console.error("Failed to initialize the model");
+        return;
+    }
+
+    llama.setSystemPrompt("You are a helpful assistant. Always provide clear, concise, and accurate answers.");
+
+    const questions = [
+        "What is the capital of France?",
+        "What's the population of that city?",
+        "What country is the city in?"
+    ];
+
+    for (const question of questions) {
+        const tokenStream = llama.prompt(question);
+
+        console.log("Question:", question);
+        console.log("Answer: ");
+
+        while (true) {
+            const token = await tokenStream.read();
+            if (token === null) break;
+            process.stdout.write(token);
+        }
+
+        console.log("\n");
+    }
+}
+
+main().catch(console.error);
+```
diff --git a/clone-submodule.sh b/clone-submodule.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 
 # Define the directory where the submodule should be cloned
-SUBMODULE_DIR="cpp/externals/llama-cpp"
+SUBMODULE_DIR="cpp/externals/llama-chat"
 
 # Check if the directory already exists
 if [ ! -d "$SUBMODULE_DIR" ]; then
   # Clone the submodule if it doesn't exist
-  git clone --recurse-submodules https://github.com/developer239/llama-wrapped-cmake.git $SUBMODULE_DIR
+  git clone --recurse-submodules https://github.com/developer239/llama-chat.git $SUBMODULE_DIR
 else
   echo "Submodule already exists, pulling latest changes..."
   git -C $SUBMODULE_DIR pull
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -8,7 +8,7 @@ set(CMAKE_CXX_STANDARD 17)
 include_directories(${CMAKE_JS_INC})
 
 # Add LlamaCPP submodule library
-add_subdirectory(${CMAKE_SOURCE_DIR}/externals/llama-cpp)
+add_subdirectory(${CMAKE_SOURCE_DIR}/externals/llama-chat)
 
 # Define the Node.js addon target
 add_library(${PROJECT_NAME} SHARED
@@ -23,9 +23,9 @@ add_library(${PROJECT_NAME} SHARED
 # Set target output name to 'addon.node'
 set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
 
-# Link the LlamaCPP library and cmake-js provided Node.js runtime libraries
+# Link the LlamaChat library and cmake-js provided Node.js runtime libraries
 target_link_libraries(${PROJECT_NAME} PRIVATE
-  LlamaCPP
+  LlamaChat
   ${CMAKE_JS_LIB}
 )
 
@@ -44,7 +44,7 @@ string(REGEX REPLACE "[\r\n\"]" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
 target_include_directories(${PROJECT_NAME}
   PRIVATE
     ${NODE_ADDON_API_DIR}
-    ${CMAKE_SOURCE_DIR}/externals/llama-cpp/src  # Add this line
+    ${CMAKE_SOURCE_DIR}/externals/llama-chat/src  # Add this line
 )
 
 # define NAPI_VERSION
diff --git a/cpp/externals/llama-chat b/cpp/externals/llama-chat
@@ -0,0 +1 @@
+Subproject commit 460dfef533fbcbb2a5bc7ad1a21e987214c32cf4
diff --git a/cpp/externals/llama-cpp b/cpp/externals/llama-cpp
diff --git a/cpp/src/LlamaCPPBinding.cpp b/cpp/src/LlamaCPPBinding.cpp
@@ -5,12 +5,11 @@
 Napi::FunctionReference LlamaCPPBinding::constructor;
 
 Napi::Object LlamaCPPBinding::Init(Napi::Env env, Napi::Object exports) {
-    Napi::HandleScope scope(env);
-
     Napi::Function func = DefineClass(env, "LlamaCPP", {
         InstanceMethod("initialize", &LlamaCPPBinding::Initialize),
-        InstanceMethod("runQuery", &LlamaCPPBinding::RunQuery),
-        InstanceMethod("runQueryStream", &LlamaCPPBinding::RunQueryStream)
+        InstanceMethod("setSystemPrompt", &LlamaCPPBinding::SetSystemPrompt),
+        InstanceMethod("prompt", &LlamaCPPBinding::PromptStream),
+        InstanceMethod("resetConversation", &LlamaCPPBinding::ResetConversation),
     });
 
     constructor = Napi::Persistent(func);
@@ -20,68 +19,93 @@ Napi::Object LlamaCPPBinding::Init(Napi::Env env, Napi::Object exports) {
     return exports;
 }
 
-LlamaCPPBinding::LlamaCPPBinding(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LlamaCPPBinding>(info) {
-    Napi::Env env = info.Env();
-    Napi::HandleScope scope(env);
-
-    llama_ = std::make_unique<LlamaWrapper>();
+LlamaCPPBinding::LlamaCPPBinding(const Napi::CallbackInfo& info)
+    : Napi::ObjectWrap<LlamaCPPBinding>(info) {
+    llama_ = std::make_unique<LlamaChat>();
 }
 
 Napi::Value LlamaCPPBinding::Initialize(const Napi::CallbackInfo& info) {
     Napi::Env env = info.Env();
+
     if (info.Length() < 1 || !info[0].IsString()) {
-        Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
+        Napi::TypeError::New(env, "Model path must be a string").ThrowAsJavaScriptException();
         return env.Null();
     }
 
-    std::string model_path = info[0].As<Napi::String>().Utf8Value();
-    size_t context_size = 80000;
-    if (info.Length() > 1 && info[1].IsNumber()) {
-        context_size = info[1].As<Napi::Number>().Uint32Value();
+    std::string modelPath = info[0].As<Napi::String>().Utf8Value();
+
+    ModelParams modelParams;
+    ContextParams contextParams;
+
+    if (info.Length() > 1 && info[1].IsObject()) {
+        Napi::Object modelParamsObj = info[1].As<Napi::Object>();
+        if (modelParamsObj.Has("nGpuLayers")) {
+            modelParams.nGpuLayers = modelParamsObj.Get("nGpuLayers").As<Napi::Number>().Int32Value();
+        }
+        // Add parsing for other ModelParams if needed
+    }
+
+    if (info.Length() > 2 && info[2].IsObject()) {
+        Napi::Object contextParamsObj = info[2].As<Napi::Object>();
+        if (contextParamsObj.Has("nContext")) {
+            contextParams.nContext = contextParamsObj.Get("nContext").As<Napi::Number>().Uint32Value();
+        }
+        // Add parsing for other ContextParams if needed
     }
 
-    bool success = llama_->Initialize(model_path, context_size);
-    return Napi::Boolean::New(env, success);
+    if (!llama_->InitializeModel(modelPath, modelParams)) {
+        Napi::Error::New(env, "Failed to initialize the model").ThrowAsJavaScriptException();
+        return Napi::Boolean::New(env, false);
+    }
+
+    if (!llama_->InitializeContext(contextParams)) {
+        Napi::Error::New(env, "Failed to initialize the context").ThrowAsJavaScriptException();
+        return Napi::Boolean::New(env, false);
+    }
+
+    return Napi::Boolean::New(env, true);
 }
 
-Napi::Value LlamaCPPBinding::RunQuery(const Napi::CallbackInfo& info) {
+Napi::Value LlamaCPPBinding::SetSystemPrompt(const Napi::CallbackInfo& info) {
     Napi::Env env = info.Env();
     if (info.Length() < 1 || !info[0].IsString()) {
-        Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
+        Napi::TypeError::New(env, "System prompt must be a string").ThrowAsJavaScriptException();
         return env.Null();
     }
 
-    std::string prompt = info[0].As<Napi::String>().Utf8Value();
-    size_t max_tokens = 1000;
-    if (info.Length() > 1 && info[1].IsNumber()) {
-        max_tokens = info[1].As<Napi::Number>().Uint32Value();
-    }
+    std::string systemPrompt = info[0].As<Napi::String>().Utf8Value();
+    llama_->SetSystemPrompt(systemPrompt);
+    return env.Null();
+}
 
-    std::string response = llama_->RunQuery(prompt, max_tokens);
-    return Napi::String::New(env, response);
+Napi::Value LlamaCPPBinding::ResetConversation(const Napi::CallbackInfo& info) {
+    llama_->ResetConversation();
+    return info.Env().Undefined();
 }
 
-Napi::Value LlamaCPPBinding::RunQueryStream(const Napi::CallbackInfo& info) {
+Napi::Value LlamaCPPBinding::PromptStream(const Napi::CallbackInfo& info) {
     Napi::Env env = info.Env();
     if (info.Length() < 1 || !info[0].IsString()) {
-        Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
+        Napi::TypeError::New(env, "User message must be a string").ThrowAsJavaScriptException();
         return env.Null();
     }
 
-    std::string prompt = info[0].As<Napi::String>().Utf8Value();
-    size_t max_tokens = 1000;
-    if (info.Length() > 1 && info[1].IsNumber()) {
-        max_tokens = info[1].As<Napi::Number>().Uint32Value();
-    }
+    std::string userMessage = info[0].As<Napi::String>().Utf8Value();
 
     Napi::Object streamObj = TokenStream::NewInstance(env, env.Null());
     TokenStream* stream = Napi::ObjectWrap<TokenStream>::Unwrap(streamObj);
 
-    std::thread([this, prompt, max_tokens, stream]() {
-        llama_->RunQueryStream(prompt, max_tokens, [stream](const std::string& token) {
-            stream->Push(token);
-        });
-        stream->End();
+    LlamaChat* llama_ptr = llama_.get();
+
+    std::thread([llama_ptr, userMessage, stream]() {
+        try {
+            llama_ptr->Prompt(userMessage, [stream](const std::string& piece) {
+                stream->Push(piece);
+            });
+            stream->End();
+        } catch (const std::exception& e) {
+            stream->End();
+        }
     }).detach();
 
     return streamObj;
diff --git a/cpp/src/LlamaCPPBinding.h b/cpp/src/LlamaCPPBinding.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <napi.h>
-#include "llama-wrapper.h"
+#include "llama-chat.h"
 #include "TokenStream.h"
 
 class LlamaCPPBinding : public Napi::ObjectWrap<LlamaCPPBinding> {
@@ -13,8 +13,9 @@ class LlamaCPPBinding : public Napi::ObjectWrap<LlamaCPPBinding> {
     static Napi::FunctionReference constructor;
 
     Napi::Value Initialize(const Napi::CallbackInfo& info);
-    Napi::Value RunQuery(const Napi::CallbackInfo& info);
-    Napi::Value RunQueryStream(const Napi::CallbackInfo& info);
+    Napi::Value SetSystemPrompt(const Napi::CallbackInfo& info);
+    Napi::Value PromptStream(const Napi::CallbackInfo& info);
+    Napi::Value ResetConversation(const Napi::CallbackInfo& info);
 
-    std::unique_ptr<LlamaWrapper> llama_;
+    std::unique_ptr<LlamaChat> llama_;
 };
diff --git a/example/index.js b/example/index.js
diff --git a/index.d.ts b/index.d.ts