Skip to content

Commit 01107f9

Browse files
committed
feat: use new llama wrapper
1 parent c461563 commit 01107f9

File tree

10 files changed

+188
-115
lines changed

10 files changed

+188
-115
lines changed

.gitmodules

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
[submodule "cpp/externals/llama-cpp"]
2-
path = cpp/externals/llama-cpp
3-
url = https://github.com/developer239/llama-wrapped-cmake
1+
[submodule "cpp/externals/llama-chat"]
2+
path = cpp/externals/llama-chat
3+
url = https://github.com/developer239/llama-chat

README.md

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[![npm version](http://img.shields.io/npm/v/llama.cpp-ts.svg?style=flat)](https://www.npmjs.com/package/llama.cpp-ts "View this project on npm")
44

5-
LlamaCPP-ts is a Node.js binding for the [LlamaCPP](https://github.com/developer239/llama-wrapped-cmake) library, which wraps the [llama.cpp](https://github.com/ggerganov/llama.cpp) framework. It provides an easy-to-use interface for running language models in Node.js applications, supporting both synchronous queries and asynchronous streaming responses.
5+
LlamaCPP-ts is a Node.js binding for the [LlamaCPP](https://github.com/developer239/llama-wrapped-cmake) library, which wraps the [llama.cpp](https://github.com/ggerganov/llama.cpp) framework. It provides an easy-to-use interface for running language models in Node.js applications, supporting asynchronous streaming responses.
66

77
**Supported Systems:**
88

@@ -37,39 +37,32 @@ yarn add llama.cpp-ts
3737
### Basic Usage
3838

3939
```javascript
40-
import { Llama } from 'llama.cpp-ts';
40+
const { Llama } = require('llama.cpp-ts');
4141

42-
const llama = new Llama();
43-
const initialized = llama.initialize('./path/to/your/model.gguf');
44-
45-
if (initialized) {
46-
const response: string = llama.runQuery("Tell me a story.", 100);
47-
console.log(response);
48-
} else {
49-
console.error("Failed to initialize the model.");
50-
}
51-
```
42+
async function main() {
43+
const llama = new Llama();
44+
const modelPath = "./path/to/your/model.gguf";
45+
const modelParams = { nGpuLayers: 32 };
46+
const contextParams = { nContext: 2048 };
47+
48+
if (!llama.initialize(modelPath, modelParams, contextParams)) {
49+
console.error("Failed to initialize the model");
50+
return;
51+
}
5252

53-
### Streaming Responses
53+
llama.setSystemPrompt("You are a helpful assistant. Always provide clear, concise, and accurate answers.");
5454

55-
```javascript
56-
import { Llama, TokenStream } from 'llama.cpp-ts';
55+
const question = "What is the capital of France?";
56+
const tokenStream = llama.prompt(question);
5757

58-
async function main() {
59-
const llama = new Llama();
60-
const initialized: boolean = llama.initialize('./path/to/your/model.gguf');
61-
62-
if (initialized) {
63-
const tokenStream: TokenStream = llama.runQueryStream("Explain quantum computing", 200);
58+
console.log("Question:", question);
59+
console.log("Answer: ");
6460

6561
while (true) {
66-
const token: string | null = await tokenStream.read();
67-
if (token === null) break;
68-
process.stdout.write(token);
62+
const token = await tokenStream.read();
63+
if (token === null) break;
64+
process.stdout.write(token);
6965
}
70-
} else {
71-
console.error("Failed to initialize the model.");
72-
}
7366
}
7467

7568
main().catch(console.error);
@@ -84,9 +77,10 @@ The `Llama` class provides methods to interact with language models loaded throu
8477
#### Public Methods
8578

8679
- `constructor()`: Creates a new Llama instance.
87-
- `initialize(modelPath: string, contextSize?: number): boolean`: Initializes the model with the specified path and context size.
88-
- `runQuery(prompt: string, maxTokens?: number): string`: Runs a query with the given prompt and returns the result as a string.
89-
- `runQueryStream(prompt: string, maxTokens?: number): TokenStream`: Streams the response to the given prompt, returning a `TokenStream` object.
80+
- `initialize(modelPath: string, modelParams?: object, contextParams?: object): boolean`: Initializes the model with the specified path and parameters.
81+
- `setSystemPrompt(systemPrompt: string): void`: Sets the system prompt for the conversation.
82+
- `prompt(userMessage: string): TokenStream`: Streams the response to the given prompt, returning a `TokenStream` object.
83+
- `resetConversation(): void`: Resets the conversation history.
9084

9185
### TokenStream Class
9286

@@ -95,3 +89,48 @@ The `TokenStream` class represents a stream of tokens generated by the language
9589
#### Public Methods
9690

9791
- `read(): Promise<string | null>`: Reads the next token from the stream. Returns `null` when the stream is finished.
92+
93+
## Example
94+
95+
Here's a more comprehensive example demonstrating the usage of the library:
96+
97+
```javascript
98+
const { Llama } = require('llama.cpp-ts');
99+
100+
async function main() {
101+
const llama = new Llama();
102+
const modelPath = __dirname + "/models/Meta-Llama-3.1-8B-Instruct-Q3_K_S.gguf";
103+
const modelParams = { nGpuLayers: 32 };
104+
const contextParams = { nContext: 2048 };
105+
106+
if (!llama.initialize(modelPath, modelParams, contextParams)) {
107+
console.error("Failed to initialize the model");
108+
return;
109+
}
110+
111+
llama.setSystemPrompt("You are a helpful assistant. Always provide clear, concise, and accurate answers.");
112+
113+
const questions = [
114+
"What is the capital of France?",
115+
"What's the population of that city?",
116+
"What country is the city in?"
117+
];
118+
119+
for (const question of questions) {
120+
const tokenStream = llama.prompt(question);
121+
122+
console.log("Question:", question);
123+
console.log("Answer: ");
124+
125+
while (true) {
126+
const token = await tokenStream.read();
127+
if (token === null) break;
128+
process.stdout.write(token);
129+
}
130+
131+
console.log("\n");
132+
}
133+
}
134+
135+
main().catch(console.error);
136+
```

clone-submodule.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/bin/bash
22

33
# Define the directory where the submodule should be cloned
4-
SUBMODULE_DIR="cpp/externals/llama-cpp"
4+
SUBMODULE_DIR="cpp/externals/llama-chat"
55

66
# Check if the directory already exists
77
if [ ! -d "$SUBMODULE_DIR" ]; then
88
# Clone the submodule if it doesn't exist
9-
git clone --recurse-submodules https://github.com/developer239/llama-wrapped-cmake.git $SUBMODULE_DIR
9+
git clone --recurse-submodules https://github.com/developer239/llama-chat.git $SUBMODULE_DIR
1010
else
1111
echo "Submodule already exists, pulling latest changes..."
1212
git -C $SUBMODULE_DIR pull

cpp/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ set(CMAKE_CXX_STANDARD 17)
88
include_directories(${CMAKE_JS_INC})
99

1010
# Add LlamaCPP submodule library
11-
add_subdirectory(${CMAKE_SOURCE_DIR}/externals/llama-cpp)
11+
add_subdirectory(${CMAKE_SOURCE_DIR}/externals/llama-chat)
1212

1313
# Define the Node.js addon target
1414
add_library(${PROJECT_NAME} SHARED
@@ -23,9 +23,9 @@ add_library(${PROJECT_NAME} SHARED
2323
# Set target output name to 'addon.node'
2424
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
2525

26-
# Link the LlamaCPP library and cmake-js provided Node.js runtime libraries
26+
# Link the LlamaChat library and cmake-js provided Node.js runtime libraries
2727
target_link_libraries(${PROJECT_NAME} PRIVATE
28-
LlamaCPP
28+
LlamaChat
2929
${CMAKE_JS_LIB}
3030
)
3131

@@ -44,7 +44,7 @@ string(REGEX REPLACE "[\r\n\"]" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
4444
target_include_directories(${PROJECT_NAME}
4545
PRIVATE
4646
${NODE_ADDON_API_DIR}
47-
${CMAKE_SOURCE_DIR}/externals/llama-cpp/src # Add this line
47+
${CMAKE_SOURCE_DIR}/externals/llama-chat/src # Add this line
4848
)
4949

5050
# define NAPI_VERSION

cpp/externals/llama-chat

Submodule llama-chat added at 460dfef

cpp/externals/llama-cpp

Lines changed: 0 additions & 1 deletion
This file was deleted.

cpp/src/LlamaCPPBinding.cpp

Lines changed: 61 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
Napi::FunctionReference LlamaCPPBinding::constructor;
66

77
Napi::Object LlamaCPPBinding::Init(Napi::Env env, Napi::Object exports) {
8-
Napi::HandleScope scope(env);
9-
108
Napi::Function func = DefineClass(env, "LlamaCPP", {
119
InstanceMethod("initialize", &LlamaCPPBinding::Initialize),
12-
InstanceMethod("runQuery", &LlamaCPPBinding::RunQuery),
13-
InstanceMethod("runQueryStream", &LlamaCPPBinding::RunQueryStream)
10+
InstanceMethod("setSystemPrompt", &LlamaCPPBinding::SetSystemPrompt),
11+
InstanceMethod("prompt", &LlamaCPPBinding::PromptStream),
12+
InstanceMethod("resetConversation", &LlamaCPPBinding::ResetConversation),
1413
});
1514

1615
constructor = Napi::Persistent(func);
@@ -20,68 +19,93 @@ Napi::Object LlamaCPPBinding::Init(Napi::Env env, Napi::Object exports) {
2019
return exports;
2120
}
2221

23-
LlamaCPPBinding::LlamaCPPBinding(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LlamaCPPBinding>(info) {
24-
Napi::Env env = info.Env();
25-
Napi::HandleScope scope(env);
26-
27-
llama_ = std::make_unique<LlamaWrapper>();
22+
LlamaCPPBinding::LlamaCPPBinding(const Napi::CallbackInfo& info)
23+
: Napi::ObjectWrap<LlamaCPPBinding>(info) {
24+
llama_ = std::make_unique<LlamaChat>();
2825
}
2926

3027
Napi::Value LlamaCPPBinding::Initialize(const Napi::CallbackInfo& info) {
3128
Napi::Env env = info.Env();
29+
3230
if (info.Length() < 1 || !info[0].IsString()) {
33-
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
31+
Napi::TypeError::New(env, "Model path must be a string").ThrowAsJavaScriptException();
3432
return env.Null();
3533
}
3634

37-
std::string model_path = info[0].As<Napi::String>().Utf8Value();
38-
size_t context_size = 80000;
39-
if (info.Length() > 1 && info[1].IsNumber()) {
40-
context_size = info[1].As<Napi::Number>().Uint32Value();
35+
std::string modelPath = info[0].As<Napi::String>().Utf8Value();
36+
37+
ModelParams modelParams;
38+
ContextParams contextParams;
39+
40+
if (info.Length() > 1 && info[1].IsObject()) {
41+
Napi::Object modelParamsObj = info[1].As<Napi::Object>();
42+
if (modelParamsObj.Has("nGpuLayers")) {
43+
modelParams.nGpuLayers = modelParamsObj.Get("nGpuLayers").As<Napi::Number>().Int32Value();
44+
}
45+
// Add parsing for other ModelParams if needed
46+
}
47+
48+
if (info.Length() > 2 && info[2].IsObject()) {
49+
Napi::Object contextParamsObj = info[2].As<Napi::Object>();
50+
if (contextParamsObj.Has("nContext")) {
51+
contextParams.nContext = contextParamsObj.Get("nContext").As<Napi::Number>().Uint32Value();
52+
}
53+
// Add parsing for other ContextParams if needed
4154
}
4255

43-
bool success = llama_->Initialize(model_path, context_size);
44-
return Napi::Boolean::New(env, success);
56+
if (!llama_->InitializeModel(modelPath, modelParams)) {
57+
Napi::Error::New(env, "Failed to initialize the model").ThrowAsJavaScriptException();
58+
return Napi::Boolean::New(env, false);
59+
}
60+
61+
if (!llama_->InitializeContext(contextParams)) {
62+
Napi::Error::New(env, "Failed to initialize the context").ThrowAsJavaScriptException();
63+
return Napi::Boolean::New(env, false);
64+
}
65+
66+
return Napi::Boolean::New(env, true);
4567
}
4668

47-
Napi::Value LlamaCPPBinding::RunQuery(const Napi::CallbackInfo& info) {
69+
Napi::Value LlamaCPPBinding::SetSystemPrompt(const Napi::CallbackInfo& info) {
4870
Napi::Env env = info.Env();
4971
if (info.Length() < 1 || !info[0].IsString()) {
50-
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
72+
Napi::TypeError::New(env, "System prompt must be a string").ThrowAsJavaScriptException();
5173
return env.Null();
5274
}
5375

54-
std::string prompt = info[0].As<Napi::String>().Utf8Value();
55-
size_t max_tokens = 1000;
56-
if (info.Length() > 1 && info[1].IsNumber()) {
57-
max_tokens = info[1].As<Napi::Number>().Uint32Value();
58-
}
76+
std::string systemPrompt = info[0].As<Napi::String>().Utf8Value();
77+
llama_->SetSystemPrompt(systemPrompt);
78+
return env.Null();
79+
}
5980

60-
std::string response = llama_->RunQuery(prompt, max_tokens);
61-
return Napi::String::New(env, response);
81+
Napi::Value LlamaCPPBinding::ResetConversation(const Napi::CallbackInfo& info) {
82+
llama_->ResetConversation();
83+
return info.Env().Undefined();
6284
}
6385

64-
Napi::Value LlamaCPPBinding::RunQueryStream(const Napi::CallbackInfo& info) {
86+
Napi::Value LlamaCPPBinding::PromptStream(const Napi::CallbackInfo& info) {
6587
Napi::Env env = info.Env();
6688
if (info.Length() < 1 || !info[0].IsString()) {
67-
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
89+
Napi::TypeError::New(env, "User message must be a string").ThrowAsJavaScriptException();
6890
return env.Null();
6991
}
7092

71-
std::string prompt = info[0].As<Napi::String>().Utf8Value();
72-
size_t max_tokens = 1000;
73-
if (info.Length() > 1 && info[1].IsNumber()) {
74-
max_tokens = info[1].As<Napi::Number>().Uint32Value();
75-
}
93+
std::string userMessage = info[0].As<Napi::String>().Utf8Value();
7694

7795
Napi::Object streamObj = TokenStream::NewInstance(env, env.Null());
7896
TokenStream* stream = Napi::ObjectWrap<TokenStream>::Unwrap(streamObj);
7997

80-
std::thread([this, prompt, max_tokens, stream]() {
81-
llama_->RunQueryStream(prompt, max_tokens, [stream](const std::string& token) {
82-
stream->Push(token);
83-
});
84-
stream->End();
98+
LlamaChat* llama_ptr = llama_.get();
99+
100+
std::thread([llama_ptr, userMessage, stream]() {
101+
try {
102+
llama_ptr->Prompt(userMessage, [stream](const std::string& piece) {
103+
stream->Push(piece);
104+
});
105+
stream->End();
106+
} catch (const std::exception& e) {
107+
stream->End();
108+
}
85109
}).detach();
86110

87111
return streamObj;

cpp/src/LlamaCPPBinding.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
22

33
#include <napi.h>
4-
#include "llama-wrapper.h"
4+
#include "llama-chat.h"
55
#include "TokenStream.h"
66

77
class LlamaCPPBinding : public Napi::ObjectWrap<LlamaCPPBinding> {
@@ -13,8 +13,9 @@ class LlamaCPPBinding : public Napi::ObjectWrap<LlamaCPPBinding> {
1313
static Napi::FunctionReference constructor;
1414

1515
Napi::Value Initialize(const Napi::CallbackInfo& info);
16-
Napi::Value RunQuery(const Napi::CallbackInfo& info);
17-
Napi::Value RunQueryStream(const Napi::CallbackInfo& info);
16+
Napi::Value SetSystemPrompt(const Napi::CallbackInfo& info);
17+
Napi::Value PromptStream(const Napi::CallbackInfo& info);
18+
Napi::Value ResetConversation(const Napi::CallbackInfo& info);
1819

19-
std::unique_ptr<LlamaWrapper> llama_;
20+
std::unique_ptr<LlamaChat> llama_;
2021
};

0 commit comments

Comments
 (0)