Skip to content

Commit

Permalink
allow to set GPU layers
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler committed May 14, 2023
1 parent 7d9b011 commit 3501b34
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 4 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ cd go-llama.cpp
make libbinding.a
```

To build with OpenBLAS, for example:

```
CMAKE_ARGS="-DLLAMA_OPENBLAS=ON" make libbinding.a
```

Now you can run the example with:

```
Expand Down
4 changes: 3 additions & 1 deletion binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ void* llama_allocate_params(const char *prompt, int seed, int threads, int token
}


void* load_model(const char *fname, int n_ctx, int n_parts, int n_seed, bool memory_f16, bool mlock, bool embeddings) {
void* load_model(const char *fname, int n_ctx, int n_parts, int n_seed, bool memory_f16, bool mlock, bool embeddings, int n_gpu_layers) {
// load the model
auto lparams = llama_context_default_params();

Expand All @@ -379,6 +379,8 @@ void* load_model(const char *fname, int n_ctx, int n_parts, int n_seed, bool mem
lparams.f16_kv = memory_f16;
lparams.embedding = embeddings;
lparams.use_mlock = mlock;
lparams.n_gpu_layers = n_gpu_layers;

void* res = nullptr;
try {
res = llama_init_from_file(fname, lparams);
Expand Down
2 changes: 1 addition & 1 deletion binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ extern "C" {

extern unsigned char tokenCallback(void *, char *);

void* load_model(const char *fname, int n_ctx, int n_parts, int n_seed, bool memory_f16, bool mlock, bool embeddings);
void* load_model(const char *fname, int n_ctx, int n_parts, int n_seed, bool memory_f16, bool mlock, bool embeddings, int n_gpu);

int get_embeddings(void* params_ptr, void* state_pr, float * res_embeddings);

Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
2 changes: 1 addition & 1 deletion llama.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type LLama struct {
func New(model string, opts ...ModelOption) (*LLama, error) {
mo := NewModelOptions(opts...)
modelPath := C.CString(model)
result := C.load_model(modelPath, C.int(mo.ContextSize), C.int(mo.Parts), C.int(mo.Seed), C.bool(mo.F16Memory), C.bool(mo.MLock), C.bool(mo.Embeddings))
result := C.load_model(modelPath, C.int(mo.ContextSize), C.int(mo.Parts), C.int(mo.Seed), C.bool(mo.F16Memory), C.bool(mo.MLock), C.bool(mo.Embeddings), C.int(mo.NGPULayers))
if result == nil {
return nil, fmt.Errorf("failed loading model")
}
Expand Down
8 changes: 8 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ type ModelOptions struct {
F16Memory bool
MLock bool
Embeddings bool
NGPULayers int
}

type PredictOptions struct {
Expand Down Expand Up @@ -113,6 +114,13 @@ var IgnoreEOS PredictOption = func(p *PredictOptions) {
p.IgnoreEOS = true
}

// SetGPULayers sets the number of GPU layers to use to offload computation
func SetGPULayers(n int) ModelOption {
return func(p *ModelOptions) {
p.NGPULayers = n
}
}

// SetTokenCallback sets the prompts that will stop predictions.
func SetTokenCallback(fn func(string) bool) PredictOption {
return func(p *PredictOptions) {
Expand Down

0 comments on commit 3501b34

Please sign in to comment.