Skip to content

Commit

Permalink
Feat: Tokenize String, Test / Build cleanup (#164)
Browse files Browse the repository at this point in the history
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
  • Loading branch information
dave-gray101 and mudler authored Aug 15, 2023
1 parent 18f25c2 commit f4f03cc
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 39 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
/build
/ggml-metal.metal
/libbinding.a
prepare

# just in case
.DS_Store
.idea
.vscode
Thumbs.db

# model files downloaded by the test script
ggllm-test-model.bin
10 changes: 10 additions & 0 deletions binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,16 @@ void llama_free_params(void* params_ptr) {
delete params;
}

int llama_tokenize_string(void* params_ptr, void* state_pr, int* result) {
gpt_params* params_p = (gpt_params*) params_ptr;
llama_state* state = (llama_state*) state_pr;
llama_context* ctx = state->ctx;

// TODO: add_bos

return llama_tokenize(ctx, params_p->prompt.c_str(), result, params_p->n_ctx, true);
}


std::vector<std::string> create_vector(const char** strings, int count) {
std::vector<std::string>* vec = new std::vector<std::string>;
Expand Down
2 changes: 2 additions & 0 deletions binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ void llama_free_params(void* params_ptr);

void llama_binding_free_model(void* state);

int llama_tokenize_string(void* params_ptr, void* state_pr, int* result);

int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug);

#ifdef __cplusplus
Expand Down
39 changes: 0 additions & 39 deletions lama_test.go

This file was deleted.

42 changes: 42 additions & 0 deletions llama.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,48 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
return res, nil
}

// tokenize has an interesting return property: negative lengths (potentially) have meaning. Therefore, return the length seperate from the slice and error - all three can be used together
func (l *LLama) TokenizeString(text string, opts ...PredictOption) (int32, []int32, error) {
po := NewPredictOptions(opts...)

input := C.CString(text)
if po.Tokens == 0 {
po.Tokens = 4096 // ???
}
out := make([]C.int, po.Tokens)

var fakeDblPtr **C.char

// copy pasted and modified minimally. Should I simplify down / do we need an "allocate defaults"
params := C.llama_allocate_params(input, C.int(po.Seed), C.int(po.Threads), C.int(po.Tokens), C.int(po.TopK),
C.float(po.TopP), C.float(po.Temperature), C.float(po.Penalty), C.int(po.Repeat),
C.bool(po.IgnoreEOS), C.bool(po.F16KV),
C.int(po.Batch), C.int(po.NKeep), fakeDblPtr, C.int(0),
C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias),
C.CString(po.PathPromptCache), C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
C.CString(po.MainGPU), C.CString(po.TensorSplit),
C.bool(po.PromptCacheRO),
C.CString(po.Grammar),
C.float(po.RopeFreqBase), C.float(po.RopeFreqScale), C.float(po.NegativePromptScale), C.CString(po.NegativePrompt),
)

tokRet := C.llama_tokenize_string(params, l.state, (*C.int)(unsafe.Pointer(&out[0]))) //, C.int(po.Tokens), true)

if tokRet < 0 {
return int32(tokRet), []int32{}, fmt.Errorf("llama_tokenize_string returned negative count %d", tokRet)
}

// TODO: Is this loop still required to unbox cgo to go?
gTokRet := int32(tokRet)
goSlice := make([]int32, gTokRet)
for i := int32(0); i < gTokRet; i++ {
goSlice[i] = int32(out[i])
}

return gTokRet, goSlice, nil
}

// CGo only allows us to use static calls from C to Go, we can't just dynamically pass in func's.
// This is the next best thing, we register the callbacks in this map and call tokenCallback from
// the C code. We also attach a finalizer to LLama, so it will unregister the callback when the
Expand Down
64 changes: 64 additions & 0 deletions llama_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package llama_test

import (
"os"

. "github.com/go-skynet/go-llama.cpp"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("LLama binding", func() {
testModelPath := os.Getenv("TEST_MODEL")

Context("Declaration", func() {
It("fails with no model", func() {
model, err := New("not-existing")
Expect(err).To(HaveOccurred())
Expect(model).To(BeNil())
})
})
Context("Inferencing tests (using "+testModelPath+") ", func() {
getModel := func() (*LLama, error) {
model, err := New(
testModelPath,
EnableF16Memory,
SetContext(128),
SetMMap(true),
SetNBatch(512),
)
Expect(err).ToNot(HaveOccurred())
Expect(model).ToNot(BeNil())
return model, err
}

It("predicts successfully", func() {
if testModelPath == "" {
Skip("test skipped - only makes sense if the TEST_MODEL environment variable is set.")
}

model, err := getModel()
text, err := model.Predict(`Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction: How much is 2+2?
### Response: `, SetRopeFreqBase(10000.0), SetRopeFreqScale(1))
Expect(err).ToNot(HaveOccurred())
Expect(text).To(ContainSubstring("4"))
})

It("tokenizes strings successfully", func() {
if testModelPath == "" {
Skip("test skipped - only makes sense if the TEST_MODEL environment variable is set.")
}

model, err := getModel()
l, tokens, err := model.TokenizeString("A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?",
SetRopeFreqBase(10000.0), SetRopeFreqScale(1))

Expect(err).ToNot(HaveOccurred())
Expect(l).To(BeNumerically(">", 0))
Expect(int(l)).To(Equal(len(tokens)))
})
})
})

0 comments on commit f4f03cc

Please sign in to comment.