diff --git a/binding.cpp b/binding.cpp index b23ac4e..d0cccb9 100644 --- a/binding.cpp +++ b/binding.cpp @@ -479,7 +479,9 @@ int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug) { for (auto id : embd) { const std::string token_str = llama_token_to_piece(ctx, id); - printf("%s", token_str.c_str()); + if (debug) { + printf("%s", token_str.c_str()); + } if (embd.size() > 1) { input_tokens.push_back(id); diff --git a/llama.go b/llama.go index 2fef3b7..c1ebc2c 100644 --- a/llama.go +++ b/llama.go @@ -332,7 +332,8 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) { return res, nil } -// tokenize has an interesting return property: negative lengths (potentially) have meaning. Therefore, return the length seperate from the slice and error - all three can be used together +// tokenize has an interesting return property: negative lengths (potentially) have meaning. +// Therefore, return the length seperate from the slice and error - all three can be used together func (l *LLama) TokenizeString(text string, opts ...PredictOption) (int32, []int32, error) { po := NewPredictOptions(opts...) @@ -396,14 +397,14 @@ func (l *LLama) SetTokenCallback(callback func(token string) bool) { } var ( - m sync.Mutex + m sync.RWMutex callbacks = map[uintptr]func(string) bool{} ) //export tokenCallback func tokenCallback(statePtr unsafe.Pointer, token *C.char) bool { - m.Lock() - defer m.Unlock() + m.RLock() + defer m.RUnlock() if callback, ok := callbacks[uintptr(statePtr)]; ok { return callback(C.GoString(token))