diff --git a/binding.cpp b/binding.cpp
index b23ac4e..d0cccb9 100644
--- a/binding.cpp
+++ b/binding.cpp
@@ -479,7 +479,9 @@ int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug) {
 
         for (auto id : embd) {
             const std::string token_str = llama_token_to_piece(ctx, id);
-            printf("%s", token_str.c_str());
+            if (debug) {
+              printf("%s", token_str.c_str());
+            }
 
             if (embd.size() > 1) {
                 input_tokens.push_back(id);
diff --git a/llama.go b/llama.go
index 2fef3b7..c1ebc2c 100644
--- a/llama.go
+++ b/llama.go
@@ -332,7 +332,8 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
 	return res, nil
 }
 
-// tokenize has an interesting return property: negative lengths (potentially) have meaning. Therefore, return the length seperate from the slice and error - all three can be used together
+// tokenize has an interesting return property: negative lengths (potentially) have meaning.
+// Therefore, return the length seperate from the slice and error - all three can be used together
 func (l *LLama) TokenizeString(text string, opts ...PredictOption) (int32, []int32, error) {
 	po := NewPredictOptions(opts...)
 
@@ -396,14 +397,14 @@ func (l *LLama) SetTokenCallback(callback func(token string) bool) {
 }
 
 var (
-	m         sync.Mutex
+	m         sync.RWMutex
 	callbacks = map[uintptr]func(string) bool{}
 )
 
 //export tokenCallback
 func tokenCallback(statePtr unsafe.Pointer, token *C.char) bool {
-	m.Lock()
-	defer m.Unlock()
+	m.RLock()
+	defer m.RUnlock()
 
 	if callback, ok := callbacks[uintptr(statePtr)]; ok {
 		return callback(C.GoString(token))