From 8fb11f0c76c734ac32e38f58f7c5216072a44e6f Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Fri, 26 Apr 2024 11:27:45 -0400
Subject: [PATCH 1/3] Update version to 0.2.4 in buildspec.json

---
 buildspec.json                           | 2 +-
 src/whisper-utils/whisper-processing.cpp | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/buildspec.json b/buildspec.json
index 8ba5638..a1babb6 100644
--- a/buildspec.json
+++ b/buildspec.json
@@ -38,7 +38,7 @@
     },
     "name": "obs-localvocal",
     "displayName": "OBS Localvocal",
-    "version": "0.2.3",
+    "version": "0.2.4",
     "author": "Roy Shilkrot",
     "website": "https://github.com/occ-ai/obs-localvocal",
     "email": "roy.shil@gmail.com",
diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
index 9970619..227dafc 100644
--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@@ -282,6 +282,10 @@ struct DetectionResultWithText run_whisper_inference(struct transcription_filter
 			if (token_str[0] == '[' && token_str[strlen(token_str) - 1] == ']') {
 				keep = false;
 			}
+			// if the token starts with '<|' and ends with '|>', don't keep it
+			if (token_str[0] == '<' && token_str[strlen(token_str) - 1] == '>') {
+				keep = false;
+			}
 			if ((j == n_tokens - 2 || j == n_tokens - 3) && token.p < 0.5) {
 				keep = false;
 			}

From 099cc43ebdf8c6a9e8e8aa808a06ecab2f7fcbb5 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Fri, 26 Apr 2024 14:48:04 -0400
Subject: [PATCH 2/3] Update special token handling in whisper-processing.cpp

---
 src/whisper-utils/whisper-processing.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
index 227dafc..3905fb3 100644
--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@@ -282,8 +282,8 @@ struct DetectionResultWithText run_whisper_inference(struct transcription_filter
 			if (token_str[0] == '[' && token_str[strlen(token_str) - 1] == ']') {
 				keep = false;
 			}
-			// if the token starts with '<|' and ends with '|>', don't keep it
-			if (token_str[0] == '<' && token_str[strlen(token_str) - 1] == '>') {
+			// if this is a special token, don't keep it
+			if (token.id > 50256) {
 				keep = false;
 			}
 			if ((j == n_tokens - 2 || j == n_tokens - 3) && token.p < 0.5) {

From 33be2814efe2ee31f9c3360e34d504f01718f650 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Fri, 26 Apr 2024 15:27:05 -0400
Subject: [PATCH 3/3] Update special token handling in whisper-processing.cpp

---
 src/transcription-utils.cpp              | 14 ++++++++++++++
 src/transcription-utils.h                |  2 ++
 src/whisper-utils/whisper-processing.cpp | 21 ++++++++++-----------
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/transcription-utils.cpp b/src/transcription-utils.cpp
index c7f9d40..1cdde5a 100644
--- a/src/transcription-utils.cpp
+++ b/src/transcription-utils.cpp
@@ -2,6 +2,7 @@
 
 #include <sstream>
 #include <algorithm>
+#include <vector>
 
 #define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0)
 #define is_trail_byte(c) (((c)&0xc0) == 0x80)
@@ -102,3 +103,16 @@ std::string remove_leading_trailing_nonalpha(const std::string &str)
 		       }));
 	return str_copy;
 }
+
+std::vector<std::string> split(const std::string &string, char delimiter)
+{
+	std::vector<std::string> tokens;
+	std::string token;
+	std::istringstream tokenStream(string);
+	while (std::getline(tokenStream, token, delimiter)) {
+		if (!token.empty()) {
+			tokens.push_back(token);
+		}
+	}
+	return tokens;
+}
diff --git a/src/transcription-utils.h b/src/transcription-utils.h
index 5e2e500..c4dce8a 100644
--- a/src/transcription-utils.h
+++ b/src/transcription-utils.h
@@ -2,8 +2,10 @@
 #define TRANSCRIPTION_UTILS_H
 
 #include <string>
+#include <vector>
 
 std::string fix_utf8(const std::string &str);
 std::string remove_leading_trailing_nonalpha(const std::string &str);
+std::vector<std::string> split(const std::string &string, char delimiter);
 
 #endif // TRANSCRIPTION_UTILS_H
diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
index 3905fb3..7d46275 100644
--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@@ -6,6 +6,7 @@
 #include "transcription-filter-data.h"
 #include "whisper-processing.h"
 #include "whisper-utils.h"
+#include "transcription-utils.h"
 
 #include <algorithm>
 #include <cctype>
@@ -283,7 +284,7 @@ struct DetectionResultWithText run_whisper_inference(struct transcription_filter
 				keep = false;
 			}
 			// if this is a special token, don't keep it
-			if (token.id > 50256) {
+			if (token.id >= 50256) {
 				keep = false;
 			}
 			if ((j == n_tokens - 2 || j == n_tokens - 3) && token.p < 0.5) {
@@ -316,20 +317,18 @@ struct DetectionResultWithText run_whisper_inference(struct transcription_filter
 
 		// if suppression is enabled, check if the text is in the suppression list
 		if (!gf->suppress_sentences.empty()) {
-			std::string suppress_sentences_copy = gf->suppress_sentences;
-			size_t pos = 0;
-			std::string token;
-			while ((pos = suppress_sentences_copy.find("\n")) != std::string::npos) {
-				token = suppress_sentences_copy.substr(0, pos);
-				suppress_sentences_copy.erase(0, pos + 1);
-				if (text == suppress_sentences_copy) {
-					obs_log(gf->log_level, "Suppressing sentence: %s",
+			// split the suppression list by newline into individual sentences
+			std::vector<std::string> suppress_sentences_list =
+				split(gf->suppress_sentences, '\n');
+			// check if the text is in the suppression list
+			for (const std::string &suppress_sentence : suppress_sentences_list) {
+				if (text.find(suppress_sentence) != std::string::npos) {
+					obs_log(gf->log_level, "Suppressed sentence: '%s'",
 						text.c_str());
-					return {DETECTION_RESULT_SUPPRESSED, "", 0, 0, {}};
+					return {DETECTION_RESULT_UNKNOWN, "", 0, 0, {}};
 				}
 			}
 		}
-
 		if (gf->log_words) {
 			obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(),
 				to_timestamp(t1).c_str(), sentence_p, text.c_str());