From 0c973df5fee9586663a72ccedae2cc9610802d0e Mon Sep 17 00:00:00 2001
From: madara88645 <163588475+madara88645@users.noreply.github.com>
Date: Wed, 18 Mar 2026 09:27:43 +0000
Subject: [PATCH 1/2] Optimize regex and string matching in psycholinguist.py

Replaced `re.search` with `in` for substring matches and pre-compiled regexes
for cultural, sentiment, formality, and ambiguity heuristics to speed up
hot loops. Preserved original scoring functionality.
---
 .jules/bolt.md                            |  4 ++
 app/heuristics/handlers/psycholinguist.py | 82 +++++++++++++++--------
 2 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/.jules/bolt.md b/.jules/bolt.md
index 38f48bad..03ad84f9 100644
--- a/.jules/bolt.md
+++ b/.jules/bolt.md
@@ -9,3 +9,7 @@
 ## 2024-07-26 - Fast Vector Dot Products in Python
 **Learning:** For vector dot products in Python (without numpy), using `sum(map(operator.mul, vec_a, vec_b))` is approximately 30% to 40% faster than list comprehensions inside `sum([a * b for a, b in zip(vec_a, vec_b)])`. This is because it avoids the overhead of allocating an intermediate list in memory and pushes both iteration and multiplication to optimized C-level implementations.
 **Action:** When calculating similarity scores or dot products on vectors represented as Python lists, always prefer `map(operator.mul, a, b)` wrapped in `sum()` over list comprehensions or generator expressions.
+
+## 2024-08-14 - Optimizing Multiple Regex Pattern Matching Logic
+**Learning:** When optimizing Python loops that count distinct regex pattern matches (e.g., `sum(1 for p in PATTERNS if re.search(p, text))`), joining all patterns into a single compiled regex (`re.compile('a|b').findall(text)`) introduces a functional regression because it counts the *total occurrences* of any pattern, not the *number of distinct patterns* matched.
+**Action:** To safely optimize this logic while preserving exact functionality, pre-compile a list of distinct regular expressions at the module level and iterate through them: `sum(1 for r in COMPILED_REGEXES if r.search(text))`. Additionally, replace `re.search` with the native `in` operator (`p in text`) for exact string literals that don't rely on regex word boundaries, as it is significantly faster.
diff --git a/app/heuristics/handlers/psycholinguist.py b/app/heuristics/handlers/psycholinguist.py
index 7f853bd0..7f9200d6 100644
--- a/app/heuristics/handlers/psycholinguist.py
+++ b/app/heuristics/handlers/psycholinguist.py
@@ -143,23 +143,23 @@ class CognitiveLoadResult:
 
 # Cultural/Regional patterns
 UK_SPELLING = [
-    r"colour",
-    r"flavour",
-    r"centre",
-    r"metre",
-    r"organise",
-    r"realise",
-    r"defence",
+    "colour",
+    "flavour",
+    "centre",
+    "metre",
+    "organise",
+    "realise",
+    "defence",
 ]
 
 US_SPELLING = [
-    r"color",
-    r"flavor",
-    r"center",
-    r"meter",
-    r"organize",
-    r"realize",
-    r"defense",
+    "color",
+    "flavor",
+    "center",
+    "meter",
+    "organize",
+    "realize",
+    "defense",
 ]
 
 CURRENCY_PATTERNS = {
@@ -170,13 +170,20 @@ class CognitiveLoadResult:
 }
 
 
+# Pre-compiled regexes for performance
+_CURRENCY_REGEXES = {
+    region: re.compile("|".join(patterns)) for region, patterns in CURRENCY_PATTERNS.items()
+}
+
+
 def detect_cultural_context(text: str) -> Optional[str]:
     """Detect cultural context based on spelling and currency."""
     text_lower = text.lower()
 
     # Check spelling
-    uk_score = sum(1 for p in UK_SPELLING if re.search(p, text_lower))
-    us_score = sum(1 for p in US_SPELLING if re.search(p, text_lower))
+    # Bolt Optimization: direct string `in` check is faster than `re.search` without word boundaries
+    uk_score = sum(1 for p in UK_SPELLING if p in text_lower)
+    us_score = sum(1 for p in US_SPELLING if p in text_lower)
 
     if uk_score > us_score:
         return "British"
@@ -184,8 +191,9 @@ def detect_cultural_context(text: str) -> Optional[str]:
         return "American"
 
     # Check currency
-    for region, patterns in CURRENCY_PATTERNS.items():
-        if any(re.search(p, text_lower) for p in patterns):
+    for region, regex in _CURRENCY_REGEXES.items():
+        # Bolt Optimization: compiled regex `search` on joined patterns avoids overhead of multiple `re.search` calls
+        if regex.search(text_lower):
             if region == "TR":
                 return "Turkish"
             if region == "US":
@@ -198,34 +206,43 @@ def detect_cultural_context(text: str) -> Optional[str]:
     return None
 
 
+_FRUSTRATION_REGEX = re.compile("|".join(FRUSTRATION_PATTERNS), re.IGNORECASE)
+_CASUAL_REGEX = re.compile("|".join(CASUAL_PATTERNS))
+
+
 def detect_sentiment(text: str) -> UserSentiment:
     """Analyze text to detect user sentiment."""
     text_lower = text.lower()
 
     # Check for urgency
+    # Bolt Optimization: use pre-compiled regex and `in` for faster matching
     for kw in URGENT_KEYWORDS:
         if kw in text_lower:
             return UserSentiment.URGENT
 
     # Check for frustration (patterns on original text for CAPS detection)
-    for pattern in FRUSTRATION_PATTERNS:
-        if re.search(pattern, text, re.IGNORECASE):
-            return UserSentiment.FRUSTRATED
+    if _FRUSTRATION_REGEX.search(text):
+        return UserSentiment.FRUSTRATED
 
     # Check for casual tone
-    for pattern in CASUAL_PATTERNS:
-        if re.search(pattern, text_lower):
-            return UserSentiment.CASUAL
+    if _CASUAL_REGEX.search(text_lower):
+        return UserSentiment.CASUAL
 
     return UserSentiment.NEUTRAL
 
 
+_TR_FORMAL_REGEXES = [re.compile(p) for p in TR_FORMAL_PATTERNS]
+_TR_INFORMAL_REGEXES = [re.compile(p) for p in TR_INFORMAL_PATTERNS]
+
+
 def detect_formality(text: str) -> FormalityLevel:
     """Detect Turkish formality level (Siz vs Sen)."""
     text_lower = text.lower()
 
-    formal_score = sum(1 for p in TR_FORMAL_PATTERNS if re.search(p, text_lower))
-    informal_score = sum(1 for p in TR_INFORMAL_PATTERNS if re.search(p, text_lower))
+    # Bolt Optimization: pre-compiled regexes avoid looping `re.compile`
+    # Kept as separate regexes to match original logic of counting distinct matched patterns
+    formal_score = sum(1 for r in _TR_FORMAL_REGEXES if r.search(text_lower))
+    informal_score = sum(1 for r in _TR_INFORMAL_REGEXES if r.search(text_lower))
 
     if formal_score > informal_score:
         return FormalityLevel.FORMAL
@@ -297,16 +314,23 @@ class AmbiguityResult:
     suggestions: list[str] = field(default_factory=list)
 
 
+_AMBIGUOUS_REGEXES = {
+    key: (re.compile(rule["pattern"]), rule["suggestion"])
+    for key, rule in AMBIGUOUS_PATTERNS.items()
+}
+
+
 def detect_ambiguity(text: str) -> AmbiguityResult:
     """Detect vague or ambiguous terms in the prompt."""
     text_lower = text.lower()
     result = AmbiguityResult()
 
-    for key, rule in AMBIGUOUS_PATTERNS.items():
-        if re.search(rule["pattern"], text_lower):
+    # Bolt Optimization: avoid re.search compiling the pattern each time
+    for key, (regex, suggestion) in _AMBIGUOUS_REGEXES.items():
+        if regex.search(text_lower):
             result.is_ambiguous = True
             result.ambiguous_terms.append(key)
-            result.suggestions.append(rule["suggestion"])
+            result.suggestions.append(suggestion)
 
     return result
 

From 575888ece9fe48681db19940f990c7e0333f067a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mehmet=20=C3=96zel?=
 <163588475+madara88645@users.noreply.github.com>
Date: Wed, 18 Mar 2026 20:45:13 +0000
Subject: [PATCH 2/2] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 app/heuristics/handlers/psycholinguist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/heuristics/handlers/psycholinguist.py b/app/heuristics/handlers/psycholinguist.py
index 7f9200d6..d0113131 100644
--- a/app/heuristics/handlers/psycholinguist.py
+++ b/app/heuristics/handlers/psycholinguist.py
@@ -220,7 +220,7 @@ def detect_sentiment(text: str) -> UserSentiment:
         if kw in text_lower:
             return UserSentiment.URGENT
 
-    # Check for frustration (patterns on original text for CAPS detection)
+    # Check for frustration using pre-compiled regex
     if _FRUSTRATION_REGEX.search(text):
         return UserSentiment.FRUSTRATED