Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
**Learning:** For vector dot products in Python (without numpy), using `sum(map(operator.mul, vec_a, vec_b))` is approximately 30% to 40% faster than list comprehensions inside `sum([a * b for a, b in zip(vec_a, vec_b)])`. This is because it avoids the overhead of allocating an intermediate list in memory and pushes both iteration and multiplication to optimized C-level implementations.
**Action:** When calculating similarity scores or dot products on vectors represented as Python lists, always prefer `map(operator.mul, a, b)` wrapped in `sum()` over list comprehensions or generator expressions.

## 2024-08-14 - Optimizing Sparse Dictionary Intersections in Math Hot Loops
**Learning:** When computing cosine similarity or dot products for sparse dictionaries (like TF-IDF score mappings) in Python, creating sets for key intersection (`set(v1.keys()) & set(v2.keys())`) adds significant overhead due to set allocation and hashing. Iterating directly over the items of the smaller dictionary with a single lookup into the larger dictionary (`val = v2.get(k, sentinel)`) avoids double hashing, keeps O(min(N, M)) complexity, and is roughly 30-40% faster in execution time while still handling `0.0` values correctly.
**Action:** Replace `set()` intersection calls with smaller-dictionary iteration logic (`if len(v1) > len(v2): v1, v2 = v2, v1`) and use a sentinel-backed `dict.get` to keep one lookup per key: `sentinel = object(); dot = sum(v * val for k, v in v1.items() if (val := v2.get(k, sentinel)) is not sentinel)` in tight performance paths.
## 2024-08-14 - Optimizing Multiple Regex Pattern Matching Logic
**Learning:** When optimizing Python loops that count distinct regex pattern matches (e.g., `sum(1 for p in PATTERNS if re.search(p, text))`), joining all patterns into a single compiled regex (`re.compile('a|b').findall(text)`) introduces a functional regression because it counts the *total occurrences* of any pattern, not the *number of distinct patterns* matched.
**Action:** To safely optimize this logic while preserving exact functionality, pre-compile a list of distinct regular expressions at the module level and iterate through them: `sum(1 for r in COMPILED_REGEXES if r.search(text))`. Additionally, replace `re.search` with the native `in` operator (`p in text`) for exact string literals that don't rely on regex word boundaries, as it is significantly faster.
84 changes: 54 additions & 30 deletions app/heuristics/handlers/psycholinguist.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,23 @@ class CognitiveLoadResult:

# Cultural/Regional patterns
UK_SPELLING = [
r"colour",
r"flavour",
r"centre",
r"metre",
r"organise",
r"realise",
r"defence",
"colour",
"flavour",
"centre",
"metre",
"organise",
"realise",
"defence",
]

US_SPELLING = [
r"color",
r"flavor",
r"center",
r"meter",
r"organize",
r"realize",
r"defense",
"color",
"flavor",
"center",
"meter",
"organize",
"realize",
"defense",
]

CURRENCY_PATTERNS = {
Expand All @@ -170,22 +170,30 @@ class CognitiveLoadResult:
}


# Pre-compiled regexes for performance
_CURRENCY_REGEXES = {
region: re.compile("|".join(patterns)) for region, patterns in CURRENCY_PATTERNS.items()
}


def detect_cultural_context(text: str) -> Optional[str]:
"""Detect cultural context based on spelling and currency."""
text_lower = text.lower()

# Check spelling
uk_score = sum(1 for p in UK_SPELLING if re.search(p, text_lower))
us_score = sum(1 for p in US_SPELLING if re.search(p, text_lower))
# Bolt Optimization: direct string `in` check is faster than `re.search` without word boundaries
uk_score = sum(1 for p in UK_SPELLING if p in text_lower)
us_score = sum(1 for p in US_SPELLING if p in text_lower)

if uk_score > us_score:
return "British"
if us_score > uk_score:
return "American"

# Check currency
for region, patterns in CURRENCY_PATTERNS.items():
if any(re.search(p, text_lower) for p in patterns):
for region, regex in _CURRENCY_REGEXES.items():
# Bolt Optimization: compiled regex `search` on joined patterns avoids overhead of multiple `re.search` calls
if regex.search(text_lower):
if region == "TR":
return "Turkish"
if region == "US":
Expand All @@ -198,34 +206,43 @@ def detect_cultural_context(text: str) -> Optional[str]:
return None


_FRUSTRATION_REGEX = re.compile("|".join(FRUSTRATION_PATTERNS), re.IGNORECASE)
_CASUAL_REGEX = re.compile("|".join(CASUAL_PATTERNS))


def detect_sentiment(text: str) -> UserSentiment:
"""Analyze text to detect user sentiment."""
text_lower = text.lower()

# Check for urgency
# Bolt Optimization: use pre-compiled regex and `in` for faster matching
for kw in URGENT_KEYWORDS:
if kw in text_lower:
return UserSentiment.URGENT

# Check for frustration (patterns on original text for CAPS detection)
for pattern in FRUSTRATION_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
return UserSentiment.FRUSTRATED
# Check for frustration using pre-compiled regex
if _FRUSTRATION_REGEX.search(text):
return UserSentiment.FRUSTRATED

# Check for casual tone
for pattern in CASUAL_PATTERNS:
if re.search(pattern, text_lower):
return UserSentiment.CASUAL
if _CASUAL_REGEX.search(text_lower):
return UserSentiment.CASUAL

return UserSentiment.NEUTRAL


_TR_FORMAL_REGEXES = [re.compile(p) for p in TR_FORMAL_PATTERNS]
_TR_INFORMAL_REGEXES = [re.compile(p) for p in TR_INFORMAL_PATTERNS]


def detect_formality(text: str) -> FormalityLevel:
"""Detect Turkish formality level (Siz vs Sen)."""
text_lower = text.lower()

formal_score = sum(1 for p in TR_FORMAL_PATTERNS if re.search(p, text_lower))
informal_score = sum(1 for p in TR_INFORMAL_PATTERNS if re.search(p, text_lower))
# Bolt Optimization: pre-compiled regexes avoid looping `re.compile`
# Kept as separate regexes to match original logic of counting distinct matched patterns
formal_score = sum(1 for r in _TR_FORMAL_REGEXES if r.search(text_lower))
informal_score = sum(1 for r in _TR_INFORMAL_REGEXES if r.search(text_lower))

if formal_score > informal_score:
return FormalityLevel.FORMAL
Expand Down Expand Up @@ -297,16 +314,23 @@ class AmbiguityResult:
suggestions: list[str] = field(default_factory=list)


_AMBIGUOUS_REGEXES = {
key: (re.compile(rule["pattern"]), rule["suggestion"])
for key, rule in AMBIGUOUS_PATTERNS.items()
}


def detect_ambiguity(text: str) -> AmbiguityResult:
"""Detect vague or ambiguous terms in the prompt."""
text_lower = text.lower()
result = AmbiguityResult()

for key, rule in AMBIGUOUS_PATTERNS.items():
if re.search(rule["pattern"], text_lower):
# Bolt Optimization: avoid re.search compiling the pattern each time
for key, (regex, suggestion) in _AMBIGUOUS_REGEXES.items():
if regex.search(text_lower):
result.is_ambiguous = True
result.ambiguous_terms.append(key)
result.suggestions.append(rule["suggestion"])
result.suggestions.append(suggestion)

return result

Expand Down
Loading