safori-team · jpark0506 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025 · Copilot
diff --git a/app/services/va_fusion.py b/app/services/va_fusion.py
@@ -1,4 +1,4 @@
-from math import exp, sqrt
+from math import exp, sqrt, log
 from typing import Dict, Tuple, Optional
 
 # Emotion anchors for Valence (V) and Arousal (A)
@@ -199,6 +199,33 @@ def apply_zero_prob_mask(
     return out
 
 
+def compute_entropy(probs: Dict[str, float]) -> float:
+    """정규화된 엔트로피 계산 (0~1).
+
+    감정 분포가 균일할수록(모든 감정이 비슷한 확률) 1에 가깝고,
+    특정 감정에 집중될수록 0에 가깝습니다.
+
+    Args:
+        probs: 감정별 확률 딕셔너리 (합이 1일 필요 없음)
+
+    Returns:
+        정규화된 엔트로피 값 (0~1)
+    """
+    eps = 1e-10
+    # 정규화
+    total = sum(max(0.0, p) for p in probs.values())
+    if total <= 0:
+        return 1.0  # 모든 값이 0이면 최대 엔트로피(균일)로 간주
+
+    normalized = {k: max(0.0, v) / total for k, v in probs.items()}
+
+    # 엔트로피 계산
+    h = -sum(p * log(p + eps) for p in normalized.values() if p > 0)
+    max_h = log(len(probs)) if len(probs) > 0 else 1.0  # 균등 분포일 때 최대 엔트로피
+
+    return h / max_h if max_h > 0 else 0.0
+
+
 def fuse_VA(audio_probs: Dict[str, float], text_score: float, text_magnitude: float) -> Dict[str, object]:
     """Fuse audio (emotion probabilities) and text (score,magnitude) into composite VA.
 
@@ -231,8 +258,8 @@ def fuse_VA(audio_probs: Dict[str, float], text_score: float, text_magnitude: fl
         "happy": pos * mag,
         "sad": neg * mag,
         "neutral": max(0.0, neutral_base),
-        "angry": neg * mag * 0.8,
-        "fear": neg * mag * 0.7,
+        "angry": neg * mag,         # 부정 감정 동일 가중치
+        "fear": neg * mag,          # 부정 감정 동일 가중치
         "surprise": pos * mag * 0.8,
     }
     # 긍정 텍스트( v_text > 0 )일 때 happy 동적 가중(증가) + surprise 경감, 이후 재정규화
@@ -297,7 +324,26 @@ def fuse_VA(audio_probs: Dict[str, float], text_score: float, text_magnitude: fl
         neutral_factor = max(0.3, neutral_base_factor - extra_down)
     else:
         neutral_factor = neutral_base_factor
-    composite_score["neutral"] = composite_score.get("neutral", 0.0) * neutral_factor * 0.7
+
+    # 충돌 감지: v_audio와 v_text 부호가 다르면 감정 상쇄 발생
+    # 이 경우 neutral이 과대 평가되므로 추가 억제
+    is_conflict = (v_audio * v_text) < 0
-    is_conflict = (v_audio * v_text) < 0
+    threshold = 1e-3
+    is_conflict = (abs(v_audio) > threshold and abs(v_text) > threshold and (v_audio * v_text) < 0)
-    is_conflict = (v_audio * v_text) < 0
+    threshold = 1e-3
+    is_conflict = (abs(v_audio) > threshold and abs(v_text) > threshold and (v_audio * v_text) < 0)
+    if is_conflict:
+        conflict_factor = 0.1  # 충돌 시 neutral 0.1배로 강하게 억제
+    else:
+        conflict_factor = 1.0
+
+    # 엔트로피 기반 억제: 감정 분포가 균일할수록(엔트로피 높음) neutral 추가 억제
+    entropy = compute_entropy(composite_score)
+    if entropy > 0.8:
+        entropy_factor = 0.3  # 높은 엔트로피 시 0.3배
+    elif entropy > 0.6:
+        entropy_factor = 0.6  # 중간 엔트로피 시 0.6배
+    else:
+        entropy_factor = 1.0
+
+    # 최종 neutral 억제: 기존 + 충돌 + 엔트로피
+    composite_score["neutral"] = composite_score.get("neutral", 0.0) * neutral_factor * 0.7 * conflict_factor * entropy_factor
-    composite_score["neutral"] = composite_score.get("neutral", 0.0) * neutral_factor * 0.7 * conflict_factor * entropy_factor
+    # Prevent over-suppression: ensure neutral is not reduced below 5% of its original value
+    original_neutral = composite_score.get("neutral", 0.0)
+    suppressed_neutral = original_neutral * neutral_factor * 0.7 * conflict_factor * entropy_factor
+    composite_score["neutral"] = max(suppressed_neutral, original_neutral * 0.05)
-    composite_score["neutral"] = composite_score.get("neutral", 0.0) * neutral_factor * 0.7 * conflict_factor * entropy_factor
+    # Prevent over-suppression: ensure neutral is not reduced below 5% of its original value
+    original_neutral = composite_score.get("neutral", 0.0)
+    suppressed_neutral = original_neutral * neutral_factor * 0.7 * conflict_factor * entropy_factor
+    composite_score["neutral"] = max(suppressed_neutral, original_neutral * 0.05)
     composite_score["surprise"] = composite_score.get("surprise", 0.0) * 0.9
 
     per_emotion_bps = _normalize_to_bps(composite_score)

diff --git a/app/voice_service.py b/app/voice_service.py
@@ -303,7 +303,7 @@ async def _process_stt_and_nlp_background(self, file_content: bytes, filename: s
         db = SessionLocal()
         try:
             logger.log_step("(비동기 작업) STT 작업 시작", category="async")
-            deadline = time.monotonic() + 20.0
+            deadline = time.monotonic() + 30.0
 
             # 1. STT 처리 (스레드 풀에서 실행하여 실제 병렬 처리 가능)
             file_obj_for_stt = BytesIO(file_content)
@@ -322,7 +322,7 @@ def __init__(self, content, filename):
             try:
                 stt_result = await asyncio.wait_for(stt_coro, timeout=remaining)
             except asyncio.TimeoutError:
-                print(f"STT 타임아웃: voice_id={voice_id} after 20s")
+                print(f"STT 타임아웃: voice_id={voice_id} after 30s")
                 logger.log_step("stt 타임아웃", category="async")
                 mark_text_done(db, voice_id)
                 try_aggregate(db, voice_id)