Skip to content

Commit bd1cd84

Browse files
committed
Refactor mock speaker recognition client and improve testing structure
- Replaced direct import of mock client with a structured import from the new testing module. - Introduced a dedicated `mock_speaker_client.py` to provide a mock implementation for speaker recognition, facilitating testing without heavy dependencies. - Added an `__init__.py` file in the testing directory to organize testing utilities and mocks.
1 parent ef719b1 commit bd1cd84

File tree

3 files changed

+163
-10
lines changed

3 files changed

+163
-10
lines changed

backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,8 @@ def __init__(self, service_url: Optional[str] = None):
3939
# Check if we should use mock client (for testing)
4040
if os.getenv("USE_MOCK_SPEAKER_CLIENT") == "true":
4141
try:
42-
# Import mock client from tests directory
43-
import sys
44-
from pathlib import Path
45-
46-
# Add tests directory to Python path
47-
tests_dir = Path(__file__).resolve().parents[5] / "tests"
48-
if str(tests_dir) not in sys.path:
49-
sys.path.insert(0, str(tests_dir))
50-
51-
from mocks.mock_speaker_client import MockSpeakerRecognitionClient
42+
# Import mock client from testing module
43+
from advanced_omi_backend.testing.mock_speaker_client import MockSpeakerRecognitionClient
5244

5345
self._mock_client = MockSpeakerRecognitionClient()
5446
self.enabled = True
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Testing utilities and mocks for Chronicle backend."""
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""Mock speaker recognition client for testing without heavy ML dependencies."""
2+
3+
import logging
4+
from typing import Dict, Optional
5+
6+
logger = logging.getLogger(__name__)
7+
8+
9+
class MockSpeakerRecognitionClient:
10+
"""
11+
Mock speaker recognition client that returns pre-computed segments.
12+
13+
Used in test environments to avoid running resource-intensive speaker
14+
recognition service. Segments are based on test_data.py expectations.
15+
"""
16+
17+
# Map audio filenames to mock segment data
18+
# Segments follow the structure expected by the backend:
19+
# {
20+
# "start": float, # Start time in seconds
21+
# "end": float, # End time in seconds
22+
# "text": str, # Transcript text for this segment
23+
# "speaker": int, # Speaker label (0, 1, 2, etc.)
24+
# "identified_as": str, # Speaker name or "Unknown"
25+
# "confidence": float # Optional confidence score
26+
# }
27+
28+
MOCK_SEGMENTS = {
29+
"DIY_Experts_Glass_Blowing_16khz_mono_1min.wav": [
30+
{
31+
"start": 0.0,
32+
"end": 10.08,
33+
"speaker": 0,
34+
"identified_as": "Unknown",
35+
"text": "The pumpkin that'll last for forever. Finally. Does it count? Today, we're taking a glass blowing class.",
36+
"confidence": 0.95
37+
},
38+
{
39+
"start": 10.28,
40+
"end": 20.255,
41+
"speaker": 0,
42+
"identified_as": "Unknown",
43+
"text": "I'm sweating already. We've worked with a lot of materials before, but we've only scratched the surface",
44+
"confidence": 0.93
45+
},
46+
{
47+
"start": 20.455,
48+
"end": 21.895,
49+
"speaker": 1,
50+
"identified_as": "Unknown",
51+
"text": "when it comes to glass",
52+
"confidence": 0.91
53+
},
54+
{
55+
"start": 22.095,
56+
"end": 23.615,
57+
"speaker": 0,
58+
"identified_as": "Unknown",
59+
"text": "and that's because",
60+
"confidence": 0.94
61+
},
62+
{
63+
"start": 23.815,
64+
"end": 28.135,
65+
"speaker": 1,
66+
"identified_as": "Unknown",
67+
"text": "a little intimidating. We've got about 400 pounds",
68+
"confidence": 0.92
69+
},
70+
{
71+
"start": 28.335,
72+
"end": 43.08,
73+
"speaker": 0,
74+
"identified_as": "Unknown",
75+
"text": "of liquid glass in this furnace right here. Nick's gonna really help us out. Nick, I'm excited and nervous. Me too.",
76+
"confidence": 0.96
77+
},
78+
{
79+
"start": 43.28,
80+
"end": 44.48,
81+
"speaker": 1,
82+
"identified_as": "Unknown",
83+
"text": "So we're gonna",
84+
"confidence": 0.90
85+
},
86+
{
87+
"start": 44.68,
88+
"end": 46.76,
89+
"speaker": 0,
90+
"identified_as": "Unknown",
91+
"text": "make what's called a trumpet",
92+
"confidence": 0.95
93+
},
94+
{
95+
"start": 46.96,
96+
"end": 50.24,
97+
"speaker": 0,
98+
"identified_as": "Unknown",
99+
"text": "flower. We're using gravity as a tool.",
100+
"confidence": 0.93
101+
}
102+
]
103+
}
104+
105+
def __init__(self):
106+
"""Initialize mock client."""
107+
logger.info("🎤 Mock speaker recognition client initialized")
108+
109+
async def diarize_identify_match(
110+
self,
111+
conversation_id: str,
112+
backend_token: str,
113+
transcript_data: Dict,
114+
user_id: Optional[str] = None
115+
) -> Dict:
116+
"""
117+
Return pre-computed mock segments for known test audio files.
118+
119+
Args:
120+
conversation_id: Not used in mock (audio filename derived from transcript)
121+
backend_token: Not used in mock
122+
transcript_data: Dict with 'text' and 'words' - used to identify audio file
123+
user_id: Not used in mock
124+
125+
Returns:
126+
Dictionary with 'segments' array matching speaker service format
127+
"""
128+
logger.info(f"🎤 Mock speaker client processing conversation: {conversation_id[:12]}...")
129+
130+
# Try to identify which test audio this is from the transcript
131+
transcript_text = transcript_data.get("text", "").lower()
132+
133+
# Match by transcript content
134+
if "glass blowing" in transcript_text or "glass" in transcript_text:
135+
filename = "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav"
136+
if filename in self.MOCK_SEGMENTS:
137+
segments = self.MOCK_SEGMENTS[filename]
138+
logger.info(f"🎤 Mock returning {len(segments)} segments for DIY Glass Blowing audio")
139+
return {"segments": segments}
140+
141+
# Fallback: Create single generic segment
142+
logger.warning(f"🎤 Mock: No pre-computed segments found, creating generic segment")
143+
144+
# Get duration from words if available
145+
words = transcript_data.get("words", [])
146+
if words:
147+
duration = words[-1].get("end", 60.0)
148+
else:
149+
duration = 60.0
150+
151+
return {
152+
"segments": [{
153+
"start": 0.0,
154+
"end": duration,
155+
"speaker": 0,
156+
"identified_as": "Unknown",
157+
"text": transcript_data.get("text", ""),
158+
"confidence": 0.85
159+
}]
160+
}

0 commit comments

Comments
 (0)