Skip to content

Commit f518ec5

Browse files
authored
feat: add energy thresh and max phrase limits to speech server (#241)
1 parent dd3d699 commit f518ec5

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

common/speech/lasr_speech_recognition_msgs/action/TranscribeSpeech.action

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Energy threshold
2+
float32 energy_threshold
3+
4+
# Max phrase duration
5+
float32 max_phrase_limit
16
---
27
#result definition
38
string sequence

common/speech/lasr_speech_recognition_whisper/nodes/transcribe_microphone_server

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,34 @@ class TranscribeSpeechAction(object):
115115
f"Could not find microphone with name: {self._model_params.mic_device}"
116116
)
117117

118-
def _configure_recogniser(self) -> sr.Recognizer:
118+
def _configure_recogniser(
119+
self,
120+
energy_threshold: Optional[float] = None,
121+
pause_threshold: Optional[float] = None,
122+
) -> sr.Recognizer:
119123
"""Configures the speech recogniser object.
120124
125+
Args:
126+
energy_threshold (float): Energy threshold for silence detection. Using this disables automatic adjustment.
127+
pause_threshold (float): Seconds of non-speaking audio before a phrase is considered complete.
128+
121129
Returns:
122130
sr.Recognizer: speech recogniser object.
123131
"""
124132
self._listening = True
125133
recogniser = sr.Recognizer()
126134

127-
if self._model_params.pause_threshold:
135+
if pause_threshold:
136+
recogniser.pause_threshold = pause_threshold
137+
138+
elif self._model_params.pause_threshold:
128139
recogniser.pause_threshold = self._model_params.pause_threshold
129140

141+
if energy_threshold:
142+
recogniser.dynamic_energy_threshold = False
143+
recogniser.energy_threshold = energy_threshold
144+
return recogniser
145+
130146
if self._model_params.energy_threshold:
131147
recogniser.dynamic_energy_threshold = False
132148
recogniser.energy_threshold = self._model_params.energy_threshold
@@ -160,6 +176,18 @@ class TranscribeSpeechAction(object):
160176
rospy.loginfo("Request Received")
161177
if self._action_server.is_preempt_requested():
162178
return
179+
180+
if goal.energy_threshold > 0.0 and goal.max_phrase_limit > 0.0:
181+
self.recogniser = self._configure_recogniser(
182+
goal.energy_threshold, goal.max_phrase_limit
183+
)
184+
elif goal.energy_threshold > 0.0:
185+
self.recogniser = self._configure_recogniser(goal.energy_threshold)
186+
elif goal.max_phrase_limit > 0.0:
187+
self.recogniser = self._configure_recogniser(
188+
pause_threshold=goal.max_phrase_limit
189+
)
190+
163191
with self._configure_microphone() as src:
164192
self._listening = True
165193
wav_data = self.recogniser.listen(

0 commit comments

Comments
 (0)