@@ -115,18 +115,34 @@ class TranscribeSpeechAction(object):
115
115
f"Could not find microphone with name: { self ._model_params .mic_device } "
116
116
)
117
117
118
- def _configure_recogniser (self ) -> sr .Recognizer :
118
+ def _configure_recogniser (
119
+ self ,
120
+ energy_threshold : Optional [float ] = None ,
121
+ pause_threshold : Optional [float ] = None ,
122
+ ) -> sr .Recognizer :
119
123
"""Configures the speech recogniser object.
120
124
125
+ Args:
126
+ energy_threshold (float): Energy threshold for silence detection. Using this disables automatic adjustment.
127
+ pause_threshold (float): Seconds of non-speaking audio before a phrase is considered complete.
128
+
121
129
Returns:
122
130
sr.Recognizer: speech recogniser object.
123
131
"""
124
132
self ._listening = True
125
133
recogniser = sr .Recognizer ()
126
134
127
- if self ._model_params .pause_threshold :
135
+ if pause_threshold :
136
+ recogniser .pause_threshold = pause_threshold
137
+
138
+ elif self ._model_params .pause_threshold :
128
139
recogniser .pause_threshold = self ._model_params .pause_threshold
129
140
141
+ if energy_threshold :
142
+ recogniser .dynamic_energy_threshold = False
143
+ recogniser .energy_threshold = energy_threshold
144
+ return recogniser
145
+
130
146
if self ._model_params .energy_threshold :
131
147
recogniser .dynamic_energy_threshold = False
132
148
recogniser .energy_threshold = self ._model_params .energy_threshold
@@ -160,6 +176,18 @@ class TranscribeSpeechAction(object):
160
176
rospy .loginfo ("Request Received" )
161
177
if self ._action_server .is_preempt_requested ():
162
178
return
179
+
180
+ if goal .energy_threshold > 0.0 and goal .max_phrase_limit > 0.0 :
181
+ self .recogniser = self ._configure_recogniser (
182
+ goal .energy_threshold , goal .max_phrase_limit
183
+ )
184
+ elif goal .energy_threshold > 0.0 :
185
+ self .recogniser = self ._configure_recogniser (goal .energy_threshold )
186
+ elif goal .max_phrase_limit > 0.0 :
187
+ self .recogniser = self ._configure_recogniser (
188
+ pause_threshold = goal .max_phrase_limit
189
+ )
190
+
163
191
with self ._configure_microphone () as src :
164
192
self ._listening = True
165
193
wav_data = self .recogniser .listen (
0 commit comments