Skip to content

Commit

Permalink
fix lang detect
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed May 4, 2023
1 parent 49502a6 commit a7d8302
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions ovos_stt_plugin_fasterwhisper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,21 @@ def __init__(self, config=None):
device = "cpu"
self.engine = WhisperModel(model, device=device, compute_type=self.compute_type)

@staticmethod
def audiochunk2array(audio_data):
# Convert buffer to float32 using NumPy
audio_as_np_int16 = numpy.frombuffer(audio_data, dtype=numpy.int16)
audio_as_np_float32 = audio_as_np_int16.astype(numpy.float32)

# Normalise float32 array so that values are between -1.0 and +1.0
max_int16 = 2 ** 15
data = audio_as_np_float32 / max_int16
return data

# plugin api
def transform(self, audio_data):
# segments is an iterator, transcription is not done here
_, info = self.engine.transcribe(FasterWhisperSTT.audiodata2array(audio_data), beam_size=self.beam_size)
_, info = self.engine.transcribe(self.audiochunk2array(audio_data), beam_size=self.beam_size)
LOG.info(f"Detected speech language '{info.language}' with probability {info.language_probability}")
return audio_data, {"stt_lang": info.language, "lang_probability": info.language_probability}

Expand Down Expand Up @@ -158,14 +169,7 @@ def __init__(self, *args, **kwargs):
@staticmethod
def audiodata2array(audio_data):
assert isinstance(audio_data, AudioData)
# Convert buffer to float32 using NumPy
audio_as_np_int16 = numpy.frombuffer(audio_data.get_wav_data(), dtype=numpy.int16)
audio_as_np_float32 = audio_as_np_int16.astype(numpy.float32)

# Normalise float32 array so that values are between -1.0 and +1.0
max_int16 = 2 ** 15
data = audio_as_np_float32 / max_int16
return data
return FasterWhisperLangClassifier.audiochunk2array(audio_data.get_wav_data())

def execute(self, audio, language=None):
segments, _ = self.engine.transcribe(self.audiodata2array(audio), beam_size=self.beam_size)
Expand Down

0 comments on commit a7d8302

Please sign in to comment.