-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscription.py
134 lines (104 loc) · 4.79 KB
/
transcription.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from kivy.lang import Builder
from kivy.uix.boxlayout import BoxLayout
from kivy.properties import StringProperty, ListProperty
from fileChooser import FileViewer, LoadDialog
from kivy.uix.popup import Popup
from kivy.app import App
import concurrent.futures
from kivy.clock import Clock
from functools import partial
from vosk import Model, KaldiRecognizer
from typing import List, TypedDict
import os, json
import subprocess
Builder.load_file("layouts/transcription.kv")
class LoadFolderDialog(LoadDialog):
filters = ListProperty([''])
class ModelChooser(FileViewer):
path_label = StringProperty("Please select model folder")
def show_load(self):
content = LoadFolderDialog(load=self.load, cancel=self.dismiss_popup)
self._popup = Popup(title="Select model folder", content=content,
size_hint=(0.9, 0.9))
self._popup.open()
def load(self, path, filename):
app = App.get_running_app()
app.model_path = path
self.path_label = f"Model folder: {path}"
self.dismiss_popup()
class SpeechChooser(FileViewer):
path_label = StringProperty("Please select folder with speech files")
def show_load(self):
content = LoadFolderDialog(load=self.load, cancel=self.dismiss_popup)
self._popup = Popup(title="Select folder with speech files", content=content,
size_hint=(0.9, 0.9))
self._popup.open()
def load(self, path, filename):
app = App.get_running_app()
app.speech_path = path
self.path_label = f"Speech folder: {path}"
self.dismiss_popup()
class ResultsChooser(FileViewer):
path_label = StringProperty("Please select folder for the transcription results")
def show_load(self):
content = LoadFolderDialog(load=self.load, cancel=self.dismiss_popup)
self._popup = Popup(title="Select folder for vosk results", content=content,
size_hint=(0.9, 0.9))
self._popup.open()
def load(self, path, filename):
app = App.get_running_app()
app.transcription_res_path = path
self.path_label = f"Transcription folder: {path}"
self.dismiss_popup()
class WordInfo(TypedDict):
conf: float #confidence for specific word (from 0 to 1)
end: float #end time for spoken word in sec
start: float #start time of spoken word in sec
word: str #detected word
class VoskResult(TypedDict):
result: List[WordInfo] #list of statistics for each word
text: str #spoken text
class Transcription(BoxLayout):
def vosk_output(self):
app = App.get_running_app()
sample_rate=16000
model = Model(app.model_path)
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)
print("Wait for vosk to analyze the audio file(s)...")
for audio_file in os.listdir(app.speech_path):
print(f"Analyzing audio file: {audio_file}")
# converts audio file to appropriate format (ffmpeg needs to be installed)
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
f"{app.speech_path}/{audio_file}",
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE, stdin=subprocess.DEVNULL)
json_array = []
while True:
data = process.stdout.read(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
res = json.loads(rec.Result())
if 'result' in res:
json_array.append(VoskResult(result=[WordInfo(**info) for info in res['result']] , text=res['text']))
else:
pass
final = json.loads(rec.FinalResult())
if 'result' in final:
json_array.append(VoskResult(result=[WordInfo(**info) for info in final['result']] , text=final['text']))
with open(f'{app.transcription_res_path}/{audio_file[:-4]}_results.json', 'w') as json_res:
json.dump(json_array, json_res)
print("Created vosk results file!")
print("Vosk finished audio file processing!")
def future_is_running(self, future, dt):
if not future.running():
self.ids.start_btn.disabled = False
self.ids.start_btn.text = "Start transcription"
return False
def schedule_transcription(self):
self.ids.start_btn.disabled = True
self.ids.start_btn.text = "Please wait..."
executor = concurrent.futures.ThreadPoolExecutor()
future = executor.submit(self.vosk_output)
Clock.schedule_interval(partial(self.future_is_running, future), 1)