-
Notifications
You must be signed in to change notification settings - Fork 0
/
WhisperRecorder.py
172 lines (140 loc) · 6.66 KB
/
WhisperRecorder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import threading
import wave
import pyaudio
import whisper
import os
import logging
import time
import json
from pynput import keyboard
class WhisperRecorder:
def __init__(self, ui, keyboard_controller, model_name="tiny"):
# Transcription Config
self.ui = ui
self.keyboard_controller = keyboard_controller
self.is_recording = False
self.stream = None
self.audio_frames = []
self.model = whisper.load_model(model_name)
self.audio = pyaudio.PyAudio()
self.temp_wav_file = "recording.wav"
# Audio recording config
self.chunk_size = 4096
self.audio_format = pyaudio.paInt16
self.channels = 1
self.sample_rate = 16000
# Recording history
self.recordings_history = []
self.max_history_size = 3
self.recordings_folder = "recordings"
self.ensure_recordings_folder()
# Model config
self.model_name = model_name
self.model = whisper.load_model(self.model_name)
self.load_model_from_config()
logging.info("Recorder finished loading, close loading box.")
self.ui.create_main_ui_window()
self.ui.close_loading_screen()
logging.info("Whisper model loaded")
def start_recording(self):
logging.info('Recording started...')
self.stream = self.audio.open(format=self.audio_format, channels=self.channels, rate=self.sample_rate, input=True, frames_per_buffer=self.chunk_size)
self.is_recording = True
self.audio_frames = []
# Start a separate thread for simulating keystrokes
self.keystroke_thread = threading.Thread(target=self.simulate_keystrokes)
self.keystroke_thread.start()
while self.is_recording:
data = self.stream.read(self.chunk_size)
self.audio_frames.append(data)
def stop_recording_and_transcribe(self):
logging.info('Halting Recording...')
self.ui.change_state_indicator("yellow", text="Recording Ending")
# Close the stream
logging.info('Close Stream.')
self.stream.stop_stream()
self.stream.close()
self.ui.change_state_indicator("yellow", text="Recording Complete")
# Open/save the audio file
logging.info('Open/save file.')
self.ui.change_state_indicator("Orange", text="Preparing Audio...")
# Generate a unique file name for this recording
unique_file_name = self.get_unique_file_name()
recording_file_name = self.get_recording_file_path(unique_file_name)
wavefile = wave.open(recording_file_name, 'wb')
wavefile.setnchannels(self.channels)
wavefile.setsampwidth(self.audio.get_sample_size(self.audio_format))
wavefile.setframerate(self.sample_rate)
wavefile.writeframes(b''.join(self.audio_frames))
wavefile.close()
self.ui.change_state_indicator("Orange", text="Audio Prepared...")
# Add the new recording to the history
self.update_recordings_history(unique_file_name)
# Starting Transcription
logging.info('Starting Transcription')
self.ui.change_state_indicator("purple", text="Transcription Starting...")
result = self.model.transcribe(recording_file_name, task="translate") # Use full path here
self.ui.change_state_indicator("green", text="Transcription Complete!")
self.is_recording = False
self.keystroke_thread.join()
self.ui.change_state_indicator("green", text="Complete!")
return result['text']
def toggle_recording(self):
if not self.is_recording:
self.ui.change_state_indicator("yellow", text="Recording Starting...")
# Start a new recording thread
self.recording_thread = threading.Thread(target=self.start_recording)
self.recording_thread.start()
return "" # No transcription when starting
else:
# Stop recording and wait for the thread to finish
self.is_recording = False
self.recording_thread.join()
transcription = self.stop_recording_and_transcribe()
self.ui.change_state_indicator("grey", text="Ready") # Use UI method
self.ui.flash_indicator() # Use UI method
self.ui.update_transcription_box(transcription) # Use UI method to update the transcription box
return transcription # Return the transcription text after stopping
def terminate(self):
self.audio.terminate()
def update_recordings_history(self, unique_file_name):
full_path = self.get_recording_file_path(unique_file_name)
if len(self.recordings_history) >= self.max_history_size:
oldest_recording = self.recordings_history.pop(0)
if os.path.exists(oldest_recording):
os.remove(oldest_recording)
self.recordings_history.append(full_path)
def ensure_recordings_folder(self):
if not os.path.exists(self.recordings_folder):
os.makedirs(self.recordings_folder)
def get_recording_file_path(self, file_name):
return os.path.join(self.recordings_folder, file_name)
def get_unique_file_name(self):
timestamp = time.strftime("%Y%m%d-%H%M%S")
return f"recording_{timestamp}.wav"
def update_model(self, new_model_name):
self.model_name = new_model_name
self.model = whisper.load_model(self.model_name)
def load_model_from_config(self):
try:
with open('config.json', 'r') as config_file:
config = json.load(config_file)
model_name = config.get('model', 'tiny')
self.model = whisper.load_model(model_name)
except Exception as e:
print(f"Error loading model from config: {e}")
self.model = whisper.load_model('tiny')
def simulate_keystrokes(self):
if self.ui.chat_mode.get():
# Press 'T' once to open the chat window
self.keyboard_controller.press(keyboard.KeyCode.from_char('t'))
self.keyboard_controller.release(keyboard.KeyCode.from_char('t'))
time.sleep(0.5) # Short delay before starting the loop
# Cycle space and backspace while recording
while self.is_recording:
self.keyboard_controller.press(keyboard.Key.space)
self.keyboard_controller.release(keyboard.Key.space)
time.sleep(0.1) # Short delay between space and backspace
self.keyboard_controller.press(keyboard.Key.backspace)
self.keyboard_controller.release(keyboard.Key.backspace)
time.sleep(0.9) # Continue the loop every second