Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New CLI print functions and config options #7

Merged
merged 9 commits into from
Mar 1, 2024
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "off"
"python.analysis.typeCheckingMode": "basic"
}
27 changes: 20 additions & 7 deletions src/audio_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
choose_sample_rate,
find_supported_sample_rates,
)
from src.cli_interface import CliInterface
from src.cli_interface import CliInterface, start_pause_message
from src.config import FRAMES_PER_BUFFER
from src.whisper_transcription import WhisperTranscription

Expand All @@ -24,8 +24,10 @@ def __init__(self, model_size):
self.pyaudio_instance = pyaudio.PyAudio()
self.stream = None
self.device_index, self.chosen_sample_rate = self.setup_audio_device()
self.whisper_transcription = WhisperTranscription(model_size, self.chosen_sample_rate)
CliInterface.print_welcome()
self.whisper_transcription = WhisperTranscription(model_size, self.chosen_sample_rate)

CliInterface.print_info(start_pause_message)

def setup_audio_device(self):
"""
Expand All @@ -34,6 +36,11 @@ def setup_audio_device(self):
"""
device_index = choose_audio_device(self.pyaudio_instance)
supported_rates = find_supported_sample_rates(self.pyaudio_instance, device_index)
# if supported_rates is empty, print an error message and exit
if not supported_rates:
CliInterface.print_error("No supported sample rates found for the device.")
self.pyaudio_instance.terminate()
exit(1)
chosen_sample_rate = choose_sample_rate(supported_rates)
return device_index, chosen_sample_rate

Expand All @@ -44,18 +51,18 @@ def toggle_recording(self):
"""
if self.recording:
self.pause_recording()
CliInterface.print_recording_paused()
print(CliInterface.colorize("\r\n\u23f8", bold=True) + " Recording paused. " + start_pause_message)
else:
self.start_recording()
CliInterface.print_recording_started()
print(CliInterface.colorize("\r\n\u25cf", red=True) + " Recording started. " + start_pause_message)

def start_recording(self):
"""
Start recording audio. Opens a new stream with the chosen audio device and sample rate.
The stream's callback function is set to the transcription service's audio callback function.
"""
if not self.recording:
CliInterface.print_initialize_recording()
CliInterface.print_info("Initializing recording...")
self.stream = self.pyaudio_instance.open(
format=pyaudio.paInt16,
channels=1,
Expand All @@ -72,6 +79,13 @@ def pause_recording(self, stop=False):
"""
Pause recording audio. Stop the audio stream and wait for the transcription service to finish processing.
"""

CliInterface.print_info(
"Pausing recording... wait for processing to complete"
if not stop
else "Stopping recording... wait for processing to complete"
)

if self.recording:
if self.stream:
self.stream.stop_stream()
Expand All @@ -80,7 +94,6 @@ def pause_recording(self, stop=False):
self.whisper_transcription.finalize_recording()
self.recording = False

CliInterface.print_recording_pausing(stop)
while not self.whisper_transcription.is_processing_completed():
time.sleep(0.1) # Adjust sleep time as necessary

Expand All @@ -105,5 +118,5 @@ def run(self):
"""
Start the main loop of the application. Listens for key press events and handles them with the on_key_press function.
"""
with keyboard.Listener(on_press=self.on_key_press) as listener:
with keyboard.Listener(on_press=self.on_key_press) as listener: # type: ignore
listener.join()
25 changes: 15 additions & 10 deletions src/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ def choose_audio_device(pyaudio_instance):
:return: The index of the chosen audio device.
"""
devices = get_audio_devices(pyaudio_instance)
CliInterface.print_devices(devices)
device_index = int(input("Enter the index of the desired audio device: "))
return device_index
CliInterface.print_info("Available audio devices:\n")
for i, name in enumerate(devices, start=1):
print(CliInterface.colorize(f"{i})", bold=True) + f" {name[1]}")
choice = int(input("\n" + CliInterface.question("Enter the number corresponding to the desired device: ")))
return devices[choice - 1][0]


def find_supported_sample_rates(pyaudio_instance, device_index):
Expand All @@ -51,7 +53,9 @@ def find_supported_sample_rates(pyaudio_instance, device_index):
supported_rates.append(rate)
except Exception:
continue
CliInterface.print_supported_sample_rates(supported_rates)
CliInterface.print_info("Supported sample rates for the device:")
for rate in supported_rates:
CliInterface.print_success(f"Supported: {rate} Hz")
return supported_rates


Expand All @@ -61,15 +65,16 @@ def choose_sample_rate(supported_rates):
:param supported_rates: A list of supported sample rates.
:return: The chosen sample rate.
"""
CliInterface.print_sample_rate_options(supported_rates)
CliInterface.print_info("Choose a sample rate for recording:\n")
for i, rate in enumerate(supported_rates, start=1):
print(f"{i}) {rate} Hz")
try:
CliInterface.print_enter_number()
choice = int(input())
choice = int(input("\n" + CliInterface.question("Enter the number corresponding to the desired sample rate: ")))
if 1 <= choice <= len(supported_rates):
return supported_rates[choice - 1]
else:
CliInterface.print_invalid_selection()
CliInterface.print_warning("Invalid selection. Please enter a number from the list.")
return choose_sample_rate(supported_rates)
except ValueError:
CliInterface.print_invalid_number()
except ValueError as e:
CliInterface.print_error(e)
return choose_sample_rate(supported_rates)
119 changes: 48 additions & 71 deletions src/cli_interface.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,72 @@
class CliInterface:
@staticmethod
def print_welcome():
print("Welcome to the Whisper Audio Transcription Service")
print("-------------------------------------------------")
print("Press Space to start/stop recording, Esc to exit.")

@staticmethod
def print_initialize_recording():
print("Initializing recording...", end="", flush=True)

@staticmethod
def print_recording_started():
print("\r\033[91m●\033[0m Recording started. Press Space to pause...\n")
def colorize(string, bold=False, red=False, yellow=False, green=False, cyan=False):
"""
Return a string with bold, red, and/or yellow formatting. If many colors are specified, the color precedence is:
red > yellow > green > cyan.
:param string: The input string.
:param bold: Whether to use bold formatting.
:param red: Whether to use red formatting.
:param yellow: Whether to use yellow formatting.
:param green: Whether to use green formatting.
:param cyan: Whether to use cyan formatting.
"""
b = "\033[1m" if bold else ""
r = "\033[91m" if red else ""
y = "\033[93m" if yellow else ""
g = "\033[92m" if green else ""
c = "\033[96m" if cyan else ""
e = "\033[0m"

return f"{b}{c}{g}{y}{r}{string}{e}"

@staticmethod
def print_recording_pausing(stop=False):
print(
"\nPausing" if not stop else "\nStopping",
"recording... please wait for processing to finish.",
)

@staticmethod
def print_recording_paused():
print("\n\u23F8 Recording paused. Press Space to start or Esc to exit.")
def print_welcome():
print("\n--------------------------------------------")
print("| " + CliInterface.colorize("Welcome to the Whisper Audio Transcriber", bold=True) + " |")
print("--------------------------------------------")

@staticmethod
def print_exit():
print("\nExiting application... Thank you for using our service!")

@staticmethod
def print_processing_chunk(volume_db, chunk_size):
print(f"\r>> Processing chunk (Volume: {volume_db:.2f} dB, Size: {chunk_size} bytes)...")

@staticmethod
def print_processed_chunk(volume_db, chunk_size):
print(f"\nProcessed audio chunk with volume {volume_db:.2f} dB and size {chunk_size}.")

@staticmethod
def print_transcription_attempt(attempt):
print(f"\nTranscription attempt {attempt}...")

@staticmethod
def print_transcription_failed():
print("\nFailed to transcribe after several attempts.")

@staticmethod
def print_finalizing():
print("\nFinalizing recording...")

@staticmethod
def print_transcription_complete():
print("\nTranscription completed successfully.")
print("\r\nExiting application...")
print("\n-------------------------------------------------")
print("| " + CliInterface.colorize("Thank you for using Whisper Audio Transcriber", bold=True) + " |")
print("-------------------------------------------------")

@staticmethod
def print_error(e):
print(f"\nError: {e}")
print("\n" + CliInterface.colorize("!", red=True) + f" Error: {e}")

@staticmethod
def print_output_path(path):
print(f"\nTranscription results have been written to: {path}")
def print_warning(warning):
print("\n" + CliInterface.colorize("⚠", yellow=True) + f" Warning: {warning}")

@staticmethod
def print_output(json_output):
print(json_output)
def print_success(message):
print("\n" + CliInterface.colorize("✔", green=True) + f" {message}")

@staticmethod
def print_devices(devices):
print("Available audio devices:")
for index, name in devices:
print(f"{index}: {name}")
def print_info(message):
print("\n" + CliInterface.colorize("i", cyan=True) + f" {message}")

@staticmethod
def print_supported_sample_rates(rates):
print("Testing supported sample rates for the device:")
for rate in rates:
print(f"Supported: {rate} Hz")
def print_question(message):
print("\n" + CliInterface.colorize("?", bold=True) + f" {message}")

@staticmethod
def print_sample_rate_options(supported_rates):
print("Supported sample rates: ")
for i, rate in enumerate(supported_rates, start=1):
print(f"{i}) {rate} Hz")
def question(message):
return CliInterface.colorize("?", bold=True) + f" {message}"

@staticmethod
def print_invalid_selection():
print("Invalid selection. Please enter a number from the list.")
def print_error_message(message):
print("\n" + CliInterface.colorize("!", red=True) + f" {message}")

@staticmethod
def print_enter_number():
print("Enter the number corresponding to the desired sample rate: ")

@staticmethod
def print_invalid_number():
print("Please enter a valid number.")
start_pause_message = (
"Press "
+ CliInterface.colorize("Space", bold=True)
+ " to start/pause recording."
+ " Press "
+ CliInterface.colorize("Esc", bold=True)
+ " to exit."
)
9 changes: 9 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
# Model size to use for Whisper transcription. Options: "tiny", "base", "small", "medium", "large"
MODEL_SIZE = "base"

# Language code to use for Whisper transcription
LANGUAGE_CODE = "en"

# Whisper prompt to use for transcription
PROMPT = ""

# Sample rates to consider for testing device capabilities (in Hz)
SAMPLE_RATES = [8000, 16000, 32000, 44100, 48000]

Expand All @@ -20,3 +26,6 @@

# Path to the file to print the transcription results
OUTPUT_FILE_PATH = "transcription_results.json"

# Export raw transcriptions to a file
EXPORT_RAW_TRANSCRIPTIONS = True
Loading
Loading