hypercliq · msanguineti · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,4 +1,4 @@
 {
   "python.analysis.autoImportCompletions": true,
-  "python.analysis.typeCheckingMode": "off"
+  "python.analysis.typeCheckingMode": "basic"
 }
diff --git a/src/audio_transcriber.py b/src/audio_transcriber.py
@@ -8,7 +8,7 @@
     choose_sample_rate,
     find_supported_sample_rates,
 )
-from src.cli_interface import CliInterface
+from src.cli_interface import CliInterface, start_pause_message
 from src.config import FRAMES_PER_BUFFER
 from src.whisper_transcription import WhisperTranscription
 
@@ -24,8 +24,10 @@ def __init__(self, model_size):
         self.pyaudio_instance = pyaudio.PyAudio()
         self.stream = None
         self.device_index, self.chosen_sample_rate = self.setup_audio_device()
-        self.whisper_transcription = WhisperTranscription(model_size, self.chosen_sample_rate)
         CliInterface.print_welcome()
+        self.whisper_transcription = WhisperTranscription(model_size, self.chosen_sample_rate)
+
+        CliInterface.print_info(start_pause_message)
 
     def setup_audio_device(self):
         """
@@ -34,6 +36,11 @@ def setup_audio_device(self):
         """
         device_index = choose_audio_device(self.pyaudio_instance)
         supported_rates = find_supported_sample_rates(self.pyaudio_instance, device_index)
+        # if supported_rates is empty, print an error message and exit
+        if not supported_rates:
+            CliInterface.print_error("No supported sample rates found for the device.")
+            self.pyaudio_instance.terminate()
+            exit(1)
         chosen_sample_rate = choose_sample_rate(supported_rates)
         return device_index, chosen_sample_rate
 
@@ -44,18 +51,18 @@ def toggle_recording(self):
         """
         if self.recording:
             self.pause_recording()
-            CliInterface.print_recording_paused()
+            print(CliInterface.colorize("\r\n\u23f8", bold=True) + " Recording paused. " + start_pause_message)
         else:
             self.start_recording()
-            CliInterface.print_recording_started()
+            print(CliInterface.colorize("\r\n\u25cf", red=True) + " Recording started. " + start_pause_message)
 
     def start_recording(self):
         """
         Start recording audio. Opens a new stream with the chosen audio device and sample rate.
         The stream's callback function is set to the transcription service's audio callback function.
         """
         if not self.recording:
-            CliInterface.print_initialize_recording()
+            CliInterface.print_info("Initializing recording...")
             self.stream = self.pyaudio_instance.open(
                 format=pyaudio.paInt16,
                 channels=1,
@@ -72,6 +79,13 @@ def pause_recording(self, stop=False):
         """
         Pause recording audio. Stop the audio stream and wait for the transcription service to finish processing.
         """
+
+        CliInterface.print_info(
+            "Pausing recording... wait for processing to complete"
+            if not stop
+            else "Stopping recording... wait for processing to complete"
+        )
+
         if self.recording:
             if self.stream:
                 self.stream.stop_stream()
@@ -80,7 +94,6 @@ def pause_recording(self, stop=False):
             self.whisper_transcription.finalize_recording()
             self.recording = False
 
-        CliInterface.print_recording_pausing(stop)
         while not self.whisper_transcription.is_processing_completed():
             time.sleep(0.1)  # Adjust sleep time as necessary
 
@@ -105,5 +118,5 @@ def run(self):
         """
         Start the main loop of the application. Listens for key press events and handles them with the on_key_press function.
         """
-        with keyboard.Listener(on_press=self.on_key_press) as listener:
+        with keyboard.Listener(on_press=self.on_key_press) as listener:  # type: ignore
             listener.join()
diff --git a/src/audio_utils.py b/src/audio_utils.py
@@ -23,9 +23,11 @@ def choose_audio_device(pyaudio_instance):
     :return: The index of the chosen audio device.
     """
     devices = get_audio_devices(pyaudio_instance)
-    CliInterface.print_devices(devices)
-    device_index = int(input("Enter the index of the desired audio device: "))
-    return device_index
+    CliInterface.print_info("Available audio devices:\n")
+    for i, name in enumerate(devices, start=1):
+        print(CliInterface.colorize(f"{i})", bold=True) + f" {name[1]}")
+    choice = int(input("\n" + CliInterface.question("Enter the number corresponding to the desired device: ")))
+    return devices[choice - 1][0]
 
 
 def find_supported_sample_rates(pyaudio_instance, device_index):
@@ -51,7 +53,9 @@ def find_supported_sample_rates(pyaudio_instance, device_index):
             supported_rates.append(rate)
         except Exception:
             continue
-    CliInterface.print_supported_sample_rates(supported_rates)
+    CliInterface.print_info("Supported sample rates for the device:")
+    for rate in supported_rates:
+        CliInterface.print_success(f"Supported: {rate} Hz")
     return supported_rates
 
 
@@ -61,15 +65,16 @@ def choose_sample_rate(supported_rates):
     :param supported_rates: A list of supported sample rates.
     :return: The chosen sample rate.
     """
-    CliInterface.print_sample_rate_options(supported_rates)
+    CliInterface.print_info("Choose a sample rate for recording:\n")
+    for i, rate in enumerate(supported_rates, start=1):
+        print(f"{i}) {rate} Hz")
     try:
-        CliInterface.print_enter_number()
-        choice = int(input())
+        choice = int(input("\n" + CliInterface.question("Enter the number corresponding to the desired sample rate: ")))
         if 1 <= choice <= len(supported_rates):
             return supported_rates[choice - 1]
         else:
-            CliInterface.print_invalid_selection()
+            CliInterface.print_warning("Invalid selection. Please enter a number from the list.")
             return choose_sample_rate(supported_rates)
-    except ValueError:
-        CliInterface.print_invalid_number()
+    except ValueError as e:
+        CliInterface.print_error(e)
         return choose_sample_rate(supported_rates)
diff --git a/src/cli_interface.py b/src/cli_interface.py
@@ -1,95 +1,72 @@
 class CliInterface:
     @staticmethod
-    def print_welcome():
-        print("Welcome to the Whisper Audio Transcription Service")
-        print("-------------------------------------------------")
-        print("Press Space to start/stop recording, Esc to exit.")
-
-    @staticmethod
-    def print_initialize_recording():
-        print("Initializing recording...", end="", flush=True)
-
-    @staticmethod
-    def print_recording_started():
-        print("\r\033[91m●\033[0m Recording started. Press Space to pause...\n")
+    def colorize(string, bold=False, red=False, yellow=False, green=False, cyan=False):
+        """
+        Return a string with bold, red, and/or yellow formatting. If many colors are specified, the color precedence is:
+        red > yellow > green > cyan.
+        :param string: The input string.
+        :param bold: Whether to use bold formatting.
+        :param red: Whether to use red formatting.
+        :param yellow: Whether to use yellow formatting.
+        :param green: Whether to use green formatting.
+        :param cyan: Whether to use cyan formatting.
+        """
+        b = "\033[1m" if bold else ""
+        r = "\033[91m" if red else ""
+        y = "\033[93m" if yellow else ""
+        g = "\033[92m" if green else ""
+        c = "\033[96m" if cyan else ""
+        e = "\033[0m"
+
+        return f"{b}{c}{g}{y}{r}{string}{e}"
 
     @staticmethod
-    def print_recording_pausing(stop=False):
-        print(
-            "\nPausing" if not stop else "\nStopping",
-            "recording... please wait for processing to finish.",
-        )
-
-    @staticmethod
-    def print_recording_paused():
-        print("\n\u23F8 Recording paused. Press Space to start or Esc to exit.")
+    def print_welcome():
+        print("\n--------------------------------------------")
+        print("| " + CliInterface.colorize("Welcome to the Whisper Audio Transcriber", bold=True) + " |")
+        print("--------------------------------------------")
 
     @staticmethod
     def print_exit():
-        print("\nExiting application... Thank you for using our service!")
-
-    @staticmethod
-    def print_processing_chunk(volume_db, chunk_size):
-        print(f"\r>> Processing chunk (Volume: {volume_db:.2f} dB, Size: {chunk_size} bytes)...")
-
-    @staticmethod
-    def print_processed_chunk(volume_db, chunk_size):
-        print(f"\nProcessed audio chunk with volume {volume_db:.2f} dB and size {chunk_size}.")
-
-    @staticmethod
-    def print_transcription_attempt(attempt):
-        print(f"\nTranscription attempt {attempt}...")
-
-    @staticmethod
-    def print_transcription_failed():
-        print("\nFailed to transcribe after several attempts.")
-
-    @staticmethod
-    def print_finalizing():
-        print("\nFinalizing recording...")
-
-    @staticmethod
-    def print_transcription_complete():
-        print("\nTranscription completed successfully.")
+        print("\r\nExiting application...")
+        print("\n-------------------------------------------------")
+        print("| " + CliInterface.colorize("Thank you for using Whisper Audio Transcriber", bold=True) + " |")
+        print("-------------------------------------------------")
 
     @staticmethod
     def print_error(e):
-        print(f"\nError: {e}")
+        print("\n" + CliInterface.colorize("!", red=True) + f" Error: {e}")
 
     @staticmethod
-    def print_output_path(path):
-        print(f"\nTranscription results have been written to: {path}")
+    def print_warning(warning):
+        print("\n" + CliInterface.colorize("⚠", yellow=True) + f" Warning: {warning}")
 
     @staticmethod
-    def print_output(json_output):
-        print(json_output)
+    def print_success(message):
+        print("\n" + CliInterface.colorize("✔", green=True) + f" {message}")
 
     @staticmethod
-    def print_devices(devices):
-        print("Available audio devices:")
-        for index, name in devices:
-            print(f"{index}: {name}")
+    def print_info(message):
+        print("\n" + CliInterface.colorize("i", cyan=True) + f" {message}")
 
     @staticmethod
-    def print_supported_sample_rates(rates):
-        print("Testing supported sample rates for the device:")
-        for rate in rates:
-            print(f"Supported: {rate} Hz")
+    def print_question(message):
+        print("\n" + CliInterface.colorize("?", bold=True) + f" {message}")
 
     @staticmethod
-    def print_sample_rate_options(supported_rates):
-        print("Supported sample rates: ")
-        for i, rate in enumerate(supported_rates, start=1):
-            print(f"{i}) {rate} Hz")
+    def question(message):
+        return CliInterface.colorize("?", bold=True) + f" {message}"
 
     @staticmethod
-    def print_invalid_selection():
-        print("Invalid selection. Please enter a number from the list.")
+    def print_error_message(message):
+        print("\n" + CliInterface.colorize("!", red=True) + f" {message}")
 
-    @staticmethod
-    def print_enter_number():
-        print("Enter the number corresponding to the desired sample rate: ")
 
-    @staticmethod
-    def print_invalid_number():
-        print("Please enter a valid number.")
+start_pause_message = (
+    "Press "
+    + CliInterface.colorize("Space", bold=True)
+    + " to start/pause recording."
+    + " Press "
+    + CliInterface.colorize("Esc", bold=True)
+    + " to exit."
+)
diff --git a/src/config.py b/src/config.py
@@ -3,6 +3,12 @@
 # Model size to use for Whisper transcription. Options: "tiny", "base", "small", "medium", "large"
 MODEL_SIZE = "base"
 
+# Language code to use for Whisper transcription
+LANGUAGE_CODE = "en"
+
+# Whisper prompt to use for transcription
+PROMPT = ""
+
 # Sample rates to consider for testing device capabilities (in Hz)
 SAMPLE_RATES = [8000, 16000, 32000, 44100, 48000]
 
@@ -20,3 +26,6 @@
 
 # Path to the file to print the transcription results
 OUTPUT_FILE_PATH = "transcription_results.json"
+
+# Export raw transcriptions to a file
+EXPORT_RAW_TRANSCRIPTIONS = True