Update the dictation tool and add a dictation refiner prompt

yorevs · Dec 16, 2024 · 7e15c22 · 7e15c22
1 parent bca4908
commit 7e15c22
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 14 deletions.
diff --git a/src/main/askai/core/component/recorder.py b/src/main/askai/core/component/recorder.py
@@ -17,6 +17,7 @@
 
 import pause
 from hspylib.core.tools.commons import sysout
+from hspylib.core.tools.text_tools import ensure_endswith
 
 from askai.core.askai_configs import configs
 from askai.core.askai_events import events
@@ -215,47 +216,49 @@ def dictate(
         """
         phrase: str = ""
         dictated_text: str = ""
-        limit: int = 10
+        limit: float = 10.0
+        noise_limit: float = 0.2
+        stop_event: threading.Event = threading.Event()
+        audio_path: Path = Path(f"{REC_DIR}/askai-dictate-{now_ms()}.wav")
 
         def _countdown_(sec: int):
             i = sec
             sysout(msg.listening() + " ", end="")
             sysout(f"{i}", end="")
-            while (i := (i - 1)) >= 0:
+            while not stop_event.is_set() and (i := (i - 1)) >= 0:
                 pause.seconds(1)
                 sysout(f"%CUB({len(str(i + 1))})%{i}%EL0%", end="")
-            sysout(f"%CUB({len(str(i + 1))})%%EL0%", end="")
+            if not stop_event.is_set():
+                sysout(f"%CUB({len(str(i + 1))})%%EL0%", end="")
 
         events.listening.emit()
         while True:
             with Microphone(device_index=self._device_index) as mic_source:
                 try:
-                    new_thread = threading.Thread(target=_countdown_, args=[limit])
+                    stop_event.clear()
+                    new_thread = threading.Thread(target=_countdown_, args=(limit,))
                     sysout(("…" if dictated_text else "") + phrase)
-                    audio_path = Path(f"{REC_DIR}/askai-dictate-{now_ms()}.wav")
-                    self._rec.adjust_for_ambient_noise(mic_source, duration=0.2)
+                    self._rec.adjust_for_ambient_noise(mic_source, duration=noise_limit)
                     new_thread.start()
                     audio: AudioData = self._rec.listen(mic_source, phrase_time_limit=limit)
                     sysout(f"%CUB({len(str(limit))})%%EL0%   ")
-                    phrase: str = self._write_audio_file(audio, audio_path, language, recognition_api, True)
+                    phrase = self._write_audio_file(audio, audio_path, language, recognition_api, True)
                 except (WaitTimeoutError, UnknownValueError):
                     phrase = ""
                 except AttributeError as err:
                     raise InvalidInputDevice(str(err)) from err
                 except RequestError as err:
                     raise InvalidRecognitionApiError(str(err)) from err
                 finally:
-                    if phrase:
-                        if phrase in ["quit", "exit"]:
-                            dictated_text += ". "
-                            break
-                        dictated_text += ". " + phrase.capitalize()
-                    else:
+                    stop_event.set()
+                    if not phrase or phrase in ["quit", "exit"]:
                         break
+                    else:
+                        dictated_text += (". " if dictated_text else "") + phrase.capitalize()
 
         events.listening.emit(listening=False)
 
-        return dictated_text + os.linesep
+        return ensure_endswith(dictated_text, "." + os.linesep)
 
     def _write_audio_file(
         self,

diff --git a/src/main/askai/resources/prompts/dictation-refiner.txt b/src/main/askai/resources/prompts/dictation-refiner.txt
@@ -0,0 +1,24 @@
+You are a Text Refiner Assistant. Your task is to refine and correct dictated text. Keep in mind that users may commit errors in their dictation, and these must be manually identified and resolved, as speech recognition tools cannot handle these mistakes.
+
+**Instructions:**
+
+- Detect and fix errors in semantics, spelling, grammar, and lexicography.
+- Do not alter the meaning of any phrase or rephrase unnecessarily.
+- Handle consecutive repetitions as user attempts to correct themselves. For example:
+    - Input: "I'm putting some shits on the bad ... some sheets on the bed."
+    - Output: "I'm putting some sheets on the bed."
+- Identify human names and capitalize them properly.
+- Always prioritize the latest instance in cases of repetition.
+- Recognize and correct missing or truncated words while maintaining coherence.
+
+**Additional Guidelines:**
+
+- Do not infer or invent information beyond the input provided.
+- Ensure the final output is coherent, error-free, and preserves the intended meaning.
+- The user may provide specific instructions for corrections. Implement these precisely. For example: "Remove the last occurrence of the word 'export.'"
+- Additionally, the user may request the removal of unnecessary words from sentences, requiring precise adjustments to ensure clarity and brevity.
+
+**Final Refinement:**
+
+- By the end of the interaction, ensure the given phrase is analyzed to confirm its coherence and clarity.
+- Adjust the text to align accurately with the user's intended meaning, prioritizing clarity and precision, while preserving the original wording unless it is essential to replace terms for proper context or meaning.