From d661a58a545d10208a25e04e841cc1af06b582fd Mon Sep 17 00:00:00 2001 From: Hugo Saporetti Junior Date: Wed, 18 Dec 2024 16:32:11 -0300 Subject: [PATCH] Add the dictate command --- src/main/askai/core/commander/commander.py | 7 ++ .../core/commander/commands/tts_stt_cmd.py | 88 ++++++++++--------- src/main/askai/core/component/recorder.py | 42 ++++----- .../resources/prompts/dictation-refiner.txt | 1 + 4 files changed, 74 insertions(+), 64 deletions(-) diff --git a/src/main/askai/core/commander/commander.py b/src/main/askai/core/commander/commander.py index 789403d3..b470775e 100644 --- a/src/main/askai/core/commander/commander.py +++ b/src/main/askai/core/commander/commander.py @@ -386,6 +386,13 @@ def tts(text: str, dest_dir: str | None = None, playback: bool = True) -> None: TtsSttCmd.tts(text.strip(), dirname(dest_dir), playback) +@ask_commander.command() +@click.argument("dest_file", default="") +def dictate(dest_file: str | None) -> None: + """TODO""" + TtsSttCmd.dictate(dest_file) + + @ask_commander.command() @click.argument("folder") @click.argument("glob", default="**/*") diff --git a/src/main/askai/core/commander/commands/tts_stt_cmd.py b/src/main/askai/core/commander/commands/tts_stt_cmd.py index 4eed30eb..8a9b5558 100644 --- a/src/main/askai/core/commander/commands/tts_stt_cmd.py +++ b/src/main/askai/core/commander/commands/tts_stt_cmd.py @@ -2,17 +2,25 @@ # -*- coding: utf-8 -*- """ - @project: HsPyLib-AskAI - @package: askai.core.commander.tts_stt_cmd - @file: tts_stt_cmd.py - @created: Thu, 25 Apr 2024 - @author: Hugo Saporetti Junior - @site: https://github.com/yorevs/askai - @license: MIT - Please refer to - - Copyright (c) 2024, AskAI +@project: HsPyLib-AskAI +@package: askai.core.commander.tts_stt_cmd + @file: tts_stt_cmd.py +@created: Thu, 25 Apr 2024 + @author: Hugo Saporetti Junior + @site: https://github.com/yorevs/askai +@license: MIT - Please refer to + +Copyright (c) 2024, AskAI """ from abc import ABC +from pathlib import Path +import os +from textwrap import dedent + +from hspylib.core.metaclass.classpath import AnyPath +from clitt.core.tui.mselect.mselect import mselect +import pause + from askai.core.askai_configs import configs from askai.core.askai_settings import settings from askai.core.component.audio_player import player @@ -20,11 +28,6 @@ from askai.core.support.shared_instances import shared from askai.core.support.text_formatter import text_formatter from askai.core.support.utilities import copy_file -from clitt.core.tui.mselect.mselect import mselect -from pathlib import Path - -import os -import pause class TtsSttCmd(ABC): @@ -52,9 +55,7 @@ def voice_set(name_or_index: str | int | None = None) -> None: if name_or_index in all_voices: settings.put("openai.text.to.speech.voice", name_or_index) shared.engine.configs().tts_voice = name_or_index - text_formatter.commander_print( - f"`Speech-To-Text` voice changed to %GREEN%{name_or_index.title()}%NC%" - ) + text_formatter.commander_print(f"`Speech-To-Text` voice changed to %GREEN%{name_or_index.title()}%NC%") else: text_formatter.commander_print(f"%RED%Invalid voice: '{name_or_index}'%NC%") @@ -83,16 +84,10 @@ def tempo(speed: int | None = None) -> None: elif 1 <= speed <= 3: settings.put("askai.text.to.speech.tempo", speed) configs.tempo = speed - tempo_str: str = ( - "Normal" if speed == 1 else ("Fast" if speed == 2 else "Ultra") - ) - text_formatter.commander_print( - f"`Speech-To-Text` **tempo** changed to %GREEN%{tempo_str} ({speed})%NC%" - ) + tempo_str: str = "Normal" if speed == 1 else ("Fast" if speed == 2 else "Ultra") + text_formatter.commander_print(f"`Speech-To-Text` **tempo** changed to %GREEN%{tempo_str} ({speed})%NC%") else: - text_formatter.commander_print( - f"%RED%Invalid tempo value: '{speed}'. Please choose between [1..3].%NC%" - ) + text_formatter.commander_print(f"%RED%Invalid tempo value: '{speed}'. Please choose between [1..3].%NC%") @staticmethod def device_list() -> None: @@ -116,13 +111,9 @@ def device_set(name_or_index: str | int | None = None) -> None: def _set_device(_device) -> bool: if recorder.set_device(_device): - text_formatter.commander_print( - f"`Text-To-Speech` device changed to %GREEN%{_device}%NC%" - ) + text_formatter.commander_print(f"`Text-To-Speech` device changed to %GREEN%{_device}%NC%") return True - text_formatter.commander_print( - f"%HOM%%ED2%Error: '{_device}' is not an Audio Input device!%NC%" - ) + text_formatter.commander_print(f"%HOM%%ED2%Error: '{_device}' is not an Audio Input device!%NC%") all_devices.remove(_device) pause.seconds(2) return False @@ -136,9 +127,7 @@ def _set_device(_device) -> bool: name_or_index = all_devices[int(name_or_index)][1] device = next((dev for dev in all_devices if dev[1] == name_or_index), None) if not (device and _set_device(device)): - text_formatter.commander_print( - f"%RED%Invalid audio input device: '{name_or_index}'%NC%" - ) + text_formatter.commander_print(f"%RED%Invalid audio input device: '{name_or_index}'%NC%") @staticmethod def tts(text: str, dest_dir: str = os.getcwd(), playback: bool = True) -> None: @@ -147,15 +136,28 @@ def tts(text: str, dest_dir: str = os.getcwd(), playback: bool = True) -> None: :param dest_dir: The directory where the audio file will be saved (default is the current working directory). :param playback: Whether to play back the generated speech after conversion (default is True). """ - if ( - audio_path := shared.engine.text_to_speech( - text, stream=False, playback=playback - ) - ) and audio_path.exists(): + if (audio_path := shared.engine.text_to_speech(text, stream=False, playback=playback)) and audio_path.exists(): if dest_dir and ((dest_path := Path(dest_dir)) and dest_path.exists()): audio_path = copy_file(audio_path, dest_dir) - text_formatter.commander_print( - f"File %GREEN%'{audio_path}' was successfully saved!%NC%" - ) + text_formatter.commander_print(f"File %GREEN%'{audio_path}' was successfully saved!%NC%") else: text_formatter.commander_print(f"%RED%Unable to convert text to file !%NC%") + + @staticmethod + def dictate(dest_file: str | None) -> None: + """TODO""" + file_info: str = f"Dictated text saved: *'{dest_file}'*" if dest_file else "" + dictated_text: str | None = recorder.dictate() + if dictated_text: + if dest_file and os.path.exists(os.path.dirname(dest_file)): + with open(dest_file, "w") as f_dictation: + f_dictation.write(dictated_text + os.linesep) + # fmt: off + text_formatter.commander_print( + dedent(f"""\ + {file_info} + Dictated text: ` {dictated_text} ` + """).strip()) + # fmt: on + else: + text_formatter.commander_print(f"Dictation *produced no text* !") diff --git a/src/main/askai/core/component/recorder.py b/src/main/askai/core/component/recorder.py index 09a6ea1d..ad7a0177 100644 --- a/src/main/askai/core/component/recorder.py +++ b/src/main/askai/core/component/recorder.py @@ -14,27 +14,20 @@ """ import os import threading +from pathlib import Path +from typing import Callable, Optional, TypeAlias +import logging as log +import operator +import sys import pause -from hspylib.core.tools.commons import sysout from hspylib.core.tools.text_tools import ensure_endswith - -from askai.core.askai_configs import configs -from askai.core.askai_events import events -from askai.core.askai_messages import msg -from askai.core.component.cache_service import REC_DIR -from askai.core.component.scheduler import scheduler -from askai.core.model.ai_reply import AIReply -from askai.core.support.utilities import display_text, seconds -from askai.exception.exceptions import InvalidInputDevice, InvalidRecognitionApiError -from askai.language.language import Language from hspylib.core.enums.enumeration import Enumeration from hspylib.core.metaclass.classpath import AnyPath from hspylib.core.metaclass.singleton import Singleton from hspylib.core.preconditions import check_argument, check_state from hspylib.core.zoned_datetime import now_ms from hspylib.modules.application.exit_status import ExitStatus -from pathlib import Path from speech_recognition import ( AudioData, Microphone, @@ -43,11 +36,16 @@ UnknownValueError, WaitTimeoutError, ) -from typing import Callable, Optional, TypeAlias -import logging as log -import operator -import sys +from askai.core.askai_configs import configs +from askai.core.askai_events import events +from askai.core.askai_messages import msg +from askai.core.component.cache_service import REC_DIR +from askai.core.component.scheduler import scheduler +from askai.core.model.ai_reply import AIReply +from askai.core.support.utilities import display_text, seconds +from askai.exception.exceptions import InvalidInputDevice, InvalidRecognitionApiError +from askai.language.language import Language InputDevice: TypeAlias = tuple[int, str] @@ -248,13 +246,15 @@ def dictate( while True: _, phrase = self.listen(recognition_api, language, audio_path, False, False, msg.dictating()) - if not phrase or phrase in [msg.t("quit"), msg.t("exit"), msg.t("bye")]: - break - else: - sysout(f" {('…' if dictated_text else '') + phrase}  ") + if phrase: + display_text(f" `{('…' if dictated_text else '') + phrase}`  ", markdown=True) dictated_text += (". " if dictated_text else "") + phrase.capitalize() + if phrase.endswith((msg.t("quit"), msg.t("exit"), msg.t("bye"), msg.t("end"))): + break + else: + break - return ensure_endswith(dictated_text, "." + os.linesep) if dictated_text else dictated_text + return ensure_endswith(dictated_text.capitalize(), "." + os.linesep) if dictated_text else dictated_text def _write_audio_file( self, diff --git a/src/main/askai/resources/prompts/dictation-refiner.txt b/src/main/askai/resources/prompts/dictation-refiner.txt index 02b108bb..dad42828 100644 --- a/src/main/askai/resources/prompts/dictation-refiner.txt +++ b/src/main/askai/resources/prompts/dictation-refiner.txt @@ -17,6 +17,7 @@ You are a Text Refiner Assistant. Your task is to refine and correct dictated te - Ensure the final output is coherent, error-free, and preserves the intended meaning. - The user may provide specific instructions for corrections. Implement these precisely. For example: "Remove the last occurrence of the word 'export.'" - Additionally, the user may request the removal of unnecessary words from sentences, requiring precise adjustments to ensure clarity and brevity. +- Remove any ending "quit," "exit," "end," or "bye," as these are hot-words for terminating dictation. **Final Refinement:**