Skip to content

Commit

Permalink
Add the dictate command
Browse files Browse the repository at this point in the history
  • Loading branch information
yorevs committed Dec 18, 2024
1 parent 577205a commit d661a58
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 64 deletions.
7 changes: 7 additions & 0 deletions src/main/askai/core/commander/commander.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,13 @@ def tts(text: str, dest_dir: str | None = None, playback: bool = True) -> None:
TtsSttCmd.tts(text.strip(), dirname(dest_dir), playback)


@ask_commander.command()
@click.argument("dest_file", default="")
def dictate(dest_file: str | None) -> None:
"""TODO"""
TtsSttCmd.dictate(dest_file)


@ask_commander.command()
@click.argument("folder")
@click.argument("glob", default="**/*")
Expand Down
88 changes: 45 additions & 43 deletions src/main/askai/core/commander/commands/tts_stt_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,32 @@
# -*- coding: utf-8 -*-

"""
@project: HsPyLib-AskAI
@package: askai.core.commander.tts_stt_cmd
@file: tts_stt_cmd.py
@created: Thu, 25 Apr 2024
@author: <B>H</B>ugo <B>S</B>aporetti <B>J</B>unior
@site: https://github.com/yorevs/askai
@license: MIT - Please refer to <https://opensource.org/licenses/MIT>
Copyright (c) 2024, AskAI
@project: HsPyLib-AskAI
@package: askai.core.commander.tts_stt_cmd
@file: tts_stt_cmd.py
@created: Thu, 25 Apr 2024
@author: <B>H</B>ugo <B>S</B>aporetti <B>J</B>unior
@site: https://github.com/yorevs/askai
@license: MIT - Please refer to <https://opensource.org/licenses/MIT>
Copyright (c) 2024, AskAI
"""
from abc import ABC
from pathlib import Path
import os
from textwrap import dedent

from hspylib.core.metaclass.classpath import AnyPath
from clitt.core.tui.mselect.mselect import mselect
import pause

from askai.core.askai_configs import configs
from askai.core.askai_settings import settings
from askai.core.component.audio_player import player
from askai.core.component.recorder import InputDevice, recorder
from askai.core.support.shared_instances import shared
from askai.core.support.text_formatter import text_formatter
from askai.core.support.utilities import copy_file
from clitt.core.tui.mselect.mselect import mselect
from pathlib import Path

import os
import pause


class TtsSttCmd(ABC):
Expand Down Expand Up @@ -52,9 +55,7 @@ def voice_set(name_or_index: str | int | None = None) -> None:
if name_or_index in all_voices:
settings.put("openai.text.to.speech.voice", name_or_index)
shared.engine.configs().tts_voice = name_or_index
text_formatter.commander_print(
f"`Speech-To-Text` voice changed to %GREEN%{name_or_index.title()}%NC%"
)
text_formatter.commander_print(f"`Speech-To-Text` voice changed to %GREEN%{name_or_index.title()}%NC%")
else:
text_formatter.commander_print(f"%RED%Invalid voice: '{name_or_index}'%NC%")

Expand Down Expand Up @@ -83,16 +84,10 @@ def tempo(speed: int | None = None) -> None:
elif 1 <= speed <= 3:
settings.put("askai.text.to.speech.tempo", speed)
configs.tempo = speed
tempo_str: str = (
"Normal" if speed == 1 else ("Fast" if speed == 2 else "Ultra")
)
text_formatter.commander_print(
f"`Speech-To-Text` **tempo** changed to %GREEN%{tempo_str} ({speed})%NC%"
)
tempo_str: str = "Normal" if speed == 1 else ("Fast" if speed == 2 else "Ultra")
text_formatter.commander_print(f"`Speech-To-Text` **tempo** changed to %GREEN%{tempo_str} ({speed})%NC%")
else:
text_formatter.commander_print(
f"%RED%Invalid tempo value: '{speed}'. Please choose between [1..3].%NC%"
)
text_formatter.commander_print(f"%RED%Invalid tempo value: '{speed}'. Please choose between [1..3].%NC%")

@staticmethod
def device_list() -> None:
Expand All @@ -116,13 +111,9 @@ def device_set(name_or_index: str | int | None = None) -> None:

def _set_device(_device) -> bool:
if recorder.set_device(_device):
text_formatter.commander_print(
f"`Text-To-Speech` device changed to %GREEN%{_device}%NC%"
)
text_formatter.commander_print(f"`Text-To-Speech` device changed to %GREEN%{_device}%NC%")
return True
text_formatter.commander_print(
f"%HOM%%ED2%Error: '{_device}' is not an Audio Input device!%NC%"
)
text_formatter.commander_print(f"%HOM%%ED2%Error: '{_device}' is not an Audio Input device!%NC%")
all_devices.remove(_device)
pause.seconds(2)
return False
Expand All @@ -136,9 +127,7 @@ def _set_device(_device) -> bool:
name_or_index = all_devices[int(name_or_index)][1]
device = next((dev for dev in all_devices if dev[1] == name_or_index), None)
if not (device and _set_device(device)):
text_formatter.commander_print(
f"%RED%Invalid audio input device: '{name_or_index}'%NC%"
)
text_formatter.commander_print(f"%RED%Invalid audio input device: '{name_or_index}'%NC%")

@staticmethod
def tts(text: str, dest_dir: str = os.getcwd(), playback: bool = True) -> None:
Expand All @@ -147,15 +136,28 @@ def tts(text: str, dest_dir: str = os.getcwd(), playback: bool = True) -> None:
:param dest_dir: The directory where the audio file will be saved (default is the current working directory).
:param playback: Whether to play back the generated speech after conversion (default is True).
"""
if (
audio_path := shared.engine.text_to_speech(
text, stream=False, playback=playback
)
) and audio_path.exists():
if (audio_path := shared.engine.text_to_speech(text, stream=False, playback=playback)) and audio_path.exists():
if dest_dir and ((dest_path := Path(dest_dir)) and dest_path.exists()):
audio_path = copy_file(audio_path, dest_dir)
text_formatter.commander_print(
f"File %GREEN%'{audio_path}' was successfully saved!%NC%"
)
text_formatter.commander_print(f"File %GREEN%'{audio_path}' was successfully saved!%NC%")
else:
text_formatter.commander_print(f"%RED%Unable to convert text to file !%NC%")

@staticmethod
def dictate(dest_file: str | None) -> None:
"""TODO"""
file_info: str = f"Dictated text saved: *'{dest_file}'*" if dest_file else ""
dictated_text: str | None = recorder.dictate()
if dictated_text:
if dest_file and os.path.exists(os.path.dirname(dest_file)):
with open(dest_file, "w") as f_dictation:
f_dictation.write(dictated_text + os.linesep)
# fmt: off
text_formatter.commander_print(
dedent(f"""\
{file_info}
Dictated text: ` {dictated_text} `
""").strip())
# fmt: on
else:
text_formatter.commander_print(f"Dictation *produced no text* !")
42 changes: 21 additions & 21 deletions src/main/askai/core/component/recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,20 @@
"""
import os
import threading
from pathlib import Path
from typing import Callable, Optional, TypeAlias
import logging as log
import operator
import sys

import pause
from hspylib.core.tools.commons import sysout
from hspylib.core.tools.text_tools import ensure_endswith

from askai.core.askai_configs import configs
from askai.core.askai_events import events
from askai.core.askai_messages import msg
from askai.core.component.cache_service import REC_DIR
from askai.core.component.scheduler import scheduler
from askai.core.model.ai_reply import AIReply
from askai.core.support.utilities import display_text, seconds
from askai.exception.exceptions import InvalidInputDevice, InvalidRecognitionApiError
from askai.language.language import Language
from hspylib.core.enums.enumeration import Enumeration
from hspylib.core.metaclass.classpath import AnyPath
from hspylib.core.metaclass.singleton import Singleton
from hspylib.core.preconditions import check_argument, check_state
from hspylib.core.zoned_datetime import now_ms
from hspylib.modules.application.exit_status import ExitStatus
from pathlib import Path
from speech_recognition import (
AudioData,
Microphone,
Expand All @@ -43,11 +36,16 @@
UnknownValueError,
WaitTimeoutError,
)
from typing import Callable, Optional, TypeAlias

import logging as log
import operator
import sys
from askai.core.askai_configs import configs
from askai.core.askai_events import events
from askai.core.askai_messages import msg
from askai.core.component.cache_service import REC_DIR
from askai.core.component.scheduler import scheduler
from askai.core.model.ai_reply import AIReply
from askai.core.support.utilities import display_text, seconds
from askai.exception.exceptions import InvalidInputDevice, InvalidRecognitionApiError
from askai.language.language import Language

InputDevice: TypeAlias = tuple[int, str]

Expand Down Expand Up @@ -248,13 +246,15 @@ def dictate(

while True:
_, phrase = self.listen(recognition_api, language, audio_path, False, False, msg.dictating())
if not phrase or phrase in [msg.t("quit"), msg.t("exit"), msg.t("bye")]:
break
else:
sysout(f" {('…' if dictated_text else '') + phrase}  ")
if phrase:
display_text(f" `{('…' if dictated_text else '') + phrase}`  ", markdown=True)
dictated_text += (". " if dictated_text else "") + phrase.capitalize()
if phrase.endswith((msg.t("quit"), msg.t("exit"), msg.t("bye"), msg.t("end"))):
break
else:
break

return ensure_endswith(dictated_text, "." + os.linesep) if dictated_text else dictated_text
return ensure_endswith(dictated_text.capitalize(), "." + os.linesep) if dictated_text else dictated_text

def _write_audio_file(
self,
Expand Down
1 change: 1 addition & 0 deletions src/main/askai/resources/prompts/dictation-refiner.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ You are a Text Refiner Assistant. Your task is to refine and correct dictated te
- Ensure the final output is coherent, error-free, and preserves the intended meaning.
- The user may provide specific instructions for corrections. Implement these precisely. For example: "Remove the last occurrence of the word 'export.'"
- Additionally, the user may request the removal of unnecessary words from sentences, requiring precise adjustments to ensure clarity and brevity.
- Remove any ending "quit," "exit," "end," or "bye," as these are hot-words for terminating dictation.

**Final Refinement:**

Expand Down

0 comments on commit d661a58

Please sign in to comment.