Recorder, camera and demo fixes. Also, some new audio files

yorevs · Dec 18, 2024 · b8c134b · b8c134b
1 parent d661a58
commit b8c134b
Show file tree

Hide file tree

Showing 15 changed files with 227 additions and 136 deletions.
diff --git a/src/demo/components/camera_demo.py b/src/demo/components/camera_demo.py
@@ -57,7 +57,7 @@
             results: list[ImageMetadata] = store.query_face(query)
             for photo in results:
                 cursor.write()
-                cursor.write(f"Showing face: {photo.caption} URI: {photo.uri} DIST:", photo.distance)
+                cursor.write(f"Showing face: {photo.caption} URI: {photo.uri} DIST: {photo.distance}")
                 open_command(photo.uri)
         if opt == "5":
             count: int = store.sync_store(re_caption=False)

diff --git a/src/demo/devel/eleven_labs.py b/src/demo/devel/eleven_labs.py
@@ -0,0 +1,23 @@
+import os
+
+import requests
+
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+
+
+def get_available_voices():
+    url = "https://api.elevenlabs.io/v1/voices"
+    headers = {
+        "xi-api-key": ELEVENLABS_API_KEY,
+    }
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        voices = response.json().get("voices", [])
+        for voice in sorted(voices, key=lambda v: v["name"]):
+            print(f"Name: {voice['name']}, ID: {voice['voice_id']}")
+    else:
+        print(f"Failed to retrieve voices: {response.status_code}, {response.text}")
+
+
+if __name__ == "__main__":
+    get_available_voices()
diff --git a/src/demo/devel/eleven_labs_recon.py b/src/demo/devel/eleven_labs_recon.py
@@ -0,0 +1,96 @@
+import os
+
+import speech_recognition as sr
+import requests
+from langchain import OpenAI, LLMChain, PromptTemplate
+
+# -------------------- Configuration --------------------
+
+# Replace these with your actual API keys
+from askai.core.component.audio_player import player
+
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+
+# ElevenLabs Voice ID (obtained from the previous step)
+VOICE_ID = "4DHVFVPkvJPP4FNkikun"
+
+# Path to save the synthesized audio
+AUDIO_OUTPUT_PATH = "response_audio.mp3"
+
+# -------------------------------------------------------
+
+# Initialize LangChain with OpenAI
+llm = OpenAI(
+    openai_api_key=OPENAI_API_KEY,
+    model_name="gpt-4o-mini",  # You can choose other models like 'gpt-3.5-turbo'
+    temperature=0.7,  # Adjust for creativity
+)
+
+# Define a prompt template for LangChain
+prompt_template = PromptTemplate(
+    input_variables=["input_text"],
+    template="You are a helpful assistant. Respond to the following input:\n\n{input_text}",
+)
+
+chain = LLMChain(llm=llm, prompt=prompt_template)
+
+
+def recognize_speech():
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        print("Adjusting for ambient noise... Please wait.")
+        recognizer.adjust_for_ambient_noise(source, duration=0.2)
+        print("Listening... Please speak into the microphone.")
+        audio = recognizer.listen(source)
+    try:
+        # Using Google Speech Recognition
+        text = recognizer.recognize_google(audio)
+        print(f"Recognized Text: {text}")
+        return text
+    except sr.UnknownValueError:
+        print("Google Speech Recognition could not understand audio.")
+    except sr.RequestError as e:
+        print(f"Could not request results from Google Speech Recognition service; {e}")
+    return None
+
+
+def process_with_langchain(input_text):
+    print("Processing input with LangChain...")
+    response = chain.run(input_text)
+    print(f"LangChain Response: {response}")
+    return response
+
+
+def synthesize_speech(text):
+    print("Synthesizing speech with ElevenLabs...")
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
+
+    headers = {
+        "Content-Type": "application/json",
+        "xi-api-key": ELEVENLABS_API_KEY,
+    }
+
+    data = {"text": text, "voice_settings": {"stability": 0.75, "similarity_boost": 0.75}}
+
+    response = requests.post(url, headers=headers, json=data)
+
+    if response.status_code == 200:
+        with open(AUDIO_OUTPUT_PATH, "wb") as f:
+            f.write(response.content)
+        print(f"Audio synthesized and saved as {AUDIO_OUTPUT_PATH}")
+        # Play the audio
+        player.play_audio_file(AUDIO_OUTPUT_PATH)
+    else:
+        print(f"Failed to synthesize speech: {response.status_code}, {response.text}")
+
+
+def main():
+    input_text = recognize_speech()
+    if input_text:
+        # response_text = process_with_langchain(input_text)
+        synthesize_speech(input_text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/demo/devel/response_audio.mp3 b/src/demo/devel/response_audio.mp3
diff --git a/src/main/askai/core/component/audio_player.py b/src/main/askai/core/component/audio_player.py
@@ -2,15 +2,15 @@
 # -*- coding: utf-8 -*-
 
 """
-   @project: HsPyLib-AskAI
-   @package: askai.core.component
-      @file: audio_player.py
-   @created: Wed, 22 Feb 2024
-    @author: <B>H</B>ugo <B>S</B>aporetti <B>J</B>unior
-      @site: https://github.com/yorevs/askai
-   @license: MIT - Please refer to <https://opensource.org/licenses/MIT>
-
-   Copyright (c) 2024, AskAI
+@project: HsPyLib-AskAI
+@package: askai.core.component
+   @file: audio_player.py
+@created: Wed, 22 Feb 2024
+ @author: <B>H</B>ugo <B>S</B>aporetti <B>J</B>unior
+   @site: https://github.com/yorevs/askai
+@license: MIT - Please refer to <https://opensource.org/licenses/MIT>
+
+Copyright (c) 2024, AskAI
 """
 from askai.__classpath__ import classpath
 from clitt.core.term.terminal import Terminal
@@ -39,28 +39,44 @@ class AudioPlayer(metaclass=Singleton):
     SFX_DIR = str(classpath.resource_path) + "/sound-fx"
 
     @staticmethod
-    def play_audio_file(path_to_audio_file: str | Path, tempo: int = 1) -> bool:
+    def play_audio_file(path_to_audio_file: str | Path, tempo: int = 1, loop: float | None = None) -> bool:
         """Play the specified audio file using the ffplay (ffmpeg) application.
         :param path_to_audio_file: The path to the audio file (e.g., MP3) to be played.
         :param tempo: The playback speed (default is 1).
+        :param loop: Whether to loop the audio playback (None for no looping).
         :return: True if the audio file is played successfully, otherwise False.
         """
         if file_is_not_empty(str(path_to_audio_file)):
             try:
+                loop_args = f"-loop {loop} " if loop else ""
                 _, _, code = Terminal.shell_exec(
-                    f'ffplay -af "atempo={tempo}" -v 0 -nodisp -autoexit {path_to_audio_file}'
+                    f'ffplay -af "atempo={tempo}" -v 0 -nodisp -autoexit {loop_args}{path_to_audio_file}'
                 )
                 return code == ExitStatus.SUCCESS
             except FileNotFoundError:
                 log.error("Audio file was not found: %s !", path_to_audio_file)
 
         return False
 
-    def __init__(self):
+    @staticmethod
+    def play_sfx(filename: str, file_ext: Literal[".mp3", ".wav", ".m4a"] = ".mp3", loop: float | None = None) -> bool:
+        """Play a sound effect audio file.
+        :param filename: The name of the sound effect file (without the extension).
+        :param file_ext: The file extension of the sound effect (default is ".mp3").
+        :param loop: Whether to loop the audio playback (None for no looping).
+        :return: True if the sound effect is played successfully, otherwise False.
+        """
+        filename = f"{AudioPlayer.SFX_DIR}/{ensure_endswith(filename, file_ext)}"
         check_argument(
-            which("ffplay") is not None, "ffmpeg::ffplay is required to play audio"
+            file_is_not_empty(filename),
+            f"Sound effects file does not exist: {filename}",
         )
 
+        return AudioPlayer.play_audio_file(filename, loop=loop)
+
+    def __init__(self):
+        check_argument(which("ffplay") is not None, "ffmpeg::ffplay is required to play audio")
+
     @lru_cache
     def start_delay(self) -> float:
         """Determine the amount of delay before start streaming the text."""
@@ -93,21 +109,5 @@ def audio_length(self, path_to_audio_file: str) -> float:
 
         return out
 
-    def play_sfx(
-        self, filename: str, file_ext: Literal[".mp3", ".wav", ".m4a"] = ".mp3"
-    ) -> bool:
-        """Play a sound effect audio file.
-        :param filename: The name of the sound effect file (without the extension).
-        :param file_ext: The file extension of the sound effect (default is ".mp3").
-        :return: True if the sound effect is played successfully, otherwise False.
-        """
-        filename = f"{self.SFX_DIR}/{ensure_endswith(filename, file_ext)}"
-        check_argument(
-            file_is_not_empty(filename),
-            f"Sound effects file does not exist: {filename}",
-        )
-
-        return self.play_audio_file(filename)
-
 
 assert (player := AudioPlayer().INSTANCE) is not None