Fix the webcam capturer

yorevs · Nov 19, 2024 · 92f54f0 · 92f54f0
1 parent b81b36c
commit 92f54f0
Show file tree

Hide file tree

Showing 10 changed files with 61 additions and 88 deletions.
diff --git a/src/demo/components/webcam_demo.py b/src/demo/components/webcam_demo.py
@@ -21,6 +21,6 @@
     sysout("-=" * 40)
     sysout("AskAI WebCam Demo")
     sysout("-=" * 40)
-    # info: str = webcam_capturer("hugo", True)
-    info: str = webcam_identifier()
+    info: str = webcam_capturer("hugo", True, "Is the person happy?")
+    # info: str = webcam_identifier()
     sysout(info, markdown=True)
diff --git a/src/main/askai/core/model/image_result.py b/src/main/askai/core/model/image_result.py
@@ -11,7 +11,7 @@ class ImageResult(BaseModel):
     main_objects: list[str] = Field(description="List of the main objects on the picture")
     env_description: str = Field(description="Description of the atmosphere of the environment")
     people_description: list[str] = Field(description="List of people description")
-    user_response: list[str] = Field(description="A response to the user question")
+    user_response: str = Field(description="A response to the user question")
 
     @staticmethod
     def of(image_caption: AnyStr) -> "ImageResult":

diff --git a/src/main/askai/core/processors/splitter/splitter_actions.py b/src/main/askai/core/processors/splitter/splitter_actions.py
@@ -12,6 +12,16 @@
 
    Copyright (c) 2024, HomeSetup
 """
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Optional
+import logging as log
+
+from hspylib.core.metaclass.singleton import Singleton
+from langchain_core.messages import AIMessage
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
+from langchain_core.runnables import RunnableWithMessageHistory, Runnable
+
 from askai.core.askai_configs import configs
 from askai.core.askai_events import events
 from askai.core.askai_messages import msg
@@ -25,21 +35,11 @@
 from askai.core.model.action_plan import ActionPlan
 from askai.core.model.ai_reply import AIReply
 from askai.core.model.model_result import ModelResult
-from askai.core.router.agent_tools import features
 from askai.core.router.task_agent import agent
 from askai.core.router.tools.general import final_answer
 from askai.core.support.langchain_support import lc_llm
 from askai.core.support.shared_instances import shared
 from askai.core.support.text_formatter import text_formatter
-from hspylib.core.metaclass.singleton import Singleton
-from langchain_core.messages import AIMessage
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
-from langchain_core.runnables import RunnableWithMessageHistory
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Optional
-
-import logging as log
 
 
 class SplitterActions(metaclass=Singleton):
@@ -125,7 +125,7 @@ def splitter_template(self, query: str) -> ChatPromptTemplate:
 
         evaluation: str = str(shared.context.flat("EVALUATION"))
         template = PromptTemplate(
-            input_variables=["os_type", "shell", "datetime", "home", "agent_tools", "rag"],
+            input_variables=["os_type", "shell", "datetime", "home", "rag"],
             template=prompt.read_prompt("task-splitter.txt"),
         )
 
@@ -138,7 +138,6 @@ def splitter_template(self, query: str) -> ChatPromptTemplate:
                         shell=prompt.shell,
                         datetime=geo_location.datetime,
                         home=Path.home(),
-                        agent_tools=features.available_tools,
                         rag=self._rag.get_rag_examples(query),
                     ),
                 ),
@@ -155,11 +154,11 @@ def split(self, question: str, model: ModelResult = ModelResult.default()) -> Op
         :return: An optional ActionPlan generated from the provided question.
         """
 
-        runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp)
-        runnable = RunnableWithMessageHistory(
+        response: AIMessage
+        runnable: Runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp)
+        runnable: Runnable = RunnableWithMessageHistory(
             runnable, shared.context.flat, input_messages_key="input", history_messages_key="chat_history"
         )
-        response: AIMessage
         if response := runnable.invoke({"input": question}, config={"configurable": {"session_id": "HISTORY"}}):
             answer: str = str(response.content)
             log.info("Router::[RESPONSE] Received from AI: \n%s.", answer)

diff --git a/src/main/askai/core/processors/splitter/splitter_executor.py b/src/main/askai/core/processors/splitter/splitter_executor.py
@@ -35,6 +35,15 @@
 class SplitterExecutor(Thread):
     """Responsible for executing a TaskSplitter pipeline."""
 
+    @staticmethod
+    def display(text: str, force: bool = False) -> None:
+        """Display a debug message if debugging mode is active.
+        :param text: The debug message to display
+        :param force: Force displaying the message regardless of the debug flag.
+        """
+        if force or is_debugging():
+            tf.console.print(Text.from_markup(text))
+
     def __init__(self, query: str):
         super().__init__()
         self._pipeline = SplitterPipeline(query)
@@ -45,14 +54,6 @@ def __init__(self, query: str):
     def pipeline(self) -> SplitterPipeline:
         return self._pipeline
 
-    def display(self, text: str, force: bool = False) -> None:
-        """Display a debug message if debugging mode is active.
-        :param text: The debug message to display
-        :param force: Force displaying the message regardless of the debug flag.
-        """
-        if force or is_debugging():
-            tf.console.print(Text.from_markup(text))
-
     def interrupt(self, ev: Event) -> None:
         """Interrupt the active execution pipeline.
         :param ev: The interruption event,

diff --git a/src/main/askai/core/processors/task_splitter.py b/src/main/askai/core/processors/task_splitter.py
@@ -39,7 +39,8 @@ class TaskSplitter(metaclass=Singleton):
         InaccurateResponse,
         InvalidArgumentError,
         ValidationError
-    )  # fmt: on
+    )
+    # fmt: on
 
     def process(self, question: str, **_) -> Optional[str]:
         """Process the user question by splitting complex tasks into smaller single actionable tasks.

diff --git a/src/main/askai/core/router/agent_tools.py b/src/main/askai/core/router/agent_tools.py
@@ -133,20 +133,20 @@ def image_captioner(self, image_path: str) -> str:
             image_path=image_path, image_caption=os.linesep.join(image_caption) if image_caption else ""
         )
 
-    def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, query: str | None = None) -> str:
-        """Capture a photo using the webcam, and save it locally. This tool is useful for taking photos, detect people's
-        faces, and, describing what is in front of the webcam.
-        Usage: `webcam_capturer(photo_name, detect_faces)`
-        :param photo_name: The name of the photo file (without the extension). If None, a default name will be used.
+    def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, question: str | None = None) -> str:
+        """Capture a photo via the webcam, and save it locally. Also provide a description of the objects and people
+        depicted in the picture. An additional question may address specific details regarding the photo.
+        Usage: `webcam_capturer(photo_name, detect_faces, question)`
+        :param photo_name: Optional name of the photo image file (without the extension).
         :param detect_faces: Whether to detect and describe all faces in the photo (default is False).
-        :param query: Optional query about the photo taken.
+        :param question: Optional specific question about the photo taken (default is None).
         :return: The file path of the saved JPEG image.
         """
-        return webcam_capturer(photo_name, detect_faces, query)
+        return webcam_capturer(photo_name, detect_faces, question)
 
     def webcam_identifier(self) -> str:
-        """Identify the person in front of the webcam using a pre-stored set of faces and provide a description. This
-        tool is useful for recognizing individuals and generating descriptions based on pre-stored facial data.
+        """Capture a photo via the webcam and compare it to a pre-stored set of images to determine if the current
+        subject matches any stored faces.
         Usage: `webcam_identifier()`
         :return: A string containing the identification and description of the person.
         """

diff --git a/src/main/askai/core/router/task_agent.py b/src/main/askai/core/router/task_agent.py
@@ -13,8 +13,11 @@
    Copyright (c) 2024, HomeSetup
 """
 from askai.core.askai_configs import configs
+from askai.core.askai_events import events
+from askai.core.askai_messages import msg
 from askai.core.askai_prompt import prompt
 from askai.core.engine.openai.temperature import Temperature
+from askai.core.model.ai_reply import AIReply
 from askai.core.router.agent_tools import features
 from askai.core.support.langchain_support import lc_llm
 from askai.core.support.shared_instances import shared
@@ -62,7 +65,7 @@ def invoke(self, task: str) -> Optional[str]:
         :return: The agent's response as a string.
         """
         output: str | None = None
-        # events.reply.emit(reply=AIReply.debug(msg.task(task)))
+        events.reply.emit(reply=AIReply.debug(msg.task(task)))
         shared.context.push("HISTORY", task, "assistant")
         if (response := self._exec_task(task)) and (output := response["output"]):
             log.info("Router::[RESPONSE] Received from AI: \n%s.", output)

diff --git a/src/main/askai/core/router/tools/vision.py b/src/main/askai/core/router/tools/vision.py
@@ -102,16 +102,21 @@ def parse_caption(image_caption: str) -> list[str]:
     if image_caption:
         result: ImageResult = ImageResult.of(image_caption)
         ln: str = os.linesep
-        people_desc: str = ""
+        people_desc: list[str] = []
+        user_response_desc: list[str] = []
         if result.people_description:
-            people_desc: list[str] = [
+            people_desc = [
                 f"- **People:** `({result.people_count})`",
                 indent(f"- {'- '.join([f'`{ppl}{ln}`' + ln for ppl in result.people_description])}", "    "),
             ]
+        if result.user_response:
+            user_response_desc = [f"- **Answer**: `{result.user_response}`"]
+        # fmt: off
         return [
             f"- **Description:** `{result.env_description}`",
             f"- **Objects:** `{', '.join(result.main_objects)}`",
-        ] + people_desc
+        ] + people_desc + user_response_desc
+        # fmt: on
 
     return [msg.no_caption()]
 

diff --git a/src/main/askai/resources/prompts/task-splitter.txt b/src/main/askai/resources/prompts/task-splitter.txt
@@ -15,7 +15,7 @@ When the context of the question is not explicit, refer to past events to clarif
 Today is "{datetime}". Use this information if it is relevant to the response.
 
 
-Guidelines (in order) to break down complex tasks:
+**Guidelines (in order) to break down complex tasks**:
 
 Step 1. Identify the primary goal and all sub-goals. For each goal and sub-goal, create one task to address it. Typically, the number of tasks will match the primary goal plus the sub-goals. Ensure that the tasks are specific, actionable, and aligned with the identified goals.
 
@@ -27,37 +27,6 @@ Step 4. Provide direct instructions without explaining its purpose.
 
 Step 5. Separate tasks clearly, avoiding the use of conjunctions like 'and', 'or' and implicit multi-step processes within a single task.
 
-Step 6. Follow this workflow to guide you in breaking down the tasks:
-
-"""
-Group 0. Chat History and Context
-    - Recall the chat history and context for tasks already performed to avoid wasting resources and time.
-Group 1. Locate, List, Generate, or Summarize
-    - Locate resources (e.g., files and folders. Use the chat history; prefer 'find' over 'search').
-    - List the contents of folders and sub-folders (specify the max recursion depth).
-    - Summarize files and folders (only upon explicit user request).
-    - Generate content by giving the prompt (instructions).
-Group 2. Open, Read, Save
-    - Open necessary files (playback included).
-    - Read (show) file or folder contents.
-    - Save generated content.
-Group 3. Identify, or Extract
-    - Identify text or resources (like files and folders).
-    - Extract required data/text (explain how to extract).
-Group 4. Analyze or Display
-    - Analyze output, content, or extracted data/text.
-    - Display the information and details to the human.
-Group 5. Revise and Refine
-    - Revise your task list, ensuring tasks for the primary goal and sub-goals.
-    - Refine your task list, providing a direct and explicit answer to the user's question.
-
-Notes:
-
-- Actions within the same group must not be repeated as they will produce the same result.
-- Ensure that your answer is unbiased and does not rely on stereotypes.
-- Think step by step.
-"""
-
 
 **Task Requirements:**
 
@@ -94,23 +63,18 @@ Notes:
 6. Terminal access (execute {shell} commands).
 
 
-**You have access to the following Tools:**
-
----
-{agent_tools}
----
-
-
 **Retrieval-Augmented Generation:**
 
-The following examples can be used to help your decisions (note that it is not a source of truth).
+The following examples can be used to help your decisions.
 
 ---
 {rag}
 ---
 
 
-The response should follow this format:
+**Final Response Format*:*
+
+The final response should follow this format:
 
 
 @thought: "<your thoughts>"
@@ -135,4 +99,4 @@ The response should follow this format:
 **THE RESPONSE FORMAT IS CRUCIAL, ALTERING IT WILL CAUSE THE PARSER TO FAIL.**
 
 
-Begin splittings the tasks!
+Begin!