From 92f54f0677544b1d40d02cadbf87e682fb871168 Mon Sep 17 00:00:00 2001 From: Hugo Saporetti Junior Date: Tue, 19 Nov 2024 19:13:51 -0300 Subject: [PATCH] Fix the webcam capturer --- src/demo/components/webcam_demo.py | 4 +- src/main/askai/core/model/image_result.py | 2 +- .../processors/splitter/splitter_actions.py | 29 ++++++----- .../processors/splitter/splitter_executor.py | 17 +++---- .../askai/core/processors/task_splitter.py | 3 +- src/main/askai/core/router/agent_tools.py | 18 +++---- src/main/askai/core/router/task_agent.py | 5 +- src/main/askai/core/router/tools/vision.py | 11 +++-- .../askai/resources/prompts/task-splitter.txt | 48 +++---------------- .../askai/resources/rag/task-splitter.csv | 12 ++--- 10 files changed, 61 insertions(+), 88 deletions(-) diff --git a/src/demo/components/webcam_demo.py b/src/demo/components/webcam_demo.py index 290b967d..d4b6eaf9 100644 --- a/src/demo/components/webcam_demo.py +++ b/src/demo/components/webcam_demo.py @@ -21,6 +21,6 @@ sysout("-=" * 40) sysout("AskAI WebCam Demo") sysout("-=" * 40) - # info: str = webcam_capturer("hugo", True) - info: str = webcam_identifier() + info: str = webcam_capturer("hugo", True, "Is the person happy?") + # info: str = webcam_identifier() sysout(info, markdown=True) diff --git a/src/main/askai/core/model/image_result.py b/src/main/askai/core/model/image_result.py index ac264081..18096c01 100644 --- a/src/main/askai/core/model/image_result.py +++ b/src/main/askai/core/model/image_result.py @@ -11,7 +11,7 @@ class ImageResult(BaseModel): main_objects: list[str] = Field(description="List of the main objects on the picture") env_description: str = Field(description="Description of the atmosphere of the environment") people_description: list[str] = Field(description="List of people description") - user_response: list[str] = Field(description="A response to the user question") + user_response: str = Field(description="A response to the user question") @staticmethod def of(image_caption: AnyStr) -> "ImageResult": diff --git a/src/main/askai/core/processors/splitter/splitter_actions.py b/src/main/askai/core/processors/splitter/splitter_actions.py index 7c098ac0..0da87533 100644 --- a/src/main/askai/core/processors/splitter/splitter_actions.py +++ b/src/main/askai/core/processors/splitter/splitter_actions.py @@ -12,6 +12,16 @@ Copyright (c) 2024, HomeSetup """ +from pathlib import Path +from types import SimpleNamespace +from typing import Optional +import logging as log + +from hspylib.core.metaclass.singleton import Singleton +from langchain_core.messages import AIMessage +from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate +from langchain_core.runnables import RunnableWithMessageHistory, Runnable + from askai.core.askai_configs import configs from askai.core.askai_events import events from askai.core.askai_messages import msg @@ -25,21 +35,11 @@ from askai.core.model.action_plan import ActionPlan from askai.core.model.ai_reply import AIReply from askai.core.model.model_result import ModelResult -from askai.core.router.agent_tools import features from askai.core.router.task_agent import agent from askai.core.router.tools.general import final_answer from askai.core.support.langchain_support import lc_llm from askai.core.support.shared_instances import shared from askai.core.support.text_formatter import text_formatter -from hspylib.core.metaclass.singleton import Singleton -from langchain_core.messages import AIMessage -from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate -from langchain_core.runnables import RunnableWithMessageHistory -from pathlib import Path -from types import SimpleNamespace -from typing import Optional - -import logging as log class SplitterActions(metaclass=Singleton): @@ -125,7 +125,7 @@ def splitter_template(self, query: str) -> ChatPromptTemplate: evaluation: str = str(shared.context.flat("EVALUATION")) template = PromptTemplate( - input_variables=["os_type", "shell", "datetime", "home", "agent_tools", "rag"], + input_variables=["os_type", "shell", "datetime", "home", "rag"], template=prompt.read_prompt("task-splitter.txt"), ) @@ -138,7 +138,6 @@ def splitter_template(self, query: str) -> ChatPromptTemplate: shell=prompt.shell, datetime=geo_location.datetime, home=Path.home(), - agent_tools=features.available_tools, rag=self._rag.get_rag_examples(query), ), ), @@ -155,11 +154,11 @@ def split(self, question: str, model: ModelResult = ModelResult.default()) -> Op :return: An optional ActionPlan generated from the provided question. """ - runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp) - runnable = RunnableWithMessageHistory( + response: AIMessage + runnable: Runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp) + runnable: Runnable = RunnableWithMessageHistory( runnable, shared.context.flat, input_messages_key="input", history_messages_key="chat_history" ) - response: AIMessage if response := runnable.invoke({"input": question}, config={"configurable": {"session_id": "HISTORY"}}): answer: str = str(response.content) log.info("Router::[RESPONSE] Received from AI: \n%s.", answer) diff --git a/src/main/askai/core/processors/splitter/splitter_executor.py b/src/main/askai/core/processors/splitter/splitter_executor.py index 4b6151f8..ad915eb0 100644 --- a/src/main/askai/core/processors/splitter/splitter_executor.py +++ b/src/main/askai/core/processors/splitter/splitter_executor.py @@ -35,6 +35,15 @@ class SplitterExecutor(Thread): """Responsible for executing a TaskSplitter pipeline.""" + @staticmethod + def display(text: str, force: bool = False) -> None: + """Display a debug message if debugging mode is active. + :param text: The debug message to display + :param force: Force displaying the message regardless of the debug flag. + """ + if force or is_debugging(): + tf.console.print(Text.from_markup(text)) + def __init__(self, query: str): super().__init__() self._pipeline = SplitterPipeline(query) @@ -45,14 +54,6 @@ def __init__(self, query: str): def pipeline(self) -> SplitterPipeline: return self._pipeline - def display(self, text: str, force: bool = False) -> None: - """Display a debug message if debugging mode is active. - :param text: The debug message to display - :param force: Force displaying the message regardless of the debug flag. - """ - if force or is_debugging(): - tf.console.print(Text.from_markup(text)) - def interrupt(self, ev: Event) -> None: """Interrupt the active execution pipeline. :param ev: The interruption event, diff --git a/src/main/askai/core/processors/task_splitter.py b/src/main/askai/core/processors/task_splitter.py index 37f11016..77c3b62d 100644 --- a/src/main/askai/core/processors/task_splitter.py +++ b/src/main/askai/core/processors/task_splitter.py @@ -39,7 +39,8 @@ class TaskSplitter(metaclass=Singleton): InaccurateResponse, InvalidArgumentError, ValidationError - ) # fmt: on + ) + # fmt: on def process(self, question: str, **_) -> Optional[str]: """Process the user question by splitting complex tasks into smaller single actionable tasks. diff --git a/src/main/askai/core/router/agent_tools.py b/src/main/askai/core/router/agent_tools.py index 39f9a41f..355d2a36 100644 --- a/src/main/askai/core/router/agent_tools.py +++ b/src/main/askai/core/router/agent_tools.py @@ -133,20 +133,20 @@ def image_captioner(self, image_path: str) -> str: image_path=image_path, image_caption=os.linesep.join(image_caption) if image_caption else "" ) - def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, query: str | None = None) -> str: - """Capture a photo using the webcam, and save it locally. This tool is useful for taking photos, detect people's - faces, and, describing what is in front of the webcam. - Usage: `webcam_capturer(photo_name, detect_faces)` - :param photo_name: The name of the photo file (without the extension). If None, a default name will be used. + def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, question: str | None = None) -> str: + """Capture a photo via the webcam, and save it locally. Also provide a description of the objects and people + depicted in the picture. An additional question may address specific details regarding the photo. + Usage: `webcam_capturer(photo_name, detect_faces, question)` + :param photo_name: Optional name of the photo image file (without the extension). :param detect_faces: Whether to detect and describe all faces in the photo (default is False). - :param query: Optional query about the photo taken. + :param question: Optional specific question about the photo taken (default is None). :return: The file path of the saved JPEG image. """ - return webcam_capturer(photo_name, detect_faces, query) + return webcam_capturer(photo_name, detect_faces, question) def webcam_identifier(self) -> str: - """Identify the person in front of the webcam using a pre-stored set of faces and provide a description. This - tool is useful for recognizing individuals and generating descriptions based on pre-stored facial data. + """Capture a photo via the webcam and compare it to a pre-stored set of images to determine if the current + subject matches any stored faces. Usage: `webcam_identifier()` :return: A string containing the identification and description of the person. """ diff --git a/src/main/askai/core/router/task_agent.py b/src/main/askai/core/router/task_agent.py index 9efad493..704f125e 100644 --- a/src/main/askai/core/router/task_agent.py +++ b/src/main/askai/core/router/task_agent.py @@ -13,8 +13,11 @@ Copyright (c) 2024, HomeSetup """ from askai.core.askai_configs import configs +from askai.core.askai_events import events +from askai.core.askai_messages import msg from askai.core.askai_prompt import prompt from askai.core.engine.openai.temperature import Temperature +from askai.core.model.ai_reply import AIReply from askai.core.router.agent_tools import features from askai.core.support.langchain_support import lc_llm from askai.core.support.shared_instances import shared @@ -62,7 +65,7 @@ def invoke(self, task: str) -> Optional[str]: :return: The agent's response as a string. """ output: str | None = None - # events.reply.emit(reply=AIReply.debug(msg.task(task))) + events.reply.emit(reply=AIReply.debug(msg.task(task))) shared.context.push("HISTORY", task, "assistant") if (response := self._exec_task(task)) and (output := response["output"]): log.info("Router::[RESPONSE] Received from AI: \n%s.", output) diff --git a/src/main/askai/core/router/tools/vision.py b/src/main/askai/core/router/tools/vision.py index ca916e57..e68d60c7 100644 --- a/src/main/askai/core/router/tools/vision.py +++ b/src/main/askai/core/router/tools/vision.py @@ -102,16 +102,21 @@ def parse_caption(image_caption: str) -> list[str]: if image_caption: result: ImageResult = ImageResult.of(image_caption) ln: str = os.linesep - people_desc: str = "" + people_desc: list[str] = [] + user_response_desc: list[str] = [] if result.people_description: - people_desc: list[str] = [ + people_desc = [ f"- **People:** `({result.people_count})`", indent(f"- {'- '.join([f'`{ppl}{ln}`' + ln for ppl in result.people_description])}", " "), ] + if result.user_response: + user_response_desc = [f"- **Answer**: `{result.user_response}`"] + # fmt: off return [ f"- **Description:** `{result.env_description}`", f"- **Objects:** `{', '.join(result.main_objects)}`", - ] + people_desc + ] + people_desc + user_response_desc + # fmt: on return [msg.no_caption()] diff --git a/src/main/askai/resources/prompts/task-splitter.txt b/src/main/askai/resources/prompts/task-splitter.txt index 230f07f6..49f0b02a 100644 --- a/src/main/askai/resources/prompts/task-splitter.txt +++ b/src/main/askai/resources/prompts/task-splitter.txt @@ -15,7 +15,7 @@ When the context of the question is not explicit, refer to past events to clarif Today is "{datetime}". Use this information if it is relevant to the response. -Guidelines (in order) to break down complex tasks: +**Guidelines (in order) to break down complex tasks**: Step 1. Identify the primary goal and all sub-goals. For each goal and sub-goal, create one task to address it. Typically, the number of tasks will match the primary goal plus the sub-goals. Ensure that the tasks are specific, actionable, and aligned with the identified goals. @@ -27,37 +27,6 @@ Step 4. Provide direct instructions without explaining its purpose. Step 5. Separate tasks clearly, avoiding the use of conjunctions like 'and', 'or' and implicit multi-step processes within a single task. -Step 6. Follow this workflow to guide you in breaking down the tasks: - -""" -Group 0. Chat History and Context - - Recall the chat history and context for tasks already performed to avoid wasting resources and time. -Group 1. Locate, List, Generate, or Summarize - - Locate resources (e.g., files and folders. Use the chat history; prefer 'find' over 'search'). - - List the contents of folders and sub-folders (specify the max recursion depth). - - Summarize files and folders (only upon explicit user request). - - Generate content by giving the prompt (instructions). -Group 2. Open, Read, Save - - Open necessary files (playback included). - - Read (show) file or folder contents. - - Save generated content. -Group 3. Identify, or Extract - - Identify text or resources (like files and folders). - - Extract required data/text (explain how to extract). -Group 4. Analyze or Display - - Analyze output, content, or extracted data/text. - - Display the information and details to the human. -Group 5. Revise and Refine - - Revise your task list, ensuring tasks for the primary goal and sub-goals. - - Refine your task list, providing a direct and explicit answer to the user's question. - -Notes: - -- Actions within the same group must not be repeated as they will produce the same result. -- Ensure that your answer is unbiased and does not rely on stereotypes. -- Think step by step. -""" - **Task Requirements:** @@ -94,23 +63,18 @@ Notes: 6. Terminal access (execute {shell} commands). -**You have access to the following Tools:** - ---- -{agent_tools} ---- - - **Retrieval-Augmented Generation:** -The following examples can be used to help your decisions (note that it is not a source of truth). +The following examples can be used to help your decisions. --- {rag} --- -The response should follow this format: +**Final Response Format*:* + +The final response should follow this format: @thought: "" @@ -135,4 +99,4 @@ The response should follow this format: **THE RESPONSE FORMAT IS CRUCIAL, ALTERING IT WILL CAUSE THE PARSER TO FAIL.** -Begin splittings the tasks! +Begin! diff --git a/src/main/askai/resources/rag/task-splitter.csv b/src/main/askai/resources/rag/task-splitter.csv index 8ba9e2b9..4e792600 100644 --- a/src/main/askai/resources/rag/task-splitter.csv +++ b/src/main/askai/resources/rag/task-splitter.csv @@ -1,5 +1,5 @@ Human Question;Reasoning;Direct Response;Task List -Is the man in front of the computer happy;The user requests verification of a man’s presence in front of the computer and an assessment of his happiness, achievable via the webcam_capturer tool.;N/A;[{{1. Use `webcam_capturer` to capture a photo via the webcam.}},{{2. Identify the presence of a smiling man.}}] +Is the man in front of the computer happy;The user requests verification of a man’s presence in front of the computer and an assessment of his happiness, achievable via the webcam_capturer tool.;N/A;[{{1. Use the webcam_capturer tool to capture a photo via the webcam.}},{{2. Identify the presence of a smiling man.}}] Describe the first image in my Downloads folder.;The user requests that I locate the first image in their downloads folder and provide a description if one is found.;N/A;[{{1. List the user downloads folder}},{{2. Identify image files}},{{Describe the first image file}}] What is the size of the Moon?;The user is inquiring about the size of the moon, a commonly known fact. I will provide a concise and direct response.;Direct: The moon has a diameter of about 3.474 kilometers (2.159 miles).;N/A Hello, who are you?;The user is engaging in a casual conversation. The response should be straightforward.;Direct: Hello, I am Taius the AskAI helpful and kind assistant.;N/A @@ -11,14 +11,14 @@ Open the first reminder file in my Downloads and tell me what I need to do first list my music library and check if there's any AC/DC song. If found, show the file name and play it.;The user requests that I list my music files and then locate and open any AC/DC song found at this location.;N/A;[{{1. List the user music folder ${{HOME}}/Music}},{{2. Identify AC/DC song files}},{{3. Display the AC/DC song file names}},{{4. Open any AC/DC song file identified}}] I have downloaded a QR logo, open it for me.;The user requests that I locate a QR logo he downloaded. The file name is likely to include 'QR' and 'logo.' I will search for this pattern and open the file if it is found.;N/A;[{{1. List the user ${{HOME}}/Downloads folder}},{{2. Identify files containing 'QR' and 'logo'}},{{3. Open the QR logo file if found}}] Create a thanks note on my behalf and save it as thank-you.;The user wants me to create a note to thank someone. He did not specify the format, so I will assume plain text.;N/A;[{{1. Generate a thanks note and save it as 'thank-you.txt' in the current folder}}] -Describe what you see using the webcam;The user requests that I describe what I observe using the webcam. I can utilize the webcam_capturer tool for this purpose.;N/A;[{{1. Use the webcam_capturer to take a photo using the webcam}}] -What you see?;The user requests that I describe what I observe. I can utilize the webcam_capturer tool to accomplish this.;N/A;[{{1. Use the webcam_capturer to take a photo using the webcam}}] -Identify who is in front of the webcam;The user wants me to identify the individual in front of the webcam. I can utilize the webcam_identifier tool for this purpose.;N/A;[{{1. Use the webcam_capturer to take a photo using the webcam}}] +Describe what you see using the webcam;The user requests that I describe what I observe using the webcam. I can utilize the webcam_capturer tool for this purpose.;N/A;[{{1. Use the webcam_capturer tool to capture a photo via the webcam.}}] +What you see?;The user requests that I describe what I observe. I can utilize the webcam_capturer tool to accomplish this.;N/A;[{{1. Use the webcam_capturer tool to capture a photo via the webcam.}}] +Identify who is in front of the webcam;The user wants me to identify the individual in front of the webcam. I can utilize the webcam_identifier tool for this purpose.;N/A;[{{1. Use the webcam_capturer tool to capture a photo via the webcam.}}] Find my backup and log files from folder /tmp/logs.;The user requests that I locate all of his backup and log files. I will assume the common extensions *.bak and *.log and will issue a command to search for them.;N/A;[{{1. Execute on terminal: 'find /tmp/logs -type f \( -name *.bak -o -name *.log \)}}] Open the first reminder you find at my downloads and tell me what I must do first.;The user requests that I find the first reminder file in his $HOME/Downloads folder and identify the first task outlined in it.;N/A;[{{1. List the user ${{HOME}}/Downloads folder}},{{2. Identify any reminder file (*reminder*)}},{{3. Open the first reminder file found}},{{4. Identify the first task to be done}}] Create a small Python program to calculate speed given time and distance, and save it as 'dist.py'.;The user wants me to create a Python program that calculates speed based on time and distance. I will use the generate_content tool for this purpose, keeping in mind that this tool automatically saves the generated content.;N/A;[{{1. Generate a program to calculate the speed given the time and distance and save it as 'dist.py'}}] rm -f /tmp/myfile.txt /tmp/trash.bak;The user provided a direct terminal command. There are no syntax errors, so I will simply forward it.;N/A;[{{1. Execute on terminal: 'rm -f /tmp/myfile.txt /tmp/trash.bak'}}] -Describe me;The user requests to describe him, and I can utilize the webcam_capturer for this purpose.;N/A;{{1. Use the webcam_capturer to describe the person in front of it}} +Describe me;The user requests to describe him, and I can utilize the webcam_capturer for this purpose.;N/A;{{1. Use the webcam_capturer tool to capture a photo via the webcam.}}{{2. Describe the person in front of the photo}} Who is Hugo Saporetti Junior;I'm unable to provide real-time information about individuals, so I will search Google for Hugo Saporetti junior.;N/A;"[{{1. Search google for: "Hugo Saporetti Junior" }}]" Open Hugo Saporetti junior's linkedin page;The user requests that I open the LinkedIn page of 'Hugo Saporetti Junior', therefore, I need to search on linkedin for it and open it if found.;N/A;[{{1. Search google for: linkedin.com/search/results/people/?keywords=Hugo%20Saporetti%20Junior }}, {{2. Open the URL if found.}}] Open the official wikipedia website;The user wants me to open the official wikipedia website, which is a widely known URL.;Direct: Open the URL https://www.wikipedia.org;N/A @@ -32,6 +32,6 @@ Check for updates in my Git repositories.;The user requests a check for Git upda When is the next Los Angeles Lakers match?;The user wants to know the date and time of the next Los Angeles Lakers game. I'll search online to find it.;N/A;[{{1. Search google for: "Los Angeles Lakers agenda 2024, November 13th". }}] Tell me who is currently logged into my computer.;The user wants to know, who is currently logged into his computer. I can request a terminal command for that.;N/A;[{{1. Execute on terminal: 'who'}},{{2. Check who is currently logged.}}] Describe the dragon image in my Downloads folder.;The user requests that I locate the dragon image in their downloads folder and provide a description if one is found. If multiple is found, I will select the first one;N/A;[{{1. List the user downloads folder}},{{2. Identify image files with "dragon" in their names.}},{{Describe the first dragon image file found.}}] -Tell me who you see using the webcam. Respond as if addressing an audience.;The user requests that I describe who I observe using the webcam. I can utilize the webcam_identifier tool for this purpose.;N/A;[{{1. Use the webcam_identifier to take a photo using the webcam}},{{2. Identify who is in front of the webcam.}}] +Tell me who you see using the webcam. Respond as if addressing an audience.;The user requests that I describe who I observe using the webcam. I can utilize the webcam_identifier tool for this purpose.;N/A;[{{1. Use the webcam_identifier to capture a photo via the webcam.}},{{2. Identify who is in front of the webcam.}}] Search my Documents folder for any budget spreadsheets.;The user requests that I locate their budget spreadsheets. I will assume commonly used file extensions containing "budget" in the file names.;N/A;[{{1. Execute on terminal: "find ${{HOME}}/Documents -type f \( -iname '*budget*.xls' -o -iname '*budget*.xlsx' -o -iname '*budget*.csv' \)"}}] Adjust my system volume to 50%.;The user requests adjusting their system volume to 50% on macOS, achievable via osascript.;N/A;[{{1. Execute on terminal: 'osascript -e "set volume output volume 50"'}}]