Skip to content

Commit

Permalink
Fix the webcam capturer
Browse files Browse the repository at this point in the history
  • Loading branch information
yorevs committed Nov 19, 2024
1 parent b81b36c commit 92f54f0
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 88 deletions.
4 changes: 2 additions & 2 deletions src/demo/components/webcam_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@
sysout("-=" * 40)
sysout("AskAI WebCam Demo")
sysout("-=" * 40)
# info: str = webcam_capturer("hugo", True)
info: str = webcam_identifier()
info: str = webcam_capturer("hugo", True, "Is the person happy?")
# info: str = webcam_identifier()
sysout(info, markdown=True)
2 changes: 1 addition & 1 deletion src/main/askai/core/model/image_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ImageResult(BaseModel):
main_objects: list[str] = Field(description="List of the main objects on the picture")
env_description: str = Field(description="Description of the atmosphere of the environment")
people_description: list[str] = Field(description="List of people description")
user_response: list[str] = Field(description="A response to the user question")
user_response: str = Field(description="A response to the user question")

@staticmethod
def of(image_caption: AnyStr) -> "ImageResult":
Expand Down
29 changes: 14 additions & 15 deletions src/main/askai/core/processors/splitter/splitter_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
Copyright (c) 2024, HomeSetup
"""
from pathlib import Path
from types import SimpleNamespace
from typing import Optional
import logging as log

from hspylib.core.metaclass.singleton import Singleton
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain_core.runnables import RunnableWithMessageHistory, Runnable

from askai.core.askai_configs import configs
from askai.core.askai_events import events
from askai.core.askai_messages import msg
Expand All @@ -25,21 +35,11 @@
from askai.core.model.action_plan import ActionPlan
from askai.core.model.ai_reply import AIReply
from askai.core.model.model_result import ModelResult
from askai.core.router.agent_tools import features
from askai.core.router.task_agent import agent
from askai.core.router.tools.general import final_answer
from askai.core.support.langchain_support import lc_llm
from askai.core.support.shared_instances import shared
from askai.core.support.text_formatter import text_formatter
from hspylib.core.metaclass.singleton import Singleton
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain_core.runnables import RunnableWithMessageHistory
from pathlib import Path
from types import SimpleNamespace
from typing import Optional

import logging as log


class SplitterActions(metaclass=Singleton):
Expand Down Expand Up @@ -125,7 +125,7 @@ def splitter_template(self, query: str) -> ChatPromptTemplate:

evaluation: str = str(shared.context.flat("EVALUATION"))
template = PromptTemplate(
input_variables=["os_type", "shell", "datetime", "home", "agent_tools", "rag"],
input_variables=["os_type", "shell", "datetime", "home", "rag"],
template=prompt.read_prompt("task-splitter.txt"),
)

Expand All @@ -138,7 +138,6 @@ def splitter_template(self, query: str) -> ChatPromptTemplate:
shell=prompt.shell,
datetime=geo_location.datetime,
home=Path.home(),
agent_tools=features.available_tools,
rag=self._rag.get_rag_examples(query),
),
),
Expand All @@ -155,11 +154,11 @@ def split(self, question: str, model: ModelResult = ModelResult.default()) -> Op
:return: An optional ActionPlan generated from the provided question.
"""

runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp)
runnable = RunnableWithMessageHistory(
response: AIMessage
runnable: Runnable = self.splitter_template(question) | lc_llm.create_chat_model(Temperature.COLDEST.temp)
runnable: Runnable = RunnableWithMessageHistory(
runnable, shared.context.flat, input_messages_key="input", history_messages_key="chat_history"
)
response: AIMessage
if response := runnable.invoke({"input": question}, config={"configurable": {"session_id": "HISTORY"}}):
answer: str = str(response.content)
log.info("Router::[RESPONSE] Received from AI: \n%s.", answer)
Expand Down
17 changes: 9 additions & 8 deletions src/main/askai/core/processors/splitter/splitter_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@
class SplitterExecutor(Thread):
"""Responsible for executing a TaskSplitter pipeline."""

@staticmethod
def display(text: str, force: bool = False) -> None:
"""Display a debug message if debugging mode is active.
:param text: The debug message to display
:param force: Force displaying the message regardless of the debug flag.
"""
if force or is_debugging():
tf.console.print(Text.from_markup(text))

def __init__(self, query: str):
super().__init__()
self._pipeline = SplitterPipeline(query)
Expand All @@ -45,14 +54,6 @@ def __init__(self, query: str):
def pipeline(self) -> SplitterPipeline:
return self._pipeline

def display(self, text: str, force: bool = False) -> None:
"""Display a debug message if debugging mode is active.
:param text: The debug message to display
:param force: Force displaying the message regardless of the debug flag.
"""
if force or is_debugging():
tf.console.print(Text.from_markup(text))

def interrupt(self, ev: Event) -> None:
"""Interrupt the active execution pipeline.
:param ev: The interruption event,
Expand Down
3 changes: 2 additions & 1 deletion src/main/askai/core/processors/task_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class TaskSplitter(metaclass=Singleton):
InaccurateResponse,
InvalidArgumentError,
ValidationError
) # fmt: on
)
# fmt: on

def process(self, question: str, **_) -> Optional[str]:
"""Process the user question by splitting complex tasks into smaller single actionable tasks.
Expand Down
18 changes: 9 additions & 9 deletions src/main/askai/core/router/agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,20 @@ def image_captioner(self, image_path: str) -> str:
image_path=image_path, image_caption=os.linesep.join(image_caption) if image_caption else ""
)

def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, query: str | None = None) -> str:
"""Capture a photo using the webcam, and save it locally. This tool is useful for taking photos, detect people's
faces, and, describing what is in front of the webcam.
Usage: `webcam_capturer(photo_name, detect_faces)`
:param photo_name: The name of the photo file (without the extension). If None, a default name will be used.
def webcam_capturer(self, photo_name: str | None, detect_faces: bool = False, question: str | None = None) -> str:
"""Capture a photo via the webcam, and save it locally. Also provide a description of the objects and people
depicted in the picture. An additional question may address specific details regarding the photo.
Usage: `webcam_capturer(photo_name, detect_faces, question)`
:param photo_name: Optional name of the photo image file (without the extension).
:param detect_faces: Whether to detect and describe all faces in the photo (default is False).
:param query: Optional query about the photo taken.
:param question: Optional specific question about the photo taken (default is None).
:return: The file path of the saved JPEG image.
"""
return webcam_capturer(photo_name, detect_faces, query)
return webcam_capturer(photo_name, detect_faces, question)

def webcam_identifier(self) -> str:
"""Identify the person in front of the webcam using a pre-stored set of faces and provide a description. This
tool is useful for recognizing individuals and generating descriptions based on pre-stored facial data.
"""Capture a photo via the webcam and compare it to a pre-stored set of images to determine if the current
subject matches any stored faces.
Usage: `webcam_identifier()`
:return: A string containing the identification and description of the person.
"""
Expand Down
5 changes: 4 additions & 1 deletion src/main/askai/core/router/task_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
Copyright (c) 2024, HomeSetup
"""
from askai.core.askai_configs import configs
from askai.core.askai_events import events
from askai.core.askai_messages import msg
from askai.core.askai_prompt import prompt
from askai.core.engine.openai.temperature import Temperature
from askai.core.model.ai_reply import AIReply
from askai.core.router.agent_tools import features
from askai.core.support.langchain_support import lc_llm
from askai.core.support.shared_instances import shared
Expand Down Expand Up @@ -62,7 +65,7 @@ def invoke(self, task: str) -> Optional[str]:
:return: The agent's response as a string.
"""
output: str | None = None
# events.reply.emit(reply=AIReply.debug(msg.task(task)))
events.reply.emit(reply=AIReply.debug(msg.task(task)))
shared.context.push("HISTORY", task, "assistant")
if (response := self._exec_task(task)) and (output := response["output"]):
log.info("Router::[RESPONSE] Received from AI: \n%s.", output)
Expand Down
11 changes: 8 additions & 3 deletions src/main/askai/core/router/tools/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,21 @@ def parse_caption(image_caption: str) -> list[str]:
if image_caption:
result: ImageResult = ImageResult.of(image_caption)
ln: str = os.linesep
people_desc: str = ""
people_desc: list[str] = []
user_response_desc: list[str] = []
if result.people_description:
people_desc: list[str] = [
people_desc = [
f"- **People:** `({result.people_count})`",
indent(f"- {'- '.join([f'`{ppl}{ln}`' + ln for ppl in result.people_description])}", " "),
]
if result.user_response:
user_response_desc = [f"- **Answer**: `{result.user_response}`"]
# fmt: off
return [
f"- **Description:** `{result.env_description}`",
f"- **Objects:** `{', '.join(result.main_objects)}`",
] + people_desc
] + people_desc + user_response_desc
# fmt: on

return [msg.no_caption()]

Expand Down
48 changes: 6 additions & 42 deletions src/main/askai/resources/prompts/task-splitter.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ When the context of the question is not explicit, refer to past events to clarif
Today is "{datetime}". Use this information if it is relevant to the response.


Guidelines (in order) to break down complex tasks:
**Guidelines (in order) to break down complex tasks**:

Step 1. Identify the primary goal and all sub-goals. For each goal and sub-goal, create one task to address it. Typically, the number of tasks will match the primary goal plus the sub-goals. Ensure that the tasks are specific, actionable, and aligned with the identified goals.

Expand All @@ -27,37 +27,6 @@ Step 4. Provide direct instructions without explaining its purpose.

Step 5. Separate tasks clearly, avoiding the use of conjunctions like 'and', 'or' and implicit multi-step processes within a single task.

Step 6. Follow this workflow to guide you in breaking down the tasks:

"""
Group 0. Chat History and Context
- Recall the chat history and context for tasks already performed to avoid wasting resources and time.
Group 1. Locate, List, Generate, or Summarize
- Locate resources (e.g., files and folders. Use the chat history; prefer 'find' over 'search').
- List the contents of folders and sub-folders (specify the max recursion depth).
- Summarize files and folders (only upon explicit user request).
- Generate content by giving the prompt (instructions).
Group 2. Open, Read, Save
- Open necessary files (playback included).
- Read (show) file or folder contents.
- Save generated content.
Group 3. Identify, or Extract
- Identify text or resources (like files and folders).
- Extract required data/text (explain how to extract).
Group 4. Analyze or Display
- Analyze output, content, or extracted data/text.
- Display the information and details to the human.
Group 5. Revise and Refine
- Revise your task list, ensuring tasks for the primary goal and sub-goals.
- Refine your task list, providing a direct and explicit answer to the user's question.

Notes:

- Actions within the same group must not be repeated as they will produce the same result.
- Ensure that your answer is unbiased and does not rely on stereotypes.
- Think step by step.
"""


**Task Requirements:**

Expand Down Expand Up @@ -94,23 +63,18 @@ Notes:
6. Terminal access (execute {shell} commands).


**You have access to the following Tools:**

---
{agent_tools}
---


**Retrieval-Augmented Generation:**

The following examples can be used to help your decisions (note that it is not a source of truth).
The following examples can be used to help your decisions.

---
{rag}
---


The response should follow this format:
**Final Response Format*:*

The final response should follow this format:


@thought: "<your thoughts>"
Expand All @@ -135,4 +99,4 @@ The response should follow this format:
**THE RESPONSE FORMAT IS CRUCIAL, ALTERING IT WILL CAUSE THE PARSER TO FAIL.**


Begin splittings the tasks!
Begin!
Loading

0 comments on commit 92f54f0

Please sign in to comment.