diff --git a/dependencies.hspd b/dependencies.hspd index fb95df96..012ca741 100644 --- a/dependencies.hspd +++ b/dependencies.hspd @@ -4,7 +4,7 @@ Created: 05th Jan, 2024 !! Syntax for dependencies !! - package: , version: , mode: + package: , version: , mode: */ /* Module dependencies */ @@ -14,26 +14,27 @@ package: hspylib-clitt, version: 0.9.117, mode: ge /* Application dependencies */ -package: retry, version: 0.9.2, mode: eq -package: pause, version: 0.3, mode: eq -package: requests, version: 2.31.0, mode: eq -package: argostranslate, version: 1.9.1, mode: eq -package: protobuf, version: 4.22.1, mode: eq -package: torch, version: 2.1.2, mode: eq -package: stanza, version: 1.1.1, mode: eq -package: soundfile, version: 0.12.1, mode: eq -package: urllib3, version: 1.26.18, mode: eq +package: retry, version: 0.9.2, mode: compat +package: pause, version: 0.3, mode: compat +package: requests, version: 2.31.0, mode: compat +package: argostranslate, version: 1.9.1, mode: compat +package: protobuf, version: 4.22.1, mode: compat +package: torch, version: 2.1.2, mode: compat +package: stanza, version: 1.1.1, mode: compat +package: soundfile, version: 0.12.1, mode: compat +package: urllib3, version: 1.26.18, mode: compat package: langchain, version: 0.1.12, mode: ge -package: langchain-openai, version: 0.0.8, mode: eq -package: langchain-community, version: 0.0.28, mode: eq -package: unstructured, version: 0.12.6, mode: eq -package: chromadb, version: 0.4.24, mode: eq -package: python-magic-bin, version: 0.4.14, mode: eq -package: tiktoken, version: 0.6.0, mode: eq -package: aiohttp, version: 3.9.3, mode: eq -package: html2text, version: 2024.2.26, mode: eq -package: PyAudio, version: 0.2.14, mode: eq -package: SpeechRecognition, version: 3.10.1, mode: eq -package: openai-whisper, version: 20231117, mode: eq -package: google-api-python-client, version: 2.122.0, mode: eq -package: openai, version: 1.14.1, mode: eq +package: langchain-openai, version: 0.0.8, mode: compat +package: langchain-community, version: 0.0.28, mode: compat +package: unstructured, version: 0.12.6, mode: compat +package: unstructured[md], version: 0.12.6, mode: compat +package: chromadb, version: 0.4.24, mode: ge +package: python-magic-bin, version: 0.4.14, mode: compat +package: tiktoken, version: 0.6.0, mode: compat +package: aiohttp, version: 3.9.3, mode: compat +package: html2text, version: 2024.2.26, mode: compat +package: PyAudio, version: 0.2.14, mode: compat +package: SpeechRecognition, version: 3.10.1, mode: compat +package: openai-whisper, version: 20231117, mode: compat +package: google-api-python-client, version: 2.122.0, mode: compat +package: openai, version: 1.14.1, mode: compat diff --git a/gradle b/gradle index 6e42086e..49111aea 160000 --- a/gradle +++ b/gradle @@ -1 +1 @@ -Subproject commit 6e42086e7aed4a39743362bf3a6c623b1af90409 +Subproject commit 49111aea7f0e93d1950901d510ab9a7bb1132ffb diff --git a/src/main/askai/core/askai.py b/src/main/askai/core/askai.py index f031d0b3..baff05d1 100644 --- a/src/main/askai/core/askai.py +++ b/src/main/askai/core/askai.py @@ -15,6 +15,7 @@ import logging as log import os import sys +from functools import partial from threading import Thread from typing import List, Optional @@ -30,7 +31,7 @@ from askai.__classpath__ import classpath from askai.core.askai_configs import configs -from askai.core.askai_events import ASKAI_BUS_NAME, AskAiEvents, REPLY_EVENT +from askai.core.askai_events import ASKAI_BUS_NAME, AskAiEvents, REPLY_EVENT, REPLY_ERROR_EVENT from askai.core.askai_messages import msg from askai.core.component.audio_player import player from askai.core.component.cache_service import cache, CACHE_DIR @@ -145,11 +146,14 @@ def reply_error(self, message: str) -> None: else: display_text(f"{shared.nickname}: %RED%{message}%NC%") - def _cb_reply_event(self, ev: Event) -> None: + def _cb_reply_event(self, ev: Event, error: bool = False) -> None: """Callback to handle reply events.""" - if ev.args.erase_last: - cursor.erase_line() - self.reply(ev.args.message) + if error: + self.reply_error(ev.args.message) + else: + if ev.args.erase_last: + cursor.erase_line() + self.reply(ev.args.message) def _splash(self) -> None: """Display the AskAI splash screen.""" @@ -174,6 +178,7 @@ def _startup(self) -> None: cache.read_query_history() askai_bus = AskAiEvents.get_bus(ASKAI_BUS_NAME) askai_bus.subscribe(REPLY_EVENT, self._cb_reply_event) + askai_bus.subscribe(REPLY_ERROR_EVENT, partial(self._cb_reply_event, error=True)) if configs.is_speak: player.start_delay() self._ready = True diff --git a/src/main/askai/core/askai_events.py b/src/main/askai/core/askai_events.py index 62d0d1f9..62732354 100644 --- a/src/main/askai/core/askai_events.py +++ b/src/main/askai/core/askai_events.py @@ -48,7 +48,7 @@ def __str__(self): ASKAI_BUS = _EventBus( ASKAI_BUS_NAME, reply=_Event(REPLY_EVENT, erase_last=False), - reply_error=_Event(REPLY_EVENT) + reply_error=_Event(REPLY_ERROR_EVENT) ) # fmt: on diff --git a/src/main/askai/core/askai_messages.py b/src/main/askai/core/askai_messages.py index 38b70419..281a55b8 100644 --- a/src/main/askai/core/askai_messages.py +++ b/src/main/askai/core/askai_messages.py @@ -113,6 +113,10 @@ def cmd_no_exist(self, command: str) -> str: def cmd_failed(self, cmd_line: str) -> str: return self.translate(f"Error: Sorry! Command `{cmd_line}' failed to execute !") + @lru_cache + def missing_package(self, err: ImportError) -> str: + return self.translate(f"Error: Unable to summarize => {str(err)}' !") + @lru_cache def intelligible(self, question: str) -> str: return self.translate(f"Error: Your question '{question}' is not clear, please reformulate !") diff --git a/src/main/askai/core/component/summarizer.py b/src/main/askai/core/component/summarizer.py index 407ebdec..fd8d62c2 100644 --- a/src/main/askai/core/component/summarizer.py +++ b/src/main/askai/core/component/summarizer.py @@ -94,7 +94,7 @@ def text_splitter(self) -> TextSplitter: return self._text_splitter @lru_cache - def generate(self, folder: str | Path, glob: str) -> None: + def generate(self, folder: str | Path, glob: str) -> bool: """Generate a summarization of the folder contents. :param folder: The base folder of the summarization. :param glob: The glob pattern or file of the summarization. @@ -104,20 +104,27 @@ def generate(self, folder: str | Path, glob: str) -> None: AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.summarizing(self.sum_path)) embeddings = lc_llm.create_embeddings() - if self.persist_dir.exists(): - log.info("Recovering vector store from: '%s'", self.persist_dir) - v_store = Chroma(persist_directory=str(self.persist_dir), embedding_function=embeddings) - else: - log.info("Summarizing documents from '%s'", self.sum_path) - documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load() - if len(documents) <= 0: - raise DocumentsNotFound(f"Unable to find any document to summarize at: '{self.sum_path}'") - texts: List[Document] = self._text_splitter.split_documents(documents) - v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(self.persist_dir)) - - self._retriever = RetrievalQA.from_chain_type( - llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever() - ) + try: + if self.persist_dir.exists(): + log.info("Recovering vector store from: '%s'", self.persist_dir) + v_store = Chroma(persist_directory=str(self.persist_dir), embedding_function=embeddings) + else: + log.info("Summarizing documents from '%s'", self.sum_path) + documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load() + if len(documents) <= 0: + raise DocumentsNotFound(f"Unable to find any document to summarize at: '{self.sum_path}'") + texts: List[Document] = self._text_splitter.split_documents(documents) + v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(self.persist_dir)) + + self._retriever = RetrievalQA.from_chain_type( + llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever() + ) + return True + except ImportError as err: + log.error("Unable to summarize '%s' => %s", self.sum_path, err) + AskAiEvents.ASKAI_BUS.events.reply_error.emit(message=msg.missing_package(err)) + + return False def query(self, *queries: str) -> Optional[List[SummaryResult]]: """Answer questions about the summarized content. diff --git a/src/main/askai/core/model/processor_response.py b/src/main/askai/core/model/processor_response.py index 03c59afe..5836b6a7 100644 --- a/src/main/askai/core/model/processor_response.py +++ b/src/main/askai/core/model/processor_response.py @@ -26,6 +26,7 @@ class ProcessorResponse: response: str = "" terminating: bool = False intelligible: bool = False + unclear: bool = False require_internet: bool = False require_summarization: bool = False commands: List[TerminalCommand] = field(default_factory=list) diff --git a/src/main/askai/core/processor/instances/summary_processor.py b/src/main/askai/core/processor/instances/summary_processor.py index e2e60d73..0981938f 100644 --- a/src/main/askai/core/processor/instances/summary_processor.py +++ b/src/main/askai/core/processor/instances/summary_processor.py @@ -88,7 +88,8 @@ def process(self, query_response: ProcessorResponse) -> Tuple[bool, Optional[str else: shared.context.clear("SUMMARY") if not summarizer.exists(summary.folder, summary.glob): - summarizer.generate(summary.folder, summary.glob) + if not summarizer.generate(summary.folder, summary.glob): + return True, "%ORANGE%Sorry, summarization was not possible !%NC%" else: summarizer.folder = summary.folder summarizer.glob = summary.glob diff --git a/src/main/askai/core/support/utilities.py b/src/main/askai/core/support/utilities.py index ebb36290..c4bfb341 100644 --- a/src/main/askai/core/support/utilities.py +++ b/src/main/askai/core/support/utilities.py @@ -32,7 +32,7 @@ from askai.language.language import Language CHAT_ICONS = { - '': '\n\n%RED% Error: ', + '': '\n%RED% Error: ', '': '\n\n%BLUE% Hints & Tips: ', '': '\n\n%BLUE% Analysis: ', '': '\n\n%BLUE% Summary: ', diff --git a/src/main/askai/resources/assets/prompts/command-prompt.txt b/src/main/askai/resources/assets/prompts/command-prompt.txt index 36086b0c..93e9ef05 100644 --- a/src/main/askai/resources/assets/prompts/command-prompt.txt +++ b/src/main/askai/resources/assets/prompts/command-prompt.txt @@ -16,7 +16,7 @@ Before responding to the user, it is imperative that you follow the step-by-step 7. For all other file management queries you must use `open'; Example: "open 'my-doc.doc' &>/dev/null". -8. Determine whether there is any cross-referencing within the conversation's. When user requests information about specific items like 'open 1,' 'play 2,' or 'open it.' Utilize the entire chat history, starting from the most recent entry and proceeding to the oldest, to locate the reference associated with the specified file or folder name. Follow the list numbers sequentially without skipping any item. Once you find the reference, discontinue further search. +8. Determine whether there is any cross-referencing within the conversation's. When user requests information about specific items like ['open 1,' 'play 2,', 'show me 3', 'show it' or 'open it']. Utilize the entire chat history, starting from the most recent entry and proceeding to the oldest, to locate the reference associated with the specified file or folder name. Follow the list numbers sequentially without skipping any item. Once you find the reference, discontinue further search. 9. When I explicitly refer to: my file(s) or my folder(s) in the query, assume they are referring to files and folders within their HOME (~) directory for navigation or file management purposes. diff --git a/src/main/askai/resources/assets/prompts/proxy-prompt.txt b/src/main/askai/resources/assets/prompts/proxy-prompt.txt index 37d155ef..523cf1e6 100644 --- a/src/main/askai/resources/assets/prompts/proxy-prompt.txt +++ b/src/main/askai/resources/assets/prompts/proxy-prompt.txt @@ -4,11 +4,11 @@ As 'Taius', the AI query proxy. Your task is to analyze and categorize the types - "SummarizationQuery" (Examples: [summarize my documents, summarize the file /tmp/the-file.md]) -- "AnalysisQuery" (Examples: [is there any image, how many reminders, what is the total size]) +- "AnalysisQuery" (Examples: [is there any image, how many reminders, what is the total size, what should I do]) - "CommandQuery" (Examples: [list my images, open 1, play it, show me it]) -- "ConversationQuery" (Examples: [what is the size of the moon, who are you]) +- "GenericQuery" (Examples: [what is the size of the moon, who are you]) Before responding to the user, you must follow the step-by-step instructions provided below in sequential order: @@ -20,12 +20,14 @@ Before responding to the user, you must follow the step-by-step instructions pro 4. Determine if the query requires summarization of files and folders to complete your reply. This query will consistently commence with "summarize" or a synonymous term. -5. If you don't have an answer so far, or, haven't decided yet, select the "GenericQuery". +5. If the user has provided a terminal command in a clear manner, select the 'CommandQuery'. -6. The final response is a formatted JSON with no additional description or context. +6. If you haven't found an answer yet or are still undecided, choose the either 'GenericQuery' or 'AnalysisQuery' and include the boolean field 'unclear' set to true. -7. Do not use markdown to format the response message. Use plain JSON. +7. The final response is a formatted JSON with no additional description or context. -8. The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_summarization', 'require_internet'. +8. Do not use markdown to format the response message. Use plain JSON. -9. The final response 'JSON' must contain the string fields: 'query_type', and 'question'. +9. The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_summarization', 'require_internet'. + +10. The final response 'JSON' must contain the string fields: 'query_type', and 'question'. diff --git a/src/main/requirements.txt b/src/main/requirements.txt index 1179d65b..ab7bff91 100644 --- a/src/main/requirements.txt +++ b/src/main/requirements.txt @@ -1,4 +1,4 @@ -###### Requirements file for AskAi ###### +###### AUTO-GENERATED Requirements file for: AskAi ###### hspylib>=1.12.35 hspylib-clitt>=0.9.117 @@ -15,7 +15,8 @@ langchain>=0.1.12 langchain-openai==0.0.8 langchain-community==0.0.28 unstructured==0.12.6 -chromadb==0.4.24 +unstructured[md]==0.12.6 +chromadb>=0.4.24 python-magic-bin==0.4.14 tiktoken==0.6.0 aiohttp==3.9.3