New proxy structure - Fix

yorevs · Mar 26, 2024 · c917067 · c917067
1 parent f97623b
commit c917067
Show file tree

Hide file tree

Showing 12 changed files with 79 additions and 57 deletions.
diff --git a/dependencies.hspd b/dependencies.hspd
@@ -4,7 +4,7 @@
   Created: 05th Jan, 2024
 
   !! Syntax for dependencies !!
-  package: <pkg_name>, version: <latest|versionNum>, mode: <gt|ge|eq|ne|compat|none>
+  package: <pkg_name>, version: <latest|versionNum>, mode: <gt|ge|compat|ne|compat|none>
 */
 
 /* Module dependencies */
@@ -14,26 +14,27 @@ package: hspylib-clitt, version: 0.9.117, mode: ge
 
 /* Application dependencies */
 
-package: retry, version: 0.9.2, mode: eq
-package: pause, version: 0.3, mode: eq
-package: requests, version: 2.31.0, mode: eq
-package: argostranslate, version: 1.9.1, mode: eq
-package: protobuf, version: 4.22.1, mode: eq
-package: torch, version: 2.1.2, mode: eq
-package: stanza, version: 1.1.1, mode: eq
-package: soundfile, version: 0.12.1, mode: eq
-package: urllib3, version: 1.26.18, mode: eq
+package: retry, version: 0.9.2, mode: compat
+package: pause, version: 0.3, mode: compat
+package: requests, version: 2.31.0, mode: compat
+package: argostranslate, version: 1.9.1, mode: compat
+package: protobuf, version: 4.22.1, mode: compat
+package: torch, version: 2.1.2, mode: compat
+package: stanza, version: 1.1.1, mode: compat
+package: soundfile, version: 0.12.1, mode: compat
+package: urllib3, version: 1.26.18, mode: compat
 package: langchain, version: 0.1.12, mode: ge
-package: langchain-openai, version: 0.0.8, mode: eq
-package: langchain-community, version: 0.0.28, mode: eq
-package: unstructured, version: 0.12.6, mode: eq
-package: chromadb, version: 0.4.24, mode: eq
-package: python-magic-bin, version: 0.4.14, mode: eq
-package: tiktoken, version: 0.6.0, mode: eq
-package: aiohttp, version: 3.9.3, mode: eq
-package: html2text, version: 2024.2.26, mode: eq
-package: PyAudio, version: 0.2.14, mode: eq
-package: SpeechRecognition, version: 3.10.1, mode: eq
-package: openai-whisper, version: 20231117, mode: eq
-package: google-api-python-client, version: 2.122.0, mode: eq
-package: openai, version: 1.14.1, mode: eq
+package: langchain-openai, version: 0.0.8, mode: compat
+package: langchain-community, version: 0.0.28, mode: compat
+package: unstructured, version: 0.12.6, mode: compat
+package: unstructured[md], version: 0.12.6, mode: compat
+package: chromadb, version: 0.4.24, mode: ge
+package: python-magic-bin, version: 0.4.14, mode: compat
+package: tiktoken, version: 0.6.0, mode: compat
+package: aiohttp, version: 3.9.3, mode: compat
+package: html2text, version: 2024.2.26, mode: compat
+package: PyAudio, version: 0.2.14, mode: compat
+package: SpeechRecognition, version: 3.10.1, mode: compat
+package: openai-whisper, version: 20231117, mode: compat
+package: google-api-python-client, version: 2.122.0, mode: compat
+package: openai, version: 1.14.1, mode: compat
diff --git a/gradle b/gradle
diff --git a/src/main/askai/core/askai.py b/src/main/askai/core/askai.py
@@ -15,6 +15,7 @@
 import logging as log
 import os
 import sys
+from functools import partial
 from threading import Thread
 from typing import List, Optional
 
@@ -30,7 +31,7 @@
 
 from askai.__classpath__ import classpath
 from askai.core.askai_configs import configs
-from askai.core.askai_events import ASKAI_BUS_NAME, AskAiEvents, REPLY_EVENT
+from askai.core.askai_events import ASKAI_BUS_NAME, AskAiEvents, REPLY_EVENT, REPLY_ERROR_EVENT
 from askai.core.askai_messages import msg
 from askai.core.component.audio_player import player
 from askai.core.component.cache_service import cache, CACHE_DIR
@@ -145,11 +146,14 @@ def reply_error(self, message: str) -> None:
         else:
             display_text(f"{shared.nickname}: %RED%{message}%NC%")
 
-    def _cb_reply_event(self, ev: Event) -> None:
+    def _cb_reply_event(self, ev: Event, error: bool = False) -> None:
         """Callback to handle reply events."""
-        if ev.args.erase_last:
-            cursor.erase_line()
-        self.reply(ev.args.message)
+        if error:
+            self.reply_error(ev.args.message)
+        else:
+            if ev.args.erase_last:
+                cursor.erase_line()
+            self.reply(ev.args.message)
 
     def _splash(self) -> None:
         """Display the AskAI splash screen."""
@@ -174,6 +178,7 @@ def _startup(self) -> None:
         cache.read_query_history()
         askai_bus = AskAiEvents.get_bus(ASKAI_BUS_NAME)
         askai_bus.subscribe(REPLY_EVENT, self._cb_reply_event)
+        askai_bus.subscribe(REPLY_ERROR_EVENT, partial(self._cb_reply_event, error=True))
         if configs.is_speak:
             player.start_delay()
         self._ready = True

diff --git a/src/main/askai/core/askai_events.py b/src/main/askai/core/askai_events.py
@@ -48,7 +48,7 @@ def __str__(self):
     ASKAI_BUS = _EventBus(
         ASKAI_BUS_NAME,
         reply=_Event(REPLY_EVENT, erase_last=False),
-        reply_error=_Event(REPLY_EVENT)
+        reply_error=_Event(REPLY_ERROR_EVENT)
     )
 
     # fmt: on

diff --git a/src/main/askai/core/askai_messages.py b/src/main/askai/core/askai_messages.py
@@ -113,6 +113,10 @@ def cmd_no_exist(self, command: str) -> str:
     def cmd_failed(self, cmd_line: str) -> str:
         return self.translate(f"Error: Sorry! Command `{cmd_line}' failed to execute !")
 
+    @lru_cache
+    def missing_package(self, err: ImportError) -> str:
+        return self.translate(f"Error: Unable to summarize => {str(err)}' !")
+
     @lru_cache
     def intelligible(self, question: str) -> str:
         return self.translate(f"Error: Your question '{question}' is not clear, please reformulate !")

diff --git a/src/main/askai/core/component/summarizer.py b/src/main/askai/core/component/summarizer.py
@@ -94,7 +94,7 @@ def text_splitter(self) -> TextSplitter:
         return self._text_splitter
 
     @lru_cache
-    def generate(self, folder: str | Path, glob: str) -> None:
+    def generate(self, folder: str | Path, glob: str) -> bool:
         """Generate a summarization of the folder contents.
         :param folder: The base folder of the summarization.
         :param glob: The glob pattern or file of the summarization.
@@ -104,20 +104,27 @@ def generate(self, folder: str | Path, glob: str) -> None:
         AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.summarizing(self.sum_path))
         embeddings = lc_llm.create_embeddings()
 
-        if self.persist_dir.exists():
-            log.info("Recovering vector store from: '%s'", self.persist_dir)
-            v_store = Chroma(persist_directory=str(self.persist_dir), embedding_function=embeddings)
-        else:
-            log.info("Summarizing documents from '%s'", self.sum_path)
-            documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load()
-            if len(documents) <= 0:
-                raise DocumentsNotFound(f"Unable to find any document to summarize at: '{self.sum_path}'")
-            texts: List[Document] = self._text_splitter.split_documents(documents)
-            v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(self.persist_dir))
-
-        self._retriever = RetrievalQA.from_chain_type(
-            llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever()
-        )
+        try:
+            if self.persist_dir.exists():
+                log.info("Recovering vector store from: '%s'", self.persist_dir)
+                v_store = Chroma(persist_directory=str(self.persist_dir), embedding_function=embeddings)
+            else:
+                log.info("Summarizing documents from '%s'", self.sum_path)
+                documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load()
+                if len(documents) <= 0:
+                    raise DocumentsNotFound(f"Unable to find any document to summarize at: '{self.sum_path}'")
+                texts: List[Document] = self._text_splitter.split_documents(documents)
+                v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(self.persist_dir))
+
+            self._retriever = RetrievalQA.from_chain_type(
+                llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever()
+            )
+            return True
+        except ImportError as err:
+            log.error("Unable to summarize '%s' => %s", self.sum_path, err)
+            AskAiEvents.ASKAI_BUS.events.reply_error.emit(message=msg.missing_package(err))
+
+        return False
 
     def query(self, *queries: str) -> Optional[List[SummaryResult]]:
         """Answer questions about the summarized content.

diff --git a/src/main/askai/core/model/processor_response.py b/src/main/askai/core/model/processor_response.py
@@ -26,6 +26,7 @@ class ProcessorResponse:
     response: str = ""
     terminating: bool = False
     intelligible: bool = False
+    unclear: bool = False
     require_internet: bool = False
     require_summarization: bool = False
     commands: List[TerminalCommand] = field(default_factory=list)

diff --git a/src/main/askai/core/processor/instances/summary_processor.py b/src/main/askai/core/processor/instances/summary_processor.py
@@ -88,7 +88,8 @@ def process(self, query_response: ProcessorResponse) -> Tuple[bool, Optional[str
                 else:
                     shared.context.clear("SUMMARY")
                     if not summarizer.exists(summary.folder, summary.glob):
-                        summarizer.generate(summary.folder, summary.glob)
+                        if not summarizer.generate(summary.folder, summary.glob):
+                            return True, "%ORANGE%Sorry, summarization was not possible !%NC%"
                     else:
                         summarizer.folder = summary.folder
                         summarizer.glob = summary.glob

diff --git a/src/main/askai/core/support/utilities.py b/src/main/askai/core/support/utilities.py
@@ -32,7 +32,7 @@
 from askai.language.language import Language
 
 CHAT_ICONS = {
-    '': '\n\n%RED%  Error: ',
+    '': '\n%RED%  Error: ',
     '': '\n\n%BLUE%  Hints & Tips: ',
     '': '\n\n%BLUE%  Analysis: ',
     '': '\n\n%BLUE%  Summary: ',

diff --git a/src/main/askai/resources/assets/prompts/command-prompt.txt b/src/main/askai/resources/assets/prompts/command-prompt.txt
@@ -16,7 +16,7 @@ Before responding to the user, it is imperative that you follow the step-by-step
 
 7. For all other file management queries you must use `open'; Example: "open 'my-doc.doc' &>/dev/null".
 
-8. Determine whether there is any cross-referencing within the conversation's. When user requests information about specific items like 'open 1,' 'play 2,' or 'open it.' Utilize the entire chat history, starting from the most recent entry and proceeding to the oldest, to locate the reference associated with the specified file or folder name. Follow the list numbers sequentially without skipping any item. Once you find the reference, discontinue further search.
+8. Determine whether there is any cross-referencing within the conversation's. When user requests information about specific items like ['open 1,' 'play 2,', 'show me 3', 'show it' or 'open it']. Utilize the entire chat history, starting from the most recent entry and proceeding to the oldest, to locate the reference associated with the specified file or folder name. Follow the list numbers sequentially without skipping any item. Once you find the reference, discontinue further search.
 
 9. When I explicitly refer to: my file(s) or my folder(s) in the query, assume they are referring to files and folders within their HOME (~) directory for navigation or file management purposes.
 

diff --git a/src/main/askai/resources/assets/prompts/proxy-prompt.txt b/src/main/askai/resources/assets/prompts/proxy-prompt.txt
@@ -4,11 +4,11 @@ As 'Taius', the AI query proxy. Your task is to analyze and categorize the types
 
 - "SummarizationQuery" (Examples: [summarize my documents, summarize the file /tmp/the-file.md])
 
-- "AnalysisQuery" (Examples: [is there any image, how many reminders, what is the total size])
+- "AnalysisQuery" (Examples: [is there any image, how many reminders, what is the total size, what should I do])
 
 - "CommandQuery" (Examples: [list my images, open 1, play it, show me it])
 
-- "ConversationQuery" (Examples: [what is the size of the moon, who are you])
+- "GenericQuery" (Examples: [what is the size of the moon, who are you])
 
 Before responding to the user, you must follow the step-by-step instructions provided below in sequential order:
 
@@ -20,12 +20,14 @@ Before responding to the user, you must follow the step-by-step instructions pro
 
 4. Determine if the query requires summarization of files and folders to complete your reply. This query will consistently commence with "summarize" or a synonymous term.
 
-5. If you don't have an answer so far, or, haven't decided yet, select the "GenericQuery".
+5. If the user has provided a terminal command in a clear manner, select the 'CommandQuery'.
 
-6. The final response is a formatted JSON with no additional description or context.
+6. If you haven't found an answer yet or are still undecided, choose the either 'GenericQuery' or 'AnalysisQuery' and include the boolean field 'unclear' set to true.
 
-7. Do not use markdown to format the response message. Use plain JSON.
+7. The final response is a formatted JSON with no additional description or context.
 
-8. The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_summarization', 'require_internet'.
+8. Do not use markdown to format the response message. Use plain JSON.
 
-9. The final response 'JSON' must contain the string fields: 'query_type', and 'question'.
+9. The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_summarization', 'require_internet'.
+
+10. The final response 'JSON' must contain the string fields: 'query_type', and 'question'.
diff --git a/src/main/requirements.txt b/src/main/requirements.txt
@@ -1,4 +1,4 @@
-###### Requirements file for AskAi ######
+###### AUTO-GENERATED Requirements file for: AskAi ######
 
 hspylib>=1.12.35
 hspylib-clitt>=0.9.117
@@ -15,7 +15,8 @@ langchain>=0.1.12
 langchain-openai==0.0.8
 langchain-community==0.0.28
 unstructured==0.12.6
-chromadb==0.4.24
+unstructured[md]==0.12.6
+chromadb>=0.4.24
 python-magic-bin==0.4.14
 tiktoken==0.6.0
 aiohttp==3.9.3