Improving processors and bugfixes - 2

yorevs · Mar 15, 2024 · a1509aa · a1509aa
1 parent 4c9cd6f
commit a1509aa
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 26 deletions.
diff --git a/src/main/askai/core/component/summarizer.py b/src/main/askai/core/component/summarizer.py
@@ -45,10 +45,16 @@ class Summarizer(metaclass=Singleton):
 
     @staticmethod
     def _extract_result(result: dict) -> Tuple[str, str]:
-        """TODO"""
-        query = result['query'] if 'query' in result else result['question']
-        result = result['result'] if 'result' in result else result['answer']
-        return query, result
+        """Extract the question and answer from the summarization result."""
+        question = result['query'] if 'query' in result else result['question']
+        answer = result['result'] if 'result' in result else result['answer']
+        return question, answer
+
+    @staticmethod
+    def exists(folder: str | Path, glob: str) -> bool:
+        """Return whether or not the summary already exists."""
+        summary_hash = hash_text(f"{ensure_endswith(folder, '/')}{glob}")
+        return Path(f"{PERSIST_DIR}/{summary_hash}").exists()
 
     def __init__(self):
         nltk.download('averaged_perceptron_tagger')
@@ -81,7 +87,7 @@ def text_splitter(self) -> TextSplitter:
         return self._text_splitter
 
     @lru_cache
-    def generate(self, folder: str | Path, glob: str = None) -> None:
+    def generate(self, folder: str | Path, glob: str) -> None:
         """Generate a summarization of the folder contents.
         :param folder: The base folder of the summarization.
         :param glob: The glob pattern or file of the summarization.

diff --git a/src/main/askai/core/processor/internet_processor.py b/src/main/askai/core/processor/internet_processor.py
@@ -50,7 +50,7 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
                 search: SearchResult = object_mapper.of_json(response.message, SearchResult)
                 if not isinstance(search, SearchResult):
                     log.error(msg.invalid_response(search))
-                    output = response.message
+                    output = response.message.strip()
                 else:
                     query = " + ".join(search.keywords)
                     if output := internet.search_google(query, *search.sites):

diff --git a/src/main/askai/core/processor/summary_processor.py b/src/main/askai/core/processor/summary_processor.py
@@ -13,6 +13,7 @@
    Copyright·(c)·2024,·HSPyLib
 """
 import logging as log
+import os
 from typing import Tuple, Optional
 
 from langchain_core.prompts import PromptTemplate
@@ -54,12 +55,14 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
                     log.error(msg.invalid_response(SummaryResult))
                     output = response.message
                 else:
-                    summarizer.generate(summary.folder, summary.glob)
-                    if results := summarizer.query('Give me an overview of all the summarized content'):
-                        summary.results = results
-                        output = self._wrap_output(query_response, summary)
-                        shared.context.set("CONTEXT", output, "assistant")
-                        cache.save_reply(query_response.question, output)
+                    if not summarizer.exists(summary.folder, summary.glob):
+                        summarizer.generate(summary.folder, summary.glob)
+                        if results := summarizer.query('Give me an overview of all the summarized content'):
+                            output = os.linesep.join([r.answer for r in results]).strip()
+                            shared.context.set("CONTEXT", output, "assistant")
+                            cache.save_reply(query_response.question, output)
+                        else:
+                            log.info("Reusing existing summary: '%s'/'%s'", summary.folder, summary.glob)
                 status = True
             else:
                 output = msg.llm_error(response.message)
@@ -68,14 +71,3 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
             status = True
 
         return status, output
-
-    def _wrap_output(self, query_response: QueryResponse, summary_result: SummaryResult) -> str:
-        """Wrap the output into a new string to be forwarded to the next processor.
-        :param query_response: The query response provided by the AI.
-        :param summary_result: The summary results.
-        """
-        query_response.query_type = self.next_in_chain().query_type()
-        query_response.require_summarization = False
-        query_response.require_internet = False
-        query_response.response = str(summary_result)
-        return str(query_response)
diff --git a/src/main/askai/resources/assets/prompts/proxy-prompt.txt b/src/main/askai/resources/assets/prompts/proxy-prompt.txt
@@ -4,16 +4,16 @@ ${persona}
 
 Before responding to the user, it is imperative that you follow the step-by-step instructions provided below in sequential order:
 
+- It's crucial to review the entire chat history. The context provided previously may influence the outcome.
+
 - Determine whether the query clear and intelligible.
 
 - Determine whether the query suggests the user intends to end the conversation.
 
-- Determine whether real-time data are required for ensuring the utmost accuracy and relevance in responses. This pertains specifically to situations where the required information may be outdated or cannot be retrieved from the chat history or your existing database.
+- Determine whether real-time data are required for ensuring the utmost accuracy and relevance in responses. This pertains specifically to situations where the required information MAY BE OUTDATED or cannot be retrieved from the chat history or your existing database.
 
 - Determine whether summarizing documents or web sites is necessary to provide an accurate and comprehensive response.
 
-- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.
-
 - The final response is a formatted JSON with no additional description or context.
 
 - The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_internet' and 'require_summarization'.