Skip to content

Commit

Permalink
Improving processors and bugfixes - 2
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo Saporetti Junior committed Mar 15, 2024
1 parent 4c9cd6f commit a1509aa
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 26 deletions.
16 changes: 11 additions & 5 deletions src/main/askai/core/component/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,16 @@ class Summarizer(metaclass=Singleton):

@staticmethod
def _extract_result(result: dict) -> Tuple[str, str]:
"""TODO"""
query = result['query'] if 'query' in result else result['question']
result = result['result'] if 'result' in result else result['answer']
return query, result
"""Extract the question and answer from the summarization result."""
question = result['query'] if 'query' in result else result['question']
answer = result['result'] if 'result' in result else result['answer']
return question, answer

@staticmethod
def exists(folder: str | Path, glob: str) -> bool:
"""Return whether or not the summary already exists."""
summary_hash = hash_text(f"{ensure_endswith(folder, '/')}{glob}")
return Path(f"{PERSIST_DIR}/{summary_hash}").exists()

def __init__(self):
nltk.download('averaged_perceptron_tagger')
Expand Down Expand Up @@ -81,7 +87,7 @@ def text_splitter(self) -> TextSplitter:
return self._text_splitter

@lru_cache
def generate(self, folder: str | Path, glob: str = None) -> None:
def generate(self, folder: str | Path, glob: str) -> None:
"""Generate a summarization of the folder contents.
:param folder: The base folder of the summarization.
:param glob: The glob pattern or file of the summarization.
Expand Down
2 changes: 1 addition & 1 deletion src/main/askai/core/processor/internet_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
search: SearchResult = object_mapper.of_json(response.message, SearchResult)
if not isinstance(search, SearchResult):
log.error(msg.invalid_response(search))
output = response.message
output = response.message.strip()
else:
query = " + ".join(search.keywords)
if output := internet.search_google(query, *search.sites):
Expand Down
26 changes: 9 additions & 17 deletions src/main/askai/core/processor/summary_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Copyright·(c)·2024,·HSPyLib
"""
import logging as log
import os
from typing import Tuple, Optional

from langchain_core.prompts import PromptTemplate
Expand Down Expand Up @@ -54,12 +55,14 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
log.error(msg.invalid_response(SummaryResult))
output = response.message
else:
summarizer.generate(summary.folder, summary.glob)
if results := summarizer.query('Give me an overview of all the summarized content'):
summary.results = results
output = self._wrap_output(query_response, summary)
shared.context.set("CONTEXT", output, "assistant")
cache.save_reply(query_response.question, output)
if not summarizer.exists(summary.folder, summary.glob):
summarizer.generate(summary.folder, summary.glob)
if results := summarizer.query('Give me an overview of all the summarized content'):
output = os.linesep.join([r.answer for r in results]).strip()
shared.context.set("CONTEXT", output, "assistant")
cache.save_reply(query_response.question, output)
else:
log.info("Reusing existing summary: '%s'/'%s'", summary.folder, summary.glob)
status = True
else:
output = msg.llm_error(response.message)
Expand All @@ -68,14 +71,3 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
status = True

return status, output

def _wrap_output(self, query_response: QueryResponse, summary_result: SummaryResult) -> str:
"""Wrap the output into a new string to be forwarded to the next processor.
:param query_response: The query response provided by the AI.
:param summary_result: The summary results.
"""
query_response.query_type = self.next_in_chain().query_type()
query_response.require_summarization = False
query_response.require_internet = False
query_response.response = str(summary_result)
return str(query_response)
6 changes: 3 additions & 3 deletions src/main/askai/resources/assets/prompts/proxy-prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ ${persona}

Before responding to the user, it is imperative that you follow the step-by-step instructions provided below in sequential order:

- It's crucial to review the entire chat history. The context provided previously may influence the outcome.

- Determine whether the query clear and intelligible.

- Determine whether the query suggests the user intends to end the conversation.

- Determine whether real-time data are required for ensuring the utmost accuracy and relevance in responses. This pertains specifically to situations where the required information may be outdated or cannot be retrieved from the chat history or your existing database.
- Determine whether real-time data are required for ensuring the utmost accuracy and relevance in responses. This pertains specifically to situations where the required information MAY BE OUTDATED or cannot be retrieved from the chat history or your existing database.

- Determine whether summarizing documents or web sites is necessary to provide an accurate and comprehensive response.

- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.

- The final response is a formatted JSON with no additional description or context.

- The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_internet' and 'require_summarization'.
Expand Down

0 comments on commit a1509aa

Please sign in to comment.